Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CALCITE-6009] Add optimization to remove redundant LIMIT that is more than input row count #3449

Merged
merged 1 commit into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,9 @@ private CoreRules() {}
public static final SortRemoveConstantKeysRule SORT_REMOVE_CONSTANT_KEYS =
SortRemoveConstantKeysRule.Config.DEFAULT.toRule();

/** Rule that removes redundant {@link Sort} if its input max row number
* is less than or equal to one. */
/** Rule that removes redundant {@code Order By} or {@code Limit} when its input RelNode's
* max row count is less than or equal to specified row count.All of them
* are represented by {@link Sort}*/
public static final SortRemoveRedundantRule SORT_REMOVE_REDUNDANT =
SortRemoveRedundantRule.Config.DEFAULT.toRule();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,21 @@
package org.apache.calcite.rel.rules;

import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.plan.RelRule;
import org.apache.calcite.rel.core.Sort;
import org.apache.calcite.rex.RexLiteral;

import org.immutables.value.Value;

/**
* Planner rule that removes
* the redundant {@link org.apache.calcite.rel.core.Sort} if its input
* max row number is less than or equal to one.
import java.util.Optional;

/** Rule that removes redundant {@code Order By} or {@code Limit} when its input RelNode's
* max row count is less than or equal to specified row count.All of them
* are represented by {@link Sort}
*
* <p> If a {@code Sort} is pure order by,and its offset is null,when its input RelNode's
* max row count is less than or equal to 1,then we could remove the redundant sort.
*
* <p> For example:
* <blockquote><pre>{@code
Expand All @@ -37,6 +43,23 @@
* select max(totalprice) from orders}
* </pre></blockquote>
*
* <p> If a {@code Sort} is pure limit,and its offset is null, when its input
* RelNode's max row count is less than or equal to the limit's fetch,then we could
* remove the redundant sort.
*
* <p> For example:
* <blockquote><pre>{@code
* SELECT * FROM (VALUES 1,2,3,4,5,6) AS t1 LIMIT 10}
* </pre></blockquote>
*
* <p> The above values max row count is 6 rows, and the limit's fetch is 10,
* so we could remove the redundant sort.
*
* <p> It could be converted to:
* <blockquote><pre>{@code
* SELECT * FROM (VALUES 1,2,3,4,5,6) AS t1}
* </pre></blockquote>
*
* @see CoreRules#SORT_REMOVE_REDUNDANT
*/
@Value.Enclosing
Expand All @@ -49,20 +72,43 @@ protected SortRemoveRedundantRule(final SortRemoveRedundantRule.Config config) {

@Override public void onMatch(final RelOptRuleCall call) {
final Sort sort = call.rel(0);
if (sort.offset != null || sort.fetch != null) {
// Don't remove sort if it has explicit OFFSET and LIMIT
if (RelOptUtil.isOffset(sort)) {
// Don't remove sort if it has explicit OFFSET
return;
}

// Get the max row count for sort's input RelNode.
final Double maxRowCount = call.getMetadataQuery().getMaxRowCount(sort.getInput());
// If the max row count is not null and less than or equal to 1,
// then we could remove the sort.
if (maxRowCount != null && maxRowCount <= 1D) {
final Double inputMaxRowCount = call.getMetadataQuery().getMaxRowCount(sort.getInput());

// Get the target max row count with sort's semantics.
// If sort is pure order by, the target max row count is 1.
// If sort is pure limit, the target max row count is the limit's fetch.
final Optional<Double> targetMaxRowCount = getSortInputSpecificMaxRowCount(sort);

if (!targetMaxRowCount.isPresent()) {
return;
}

// If the max row count is not null and less than or equal to targetMaxRowCount,
// then we could remove the redundant sort.
if (inputMaxRowCount != null && inputMaxRowCount <= targetMaxRowCount.get()) {
call.transformTo(sort.getInput());
}
}

private Optional<Double> getSortInputSpecificMaxRowCount(Sort sort) {
// If the sort is pure limit, the specific max row count is limit's fetch.
if (RelOptUtil.isPureLimit(sort)) {
final double limit =
sort.fetch instanceof RexLiteral ? RexLiteral.intValue(sort.fetch) : -1D;
return Optional.of(limit);
} else if (RelOptUtil.isPureOrder(sort)) {
// If the sort is pure order by, the specific max row count is 1.
return Optional.of(1D);
}
return Optional.empty();
}

/** Rule configuration. */
@Value.Immutable
public interface Config extends RelRule.Config {
Expand Down
36 changes: 36 additions & 0 deletions core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,42 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) {
.check();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6009">[CALCITE-6009]
* Add optimization to remove redundant LIMIT that is more than
* input row count</a>. */
@Test void testSortRemoveWhenInputValuesMaxRowCntLessOrEqualLimitFetch() {
final String sql = "select * from\n"
+ "(VALUES 1,2,3,4,5,6) as t1 limit 10";
sql(sql)
.withRule(CoreRules.SORT_REMOVE_REDUNDANT)
.check();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6009">[CALCITE-6009]
* Add optimization to remove redundant LIMIT that is more than input
* row count</a>. */
@Test void testSortRemoveWhenInputAggregateMaxRowCntLessOrEqualLimitFetch() {
final String sql = "select count(*) as c\n"
+ "from sales.emp limit 20";
sql(sql)
.withRule(CoreRules.SORT_REMOVE_REDUNDANT)
.check();
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6009">[CALCITE-6009]
* Add optimization to remove redundant LIMIT that is more than input
* row count</a>. */
@Test void testSortRemoveWhenHasOffset() {
final String sql = "select * from\n"
+ "(select * from sales.emp limit 10) t limit 20 offset 1";
sql(sql)
.withRule(CoreRules.SORT_REMOVE_REDUNDANT)
.checkUnchanged();
}

/** Tests that an {@link EnumerableLimit} and {@link EnumerableSort} are
* replaced by an {@link EnumerableLimitSort}, per
* <a href="https://issues.apache.org/jira/browse/CALCITE-3920">[CALCITE-3920]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14142,6 +14142,61 @@ LogicalSort(sort0=[$0], dir0=[ASC])
LogicalAggregate(group=[{}], C=[COUNT()])
LogicalProject($f0=[0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testSortRemoveWhenHasOffset">
<Resource name="sql">
<![CDATA[select * from
(select * from sales.emp limit 10) t limit 20 offset 1]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalSort(offset=[1], fetch=[20])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalSort(fetch=[10])
LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testSortRemoveWhenInputAggregateMaxRowCntLessOrEqualLimitFetch">
<Resource name="sql">
<![CDATA[select count(*) as c
from sales.emp limit 20]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalSort(fetch=[20])
LogicalAggregate(group=[{}], C=[COUNT()])
LogicalProject($f0=[0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalAggregate(group=[{}], C=[COUNT()])
LogicalProject($f0=[0])
LogicalTableScan(table=[[CATALOG, SALES, EMP]])
]]>
</Resource>
</TestCase>
<TestCase name="testSortRemoveWhenInputValuesMaxRowCntLessOrEqualLimitFetch">
<Resource name="sql">
<![CDATA[select * from
(VALUES 1,2,3,4,5,6) as t1 limit 10]]>
</Resource>
<Resource name="planBefore">
<![CDATA[
LogicalSort(fetch=[10])
LogicalProject(T1=[$0])
LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }]])
]]>
</Resource>
<Resource name="planAfter">
<![CDATA[
LogicalProject(T1=[$0])
LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }]])
]]>
</Resource>
</TestCase>
Expand Down
Loading