apache · JiajunBernoulli · Oct 3, 2023 · Sep 29, 2023
diff --git a/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java b/core/src/main/java/org/apache/calcite/rel/rules/CoreRules.java
@@ -727,8 +727,9 @@ private CoreRules() {}
   public static final SortRemoveConstantKeysRule SORT_REMOVE_CONSTANT_KEYS =
       SortRemoveConstantKeysRule.Config.DEFAULT.toRule();
 
-  /** Rule that removes redundant {@link Sort} if its input max row number
-   * is less than or equal to one. */
+  /** Rule that removes redundant {@code Order By} or {@code Limit} when its input RelNode's
+   * max row count is less than or equal to specified row count.All of them
+   * are represented by {@link Sort}*/
   public static final SortRemoveRedundantRule SORT_REMOVE_REDUNDANT =
       SortRemoveRedundantRule.Config.DEFAULT.toRule();
 

diff --git a/core/src/main/java/org/apache/calcite/rel/rules/SortRemoveRedundantRule.java b/core/src/main/java/org/apache/calcite/rel/rules/SortRemoveRedundantRule.java
@@ -17,15 +17,21 @@
 package org.apache.calcite.rel.rules;
 
 import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.RelRule;
 import org.apache.calcite.rel.core.Sort;
+import org.apache.calcite.rex.RexLiteral;
 
 import org.immutables.value.Value;
 
-/**
- * Planner rule that removes
- * the redundant {@link org.apache.calcite.rel.core.Sort} if its input
- * max row number is less than or equal to one.
+import java.util.Optional;
+
+/** Rule that removes redundant {@code Order By} or {@code Limit} when its input RelNode's
+ * max row count is less than or equal to specified row count.All of them
+ * are represented by {@link Sort}
+ *
+ * <p> If a {@code Sort} is pure order by,and its offset is null,when its input RelNode's
+ * max row count is less than or equal to 1,then we could remove the redundant sort.
  *
  * <p> For example:
  * <blockquote><pre>{@code
@@ -37,6 +43,23 @@
  *  select max(totalprice) from orders}
  *  </pre></blockquote>
  *
+ * <p> If a {@code Sort} is pure limit,and its offset is null, when its input
+ * RelNode's max row count is less than or equal to the limit's fetch,then we could
+ * remove the redundant sort.
+ *
+ * <p> For example:
+ * <blockquote><pre>{@code
+ * SELECT * FROM (VALUES 1,2,3,4,5,6) AS t1 LIMIT 10}
+ * </pre></blockquote>
+ *
+ * <p> The above values max row count is 6 rows, and the limit's fetch is 10,
+ * so we could remove the redundant sort.
+ *
+ * <p> It could be converted to:
+ * <blockquote><pre>{@code
+ * SELECT * FROM (VALUES 1,2,3,4,5,6) AS t1}
+ * </pre></blockquote>
+ *
  * @see CoreRules#SORT_REMOVE_REDUNDANT
  */
 @Value.Enclosing
@@ -49,20 +72,43 @@ protected SortRemoveRedundantRule(final SortRemoveRedundantRule.Config config) {
 
   @Override public void onMatch(final RelOptRuleCall call) {
     final Sort sort = call.rel(0);
-    if (sort.offset != null || sort.fetch != null) {
-      // Don't remove sort if it has explicit OFFSET and LIMIT
+    if (RelOptUtil.isOffset(sort)) {
+      // Don't remove sort if it has explicit OFFSET
       return;
     }
 
     // Get the max row count for sort's input RelNode.
-    final Double maxRowCount = call.getMetadataQuery().getMaxRowCount(sort.getInput());
-    // If the max row count is not null and less than or equal to 1,
-    // then we could remove the sort.
-    if (maxRowCount != null && maxRowCount <= 1D) {
+    final Double inputMaxRowCount = call.getMetadataQuery().getMaxRowCount(sort.getInput());
+
+    // Get the target max row count with sort's semantics.
+    // If sort is pure order by, the target max row count is 1.
+    // If sort is pure limit, the target max row count is the limit's fetch.
+    final Optional<Double> targetMaxRowCount = getSortInputSpecificMaxRowCount(sort);
+
+    if (!targetMaxRowCount.isPresent()) {
+      return;
+    }
+
+    // If the max row count is not null and less than or equal to targetMaxRowCount,
+    // then we could remove the redundant sort.
+    if (inputMaxRowCount != null && inputMaxRowCount <= targetMaxRowCount.get()) {
       call.transformTo(sort.getInput());
     }
   }
 
+  private Optional<Double> getSortInputSpecificMaxRowCount(Sort sort) {
+    // If the sort is pure limit, the specific max row count is limit's fetch.
+    if (RelOptUtil.isPureLimit(sort)) {
+      final double limit =
+          sort.fetch instanceof RexLiteral ? RexLiteral.intValue(sort.fetch) : -1D;
+      return Optional.of(limit);
+    } else if (RelOptUtil.isPureOrder(sort)) {
+      // If the sort is pure order by, the specific max row count is 1.
+      return Optional.of(1D);
+    }
+    return Optional.empty();
+  }
+
   /** Rule configuration. */
   @Value.Immutable
   public interface Config extends RelRule.Config {

diff --git a/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java b/core/src/test/java/org/apache/calcite/test/RelOptRulesTest.java
@@ -1261,6 +1261,42 @@ private void checkSemiOrAntiJoinProjectTranspose(JoinRelType type) {
         .check();
   }
 
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-6009">[CALCITE-6009]
+   * Add optimization to remove redundant LIMIT that is more than
+   * input row count</a>. */
+  @Test void testSortRemoveWhenInputValuesMaxRowCntLessOrEqualLimitFetch() {
+    final String sql = "select * from\n"
+        + "(VALUES 1,2,3,4,5,6) as t1 limit 10";
+    sql(sql)
+        .withRule(CoreRules.SORT_REMOVE_REDUNDANT)
+        .check();
+  }
+
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-6009">[CALCITE-6009]
+   * Add optimization to remove redundant LIMIT that is more than input
+   * row count</a>. */
+  @Test void testSortRemoveWhenInputAggregateMaxRowCntLessOrEqualLimitFetch() {
+    final String sql = "select count(*) as c\n"
+        + "from sales.emp limit 20";
+    sql(sql)
+        .withRule(CoreRules.SORT_REMOVE_REDUNDANT)
+        .check();
+  }
+
+  /** Test case for
+   * <a href="https://issues.apache.org/jira/browse/CALCITE-6009">[CALCITE-6009]
+   * Add optimization to remove redundant LIMIT that is more than input
+   * row count</a>. */
+  @Test void testSortRemoveWhenHasOffset() {
+    final String sql = "select * from\n"
+        + "(select * from sales.emp limit 10) t limit 20 offset 1";
+    sql(sql)
+        .withRule(CoreRules.SORT_REMOVE_REDUNDANT)
+        .checkUnchanged();
+  }
+
   /** Tests that an {@link EnumerableLimit} and {@link EnumerableSort} are
    * replaced by an {@link EnumerableLimitSort}, per
    * <a href="https://issues.apache.org/jira/browse/CALCITE-3920">[CALCITE-3920]

diff --git a/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml b/core/src/test/resources/org/apache/calcite/test/RelOptRulesTest.xml
@@ -14142,6 +14142,61 @@ LogicalSort(sort0=[$0], dir0=[ASC])
 LogicalAggregate(group=[{}], C=[COUNT()])
   LogicalProject($f0=[0])
     LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSortRemoveWhenHasOffset">
+    <Resource name="sql">
+      <![CDATA[select * from
+(select * from sales.emp limit 10) t limit 20 offset 1]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(offset=[1], fetch=[20])
+  LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+    LogicalSort(fetch=[10])
+      LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], SLACKER=[$8])
+        LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSortRemoveWhenInputAggregateMaxRowCntLessOrEqualLimitFetch">
+    <Resource name="sql">
+      <![CDATA[select count(*) as c
+from sales.emp limit 20]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(fetch=[20])
+  LogicalAggregate(group=[{}], C=[COUNT()])
+    LogicalProject($f0=[0])
+      LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalAggregate(group=[{}], C=[COUNT()])
+  LogicalProject($f0=[0])
+    LogicalTableScan(table=[[CATALOG, SALES, EMP]])
+]]>
+    </Resource>
+  </TestCase>
+  <TestCase name="testSortRemoveWhenInputValuesMaxRowCntLessOrEqualLimitFetch">
+    <Resource name="sql">
+      <![CDATA[select * from
+(VALUES 1,2,3,4,5,6) as t1 limit 10]]>
+    </Resource>
+    <Resource name="planBefore">
+      <![CDATA[
+LogicalSort(fetch=[10])
+  LogicalProject(T1=[$0])
+    LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }]])
+]]>
+    </Resource>
+    <Resource name="planAfter">
+      <![CDATA[
+LogicalProject(T1=[$0])
+  LogicalValues(tuples=[[{ 1 }, { 2 }, { 3 }, { 4 }, { 5 }, { 6 }]])
 ]]>
     </Resource>
   </TestCase>