diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java index 1a86e933a511d8..b0d47f9e64f966 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAgg.java @@ -36,6 +36,7 @@ public Rule build() { return logicalJoin(logicalAggregate(), any()) .whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder()) .when(join -> join.getJoinType().isLeftSemiOrAntiJoin()) + .whenNot(join -> join.isMarkJoin()) .then(join -> { LogicalAggregate aggregate = join.left(); if (!canTranspose(aggregate, join)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java index 24ca535eed871b..0bbc65a1f2af0d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/TransposeSemiJoinAggProject.java @@ -34,6 +34,7 @@ public Rule build() { return logicalJoin(logicalProject(logicalAggregate()), any()) .whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder()) .when(join -> join.getJoinType().isLeftSemiOrAntiJoin()) + .whenNot(join -> join.isMarkJoin()) .when(join -> join.left().isAllSlots()) .when(join -> join.left().getProjects().stream().allMatch(n -> n instanceof Slot)) .then(join -> { diff --git a/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out new file mode 100644 index 00000000000000..79378dff1bf35b --- /dev/null +++ b/regression-test/data/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.out @@ -0,0 +1,89 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !groupby_positive_case -- +PhysicalResultSink +--hashAgg[LOCAL] +----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a) +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] +------filter((T2.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T2] + +-- !groupby_negative_case -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT)) +----hashAgg[LOCAL] +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] + +-- !grouping_positive_case -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a) +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] + +-- !grouping_negative_case -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT)) +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] + +-- !groupby_positive_case2 -- +PhysicalResultSink +--hashAgg[LOCAL] +----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a) +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] +------filter((T2.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T2] + +-- !groupby_negative_case2 -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT)) +----hashAgg[LOCAL] +------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +--------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] + +-- !grouping_positive_case2 -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a) +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] + +-- !grouping_negative_case2 -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT)) +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalRepeat +----------filter((T1.__DORIS_DELETE_SIGN__ = 0)) +------------PhysicalOlapScan[T1] +----filter((T2.__DORIS_DELETE_SIGN__ = 0)) +------PhysicalOlapScan[T2] + +-- !groupby_negative_case3 -- +PhysicalResultSink +--hashJoin[LEFT_SEMI_JOIN](T3.len = T3.len) +----hashAgg[GLOBAL] +------hashAgg[LOCAL] +--------PhysicalOlapScan[T3] +----PhysicalOlapScan[T3] + diff --git a/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy new file mode 100644 index 00000000000000..3a84754bbaac1f --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/transposeJoin/transposeSemiJoinAgg.groovy @@ -0,0 +1,151 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("transposeSemiJoinAgg") { + // filter about invisible column "DORIS_DELETE_SIGN = 0" has no impaction on partition pruning + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "SET enable_nereids_planner=true" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "SET enable_fallback_to_original_planner=false" + sql "set partition_pruning_expand_threshold=10;" + sql "set ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + sql "drop table if exists T1;" + sql """ + CREATE TABLE T1 ( + a INT NULL, + b INT NULL, + c INT NULL + ) ENGINE=OLAP + UNIQUE KEY(`a`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`a`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); """ + + sql "drop table if exists T2;" + sql """ + CREATE TABLE T2 ( + a INT NULL, + b INT NULL, + c INT NULL + ) ENGINE=OLAP + UNIQUE KEY(`a`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`a`) BUCKETS 10 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "set enable_runtime_filter_prune=false;" + sql ''' + alter table T1 modify column a set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709'); + ''' + sql ''' + alter table T1 modify column b set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709'); + ''' + sql ''' + alter table T2 modify column a set stats ('ndv'='100', 'num_nulls'='0', 'row_count'='100'); + ''' + + sql "drop table if exists T3;" + sql """ + CREATE TABLE T3 ( + str varchar(100), + len int + ) DUPLICATE KEY(str) + DISTRIBUTED BY HASH(str) BUCKETS 10 + PROPERTIES("replication_num" = "1"); + """ + + // RULE: TransposeSemiJoinAggProject + // 1. group-by(without grouping sets) + // agg-leftSemi => leftSemi-agg + qt_groupby_positive_case """ + explain shape plan + select T3.a + from (select a, b, sum(c) from T1 group by a, b) T3 + left semi join T2 on T3.a=T2.a; + """ + + // agg-leftSemi: agg not pushed down + qt_groupby_negative_case """ + explain shape plan + select T3.a + from (select a, b, sum(c) as d from T1 group by a, b) T3 + left semi join T2 on T3.D=T2.a; + """ + + // 2 grouping sets + // agg-leftSemi => leftSemi-agg + qt_grouping_positive_case """ + explain shape plan + select T3.a + from (select a, b, sum(c) from T1 group by grouping sets ((a, b), (a))) T3 + left semi join T2 on T3.a=T2.a; + """ + + // agg-leftSemi: agg not pushed down + qt_grouping_negative_case """ + explain shape plan + select T3.a + from (select a, b, sum(c) as D from T1 group by grouping sets ((a, b), (a), ())) T3 + left semi join T2 on T3.D=T2.a; + """ + + // RULE: TransposeSemiJoinAgg + // 1. group-by(without grouping sets) + // agg-leftSemi => leftSemi-agg + qt_groupby_positive_case2 """ + explain shape plan + select T3.a + from (select a from T1 group by a) T3 + left semi join T2 on T3.a=T2.a; + """ + + // agg-leftSemi: agg not pushed down + qt_groupby_negative_case2 """ + explain shape plan + select T3.D + from (select sum(c) as D from T1 group by a) T3 + left semi join T2 on T3.D=T2.a; + """ + + // 2 grouping sets + // agg-leftSemi => leftSemi-agg + qt_grouping_positive_case2 """ + explain shape plan + select T3.a + from (select a from T1 group by grouping sets ((a, b), (a))) T3 + left semi join T2 on T3.a=T2.a; + """ + // agg-leftSemi: agg not pushed down + qt_grouping_negative_case2 """ + explain shape plan + select T3.D + from (select sum(C) as D from T1 group by grouping sets ((a, b), (a), ())) T3 + left semi join T2 on T3.D=T2.a; + """ + // https://github.com/apache/doris/issues/31308 + qt_groupby_negative_case3 """ + explain shape plan + select case when len in (select len from T3) then 1 else 1 end c1 from T3 group by len; + """ +} \ No newline at end of file