Skip to content

Commit

Permalink
[fix](nereids) do not transpose semi join agg when mark join (apache#…
Browse files Browse the repository at this point in the history
  • Loading branch information
starocean999 authored and weixingyu12 committed Apr 30, 2024
1 parent 98b590c commit e683b9c
Show file tree
Hide file tree
Showing 4 changed files with 242 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ public Rule build() {
return logicalJoin(logicalAggregate(), any())
.whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder())
.when(join -> join.getJoinType().isLeftSemiOrAntiJoin())
.whenNot(join -> join.isMarkJoin())
.then(join -> {
LogicalAggregate<Plan> aggregate = join.left();
if (!canTranspose(aggregate, join)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ public Rule build() {
return logicalJoin(logicalProject(logicalAggregate()), any())
.whenNot(join -> ConnectContext.get().getSessionVariable().isDisableJoinReorder())
.when(join -> join.getJoinType().isLeftSemiOrAntiJoin())
.whenNot(join -> join.isMarkJoin())
.when(join -> join.left().isAllSlots())
.when(join -> join.left().getProjects().stream().allMatch(n -> n instanceof Slot))
.then(join -> {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !groupby_positive_case --
PhysicalResultSink
--hashAgg[LOCAL]
----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
------filter((T1.__DORIS_DELETE_SIGN__ = 0))
--------PhysicalOlapScan[T1]
------filter((T2.__DORIS_DELETE_SIGN__ = 0))
--------PhysicalOlapScan[T2]

-- !groupby_negative_case --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
----hashAgg[LOCAL]
------filter((T1.__DORIS_DELETE_SIGN__ = 0))
--------PhysicalOlapScan[T1]
----filter((T2.__DORIS_DELETE_SIGN__ = 0))
------PhysicalOlapScan[T2]

-- !grouping_positive_case --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalRepeat
----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
------------PhysicalOlapScan[T1]
----filter((T2.__DORIS_DELETE_SIGN__ = 0))
------PhysicalOlapScan[T2]

-- !grouping_negative_case --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalRepeat
----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
------------PhysicalOlapScan[T1]
----filter((T2.__DORIS_DELETE_SIGN__ = 0))
------PhysicalOlapScan[T2]

-- !groupby_positive_case2 --
PhysicalResultSink
--hashAgg[LOCAL]
----hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
------filter((T1.__DORIS_DELETE_SIGN__ = 0))
--------PhysicalOlapScan[T1]
------filter((T2.__DORIS_DELETE_SIGN__ = 0))
--------PhysicalOlapScan[T2]

-- !groupby_negative_case2 --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
----hashAgg[LOCAL]
------filter((T1.__DORIS_DELETE_SIGN__ = 0))
--------PhysicalOlapScan[T1]
----filter((T2.__DORIS_DELETE_SIGN__ = 0))
------PhysicalOlapScan[T2]

-- !grouping_positive_case2 --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.a = T2.a)
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalRepeat
----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
------------PhysicalOlapScan[T1]
----filter((T2.__DORIS_DELETE_SIGN__ = 0))
------PhysicalOlapScan[T2]

-- !grouping_negative_case2 --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.D = expr_cast(a as BIGINT))
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalRepeat
----------filter((T1.__DORIS_DELETE_SIGN__ = 0))
------------PhysicalOlapScan[T1]
----filter((T2.__DORIS_DELETE_SIGN__ = 0))
------PhysicalOlapScan[T2]

-- !groupby_negative_case3 --
PhysicalResultSink
--hashJoin[LEFT_SEMI_JOIN](T3.len = T3.len)
----hashAgg[GLOBAL]
------hashAgg[LOCAL]
--------PhysicalOlapScan[T3]
----PhysicalOlapScan[T3]

Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("transposeSemiJoinAgg") {
// filter about invisible column "DORIS_DELETE_SIGN = 0" has no impaction on partition pruning
String db = context.config.getDbNameByFile(context.file)
sql "use ${db}"
sql "SET enable_nereids_planner=true"
sql "set runtime_filter_mode=OFF";
sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
sql "SET enable_fallback_to_original_planner=false"
sql "set partition_pruning_expand_threshold=10;"
sql "set ignore_shape_nodes='PhysicalDistribute,PhysicalProject'"
sql "drop table if exists T1;"
sql """
CREATE TABLE T1 (
a INT NULL,
b INT NULL,
c INT NULL
) ENGINE=OLAP
UNIQUE KEY(`a`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`a`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
); """

sql "drop table if exists T2;"
sql """
CREATE TABLE T2 (
a INT NULL,
b INT NULL,
c INT NULL
) ENGINE=OLAP
UNIQUE KEY(`a`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`a`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1"
);
"""

sql "set enable_runtime_filter_prune=false;"
sql '''
alter table T1 modify column a set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709');
'''
sql '''
alter table T1 modify column b set stats ('ndv'='5999989709', 'num_nulls'='0', 'row_count'='5999989709');
'''
sql '''
alter table T2 modify column a set stats ('ndv'='100', 'num_nulls'='0', 'row_count'='100');
'''

sql "drop table if exists T3;"
sql """
CREATE TABLE T3 (
str varchar(100),
len int
) DUPLICATE KEY(str)
DISTRIBUTED BY HASH(str) BUCKETS 10
PROPERTIES("replication_num" = "1");
"""

// RULE: TransposeSemiJoinAggProject
// 1. group-by(without grouping sets)
// agg-leftSemi => leftSemi-agg
qt_groupby_positive_case """
explain shape plan
select T3.a
from (select a, b, sum(c) from T1 group by a, b) T3
left semi join T2 on T3.a=T2.a;
"""

// agg-leftSemi: agg not pushed down
qt_groupby_negative_case """
explain shape plan
select T3.a
from (select a, b, sum(c) as d from T1 group by a, b) T3
left semi join T2 on T3.D=T2.a;
"""

// 2 grouping sets
// agg-leftSemi => leftSemi-agg
qt_grouping_positive_case """
explain shape plan
select T3.a
from (select a, b, sum(c) from T1 group by grouping sets ((a, b), (a))) T3
left semi join T2 on T3.a=T2.a;
"""

// agg-leftSemi: agg not pushed down
qt_grouping_negative_case """
explain shape plan
select T3.a
from (select a, b, sum(c) as D from T1 group by grouping sets ((a, b), (a), ())) T3
left semi join T2 on T3.D=T2.a;
"""

// RULE: TransposeSemiJoinAgg
// 1. group-by(without grouping sets)
// agg-leftSemi => leftSemi-agg
qt_groupby_positive_case2 """
explain shape plan
select T3.a
from (select a from T1 group by a) T3
left semi join T2 on T3.a=T2.a;
"""

// agg-leftSemi: agg not pushed down
qt_groupby_negative_case2 """
explain shape plan
select T3.D
from (select sum(c) as D from T1 group by a) T3
left semi join T2 on T3.D=T2.a;
"""

// 2 grouping sets
// agg-leftSemi => leftSemi-agg
qt_grouping_positive_case2 """
explain shape plan
select T3.a
from (select a from T1 group by grouping sets ((a, b), (a))) T3
left semi join T2 on T3.a=T2.a;
"""
// agg-leftSemi: agg not pushed down
qt_grouping_negative_case2 """
explain shape plan
select T3.D
from (select sum(C) as D from T1 group by grouping sets ((a, b), (a), ())) T3
left semi join T2 on T3.D=T2.a;
"""
// https://github.com/apache/doris/issues/31308
qt_groupby_negative_case3 """
explain shape plan
select case when len in (select len from T3) then 1 else 1 end c1 from T3 group by len;
"""
}

0 comments on commit e683b9c

Please sign in to comment.