From 76935803931733543e8de86802782c9fdf097202 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Thu, 25 Apr 2024 16:42:35 +0800 Subject: [PATCH] [fix](Nereids) support aggregate function only in having statement (#34086) pick from master PR #34086 commit id dfe30f542d89f74c008f05d1b7241c8d33c94d6c SQL like > SELECT 1 AS c1 FROM t HAVING count(1) > 0 OR c1 IS NOT NULL --- .../rules/analysis/FillUpMissingSlots.java | 75 ++++++++++++++----- .../analysis/FillUpMissingSlotsTest.java | 2 +- .../test_having_with_aggregate_function.out | 4 + ...test_having_with_aggregate_function.groovy | 32 ++++++++ 4 files changed, 95 insertions(+), 18 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out create mode 100644 regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java index 1a59ba00adf456..0a70ccaee9283c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlots.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.algebra.Aggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalHaving; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import org.apache.doris.nereids.trees.plans.logical.LogicalSort; @@ -157,20 +158,52 @@ public List buildRules() { ), RuleType.FILL_UP_HAVING_PROJECT.build( logicalHaving(logicalProject()).then(having -> { - LogicalProject project = having.child(); - Set projectOutputSet = project.getOutputSet(); - Set notExistedInProject = having.getExpressions().stream() - .map(Expression::getInputSlots) - .flatMap(Set::stream) - .filter(s -> !projectOutputSet.contains(s)) - .collect(Collectors.toSet()); - if (notExistedInProject.size() == 0) { - return null; + if (having.getExpressions().stream().anyMatch(e -> e.containsType(AggregateFunction.class))) { + // This is very weird pattern. + // There are some aggregate functions in having, but its child is project. + // There are some slot from project in having too. + // Having should execute after project. + // But aggregate function should execute before project. + // Since no aggregate here, we should add an empty aggregate before project. + // We should push aggregate function into aggregate node first. + // Then put aggregate result slots and original project slots into new project. + // The final plan should be + // Having + // +-- Project + // +-- Aggregate + // Since aggregate node have no group by key. + // So project should not contain any slot from its original child. + // Or otherwise slot cannot find will be thrown. + LogicalProject project = having.child(); + // new an empty agg here + LogicalAggregate agg = new LogicalAggregate<>( + ImmutableList.of(), ImmutableList.of(), project.child()); + // avoid throw exception even if having have slot from its child. + // because we will add a project between having and project. + Resolver resolver = new Resolver(agg, false); + having.getConjuncts().forEach(resolver::resolve); + agg = agg.withAggOutput(resolver.getNewOutputSlots()); + Set newConjuncts = ExpressionUtils.replace( + having.getConjuncts(), resolver.getSubstitution()); + ImmutableList.Builder projects = ImmutableList.builder(); + projects.addAll(project.getOutputs()).addAll(agg.getOutput()); + return new LogicalHaving<>(newConjuncts, new LogicalProject<>(projects.build(), agg)); + } else { + LogicalProject project = having.child(); + Set projectOutputSet = project.getOutputSet(); + Set notExistedInProject = having.getExpressions().stream() + .map(Expression::getInputSlots) + .flatMap(Set::stream) + .filter(s -> !projectOutputSet.contains(s)) + .collect(Collectors.toSet()); + if (notExistedInProject.isEmpty()) { + return null; + } + List projects = ImmutableList.builder() + .addAll(project.getProjects()).addAll(notExistedInProject).build(); + return new LogicalProject<>(ImmutableList.copyOf(project.getOutput()), + having.withChildren(new LogicalProject<>(projects, project.child()))); } - List projects = ImmutableList.builder() - .addAll(project.getProjects()).addAll(notExistedInProject).build(); - return new LogicalProject<>(ImmutableList.copyOf(project.getOutput()), - having.withChildren(new LogicalProject<>(projects, project.child()))); }) ) ); @@ -183,13 +216,19 @@ static class Resolver { private final Map substitution = Maps.newHashMap(); private final List newOutputSlots = Lists.newArrayList(); private final Map outputSubstitutionMap; + private final boolean checkSlot; - Resolver(Aggregate aggregate) { + Resolver(Aggregate aggregate, boolean checkSlot) { outputExpressions = aggregate.getOutputExpressions(); groupByExpressions = aggregate.getGroupByExpressions(); outputSubstitutionMap = outputExpressions.stream().filter(Alias.class::isInstance) - .collect(Collectors.toMap(alias -> alias.toSlot(), alias -> alias.child(0), - (k1, k2) -> k1)); + .collect(Collectors.toMap(NamedExpression::toSlot, alias -> alias.child(0), + (k1, k2) -> k1)); + this.checkSlot = checkSlot; + } + + Resolver(Aggregate aggregate) { + this(aggregate, true); } public void resolve(Expression expression) { @@ -217,7 +256,9 @@ public void resolve(Expression expression) { // We couldn't find the equivalent expression in output expressions and group-by expressions, // so we should check whether the expression is valid. if (expression instanceof SlotReference) { - throw new AnalysisException(expression.toSql() + " in having clause should be grouped by."); + if (checkSlot) { + throw new AnalysisException(expression.toSql() + " should be grouped by."); + } } else if (expression instanceof AggregateFunction) { if (checkWhetherNestedAggregateFunctionsExist((AggregateFunction) expression)) { throw new AnalysisException("Aggregate functions in having clause can't be nested: " diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java index 534833754bde28..7e6c6b0c794a27 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/analysis/FillUpMissingSlotsTest.java @@ -306,7 +306,7 @@ void testJoinWithHaving() { void testInvalidHaving() { ExceptionChecker.expectThrowsWithMsg( AnalysisException.class, - "a2 in having clause should be grouped by.", + "a2 should be grouped by", () -> PlanChecker.from(connectContext).analyze( "SELECT a1 FROM t1 GROUP BY a1 HAVING a2 > 0" )); diff --git a/regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out b/regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out new file mode 100644 index 00000000000000..2bef87ab2b7a2d --- /dev/null +++ b/regression-test/data/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !having_project_with_having_count_1_and_slot_from_project -- +1 + diff --git a/regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy b/regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy new file mode 100644 index 00000000000000..9047a7c85bf456 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/fill_up_missing_slots/test_having_with_aggregate_function.groovy @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_having_project") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + + sql """ + DROP TABLE IF EXISTS t + """ + sql """ + create table t(id smallint) distributed by random properties('replication_num'='1'); + """ + + qt_having_project_with_having_count_1_and_slot_from_project """ + SELECT 1 AS c1 FROM t HAVING count(1) > 0 OR c1 IS NOT NULL + """ +}