Skip to content

Commit

Permalink
[fix](mtmv) Fix select literal result wrongly in group by when use ma…
Browse files Browse the repository at this point in the history
…terialized view (apache#38958)

This is brought by apache#34274

if mv def is
select o_orderdate from  orders group by o_orderdate;

query is as followiing, the result is wrong.
select 1 from orders group by o_orderdate;
  • Loading branch information
seawinde committed Aug 21, 2024
1 parent 2256298 commit c365cb6
Show file tree
Hide file tree
Showing 11 changed files with 1,137 additions and 369 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -214,7 +214,7 @@ protected LogicalAggregate<Plan> doRewriteQueryByView(
LogicalAggregate<Plan> queryAggregate = queryTopPlanAndAggPair.value();
List<Expression> queryGroupByExpressions = queryAggregate.getGroupByExpressions();
// handle the scene that query top plan not use the group by in query bottom aggregate
if (queryGroupByExpressions.size() != queryTopPlanGroupBySet.size()) {
if (needCompensateGroupBy(queryTopPlanGroupBySet, queryGroupByExpressions)) {
for (Expression expression : queryGroupByExpressions) {
if (queryTopPlanGroupBySet.contains(expression)) {
continue;
Expand Down Expand Up @@ -263,6 +263,42 @@ protected LogicalAggregate<Plan> doRewriteQueryByView(
return new LogicalAggregate<>(finalGroupExpressions, finalOutputExpressions, tempRewritedPlan);
}

/**
* handle the scene that query top plan not use the group by in query bottom aggregate
* If mv is select o_orderdate from orders group by o_orderdate;
* query is select 1 from orders group by o_orderdate.
* Or mv is select o_orderdate from orders group by o_orderdate
* query is select o_orderdate from orders group by o_orderdate, o_orderkey;
* if the slot which query top project use can not cover the slot which query bottom aggregate group by slot
* should compensate group by to make sure the data is right.
* For example:
* mv is select o_orderdate from orders group by o_orderdate;
* query is select o_orderdate from orders group by o_orderdate, o_orderkey;
*
* @param queryGroupByExpressions query bottom aggregate group by is o_orderdate, o_orderkey
* @param queryTopProject query top project is o_orderdate
* @return need to compensate group by if true or not need
*
*/
private static boolean needCompensateGroupBy(Set<? extends Expression> queryTopProject,
List<Expression> queryGroupByExpressions) {
Set<Expression> queryGroupByExpressionSet = new HashSet<>(queryGroupByExpressions);
if (queryGroupByExpressionSet.size() != queryTopProject.size()) {
return true;
}
Set<NamedExpression> queryTopPlanGroupByUseNamedExpressions = new HashSet<>();
Set<NamedExpression> queryGroupByUseNamedExpressions = new HashSet<>();
for (Expression expr : queryTopProject) {
queryTopPlanGroupByUseNamedExpressions.addAll(expr.collect(NamedExpression.class::isInstance));
}
for (Expression expr : queryGroupByExpressionSet) {
queryGroupByUseNamedExpressions.addAll(expr.collect(NamedExpression.class::isInstance));
}
// if the slots query top project use can not cover the slots which query bottom aggregate use
// Should compensate.
return !queryTopPlanGroupByUseNamedExpressions.containsAll(queryGroupByUseNamedExpressions);
}

/**
* Try to rewrite query expression by view, contains both group by dimension and aggregate function
*/
Expand Down
Loading

0 comments on commit c365cb6

Please sign in to comment.