Skip to content

Commit

Permalink
[fix](nereids) fix bug for A>n, where A.max is infinity apache#39936 (a…
Browse files Browse the repository at this point in the history
…pache#40368)

## Proposed changes
pick apache#39936 
Issue Number: close #xxx

<!--Describe your changes.-->
  • Loading branch information
englefly authored Sep 5, 2024
1 parent cc20ecd commit c6771da
Show file tree
Hide file tree
Showing 15 changed files with 246 additions and 196 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -622,7 +622,9 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType
.setMaxExpr(intersectRange.getHighExpr())
.setNdv(intersectRange.getDistinctValues())
.setNumNulls(0);
double sel = leftRange.overlapPercentWith(rightRange);
double sel = leftRange.getDistinctValues() == 0
? 1.0
: intersectRange.getDistinctValues() / leftRange.getDistinctValues();
if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) {
sel = DEFAULT_INEQUALITY_COEFFICIENT;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,10 @@ public boolean isInfinite() {
return Double.isInfinite(low) || Double.isInfinite(high);
}

public boolean isOneSideInfinite() {
return isInfinite() && !isBothInfinite();
}

public boolean isFinite() {
return Double.isFinite(low) && Double.isFinite(high);
}
Expand Down Expand Up @@ -175,8 +179,7 @@ public Pair<Double, LiteralExpr> maxPair(double r1, LiteralExpr e1, double r2, L
}

public StatisticRange cover(StatisticRange other) {
// double newLow = Math.max(low, other.low);
// double newHigh = Math.min(high, other.high);
StatisticRange resultRange;
Pair<Double, LiteralExpr> biggerLow = maxPair(low, lowExpr, other.low, other.lowExpr);
double newLow = biggerLow.first;
LiteralExpr newLowExpr = biggerLow.second;
Expand All @@ -188,9 +191,18 @@ public StatisticRange cover(StatisticRange other) {
double overlapPercentOfLeft = overlapPercentWith(other);
double overlapDistinctValuesLeft = overlapPercentOfLeft * distinctValues;
double coveredDistinctValues = minExcludeNaN(distinctValues, overlapDistinctValuesLeft);
return new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues, dataType);
if (this.isBothInfinite() && other.isOneSideInfinite()) {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr,
distinctValues * INFINITE_TO_INFINITE_RANGE_INTERSECT_OVERLAP_HEURISTIC_FACTOR,
dataType);
} else {
resultRange = new StatisticRange(newLow, newLowExpr, newHigh, newHighExpr, coveredDistinctValues,
dataType);
}
} else {
resultRange = empty(dataType);
}
return empty(dataType);
return resultRange;
}

public StatisticRange union(StatisticRange other) {
Expand Down Expand Up @@ -241,6 +253,6 @@ public double getDistinctValues() {

@Override
public String toString() {
return "(" + lowExpr + "," + highExpr + ")";
return "range=(" + lowExpr + "," + highExpr + "), ndv=" + distinctValues;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1365,4 +1365,39 @@ public void testStringRangeColToCol() {
Statistics agrtc = new FilterEstimation().estimate(new GreaterThan(a, c), baseStats);
Assertions.assertEquals(50, agrtc.getRowCount());
}

@Test
void testAndWithInfinity() {
Double row = 1000.0;
SlotReference a = new SlotReference("a", new VarcharType(25));
ColumnStatisticBuilder columnStatisticBuilderA = new ColumnStatisticBuilder()
.setNdv(10)
.setAvgSizeByte(4)
.setNumNulls(0)
.setCount(row);

SlotReference b = new SlotReference("b", IntegerType.INSTANCE);
ColumnStatisticBuilder columnStatisticBuilderB = new ColumnStatisticBuilder()
.setNdv(488)
.setAvgSizeByte(25)
.setNumNulls(0)
.setCount(row);
StatisticsBuilder statsBuilder = new StatisticsBuilder();
statsBuilder.setRowCount(row);
statsBuilder.putColumnStatistics(a, columnStatisticBuilderA.build());
statsBuilder.putColumnStatistics(b, columnStatisticBuilderB.build());
Expression strGE = new GreaterThanEqual(a,
new org.apache.doris.nereids.trees.expressions.literal.StringLiteral("2024-05-14"));
Statistics strStats = new FilterEstimation().estimate(strGE, statsBuilder.build());
Assertions.assertEquals(500, strStats.getRowCount());

Expression intGE = new GreaterThan(b, new IntegerLiteral(0));
Statistics intStats = new FilterEstimation().estimate(intGE, statsBuilder.build());
Assertions.assertEquals(500, intStats.getRowCount());

Expression predicate = new And(strGE, intGE);

Statistics stats = new FilterEstimation().estimate(predicate, statsBuilder.build());
Assertions.assertEquals(250, stats.getRowCount());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,18 +69,18 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
--------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=()
--------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------PhysicalProject
------------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's'))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=()
----------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------PhysicalProject
--------------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000))
--------------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------------PhysicalDistribute[DistributionSpecHash]
------------------------------PhysicalProject
--------------------------------filter((t_s_secyear.dyear = 2000) and (t_s_secyear.sale_type = 's'))
--------------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000))
----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------PhysicalProject
------------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000)))
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000)))
--------------PhysicalProject
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=()
------------------PhysicalDistribute[DistributionSpecHash]
--------------------PhysicalProject
----------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------PhysicalDistribute[DistributionSpecHash]
Expand All @@ -59,4 +55,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------filter((t_s_firstyear.dyear = 2001) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -51,28 +51,20 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_secyear.dyear = 2000) and (t_w_secyear.sale_type = 'w'))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
--------------PhysicalProject
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=()
------------------PhysicalDistribute[DistributionSpecHash]
--------------------PhysicalProject
----------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
----------------------filter((t_w_secyear.dyear = 2000) and (t_w_secyear.sale_type = 'w'))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=((if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL) > if((year_total > 0.000000), (cast(year_total as DECIMALV3(38, 16)) / year_total), NULL)))
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_firstyear.customer_id)) otherCondition=()
------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_c_secyear.customer_id)) otherCondition=()
--------------------------PhysicalDistribute[DistributionSpecHash]
----------------------------PhysicalProject
------------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
------------------------------filter((t_c_secyear.dyear = 2000) and (t_c_secyear.sale_type = 'c'))
--------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
----------------------------PhysicalDistribute[DistributionSpecHash]
Expand All @@ -83,4 +75,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------------------------------PhysicalProject
--------------------------------filter((t_s_firstyear.dyear = 1999) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.000000))
----------------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
----------------------PhysicalDistribute[DistributionSpecHash]
------------------------PhysicalProject
--------------------------filter((t_c_firstyear.dyear = 1999) and (t_c_firstyear.sale_type = 'c') and (t_c_firstyear.year_total > 0.000000))
----------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_firstyear.dyear = 1999) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.000000))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) > if((year_total > 0.0), (year_total / year_total), NULL)))
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year = 2000))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=((if((year_total > 0.0), (year_total / year_total), NULL) > if((year_total > 0.0), (year_total / year_total), NULL)))
--------------PhysicalProject
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=()
------------------PhysicalDistribute[DistributionSpecHash]
--------------------PhysicalProject
----------------------filter((t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.0))
----------------------filter((t_w_secyear.sale_type = 'w') and (t_w_secyear.year = 2000))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------PhysicalDistribute[DistributionSpecHash]
Expand All @@ -59,4 +55,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------filter((t_s_firstyear.sale_type = 's') and (t_s_firstyear.year = 1999) and (t_s_firstyear.year_total > 0.0))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year = 1999) and (t_w_firstyear.year_total > 0.0))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Original file line number Diff line number Diff line change
Expand Up @@ -39,16 +39,12 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
------PhysicalDistribute[DistributionSpecGather]
--------PhysicalTopN[LOCAL_SORT]
----------PhysicalProject
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000)))
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=((if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000) > if((year_total > 0.00), (cast(year_total as DECIMALV3(38, 8)) / year_total), 0.000000)))
--------------PhysicalProject
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_firstyear.customer_id)) otherCondition=()
----------------hashJoin[INNER_JOIN] hashCondition=((t_s_firstyear.customer_id = t_w_secyear.customer_id)) otherCondition=()
------------------PhysicalDistribute[DistributionSpecHash]
--------------------PhysicalProject
----------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
----------------------filter((t_w_secyear.dyear = 2002) and (t_w_secyear.sale_type = 'w'))
------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
------------------hashJoin[INNER_JOIN] hashCondition=((t_s_secyear.customer_id = t_s_firstyear.customer_id)) otherCondition=()
--------------------PhysicalDistribute[DistributionSpecHash]
Expand All @@ -59,4 +55,8 @@ PhysicalCteAnchor ( cteId=CTEId#0 )
----------------------PhysicalProject
------------------------filter((t_s_firstyear.dyear = 2001) and (t_s_firstyear.sale_type = 's') and (t_s_firstyear.year_total > 0.00))
--------------------------PhysicalCteConsumer ( cteId=CTEId#0 )
--------------PhysicalDistribute[DistributionSpecHash]
----------------PhysicalProject
------------------filter((t_w_firstyear.dyear = 2001) and (t_w_firstyear.sale_type = 'w') and (t_w_firstyear.year_total > 0.00))
--------------------PhysicalCteConsumer ( cteId=CTEId#0 )

Loading

0 comments on commit c6771da

Please sign in to comment.