Skip to content

Commit

Permalink
[opt](nereids) refine expression estimation
Browse files Browse the repository at this point in the history
  • Loading branch information
zhongjian.xzj authored and zhongjian.xzj committed Sep 20, 2024
1 parent 368facd commit f05e9f1
Show file tree
Hide file tree
Showing 6 changed files with 126 additions and 128 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -334,8 +334,8 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats
} else {
double val = statsForRight.maxValue;
if (val > statsForLeft.maxValue || val < statsForLeft.minValue) {
// do a lower bound protection to avoid using 0 directly
selectivity = RANGE_SELECTIVITY_THRESHOLD;
// TODO: will fix this in the next pr by adding RangeScalable protection
selectivity = 0.0;
} else if (ndv >= 1.0) {
selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv);
} else {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,6 @@ private Statistics computeOlapScan(OlapScan olapScan) {
if (derivedStats.findColumnStatistics(slot) == null) {
derivedStats.addColumnStats(slot,
new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN, derivedRowCount)
.setAvgSizeByte(slot.getDataType().width())
.build());
}
}
Expand Down Expand Up @@ -433,7 +432,6 @@ private Statistics computeOlapScan(OlapScan olapScan) {
for (Slot slot : ((Plan) olapScan).getOutput()) {
builder.putColumnStatistics(slot,
new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN, tableRowCount)
.setAvgSizeByte(slot.getDataType().width())
.build());
}
setHasUnknownColStatsInStatementContext();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,63 +8,63 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecGather]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 ss_item_sk->[i_item_sk]
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk]
------------------PhysicalProject
--------------------filter((item.i_category = 'Jewelry'))
----------------------PhysicalOlapScan[item] apply RFs: RF10
--------------------filter((customer_address.ca_gmt_offset = -7.00))
----------------------PhysicalOlapScan[customer_address] apply RFs: RF10
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF9 c_current_addr_sk->[ca_address_sk]
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk]
----------------------PhysicalProject
------------------------filter((customer_address.ca_gmt_offset = -7.00))
--------------------------PhysicalOlapScan[customer_address] apply RFs: RF9
------------------------PhysicalOlapScan[customer] apply RFs: RF9
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF8 ss_customer_sk->[c_customer_sk]
------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 ss_item_sk->[i_item_sk]
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[customer] apply RFs: RF8
----------------------------filter((item.i_category = 'Jewelry'))
------------------------------PhysicalOlapScan[item] apply RFs: RF8
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 ss_sold_date_sk->[d_date_sk]
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF7
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF6 ss_promo_sk->[p_promo_sk]
----------------------------------PhysicalProject
------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y')))
--------------------------------------PhysicalOlapScan[promotion] apply RFs: RF6
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk]
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7
--------------------------------------PhysicalProject
----------------------------------------filter((store.s_gmt_offset = -7.00))
------------------------------------------PhysicalOlapScan[store]
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
--------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalProject
--------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y')))
----------------------------------PhysicalOlapScan[promotion]
--------hashAgg[GLOBAL]
----------PhysicalDistribute[DistributionSpecGather]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 ss_item_sk->[i_item_sk]
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk]
------------------PhysicalProject
--------------------filter((item.i_category = 'Jewelry'))
----------------------PhysicalOlapScan[item] apply RFs: RF4
--------------------filter((customer_address.ca_gmt_offset = -7.00))
----------------------PhysicalOlapScan[customer_address] apply RFs: RF4
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk]
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk]
----------------------PhysicalProject
------------------------filter((customer_address.ca_gmt_offset = -7.00))
--------------------------PhysicalOlapScan[customer_address] apply RFs: RF3
------------------------PhysicalOlapScan[customer] apply RFs: RF3
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk]
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[customer] apply RFs: RF2
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 ss_sold_date_sk->[d_date_sk]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
----------------------------------PhysicalProject
------------------------------------filter((store.s_gmt_offset = -7.00))
--------------------------------------PhysicalOlapScan[store]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalProject
----------------------------filter((item.i_category = 'Jewelry'))
------------------------------PhysicalOlapScan[item]

62 changes: 31 additions & 31 deletions regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query61.out
Original file line number Diff line number Diff line change
Expand Up @@ -8,63 +8,63 @@ PhysicalResultSink
----------PhysicalDistribute[DistributionSpecGather]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 ss_item_sk->[i_item_sk]
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk]
------------------PhysicalProject
--------------------filter((item.i_category = 'Jewelry'))
----------------------PhysicalOlapScan[item] apply RFs: RF10
--------------------filter((customer_address.ca_gmt_offset = -7.00))
----------------------PhysicalOlapScan[customer_address] apply RFs: RF10
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF9 c_current_addr_sk->[ca_address_sk]
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk]
----------------------PhysicalProject
------------------------filter((customer_address.ca_gmt_offset = -7.00))
--------------------------PhysicalOlapScan[customer_address] apply RFs: RF9
------------------------PhysicalOlapScan[customer] apply RFs: RF9
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF8 ss_customer_sk->[c_customer_sk]
------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 ss_item_sk->[i_item_sk]
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[customer] apply RFs: RF8
----------------------------filter((item.i_category = 'Jewelry'))
------------------------------PhysicalOlapScan[item] apply RFs: RF8
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 ss_sold_date_sk->[d_date_sk]
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF7
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF6 ss_promo_sk->[p_promo_sk]
----------------------------------PhysicalProject
------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y')))
--------------------------------------PhysicalOlapScan[promotion] apply RFs: RF6
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk]
----------------------------------PhysicalProject
------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk]
--------------------------------------PhysicalProject
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5
----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7
--------------------------------------PhysicalProject
----------------------------------------filter((store.s_gmt_offset = -7.00))
------------------------------------------PhysicalOlapScan[store]
----------------------------------PhysicalProject
------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
--------------------------------------PhysicalOlapScan[date_dim]
------------------------------PhysicalProject
--------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y')))
----------------------------------PhysicalOlapScan[promotion]
--------hashAgg[GLOBAL]
----------PhysicalDistribute[DistributionSpecGather]
------------hashAgg[LOCAL]
--------------PhysicalProject
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 ss_item_sk->[i_item_sk]
----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk]
------------------PhysicalProject
--------------------filter((item.i_category = 'Jewelry'))
----------------------PhysicalOlapScan[item] apply RFs: RF4
--------------------filter((customer_address.ca_gmt_offset = -7.00))
----------------------PhysicalOlapScan[customer_address] apply RFs: RF4
------------------PhysicalProject
--------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk]
--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk]
----------------------PhysicalProject
------------------------filter((customer_address.ca_gmt_offset = -7.00))
--------------------------PhysicalOlapScan[customer_address] apply RFs: RF3
------------------------PhysicalOlapScan[customer] apply RFs: RF3
----------------------PhysicalProject
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk]
--------------------------PhysicalProject
----------------------------PhysicalOlapScan[customer] apply RFs: RF2
------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk]
--------------------------PhysicalProject
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 ss_sold_date_sk->[d_date_sk]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1
----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk]
------------------------------PhysicalProject
--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk]
----------------------------------PhysicalProject
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0
------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2
----------------------------------PhysicalProject
------------------------------------filter((store.s_gmt_offset = -7.00))
--------------------------------------PhysicalOlapScan[store]
------------------------------PhysicalProject
--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999))
----------------------------------PhysicalOlapScan[date_dim]
--------------------------PhysicalProject
----------------------------filter((item.i_category = 'Jewelry'))
------------------------------PhysicalOlapScan[item]

Loading

0 comments on commit f05e9f1

Please sign in to comment.