From f05e9f13783dc9b1145e972d054d5c4e69bd26d1 Mon Sep 17 00:00:00 2001 From: "zhongjian.xzj" Date: Fri, 20 Sep 2024 12:05:30 +0800 Subject: [PATCH] [opt](nereids) refine expression estimation --- .../doris/nereids/stats/FilterEstimation.java | 4 +- .../doris/nereids/stats/StatsCalculator.java | 2 - .../rf_prune/query61.out | 62 +++++++++---------- .../shape/query61.out | 62 +++++++++---------- .../tpcds_sf100/rf_prune/query61.out | 62 +++++++++---------- .../tpcds_sf100/shape/query61.out | 62 +++++++++---------- 6 files changed, 126 insertions(+), 128 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 7a8a698713ac35e..e7a62dcd4848460 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -334,8 +334,8 @@ private Statistics estimateEqualTo(ComparisonPredicate cp, ColumnStatistic stats } else { double val = statsForRight.maxValue; if (val > statsForLeft.maxValue || val < statsForLeft.minValue) { - // do a lower bound protection to avoid using 0 directly - selectivity = RANGE_SELECTIVITY_THRESHOLD; + // TODO: will fix this in the next pr by adding RangeScalable protection + selectivity = 0.0; } else if (ndv >= 1.0) { selectivity = StatsMathUtil.minNonNaN(1.0, 1.0 / ndv); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 8cb0d38238c3497..2ccf7c0f75eea09 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -404,7 +404,6 @@ private Statistics computeOlapScan(OlapScan olapScan) { if (derivedStats.findColumnStatistics(slot) == null) { derivedStats.addColumnStats(slot, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN, derivedRowCount) - .setAvgSizeByte(slot.getDataType().width()) .build()); } } @@ -433,7 +432,6 @@ private Statistics computeOlapScan(OlapScan olapScan) { for (Slot slot : ((Plan) olapScan).getOutput()) { builder.putColumnStatistics(slot, new ColumnStatisticBuilder(ColumnStatistic.UNKNOWN, tableRowCount) - .setAvgSizeByte(slot.getDataType().width()) .build()); } setHasUnknownColStatsInStatementContext(); diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query61.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query61.out index d034c5b43231d70..66b73da1257da6f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query61.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/rf_prune/query61.out @@ -8,63 +8,63 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF10 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF10 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF9 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +------------------------PhysicalOlapScan[customer] apply RFs: RF9 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF8 ss_customer_sk->[c_customer_sk] +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 ss_item_sk->[i_item_sk] --------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF8 +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] apply RFs: RF8 --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 ss_sold_date_sk->[d_date_sk] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF7 -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF6 ss_promo_sk->[p_promo_sk] -----------------------------------PhysicalProject -------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) ---------------------------------------PhysicalOlapScan[promotion] apply RFs: RF6 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] ----------------------------------PhysicalProject ------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] --------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7 --------------------------------------PhysicalProject ----------------------------------------filter((store.s_gmt_offset = -7.00)) ------------------------------------------PhysicalOlapScan[store] +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) +----------------------------------PhysicalOlapScan[promotion] --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF4 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF4 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +------------------------PhysicalOlapScan[customer] apply RFs: RF3 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF2 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 ss_sold_date_sk->[d_date_sk] -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject --------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] ----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ----------------------------------PhysicalProject ------------------------------------filter((store.s_gmt_offset = -7.00)) --------------------------------------PhysicalOlapScan[store] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query61.out b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query61.out index d034c5b43231d70..66b73da1257da6f 100644 --- a/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query61.out +++ b/regression-test/data/nereids_tpcds_shape_sf100_p0/shape/query61.out @@ -8,63 +8,63 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF10 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF10 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF9 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +------------------------PhysicalOlapScan[customer] apply RFs: RF9 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF8 ss_customer_sk->[c_customer_sk] +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 ss_item_sk->[i_item_sk] --------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF8 +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] apply RFs: RF8 --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 ss_sold_date_sk->[d_date_sk] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF7 -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF6 ss_promo_sk->[p_promo_sk] -----------------------------------PhysicalProject -------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) ---------------------------------------PhysicalOlapScan[promotion] apply RFs: RF6 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] ----------------------------------PhysicalProject ------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] --------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7 --------------------------------------PhysicalProject ----------------------------------------filter((store.s_gmt_offset = -7.00)) ------------------------------------------PhysicalOlapScan[store] +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) +----------------------------------PhysicalOlapScan[promotion] --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF4 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF4 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +------------------------PhysicalOlapScan[customer] apply RFs: RF3 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF2 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 ss_sold_date_sk->[d_date_sk] -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject --------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] ----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ----------------------------------PhysicalProject ------------------------------------filter((store.s_gmt_offset = -7.00)) --------------------------------------PhysicalOlapScan[store] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query61.out b/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query61.out index d034c5b43231d70..66b73da1257da6f 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query61.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/rf_prune/query61.out @@ -8,63 +8,63 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF10 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF10 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF9 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +------------------------PhysicalOlapScan[customer] apply RFs: RF9 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF8 ss_customer_sk->[c_customer_sk] +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 ss_item_sk->[i_item_sk] --------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF8 +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] apply RFs: RF8 --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 ss_sold_date_sk->[d_date_sk] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF7 -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF6 ss_promo_sk->[p_promo_sk] -----------------------------------PhysicalProject -------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) ---------------------------------------PhysicalOlapScan[promotion] apply RFs: RF6 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] ----------------------------------PhysicalProject ------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] --------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7 --------------------------------------PhysicalProject ----------------------------------------filter((store.s_gmt_offset = -7.00)) ------------------------------------------PhysicalOlapScan[store] +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) +----------------------------------PhysicalOlapScan[promotion] --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF4 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF4 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +------------------------PhysicalOlapScan[customer] apply RFs: RF3 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF2 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 ss_sold_date_sk->[d_date_sk] -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject --------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] ----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ----------------------------------PhysicalProject ------------------------------------filter((store.s_gmt_offset = -7.00)) --------------------------------------PhysicalOlapScan[store] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query61.out b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query61.out index d034c5b43231d70..66b73da1257da6f 100644 --- a/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query61.out +++ b/regression-test/data/new_shapes_p0/tpcds_sf100/shape/query61.out @@ -8,63 +8,63 @@ PhysicalResultSink ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF10 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF10 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF10 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF10 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF9 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF9 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF9 +------------------------PhysicalOlapScan[customer] apply RFs: RF9 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF8 ss_customer_sk->[c_customer_sk] +------------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF8 ss_item_sk->[i_item_sk] --------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF8 +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item] apply RFs: RF8 --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF7 ss_sold_date_sk->[d_date_sk] +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF7 p_promo_sk->[ss_promo_sk] ------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF7 -------------------------------PhysicalProject ---------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF6 ss_promo_sk->[p_promo_sk] -----------------------------------PhysicalProject -------------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) ---------------------------------------PhysicalOlapScan[promotion] apply RFs: RF6 +--------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF6 d_date_sk->[ss_sold_date_sk] ----------------------------------PhysicalProject ------------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF5 s_store_sk->[ss_store_sk] --------------------------------------PhysicalProject -----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 +----------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF5 RF6 RF7 --------------------------------------PhysicalProject ----------------------------------------filter((store.s_gmt_offset = -7.00)) ------------------------------------------PhysicalOlapScan[store] +----------------------------------PhysicalProject +------------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +--------------------------------------PhysicalOlapScan[date_dim] +------------------------------PhysicalProject +--------------------------------filter((((promotion.p_channel_dmail = 'Y') OR (promotion.p_channel_email = 'Y')) OR (promotion.p_channel_tv = 'Y'))) +----------------------------------PhysicalOlapScan[promotion] --------hashAgg[GLOBAL] ----------PhysicalDistribute[DistributionSpecGather] ------------hashAgg[LOCAL] --------------PhysicalProject -----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF4 ss_item_sk->[i_item_sk] +----------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF4 c_current_addr_sk->[ca_address_sk] ------------------PhysicalProject ---------------------filter((item.i_category = 'Jewelry')) -----------------------PhysicalOlapScan[item] apply RFs: RF4 +--------------------filter((customer_address.ca_gmt_offset = -7.00)) +----------------------PhysicalOlapScan[customer_address] apply RFs: RF4 ------------------PhysicalProject ---------------------hashJoin[INNER_JOIN bucketShuffle] hashCondition=((customer_address.ca_address_sk = customer.c_current_addr_sk)) otherCondition=() build RFs:RF3 c_current_addr_sk->[ca_address_sk] +--------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF3 ss_customer_sk->[c_customer_sk] ----------------------PhysicalProject -------------------------filter((customer_address.ca_gmt_offset = -7.00)) ---------------------------PhysicalOlapScan[customer_address] apply RFs: RF3 +------------------------PhysicalOlapScan[customer] apply RFs: RF3 ----------------------PhysicalProject -------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_customer_sk = customer.c_customer_sk)) otherCondition=() build RFs:RF2 ss_customer_sk->[c_customer_sk] ---------------------------PhysicalProject -----------------------------PhysicalOlapScan[customer] apply RFs: RF2 +------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] --------------------------PhysicalProject -----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 ss_sold_date_sk->[d_date_sk] -------------------------------PhysicalProject ---------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) -----------------------------------PhysicalOlapScan[date_dim] apply RFs: RF1 +----------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF1 d_date_sk->[ss_sold_date_sk] ------------------------------PhysicalProject --------------------------------hashJoin[INNER_JOIN broadcast] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF0 s_store_sk->[ss_store_sk] ----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 +------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 ----------------------------------PhysicalProject ------------------------------------filter((store.s_gmt_offset = -7.00)) --------------------------------------PhysicalOlapScan[store] +------------------------------PhysicalProject +--------------------------------filter((date_dim.d_moy = 11) and (date_dim.d_year = 1999)) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalProject +----------------------------filter((item.i_category = 'Jewelry')) +------------------------------PhysicalOlapScan[item]