diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java index 33b7e02b332507..65db271e39481e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/FilterEstimation.java @@ -71,6 +71,9 @@ */ public class FilterEstimation extends ExpressionVisitor { public static final double DEFAULT_INEQUALITY_COEFFICIENT = 0.5; + // "Range selectivity is prone to producing outliers, so we add this threshold limit. + // The threshold estimation is calculated based on selecting one month out of fifty years." + public static final double RANGE_SELECTIVITY_THRESHOLD = 0.0016; public static final double DEFAULT_IN_COEFFICIENT = 1.0 / 3.0; public static final double DEFAULT_HAVING_COEFFICIENT = 0.01; @@ -627,6 +630,8 @@ private Statistics estimateBinaryComparisonFilter(Expression leftExpr, DataType : intersectRange.getDistinctValues() / leftRange.getDistinctValues(); if (!(dataType instanceof RangeScalable) && (sel != 0.0 && sel != 1.0)) { sel = DEFAULT_INEQUALITY_COEFFICIENT; + } else if (sel < RANGE_SELECTIVITY_THRESHOLD) { + sel = RANGE_SELECTIVITY_THRESHOLD; } sel = getNotNullSelectivity(leftStats, sel); updatedStatistics = context.statistics.withSel(sel); diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out index 40646f2dda1412..03274a28fef3b6 100644 --- a/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out +++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query12.out @@ -13,16 +13,15 @@ PhysicalResultSink --------------------hashAgg[LOCAL] ----------------------PhysicalProject ------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ws_item_sk] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------PhysicalProject -------------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +--------------------------PhysicalProject +----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +------------------------------PhysicalProject +--------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +------------------------------PhysicalDistribute[DistributionSpecReplicated] --------------------------------PhysicalProject -----------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 ---------------------------------PhysicalDistribute[DistributionSpecReplicated] -----------------------------------PhysicalProject -------------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15')) ---------------------------------------PhysicalOlapScan[date_dim] ---------------------------PhysicalDistribute[DistributionSpecHash] +----------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15')) +------------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------PhysicalProject ------------------------------filter(i_category IN ('Books', 'Electronics', 'Men')) --------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out b/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out index 88976f6717b532..9981b31b55dbba 100644 --- a/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out +++ b/regression-test/data/nereids_hint_tpcds_p0/shape/query80.out @@ -16,34 +16,31 @@ PhysicalResultSink --------------------------hashAgg[LOCAL] ----------------------------PhysicalProject ------------------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 ss_ticket_number->[sr_ticket_number] ---------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5 ---------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] ---------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -----------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk] -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 -------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------PhysicalProject -----------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) -------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((promotion.p_channel_tv = 'N')) -----------------------------------------------------PhysicalOlapScan[promotion] -----------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------PhysicalProject ---------------------------------------------filter((item.i_current_price > 50.00)) -----------------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk] +------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------------PhysicalProject +----------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +--------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------PhysicalProject +------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) +--------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------PhysicalProject +----------------------------------------------filter((promotion.p_channel_tv = 'N')) +------------------------------------------------PhysicalOlapScan[promotion] --------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[store] +------------------------------------------filter((item.i_current_price > 50.00)) +--------------------------------------------PhysicalOlapScan[item] +------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store] --------------------PhysicalProject ----------------------hashAgg[GLOBAL] ------------------------PhysicalDistribute[DistributionSpecHash] diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out index 03682c1c406c9a..837bd33960d442 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query12.out @@ -13,15 +13,14 @@ PhysicalResultSink --------------------hashAgg[LOCAL] ----------------------PhysicalProject ------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF1 i_item_sk->[ws_item_sk] ---------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +--------------------------hashJoin[INNER_JOIN] hashCondition=((web_sales.ws_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ws_sold_date_sk] +----------------------------PhysicalProject +------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 +----------------------------PhysicalDistribute[DistributionSpecReplicated] ------------------------------PhysicalProject ---------------------------------PhysicalOlapScan[web_sales] apply RFs: RF0 RF1 -------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------PhysicalProject -----------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15')) -------------------------------------PhysicalOlapScan[date_dim] ---------------------------PhysicalDistribute[DistributionSpecHash] +--------------------------------filter((date_dim.d_date <= '2001-07-15') and (date_dim.d_date >= '2001-06-15')) +----------------------------------PhysicalOlapScan[date_dim] +--------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------PhysicalProject ------------------------------filter(i_category IN ('Books', 'Electronics', 'Men')) --------------------------------PhysicalOlapScan[item] diff --git a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out index 88976f6717b532..9981b31b55dbba 100644 --- a/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out +++ b/regression-test/data/nereids_tpcds_shape_sf1000_p0/shape/query80.out @@ -16,34 +16,31 @@ PhysicalResultSink --------------------------hashAgg[LOCAL] ----------------------------PhysicalProject ------------------------------hashJoin[RIGHT_OUTER_JOIN] hashCondition=((store_sales.ss_item_sk = store_returns.sr_item_sk) and (store_sales.ss_ticket_number = store_returns.sr_ticket_number)) otherCondition=() build RFs:RF4 ss_item_sk->[sr_item_sk];RF5 ss_ticket_number->[sr_ticket_number] ---------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------PhysicalProject -------------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5 ---------------------------------PhysicalDistribute[DistributionSpecHash] -----------------------------------PhysicalProject -------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] ---------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] -----------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------PhysicalProject ---------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk] -----------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] -------------------------------------------------PhysicalProject ---------------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 -------------------------------------------------PhysicalDistribute[DistributionSpecReplicated] ---------------------------------------------------PhysicalProject -----------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) -------------------------------------------------------PhysicalOlapScan[date_dim] -----------------------------------------------PhysicalDistribute[DistributionSpecReplicated] -------------------------------------------------PhysicalProject ---------------------------------------------------filter((promotion.p_channel_tv = 'N')) -----------------------------------------------------PhysicalOlapScan[promotion] -----------------------------------------PhysicalDistribute[DistributionSpecHash] -------------------------------------------PhysicalProject ---------------------------------------------filter((item.i_current_price > 50.00)) -----------------------------------------------PhysicalOlapScan[item] +--------------------------------PhysicalProject +----------------------------------PhysicalOlapScan[store_returns] apply RFs: RF4 RF5 +--------------------------------PhysicalProject +----------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_store_sk = store.s_store_sk)) otherCondition=() build RFs:RF3 s_store_sk->[ss_store_sk] +------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_item_sk = item.i_item_sk)) otherCondition=() build RFs:RF2 i_item_sk->[ss_item_sk] +--------------------------------------PhysicalProject +----------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_promo_sk = promotion.p_promo_sk)) otherCondition=() build RFs:RF1 p_promo_sk->[ss_promo_sk] +------------------------------------------hashJoin[INNER_JOIN] hashCondition=((store_sales.ss_sold_date_sk = date_dim.d_date_sk)) otherCondition=() build RFs:RF0 d_date_sk->[ss_sold_date_sk] +--------------------------------------------PhysicalProject +----------------------------------------------PhysicalOlapScan[store_sales] apply RFs: RF0 RF1 RF2 RF3 +--------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +----------------------------------------------PhysicalProject +------------------------------------------------filter((date_dim.d_date <= '2002-09-13') and (date_dim.d_date >= '2002-08-14')) +--------------------------------------------------PhysicalOlapScan[date_dim] +------------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------------PhysicalProject +----------------------------------------------filter((promotion.p_channel_tv = 'N')) +------------------------------------------------PhysicalOlapScan[promotion] --------------------------------------PhysicalDistribute[DistributionSpecReplicated] ----------------------------------------PhysicalProject -------------------------------------------PhysicalOlapScan[store] +------------------------------------------filter((item.i_current_price > 50.00)) +--------------------------------------------PhysicalOlapScan[item] +------------------------------------PhysicalDistribute[DistributionSpecReplicated] +--------------------------------------PhysicalProject +----------------------------------------PhysicalOlapScan[store] --------------------PhysicalProject ----------------------hashAgg[GLOBAL] ------------------------PhysicalDistribute[DistributionSpecHash]