From 182b052e2ae6a53683016e1cdab34bb466cda6cd Mon Sep 17 00:00:00 2001 From: Zhidong Guo <52783948+Gun9niR@users.noreply.github.com> Date: Wed, 1 May 2024 17:44:00 -0400 Subject: [PATCH] fix: correctly handle `null_frac` in LIKE (#179) --- optd-datafusion-repr/src/cost/base_cost/filter/like.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/optd-datafusion-repr/src/cost/base_cost/filter/like.rs b/optd-datafusion-repr/src/cost/base_cost/filter/like.rs index fd4541aa..0906c44b 100644 --- a/optd-datafusion-repr/src/cost/base_cost/filter/like.rs +++ b/optd-datafusion-repr/src/cost/base_cost/filter/like.rs @@ -97,14 +97,15 @@ impl< (0.0, 0.0) }; - // Postgres clamps the result after histogram and before MCV. See Postgres `patternsel_common`. - let result = ((non_mcv_sel + mcv_freq) * (1.0 - null_frac)).clamp(0.0001, 0.9999); + let result = non_mcv_sel + mcv_freq; if like_expr.negated() { 1.0 - result - null_frac } else { result } + // Postgres clamps the result after histogram and before MCV. See Postgres `patternsel_common`. + .clamp(0.0001, 0.9999) } else { UNIMPLEMENTED_SEL } @@ -170,13 +171,12 @@ mod tests { )]; assert_approx_eq::assert_approx_eq!( cost_model.get_like_selectivity(&like(0, "%abcd%", false), &column_refs), - (0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4)) * null_frac + 0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4) ); assert_approx_eq::assert_approx_eq!( cost_model.get_like_selectivity(&like(0, "%abcd%", true), &column_refs), 1.0 - (0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4)) - * null_frac - - 0.5 + - null_frac ); } }