Skip to content

Commit

Permalink
Merge branch 'main' into phw2/csv-to-parquet
Browse files Browse the repository at this point in the history
  • Loading branch information
wangpatrick57 committed May 1, 2024
2 parents b10de9f + 182b052 commit 92d0333
Show file tree
Hide file tree
Showing 23 changed files with 1,288 additions and 808 deletions.
2 changes: 1 addition & 1 deletion optd-datafusion-repr/src/cost/base_cost/agg.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ impl<
let group_col_refs = optimizer
.get_property_by_group::<ColumnRefPropertyBuilder>(context.group_id, 1);
group_col_refs
.column_refs()
.base_table_column_refs()
.iter()
.take(group_by.len())
.map(|col_ref| match col_ref {
Expand Down
386 changes: 84 additions & 302 deletions optd-datafusion-repr/src/cost/base_cost/filter.rs

Large diffs are not rendered by default.

17 changes: 7 additions & 10 deletions optd-datafusion-repr/src/cost/base_cost/filter/in_list.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use crate::{
OptCostModel,
},
plan_nodes::{ColumnRefExpr, ConstantExpr, InListExpr, OptRelNode, OptRelNodeTyp},
properties::column_ref::{BaseTableColumnRef, ColumnRef, GroupColumnRefs},
properties::column_ref::{BaseTableColumnRef, BaseTableColumnRefs, ColumnRef},
};

impl<
Expand All @@ -22,7 +22,7 @@ impl<
pub(super) fn get_in_list_selectivity(
&self,
expr: &InListExpr,
column_refs: &GroupColumnRefs,
column_refs: &BaseTableColumnRefs,
) -> f64 {
let child = expr.child();

Expand Down Expand Up @@ -84,7 +84,7 @@ mod tests {
create_one_column_cost_model, in_list, TestDistribution, TestMostCommonValues,
TestPerColumnStats, TABLE1_NAME,
},
properties::column_ref::{ColumnRef, GroupColumnRefs},
properties::column_ref::ColumnRef,
};

#[test]
Expand All @@ -95,13 +95,10 @@ mod tests {
0.0,
Some(TestDistribution::empty()),
));
let column_refs = GroupColumnRefs::new_test(
vec![ColumnRef::base_table_column_ref(
String::from(TABLE1_NAME),
0,
)],
None,
);
let column_refs = vec![ColumnRef::base_table_column_ref(
String::from(TABLE1_NAME),
0,
)];
assert_approx_eq::assert_approx_eq!(
cost_model
.get_in_list_selectivity(&in_list(0, vec![Value::Int32(1)], false), &column_refs),
Expand Down
38 changes: 16 additions & 22 deletions optd-datafusion-repr/src/cost/base_cost/filter/like.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ use crate::{
OptCostModel,
},
plan_nodes::{ColumnRefExpr, ConstantExpr, LikeExpr, OptRelNode, OptRelNodeTyp},
properties::column_ref::{BaseTableColumnRef, ColumnRef, GroupColumnRefs},
properties::column_ref::{BaseTableColumnRef, BaseTableColumnRefs, ColumnRef},
};

// Used for estimating pattern selectivity character-by-character. These numbers
Expand Down Expand Up @@ -40,7 +40,7 @@ impl<
pub(super) fn get_like_selectivity(
&self,
like_expr: &LikeExpr,
column_refs: &GroupColumnRefs,
column_refs: &BaseTableColumnRefs,
) -> f64 {
let child = like_expr.child();

Expand Down Expand Up @@ -97,14 +97,15 @@ impl<
(0.0, 0.0)
};

// Postgres clamps the result after histogram and before MCV. See Postgres `patternsel_common`.
let result = ((non_mcv_sel + mcv_freq) * (1.0 - null_frac)).clamp(0.0001, 0.9999);
let result = non_mcv_sel + mcv_freq;

if like_expr.negated() {
1.0 - result - null_frac
} else {
result
}
// Postgres clamps the result after histogram and before MCV. See Postgres `patternsel_common`.
.clamp(0.0001, 0.9999)
} else {
UNIMPLEMENTED_SEL
}
Expand All @@ -123,7 +124,7 @@ mod tests {
TestPerColumnStats, TABLE1_NAME,
},
},
properties::column_ref::{ColumnRef, GroupColumnRefs},
properties::column_ref::ColumnRef,
};

#[test]
Expand All @@ -137,13 +138,10 @@ mod tests {
0.0,
Some(TestDistribution::empty()),
));
let column_refs = GroupColumnRefs::new_test(
vec![ColumnRef::base_table_column_ref(
String::from(TABLE1_NAME),
0,
)],
None,
);
let column_refs = vec![ColumnRef::base_table_column_ref(
String::from(TABLE1_NAME),
0,
)];
assert_approx_eq::assert_approx_eq!(
cost_model.get_like_selectivity(&like(0, "%abcd%", false), &column_refs),
0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4)
Expand All @@ -167,22 +165,18 @@ mod tests {
null_frac,
Some(TestDistribution::empty()),
));
let column_refs = GroupColumnRefs::new_test(
vec![ColumnRef::base_table_column_ref(
String::from(TABLE1_NAME),
0,
)],
None,
);
let column_refs = vec![ColumnRef::base_table_column_ref(
String::from(TABLE1_NAME),
0,
)];
assert_approx_eq::assert_approx_eq!(
cost_model.get_like_selectivity(&like(0, "%abcd%", false), &column_refs),
(0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4)) * null_frac
0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4)
);
assert_approx_eq::assert_approx_eq!(
cost_model.get_like_selectivity(&like(0, "%abcd%", true), &column_refs),
1.0 - (0.1 + FULL_WILDCARD_SEL_FACTOR.powi(2) * FIXED_CHAR_SEL_FACTOR.powi(4))
* null_frac
- 0.5
- null_frac
);
}
}
Loading

0 comments on commit 92d0333

Please sign in to comment.