From 947397e999429104ce941e13df1d7369f4077160 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Sat, 10 Aug 2024 18:27:31 +0800 Subject: [PATCH] [Bug](rf) fix rf of in filter cast data as different class type maybe return wrong result (#39026) two point have changed: 1. in batch_assign function: const std::string& string_value = column.stringval(); if call **insert(&string_value)**, will cast as string_ref: reinterpret_cast(data), this maybe error; ``` void insert(const void* data) override { if (data == nullptr) { _contains_null = true; return; } const auto* value = reinterpret_cast(data); std::string str_value(value->data, value->size); _set.insert(str_value); } ``` 2. in batch_copy function, will cast void_value as T* but the it->get_value() return is StringRef, so need change T as StringRef ``` template void batch_copy(PInFilter* filter, HybridSetBase::IteratorBase* it, void (*set_func)(PColumnValue*, const T*)) { while (it->has_next()) { const void* void_value = it->get_value(); auto origin_value = reinterpret_cast(void_value); set_func(filter->add_values(), origin_value); it->next(); } } ``` --- be/src/exprs/runtime_filter.cpp | 12 ++++++++---- .../data/query_p0/join/test_runtimefilter_2.out | 9 +++++++++ .../suites/query_p0/join/test_runtimefilter_2.groovy | 11 +++++++++++ 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index c6fd3338b14656..5a241326f9099b 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -694,8 +694,10 @@ class RuntimePredicateWrapper { case TYPE_CHAR: case TYPE_STRING: { batch_assign(in_filter, [](std::shared_ptr& set, PColumnValue& column) { - const auto& string_val_ref = column.stringval(); - set->insert(&string_val_ref); + const std::string& string_value = column.stringval(); + // string_value is std::string, call insert(data, size) function in StringSet will not cast as StringRef + // so could avoid some cast error at different class object. + set->insert((void*)string_value.data(), string_value.size()); }); break; } @@ -1630,8 +1632,10 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) { case TYPE_CHAR: case TYPE_VARCHAR: case TYPE_STRING: { - batch_copy(filter, it, [](PColumnValue* column, const std::string* value) { - column->set_stringval(*value); + //const void* void_value = it->get_value(); + //Now the get_value return void* is StringRef + batch_copy(filter, it, [](PColumnValue* column, const StringRef* value) { + column->set_stringval(value->to_string()); }); return; } diff --git a/regression-test/data/query_p0/join/test_runtimefilter_2.out b/regression-test/data/query_p0/join/test_runtimefilter_2.out index d6cc7fc59a016b..005406e6793fa0 100644 --- a/regression-test/data/query_p0/join/test_runtimefilter_2.out +++ b/regression-test/data/query_p0/join/test_runtimefilter_2.out @@ -2,3 +2,12 @@ -- !select_1 -- aaa +-- !select_2 -- +aaa + +-- !select_3 -- +BSDSAE1018 1 1 true BSDSAE1018 1 true true + +-- !select_4 -- +2 3 BSDSAE1018 + diff --git a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy index 6e6e57c6c2da29..50a61a366b1bd2 100644 --- a/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy +++ b/regression-test/suites/query_p0/join/test_runtimefilter_2.groovy @@ -30,4 +30,15 @@ qt_select_1 """ select "aaa" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID ) jpa4 on tpisyncjpa4.USER_ID = jpa4.USER_ID; """ + sql """set runtime_filter_type='IN';""" + qt_select_2 """ + select "aaa" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID ) jpa4 on tpisyncjpa4.USER_ID = jpa4.USER_ID; + """ + qt_select_3 """ + select *, tpisyncjpp1.POST_ID=jpp1.POST_ID, tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE from ( select tpisyncjpp1.POST_ID,tpisyncjpp1.INTERNAL_CODE as INTERNAL_CODE, tpisyncjpp1.STATE_ID, tpisyncjpp1.STATE_ID ='1' from ( select tpisyncjpa4.* from t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join [broadcast] ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE where tpisyncjpa4.STATE_ID ='1' ) tpisyncjpa4 inner join [broadcast] t_ods_tpisyncjpp1_2 tpisyncjpp1 where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018' ) tpisyncjpp1 inner join [broadcast] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE; + """ + qt_select_4 """ + select DISTINCT tpisyncjpa4.USER_ID as USER_ID, tpisyncjpa4.USER_NAME as USER_NAME, tpisyncjpp1.POST_ID AS "T4_POST_ID" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 cross join [shuffle] t_ods_tpisyncjpp1_2 tpisyncjpp1 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE inner join [shuffle] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpa4.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018'; + """ + } \ No newline at end of file