Skip to content

Commit

Permalink
[Bug](rf) fix rf of in filter cast data as different class type maybe…
Browse files Browse the repository at this point in the history
… return wrong result (#39026)

two point have changed:
1. in batch_assign function: 
const std::string& string_value = column.stringval();
if call **insert(&string_value)**, will cast as string_ref:
reinterpret_cast<const **StringRef***>(data), this maybe error;
```
    void insert(const void* data) override {
        if (data == nullptr) {
            _contains_null = true;
            return;
        }

        const auto* value = reinterpret_cast<const StringRef*>(data);
        std::string str_value(value->data, value->size);
        _set.insert(str_value);
    }
```

2. in batch_copy function, will cast void_value as T*
but the it->get_value() return is StringRef, so need change T as
StringRef
```
template <typename T>
void batch_copy(PInFilter* filter, HybridSetBase::IteratorBase* it,
                void (*set_func)(PColumnValue*, const T*)) {
    while (it->has_next()) {
        const void* void_value = it->get_value();
        auto origin_value = reinterpret_cast<const T*>(void_value);
        set_func(filter->add_values(), origin_value);
        it->next();
    }
}
```
  • Loading branch information
zhangstar333 authored Aug 10, 2024
1 parent 7bab58a commit 947397e
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 4 deletions.
12 changes: 8 additions & 4 deletions be/src/exprs/runtime_filter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -694,8 +694,10 @@ class RuntimePredicateWrapper {
case TYPE_CHAR:
case TYPE_STRING: {
batch_assign(in_filter, [](std::shared_ptr<HybridSetBase>& set, PColumnValue& column) {
const auto& string_val_ref = column.stringval();
set->insert(&string_val_ref);
const std::string& string_value = column.stringval();
// string_value is std::string, call insert(data, size) function in StringSet will not cast as StringRef
// so could avoid some cast error at different class object.
set->insert((void*)string_value.data(), string_value.size());
});
break;
}
Expand Down Expand Up @@ -1630,8 +1632,10 @@ void IRuntimeFilter::to_protobuf(PInFilter* filter) {
case TYPE_CHAR:
case TYPE_VARCHAR:
case TYPE_STRING: {
batch_copy<std::string>(filter, it, [](PColumnValue* column, const std::string* value) {
column->set_stringval(*value);
//const void* void_value = it->get_value();
//Now the get_value return void* is StringRef
batch_copy<StringRef>(filter, it, [](PColumnValue* column, const StringRef* value) {
column->set_stringval(value->to_string());
});
return;
}
Expand Down
9 changes: 9 additions & 0 deletions regression-test/data/query_p0/join/test_runtimefilter_2.out
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,12 @@
-- !select_1 --
aaa

-- !select_2 --
aaa

-- !select_3 --
BSDSAE1018 1 1 true BSDSAE1018 1 true true

-- !select_4 --
2 3 BSDSAE1018

11 changes: 11 additions & 0 deletions regression-test/suites/query_p0/join/test_runtimefilter_2.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,15 @@
qt_select_1 """
select "aaa" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID ) jpa4 on tpisyncjpa4.USER_ID = jpa4.USER_ID;
"""
sql """set runtime_filter_type='IN';"""
qt_select_2 """
select "aaa" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID ) jpa4 on tpisyncjpa4.USER_ID = jpa4.USER_ID;
"""
qt_select_3 """
select *, tpisyncjpp1.POST_ID=jpp1.POST_ID, tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE from ( select tpisyncjpp1.POST_ID,tpisyncjpp1.INTERNAL_CODE as INTERNAL_CODE, tpisyncjpp1.STATE_ID, tpisyncjpp1.STATE_ID ='1' from ( select tpisyncjpa4.* from t_ods_tpisyncjpa4_2 tpisyncjpa4 inner join [broadcast] ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE where tpisyncjpa4.STATE_ID ='1' ) tpisyncjpa4 inner join [broadcast] t_ods_tpisyncjpp1_2 tpisyncjpp1 where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018' ) tpisyncjpp1 inner join [broadcast] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE;
"""
qt_select_4 """
select DISTINCT tpisyncjpa4.USER_ID as USER_ID, tpisyncjpa4.USER_NAME as USER_NAME, tpisyncjpp1.POST_ID AS "T4_POST_ID" FROM t_ods_tpisyncjpa4_2 tpisyncjpa4 cross join [shuffle] t_ods_tpisyncjpp1_2 tpisyncjpp1 inner join ( SELECT USER_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpa4_2 WHERE STATE_ID = '1' GROUP BY USER_ID )jpa4 on tpisyncjpa4.USER_ID=jpa4.USER_ID and tpisyncjpa4.INTERNAL_CODE=jpa4.INTERNAL_CODE inner join [shuffle] ( SELECT POST_ID, MAX(INTERNAL_CODE) as INTERNAL_CODE FROM t_ods_tpisyncjpp1_2 WHERE STATE_ID = '1' GROUP BY POST_ID )jpp1 on tpisyncjpp1.POST_ID=jpp1.POST_ID and tpisyncjpp1.INTERNAL_CODE=jpp1.INTERNAL_CODE where tpisyncjpa4.USER_ID = tpisyncjpp1.USER_ID AND tpisyncjpp1.STATE_ID ='1' AND tpisyncjpa4.STATE_ID ='1' AND tpisyncjpp1.POST_ID='BSDSAE1018';
"""

}

0 comments on commit 947397e

Please sign in to comment.