-
Notifications
You must be signed in to change notification settings - Fork 3.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[feature](Paimon) support deletion vector for Paimon naive reader (#3…
…4743) (#35241) bp #34743 Co-authored-by: 苏小刚 <[email protected]>
- Loading branch information
1 parent
50f50cf
commit adc364a
Showing
20 changed files
with
585 additions
and
169 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#pragma once | ||
|
||
#include <algorithm> | ||
#include <cstdint> | ||
#include <cstring> | ||
#include <stdexcept> | ||
|
||
#include "common/status.h" | ||
#include "roaring/roaring.hh" | ||
|
||
namespace doris { | ||
class DeletionVector { | ||
public: | ||
const static uint32_t MAGIC_NUMBER = 1581511376; | ||
DeletionVector(roaring::Roaring roaring_bitmap) : _roaring_bitmap(std::move(roaring_bitmap)) {}; | ||
~DeletionVector() = default; | ||
|
||
bool checked_delete(uint32_t postition) { return _roaring_bitmap.addChecked(postition); } | ||
|
||
bool is_delete(uint32_t postition) const { return _roaring_bitmap.contains(postition); } | ||
|
||
bool is_empty() const { return _roaring_bitmap.isEmpty(); } | ||
|
||
uint32_t maximum() const { return _roaring_bitmap.maximum(); } | ||
|
||
uint32_t minimum() const { return _roaring_bitmap.minimum(); } | ||
|
||
static Result<DeletionVector> deserialize(const char* buf, size_t length) { | ||
uint32_t actual_length; | ||
std::memcpy(reinterpret_cast<char*>(&actual_length), buf, 4); | ||
// change byte order to big endian | ||
std::reverse(reinterpret_cast<char*>(&actual_length), | ||
reinterpret_cast<char*>(&actual_length) + 4); | ||
buf += 4; | ||
if (actual_length != length - 4) { | ||
return ResultError( | ||
Status::RuntimeError("DeletionVector deserialize error: length not match, " | ||
"actual length: {}, expect length: {}", | ||
actual_length, length - 4)); | ||
} | ||
uint32_t magic_number; | ||
std::memcpy(reinterpret_cast<char*>(&magic_number), buf, 4); | ||
// change byte order to big endian | ||
std::reverse(reinterpret_cast<char*>(&magic_number), | ||
reinterpret_cast<char*>(&magic_number) + 4); | ||
buf += 4; | ||
if (magic_number != MAGIC_NUMBER) { | ||
return ResultError(Status::RuntimeError( | ||
"DeletionVector deserialize error: invalid magic number {}", magic_number)); | ||
} | ||
roaring::Roaring roaring_bitmap; | ||
try { | ||
roaring_bitmap = roaring::Roaring::readSafe(buf, length); | ||
} catch (std::runtime_error) { | ||
return ResultError(Status::RuntimeError( | ||
"DeletionVector deserialize error: failed to deserialize roaring bitmap")); | ||
} | ||
return DeletionVector(roaring_bitmap); | ||
} | ||
|
||
private: | ||
roaring::Roaring _roaring_bitmap; | ||
}; | ||
} // namespace doris |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#include "paimon_jni_reader.h" | ||
|
||
#include <map> | ||
#include <ostream> | ||
|
||
#include "runtime/descriptors.h" | ||
#include "runtime/types.h" | ||
#include "vec/core/types.h" | ||
|
||
namespace doris { | ||
class RuntimeProfile; | ||
class RuntimeState; | ||
|
||
namespace vectorized { | ||
class Block; | ||
} // namespace vectorized | ||
} // namespace doris | ||
|
||
namespace doris::vectorized { | ||
|
||
const std::string PaimonJniReader::PAIMON_OPTION_PREFIX = "paimon_option_prefix."; | ||
|
||
PaimonJniReader::PaimonJniReader(const std::vector<SlotDescriptor*>& file_slot_descs, | ||
RuntimeState* state, RuntimeProfile* profile, | ||
const TFileRangeDesc& range) | ||
: _file_slot_descs(file_slot_descs), _state(state), _profile(profile) { | ||
std::vector<std::string> column_names; | ||
std::vector<std::string> column_types; | ||
for (auto& desc : _file_slot_descs) { | ||
column_names.emplace_back(desc->col_name()); | ||
column_types.emplace_back(JniConnector::get_jni_type(desc->type())); | ||
} | ||
std::map<String, String> params; | ||
params["db_name"] = range.table_format_params.paimon_params.db_name; | ||
params["table_name"] = range.table_format_params.paimon_params.table_name; | ||
params["paimon_split"] = range.table_format_params.paimon_params.paimon_split; | ||
params["paimon_column_names"] = range.table_format_params.paimon_params.paimon_column_names; | ||
params["paimon_predicate"] = range.table_format_params.paimon_params.paimon_predicate; | ||
params["ctl_id"] = std::to_string(range.table_format_params.paimon_params.ctl_id); | ||
params["db_id"] = std::to_string(range.table_format_params.paimon_params.db_id); | ||
params["tbl_id"] = std::to_string(range.table_format_params.paimon_params.tbl_id); | ||
params["last_update_time"] = | ||
std::to_string(range.table_format_params.paimon_params.last_update_time); | ||
params["required_fields"] = join(column_names, ","); | ||
params["columns_types"] = join(column_types, "#"); | ||
|
||
// Used to create paimon option | ||
for (auto& kv : range.table_format_params.paimon_params.paimon_options) { | ||
params[PAIMON_OPTION_PREFIX + kv.first] = kv.second; | ||
} | ||
_jni_connector = std::make_unique<JniConnector>("org/apache/doris/paimon/PaimonJniScanner", | ||
params, column_names); | ||
} | ||
|
||
Status PaimonJniReader::get_next_block(Block* block, size_t* read_rows, bool* eof) { | ||
RETURN_IF_ERROR(_jni_connector->get_next_block(block, read_rows, eof)); | ||
if (*eof) { | ||
RETURN_IF_ERROR(_jni_connector->close()); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status PaimonJniReader::get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type, | ||
std::unordered_set<std::string>* missing_cols) { | ||
for (auto& desc : _file_slot_descs) { | ||
name_to_type->emplace(desc->col_name(), desc->type()); | ||
} | ||
return Status::OK(); | ||
} | ||
|
||
Status PaimonJniReader::init_reader( | ||
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) { | ||
_colname_to_value_range = colname_to_value_range; | ||
RETURN_IF_ERROR(_jni_connector->init(colname_to_value_range)); | ||
return _jni_connector->open(_state, _profile); | ||
} | ||
} // namespace doris::vectorized |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
// Licensed to the Apache Software Foundation (ASF) under one | ||
// or more contributor license agreements. See the NOTICE file | ||
// distributed with this work for additional information | ||
// regarding copyright ownership. The ASF licenses this file | ||
// to you under the Apache License, Version 2.0 (the | ||
// "License"); you may not use this file except in compliance | ||
// with the License. You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, | ||
// software distributed under the License is distributed on an | ||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
// KIND, either express or implied. See the License for the | ||
// specific language governing permissions and limitations | ||
// under the License. | ||
|
||
#pragma once | ||
|
||
#include <cstddef> | ||
#include <memory> | ||
#include <string> | ||
#include <unordered_map> | ||
#include <unordered_set> | ||
#include <vector> | ||
|
||
#include "common/status.h" | ||
#include "exec/olap_common.h" | ||
#include "vec/exec/format/generic_reader.h" | ||
#include "vec/exec/format/table/table_format_reader.h" | ||
#include "vec/exec/jni_connector.h" | ||
|
||
namespace doris { | ||
class RuntimeProfile; | ||
class RuntimeState; | ||
class SlotDescriptor; | ||
namespace vectorized { | ||
class Block; | ||
} // namespace vectorized | ||
struct TypeDescriptor; | ||
} // namespace doris | ||
|
||
namespace doris::vectorized { | ||
|
||
/** | ||
* The demo usage of JniReader, showing how to read data from java scanner. | ||
* The java side is also a mock reader that provide values for each type. | ||
* This class will only be retained during the functional testing phase to verify that | ||
* the communication and data exchange with the jvm are correct. | ||
*/ | ||
class PaimonJniReader : public GenericReader { | ||
ENABLE_FACTORY_CREATOR(PaimonJniReader); | ||
|
||
public: | ||
static const std::string PAIMON_OPTION_PREFIX; | ||
PaimonJniReader(const std::vector<SlotDescriptor*>& file_slot_descs, RuntimeState* state, | ||
RuntimeProfile* profile, const TFileRangeDesc& range); | ||
|
||
~PaimonJniReader() override = default; | ||
|
||
Status get_next_block(Block* block, size_t* read_rows, bool* eof) override; | ||
|
||
Status get_columns(std::unordered_map<std::string, TypeDescriptor>* name_to_type, | ||
std::unordered_set<std::string>* missing_cols) override; | ||
|
||
Status init_reader( | ||
std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range); | ||
|
||
private: | ||
const std::vector<SlotDescriptor*>& _file_slot_descs; | ||
RuntimeState* _state = nullptr; | ||
RuntimeProfile* _profile = nullptr; | ||
std::unordered_map<std::string, ColumnValueRangeType>* _colname_to_value_range; | ||
std::unique_ptr<JniConnector> _jni_connector; | ||
}; | ||
|
||
} // namespace doris::vectorized |
Oops, something went wrong.