diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index f2c426e72b2f0c..f827addab1e6bb 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -51,6 +51,7 @@ #include "olap/olap_common.h" #include "olap/rowset/rowset.h" #include "olap/rowset/rowset_factory.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/storage_engine.h" #include "olap/tablet_meta.h" #include "runtime/client_cache.h" @@ -751,22 +752,127 @@ Status CloudMetaMgr::commit_rowset(const RowsetMeta& rs_meta, TEST_INJECTION_POINT_RETURN_WITH_VALUE("CloudMetaMgr::commit_rowset", ret_st); } if (config::enable_table_size_correctness_check) { - if (rs_meta.data_disk_size() + rs_meta.index_disk_size() != rs_meta.total_disk_size()) { - LOG(FATAL) << "[Cloud table size check failed]:" - << " tablet id: " << rs_meta.tablet_id() - << ", rowset id:" << rs_meta.rowset_id() - << ", rowset data disk size:" << rs_meta.data_disk_size() - << ", rowset index disk size:" << rs_meta.index_disk_size() - << ", rowset total disk size:" << rs_meta.total_disk_size() << "."; + const auto fs = const_cast(rs_meta).fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta.resource_id(); } - if (rs_meta.index_disk_size() > - config::max_table_index_data_ratio * rs_meta.data_disk_size()) { - LOG(FATAL) << "[Cloud table size check failed]:" - << " tablet id: " << rs_meta.tablet_id() - << ", rowset id:" << rs_meta.rowset_id() - << ", rowset data disk size:" << rs_meta.data_disk_size() - << ", rowset index disk size:" << rs_meta.index_disk_size() - << ", rowset total disk size:" << rs_meta.total_disk_size() << "."; + int64_t total_segment_size = 0; + int64_t total_inverted_index_size = 0; + for (int64_t seg_id = 0; seg_id < rs_meta.num_segments(); seg_id++) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + LOG(WARNING) << "table size correctness check get segment size failed! msg:" + << st.msg() << ", segment path:" << segment_path; + } + total_segment_size += segment_file_size; + } + + if (rs_meta.tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + auto indices = rs_meta.tablet_schema()->indexes(); + for (auto& index : indices) { + // only get file_size for inverted index + if (index.index_type() != IndexType::INVERTED) { + continue; + } + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix( + segment_path), + index.index_id(), index.get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) + << "table size correctness check get inverted index v1 " + "size failed! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + } else { + for (int seg_id = 0; seg_id < rs_meta.num_segments(); ++seg_id) { + int64_t file_size = 0; + std::string segment_path = StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), seg_id); + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix(segment_path)); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) << "table size correctness check get inverted index v2 size " + "failed! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + LOG(INFO) << "[Cloud table segment size check info]:" + << " tablet id: " << rs_meta.tablet_id() << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path(rs_meta.tablet_id(), + rs_meta.rowset_id().to_string(), 0) + << "."; + if (rs_meta.data_disk_size() != total_segment_size) { + LOG(WARNING) << "[Cloud table segment size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 0) + << "."; + DCHECK(false); + } + if (rs_meta.index_disk_size() != total_inverted_index_size) { + LOG(WARNING) << "[Cloud table index size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 0) + << "."; + DCHECK(false); + } + if (rs_meta.data_disk_size() + rs_meta.index_disk_size() != rs_meta.total_disk_size()) { + LOG(WARNING) << "[Cloud table size check failed]:" + << " tablet id: " << rs_meta.tablet_id() + << ", rowset id:" << rs_meta.rowset_id() + << ", rowset data disk size:" << rs_meta.data_disk_size() + << ", rowset index disk size:" << rs_meta.index_disk_size() + << ", rowset total disk size:" << rs_meta.total_disk_size() + << ", rowset segment path:" + << StorageResource().remote_segment_path( + rs_meta.tablet_id(), rs_meta.rowset_id().to_string(), 0) + << "."; + DCHECK(false); } } CreateRowsetRequest req; diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index 1b0b0e21c512fa..0fe37c490f9b51 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -597,7 +597,7 @@ Status BaseBetaRowsetWriter::add_rowset(RowsetSharedPtr rowset) { assert(rowset->rowset_meta()->rowset_type() == BETA_ROWSET); RETURN_IF_ERROR(rowset->link_files_to(_context.tablet_path, _context.rowset_id)); _num_rows_written += rowset->num_rows(); - _total_data_size += rowset->rowset_meta()->total_disk_size(); + _total_data_size += rowset->rowset_meta()->data_disk_size(); _total_index_size += rowset->rowset_meta()->index_disk_size(); _num_segment += rowset->num_segments(); // append key_bounds to current rowset @@ -1009,8 +1009,8 @@ Status BetaRowsetWriter::flush_segment_writer_for_segcompaction( SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + (*writer)->get_inverted_index_total_size(); - segstat.index_size = index_size + (*writer)->get_inverted_index_total_size(); + segstat.data_size = segment_size; + segstat.index_size = (*writer)->get_inverted_index_total_size(); segstat.key_bounds = key_bounds; { std::lock_guard lock(_segid_statistics_map_mutex); diff --git a/be/src/olap/rowset/segment_creator.cpp b/be/src/olap/rowset/segment_creator.cpp index 1afd3215db42f6..5f0390d58a8920 100644 --- a/be/src/olap/rowset/segment_creator.cpp +++ b/be/src/olap/rowset/segment_creator.cpp @@ -225,9 +225,9 @@ Status SegmentFlusher::_flush_segment_writer( if (row_num == 0) { return Status::OK(); } - uint64_t segment_size; - uint64_t index_size; - Status s = writer->finalize(&segment_size, &index_size); + uint64_t segment_file_size; + uint64_t common_index_size; + Status s = writer->finalize(&segment_file_size, &common_index_size); if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } @@ -245,9 +245,18 @@ Status SegmentFlusher::_flush_segment_writer( uint32_t segment_id = writer->segment_id(); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + writer->get_inverted_index_total_size(); - segstat.index_size = index_size + writer->get_inverted_index_total_size(); + // Attention: Data size = segment file size(.dat file size, which includes + // common index like zone map index but not include inverted index because + // inverted index has its own file). + // Index size = inverted index file size(.idx file size, which only includes + // inverted index.) + segstat.data_size = segment_file_size; + segstat.index_size = writer->get_inverted_index_total_size(); segstat.key_bounds = key_bounds; + LOG(INFO) << "tablet_id:" << _context.tablet_id + << ", flushing rowset_dir: " << _context.tablet_path + << ", rowset_id:" << _context.rowset_id << ", data size:" << segstat.data_size + << ", index size:" << segstat.index_size; _idx_files_info.add_file_info(segment_id, writer->get_inverted_index_file_info()); writer.reset(); @@ -255,7 +264,7 @@ Status SegmentFlusher::_flush_segment_writer( RETURN_IF_ERROR(_context.segment_collector->add(segment_id, segstat, flush_schema)); if (flush_size) { - *flush_size = segment_size + index_size; + *flush_size = segment_file_size; } return Status::OK(); } @@ -271,9 +280,9 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptrfinalize(&segment_size, &index_size); + uint64_t segment_file_size; + uint64_t common_index_size; + Status s = writer->finalize(&segment_file_size, &common_index_size); if (!s.ok()) { return Status::Error(s.code(), "failed to finalize segment: {}", s.to_string()); } @@ -291,9 +300,13 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptrget_segment_id(); SegmentStatistics segstat; segstat.row_num = row_num; - segstat.data_size = segment_size + writer->get_inverted_index_total_size(); - segstat.index_size = index_size + writer->get_inverted_index_total_size(); + segstat.data_size = segment_file_size; + segstat.index_size = writer->get_inverted_index_total_size(); segstat.key_bounds = key_bounds; + LOG(INFO) << "tablet_id:" << _context.tablet_id + << ", flushing rowset_dir: " << _context.tablet_path + << ", rowset_id:" << _context.rowset_id << ", data size:" << segstat.data_size + << ", index size:" << segstat.index_size; _idx_files_info.add_file_info(segment_id, writer->get_inverted_index_file_info()); writer.reset(); @@ -301,7 +314,7 @@ Status SegmentFlusher::_flush_segment_writer(std::unique_ptradd(segment_id, segstat, flush_schema)); if (flush_size) { - *flush_size = segment_size + index_size; + *flush_size = segment_file_size; } return Status::OK(); } diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index af68731ff413dd..6c1711c5a2b197 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -35,6 +36,7 @@ #include #include #include +#include #include #include "common/compiler_util.h" // IWYU pragma: keep @@ -333,23 +335,130 @@ void Tablet::save_meta() { if (config::enable_table_size_correctness_check) { const std::vector& all_rs_metas = _tablet_meta->all_rs_metas(); for (const auto& rs_meta : all_rs_metas) { + const auto& fs = rs_meta->fs(); + if (!fs) { + LOG(WARNING) << "get fs failed, resource_id={}" << rs_meta->resource_id(); + } + int64_t total_segment_size = 0; + int64_t total_inverted_index_size = 0; + for (int64_t seg_id = 0; seg_id < rs_meta->num_segments(); seg_id++) { + std::string segment_path = fmt::format("{}/{}_{}.dat", _tablet_path, + rs_meta->rowset_id().to_string(), seg_id); + int64_t segment_file_size = 0; + auto st = fs->file_size(segment_path, &segment_file_size); + if (!st.ok()) { + segment_file_size = 0; + LOG(WARNING) << "table size correctness check get segment size failed! msg:" + << st.msg() << ", segment path:" << segment_path; + } + total_segment_size += segment_file_size; + } + + if (tablet_schema()->get_inverted_index_storage_format() == + InvertedIndexStorageFormatPB::V1) { + auto indices = tablet_schema()->indexes(); + for (auto& index : indices) { + // only get file_size for inverted index + if (index.index_type() != IndexType::INVERTED) { + continue; + } + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + std::string segment_path = + fmt::format("{}/{}_{}.dat", _tablet_path, + rs_meta->rowset_id().to_string(), seg_id); + int64_t file_size = 0; + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v1( + InvertedIndexDescriptor::get_index_file_path_prefix( + segment_path), + index.index_id(), index.get_index_suffix()); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) << "table size correctness check get inverted index v1 " + "size failed! msg:" + << st.msg() + << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + } else { + for (int seg_id = 0; seg_id < rs_meta->num_segments(); ++seg_id) { + int64_t file_size = 0; + std::string segment_path = fmt::format( + "{}/{}_{}.dat", _tablet_path, rs_meta->rowset_id().to_string(), seg_id); + + std::string inverted_index_file_path = + InvertedIndexDescriptor::get_index_file_path_v2( + InvertedIndexDescriptor::get_index_file_path_prefix( + segment_path)); + auto st = fs->file_size(inverted_index_file_path, &file_size); + if (!st.ok()) { + file_size = 0; + LOG(WARNING) + << "table size correctness check get inverted index v2 size " + "failed! msg:" + << st.msg() << ", inverted index path:" << inverted_index_file_path; + } + total_inverted_index_size += file_size; + } + } + LOG(INFO) << "[Local table segment size info]:" + << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << fmt::format("{}/{}_x.dat", _tablet_path, rs_meta->rowset_id().to_string()) + << "."; + if (rs_meta->data_disk_size() != total_segment_size) { + LOG(WARNING) << "[Local table segment size check failed]:" + << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << fmt::format("{}/{}_x.dat", _tablet_path, + rs_meta->rowset_id().to_string()) + << "."; + DCHECK(false); + } + if (rs_meta->index_disk_size() != total_inverted_index_size) { + LOG(WARNING) << "[Local table index size check failed]:" + << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset real data disk size:" << total_segment_size + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset real index disk size:" << total_inverted_index_size + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << fmt::format("{}/{}_x.dat", _tablet_path, + rs_meta->rowset_id().to_string()) + << "."; + DCHECK(false); + } if (rs_meta->data_disk_size() + rs_meta->index_disk_size() != rs_meta->total_disk_size()) { - LOG(FATAL) << "[Local table size check failed]:" - << " tablet id: " << get_tablet_info().tablet_id - << ", rowset id:" << rs_meta->rowset_id() - << ", rowset data disk size:" << rs_meta->data_disk_size() - << ", rowset index disk size:" << rs_meta->index_disk_size() - << ", rowset total disk size:" << rs_meta->total_disk_size() << "."; - } - if (rs_meta->index_disk_size() > - config::max_table_index_data_ratio * rs_meta->data_disk_size()) { - LOG(FATAL) << "[Local table index size check failed]:" - << " tablet id: " << get_tablet_info().tablet_id - << ", rowset id:" << rs_meta->rowset_id() - << ", rowset data disk size:" << rs_meta->data_disk_size() - << ", rowset index disk size:" << rs_meta->index_disk_size() - << ", rowset total disk size:" << rs_meta->total_disk_size() << "."; + LOG(WARNING) << "[Local table size check failed]:" + << " tablet id: " << get_tablet_info().tablet_id + << ", rowset id:" << rs_meta->rowset_id() + << ", rowset data disk size:" << rs_meta->data_disk_size() + << ", rowset index disk size:" << rs_meta->index_disk_size() + << ", rowset total disk size:" << rs_meta->total_disk_size() + << ", rowset segment path:" + << fmt::format("{}/{}_x.dat", _tablet_path, + rs_meta->rowset_id().to_string()) + << "."; + DCHECK(false); } } } diff --git a/be/src/runtime/load_stream_writer.cpp b/be/src/runtime/load_stream_writer.cpp index 37243fab14bdb3..2e987edc7bd3ba 100644 --- a/be/src/runtime/load_stream_writer.cpp +++ b/be/src/runtime/load_stream_writer.cpp @@ -201,7 +201,7 @@ Status LoadStreamWriter::add_segment(uint32_t segid, const SegmentStatistics& st } DBUG_EXECUTE_IF("LoadStreamWriter.add_segment.size_not_match", { segment_file_size++; }); - if (segment_file_size + inverted_file_size != stat.data_size) { + if (segment_file_size != stat.data_size) { return Status::Corruption( "add_segment failed, segment stat {} does not match, file size={}, inverted file " "size={}, stat.data_size={}, tablet id={}", diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index 25628449c41fec..daeb5ddfee5d34 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -217,7 +217,4 @@ CONF_Int32(max_tablet_index_num_per_batch, "1000"); // Max aborted txn num for the same label name CONF_mInt64(max_num_aborted_txn, "100"); - -// Enables validation to check the correctness of table size in cloud mode. -CONF_Bool(enable_cloud_table_size_correctness_check, "false"); } // namespace doris::cloud::config