diff --git a/.asf.yaml b/.asf.yaml index 6ff16967c2e415..e71e55de23fc83 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -49,6 +49,7 @@ github: strict: false contexts: - License Check + - Clang Formatter - CheckStyle - P0 Regression (Doris Regression) - External Regression (Doris External Regression) @@ -86,6 +87,7 @@ github: strict: false contexts: - License Check + - Clang Formatter - CheckStyle - Build Broker - ShellCheck @@ -107,6 +109,7 @@ github: strict: false contexts: - License Check + - Clang Formatter - CheckStyle - P0 Regression (Doris Regression) - External Regression (Doris External Regression) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 2757578827c2bb..bac487c65627a9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -15,5 +15,6 @@ # limitations under the License. # be/src/io/* @platoneko @gavinchou @dataroaring +be/src/agent/be_exec_version_manager.cpp @BiteTheDDDDt fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @dataroaring @CalvinKirs @morningman **/pom.xml @CalvinKirs @morningman diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index f49f1987b9ed41..adc77450d78c01 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -64,7 +64,7 @@ jobs: git clone https://github.com/DoozyX/clang-format-lint-action .github/actions/clang-format-lint-action pushd .github/actions/clang-format-lint-action &>/dev/null - git checkout 6adbe14579e5b8e19eb3e31e5ff2479f3bd302c7 + git checkout c71d0bf4e21876ebec3e5647491186f8797fde31 # v0.18.2 popd &>/dev/null - name: Install Python dependencies diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index 27921888774f9b..d4944711720a59 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -102,7 +102,7 @@ namespace { std::mutex s_task_signatures_mtx; std::unordered_map> s_task_signatures; -std::atomic_ulong s_report_version(time(nullptr) * 10000); +std::atomic_ulong s_report_version(time(nullptr) * 100000); void increase_report_version() { s_report_version.fetch_add(1, std::memory_order_relaxed); @@ -1074,6 +1074,7 @@ void report_tablet_callback(StorageEngine& engine, const TMasterInfo& master_inf request.__set_backend(BackendOptions::get_local_backend()); request.__isset.tablets = true; + increase_report_version(); uint64_t report_version; for (int i = 0; i < 5; i++) { request.tablets.clear(); diff --git a/be/src/cloud/cloud_meta_mgr.cpp b/be/src/cloud/cloud_meta_mgr.cpp index 8138ea52421102..816f1108299cb8 100644 --- a/be/src/cloud/cloud_meta_mgr.cpp +++ b/be/src/cloud/cloud_meta_mgr.cpp @@ -906,7 +906,7 @@ Status CloudMetaMgr::precommit_txn(const StreamLoadContext& ctx) { return retry_rpc("precommit txn", req, &res, &MetaService_Stub::precommit_txn); } -Status CloudMetaMgr::get_storage_vault_info(StorageVaultInfos* vault_infos) { +Status CloudMetaMgr::get_storage_vault_info(StorageVaultInfos* vault_infos, bool* is_vault_mode) { GetObjStoreInfoRequest req; GetObjStoreInfoResponse resp; req.set_cloud_unique_id(config::cloud_unique_id); @@ -916,6 +916,8 @@ Status CloudMetaMgr::get_storage_vault_info(StorageVaultInfos* vault_infos) { return s; } + *is_vault_mode = resp.enable_storage_vault(); + auto add_obj_store = [&vault_infos](const auto& obj_store) { vault_infos->emplace_back(obj_store.id(), S3Conf::get_s3_conf(obj_store), StorageVaultPB_PathFormat {}); @@ -931,6 +933,7 @@ Status CloudMetaMgr::get_storage_vault_info(StorageVaultInfos* vault_infos) { } }); + // desensitization, hide secret for (int i = 0; i < resp.obj_info_size(); ++i) { resp.mutable_obj_info(i)->set_sk(resp.obj_info(i).sk().substr(0, 2) + "xxx"); } @@ -940,7 +943,8 @@ Status CloudMetaMgr::get_storage_vault_info(StorageVaultInfos* vault_infos) { j->mutable_obj_info()->set_sk(j->obj_info().sk().substr(0, 2) + "xxx"); } - LOG(INFO) << "get storage vault response: " << resp.ShortDebugString(); + LOG(INFO) << "get storage vault, enable_storage_vault=" << is_vault_mode + << " response=" << resp.ShortDebugString(); return Status::OK(); } diff --git a/be/src/cloud/cloud_meta_mgr.h b/be/src/cloud/cloud_meta_mgr.h index 6f6cc9c26b47b4..2f776b056866aa 100644 --- a/be/src/cloud/cloud_meta_mgr.h +++ b/be/src/cloud/cloud_meta_mgr.h @@ -73,7 +73,14 @@ class CloudMetaMgr { Status precommit_txn(const StreamLoadContext& ctx); - Status get_storage_vault_info(StorageVaultInfos* vault_infos); + /** + * Gets storage vault (storage backends) from meta-service + * + * @param vault_info output param, all storage backends + * @param is_vault_mode output param, true for pure vault mode, false for legacy mode + * @return status + */ + Status get_storage_vault_info(StorageVaultInfos* vault_infos, bool* is_vault_mode); Status prepare_tablet_job(const TabletJobInfoPB& job, StartTabletJobResponse* res); diff --git a/be/src/cloud/cloud_storage_engine.cpp b/be/src/cloud/cloud_storage_engine.cpp index 3e56c23d1d3e79..b98b2e3d0efc45 100644 --- a/be/src/cloud/cloud_storage_engine.cpp +++ b/be/src/cloud/cloud_storage_engine.cpp @@ -161,8 +161,9 @@ struct RefreshFSVaultVisitor { Status CloudStorageEngine::open() { cloud::StorageVaultInfos vault_infos; + bool enable_storage_vault = false; do { - auto st = _meta_mgr->get_storage_vault_info(&vault_infos); + auto st = _meta_mgr->get_storage_vault_info(&vault_infos, &enable_storage_vault); if (st.ok()) { break; } @@ -177,7 +178,11 @@ Status CloudStorageEngine::open() { return vault_process_error(id, vault_info, std::move(st)); } } - set_latest_fs(get_filesystem(std::get<0>(vault_infos.back()))); + + // vault mode should not support latest_fs to get rid of unexpected storage backends choosen + if (!enable_storage_vault) { + set_latest_fs(get_filesystem(std::get<0>(vault_infos.back()))); + } // TODO(plat1ko): DeleteBitmapTxnManager @@ -340,7 +345,8 @@ void CloudStorageEngine::_check_file_cache_ttl_block_valid() { void CloudStorageEngine::sync_storage_vault() { cloud::StorageVaultInfos vault_infos; - auto st = _meta_mgr->get_storage_vault_info(&vault_infos); + bool enable_storage_vault = false; + auto st = _meta_mgr->get_storage_vault_info(&vault_infos, &enable_storage_vault); if (!st.ok()) { LOG(WARNING) << "failed to get storage vault info. err=" << st; return; @@ -363,7 +369,7 @@ void CloudStorageEngine::sync_storage_vault() { } if (auto& id = std::get<0>(vault_infos.back()); - latest_fs() == nullptr || latest_fs()->id() != id) { + (latest_fs() == nullptr || latest_fs()->id() != id) && !enable_storage_vault) { set_latest_fs(get_filesystem(id)); } } diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 00f8a042cbcbb7..0c00bd1a38f0da 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -95,6 +95,9 @@ DEFINE_String(mem_limit, "90%"); // Soft memory limit as a fraction of hard memory limit. DEFINE_Double(soft_mem_limit_frac, "0.9"); +// Cache capacity reduce mem limit as a fraction of soft mem limit. +DEFINE_mDouble(cache_capacity_reduce_mem_limit_frac, "0.6"); + // Schema change memory limit as a fraction of soft memory limit. DEFINE_Double(schema_change_mem_limit_frac, "0.6"); @@ -286,7 +289,7 @@ DEFINE_mInt32(exchg_buffer_queue_capacity_factor, "64"); DEFINE_mInt64(memory_limitation_per_thread_for_schema_change_bytes, "2147483648"); DEFINE_mInt32(cache_prune_interval_sec, "10"); -DEFINE_mInt32(cache_periodic_prune_stale_sweep_sec, "300"); +DEFINE_mInt32(cache_periodic_prune_stale_sweep_sec, "60"); // the clean interval of tablet lookup cache DEFINE_mInt32(tablet_lookup_cache_stale_sweep_time_sec, "30"); DEFINE_mInt32(point_query_row_cache_stale_sweep_time_sec, "300"); @@ -565,7 +568,7 @@ DEFINE_String(pprof_profile_dir, "${DORIS_HOME}/log"); // for jeprofile in jemalloc DEFINE_mString(jeprofile_dir, "${DORIS_HOME}/log"); DEFINE_mBool(enable_je_purge_dirty_pages, "true"); -DEFINE_mString(je_dirty_pages_mem_limit_percent, "5%"); +DEFINE_mString(je_dirty_pages_mem_limit_percent, "2%"); // to forward compatibility, will be removed later DEFINE_mBool(enable_token_check, "true"); @@ -582,17 +585,12 @@ DEFINE_Int32(num_cores, "0"); DEFINE_Bool(ignore_broken_disk, "false"); // Sleep time in milliseconds between memory maintenance iterations -DEFINE_mInt32(memory_maintenance_sleep_time_ms, "100"); +DEFINE_mInt32(memory_maintenance_sleep_time_ms, "20"); // After full gc, no longer full gc and minor gc during sleep. // After minor gc, no minor gc during sleep, but full gc is possible. DEFINE_mInt32(memory_gc_sleep_time_ms, "500"); -// Sleep time in milliseconds between memtbale flush mgr refresh iterations -DEFINE_mInt64(memtable_mem_tracker_refresh_interval_ms, "5"); - -DEFINE_mInt64(wg_weighted_memory_ratio_refresh_interval_ms, "50"); - // percent of (active memtables size / all memtables size) when reach hard limit DEFINE_mInt32(memtable_hard_limit_active_percent, "50"); diff --git a/be/src/common/config.h b/be/src/common/config.h index bd2aa4f51be1a9..720f4f72cb4bf7 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -133,6 +133,9 @@ DECLARE_String(mem_limit); // Soft memory limit as a fraction of hard memory limit. DECLARE_Double(soft_mem_limit_frac); +// Cache capacity reduce mem limit as a fraction of soft mem limit. +DECLARE_mDouble(cache_capacity_reduce_mem_limit_frac); + // Schema change memory limit as a fraction of soft memory limit. DECLARE_Double(schema_change_mem_limit_frac); @@ -641,12 +644,6 @@ DECLARE_mInt32(memory_maintenance_sleep_time_ms); // After minor gc, no minor gc during sleep, but full gc is possible. DECLARE_mInt32(memory_gc_sleep_time_ms); -// Sleep time in milliseconds between memtbale flush mgr memory refresh iterations -DECLARE_mInt64(memtable_mem_tracker_refresh_interval_ms); - -// Sleep time in milliseconds between refresh iterations of workload group weighted memory ratio -DECLARE_mInt64(wg_weighted_memory_ratio_refresh_interval_ms); - // percent of (active memtables size / all memtables size) when reach hard limit DECLARE_mInt32(memtable_hard_limit_active_percent); diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index d8245f4045ce81..713813b4a334f9 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -73,6 +73,12 @@ namespace doris { namespace { +int64_t last_print_proc_mem = 0; +int32_t refresh_cache_capacity_sleep_time_ms = 0; +#ifdef USE_JEMALLOC +int32_t je_purge_dirty_pages_sleep_time_ms = 0; +#endif + void update_rowsets_and_segments_num_metrics() { if (config::is_cloud_mode()) { // TODO(plat1ko): CloudStorageEngine @@ -204,42 +210,104 @@ void Daemon::tcmalloc_gc_thread() { #endif } -void Daemon::memory_maintenance_thread() { - int32_t interval_milliseconds = config::memory_maintenance_sleep_time_ms; - int64_t last_print_proc_mem = PerfCounters::get_vm_rss(); - while (!_stop_background_threads_latch.wait_for( - std::chrono::milliseconds(interval_milliseconds))) { - // Refresh process memory metrics. - doris::PerfCounters::refresh_proc_status(); - doris::MemInfo::refresh_proc_meminfo(); - doris::GlobalMemoryArbitrator::reset_refresh_interval_memory_growth(); - ExecEnv::GetInstance()->brpc_iobuf_block_memory_tracker()->set_consumption( - butil::IOBuf::block_memory()); - // Refresh allocator memory metrics. +void refresh_process_memory_metrics() { + doris::PerfCounters::refresh_proc_status(); + doris::MemInfo::refresh_proc_meminfo(); + doris::GlobalMemoryArbitrator::reset_refresh_interval_memory_growth(); + ExecEnv::GetInstance()->brpc_iobuf_block_memory_tracker()->set_consumption( + butil::IOBuf::block_memory()); +} + +void refresh_common_allocator_metrics() { #if !defined(ADDRESS_SANITIZER) && !defined(LEAK_SANITIZER) && !defined(THREAD_SANITIZER) - doris::MemInfo::refresh_allocator_mem(); -#ifdef USE_JEMALLOC - if (doris::MemInfo::je_dirty_pages_mem() > doris::MemInfo::je_dirty_pages_mem_limit() && - GlobalMemoryArbitrator::is_exceed_soft_mem_limit()) { - doris::MemInfo::notify_je_purge_dirty_pages(); - } + doris::MemInfo::refresh_allocator_mem(); + if (config::enable_system_metrics) { + DorisMetrics::instance()->system_metrics()->update_allocator_metrics(); + } #endif - if (config::enable_system_metrics) { - DorisMetrics::instance()->system_metrics()->update_allocator_metrics(); + MemInfo::refresh_memory_bvar(); +} + +void refresh_memory_state_after_memory_change() { + if (abs(last_print_proc_mem - PerfCounters::get_vm_rss()) > 268435456) { + last_print_proc_mem = PerfCounters::get_vm_rss(); + doris::MemTrackerLimiter::clean_tracker_limiter_group(); + doris::MemTrackerLimiter::enable_print_log_process_usage(); + // Refresh mem tracker each type counter. + doris::MemTrackerLimiter::refresh_global_counter(); + LOG(INFO) << doris::GlobalMemoryArbitrator:: + process_mem_log_str(); // print mem log when memory state by 256M + } +} + +void refresh_cache_capacity() { + if (refresh_cache_capacity_sleep_time_ms <= 0) { + auto cache_capacity_reduce_mem_limit = uint64_t( + doris::MemInfo::soft_mem_limit() * config::cache_capacity_reduce_mem_limit_frac); + int64_t process_memory_usage = doris::GlobalMemoryArbitrator::process_memory_usage(); + double new_cache_capacity_adjust_weighted = + process_memory_usage <= cache_capacity_reduce_mem_limit + ? 1 + : std::min( + 1 - (process_memory_usage - cache_capacity_reduce_mem_limit) / + (doris::MemInfo::soft_mem_limit() - + cache_capacity_reduce_mem_limit), + 0); + if (new_cache_capacity_adjust_weighted != + doris::GlobalMemoryArbitrator::last_cache_capacity_adjust_weighted) { + doris::GlobalMemoryArbitrator::last_cache_capacity_adjust_weighted = + new_cache_capacity_adjust_weighted; + doris::GlobalMemoryArbitrator::notify_cache_adjust_capacity(); + refresh_cache_capacity_sleep_time_ms = config::memory_gc_sleep_time_ms; } + } + refresh_cache_capacity_sleep_time_ms -= config::memory_maintenance_sleep_time_ms; +} + +void je_purge_dirty_pages() { +#ifdef USE_JEMALLOC + if (je_purge_dirty_pages_sleep_time_ms <= 0 && + doris::MemInfo::je_dirty_pages_mem() > doris::MemInfo::je_dirty_pages_mem_limit() && + GlobalMemoryArbitrator::is_exceed_soft_mem_limit()) { + doris::MemInfo::notify_je_purge_dirty_pages(); + je_purge_dirty_pages_sleep_time_ms = config::memory_gc_sleep_time_ms; + } + je_purge_dirty_pages_sleep_time_ms -= config::memory_maintenance_sleep_time_ms; #endif - MemInfo::refresh_memory_bvar(); - - // Update and print memory stat when the memory changes by 256M. - if (abs(last_print_proc_mem - PerfCounters::get_vm_rss()) > 268435456) { - last_print_proc_mem = PerfCounters::get_vm_rss(); - doris::MemTrackerLimiter::clean_tracker_limiter_group(); - doris::MemTrackerLimiter::enable_print_log_process_usage(); - // Refresh mem tracker each type counter. - doris::MemTrackerLimiter::refresh_global_counter(); - LOG(INFO) << doris::GlobalMemoryArbitrator:: - process_mem_log_str(); // print mem log when memory state by 256M - } +} + +void Daemon::memory_maintenance_thread() { + while (!_stop_background_threads_latch.wait_for( + std::chrono::milliseconds(config::memory_maintenance_sleep_time_ms))) { + // step 1. Refresh process memory metrics. + refresh_process_memory_metrics(); + + // step 2. Refresh jemalloc/tcmalloc metrics. + refresh_common_allocator_metrics(); + + // step 3. Update and print memory stat when the memory changes by 256M. + refresh_memory_state_after_memory_change(); + + // step 4. Asyn Refresh cache capacity + // TODO adjust cache capacity based on smoothstep (smooth gradient). + refresh_cache_capacity(); + + // step 5. Cancel top memory task when process memory exceed hard limit. + // TODO replace memory_gc_thread. + + // step 6. Refresh weighted memory ratio of workload groups. + doris::ExecEnv::GetInstance()->workload_group_mgr()->refresh_wg_weighted_memory_limit(); + + // step 7. Analyze blocking queries. + // TODO sort the operators that can spill, wake up the pipeline task spill + // or continue execution according to certain rules or cancel query. + + // step 8. Flush memtable + doris::GlobalMemoryArbitrator::notify_memtable_memory_refresh(); + // TODO notify flush memtable + + // step 9. Jemalloc purge all arena dirty pages + je_purge_dirty_pages(); } } @@ -301,10 +369,21 @@ void Daemon::memory_gc_thread() { void Daemon::memtable_memory_refresh_thread() { // Refresh the memory statistics of the load channel tracker more frequently, // which helps to accurately control the memory of LoadChannelMgr. - while (!_stop_background_threads_latch.wait_for( - std::chrono::milliseconds(config::memtable_mem_tracker_refresh_interval_ms))) { + do { + std::unique_lock l(doris::GlobalMemoryArbitrator::memtable_memory_refresh_lock); + while (_stop_background_threads_latch.count() != 0 && + !doris::GlobalMemoryArbitrator::memtable_memory_refresh_notify.load( + std::memory_order_relaxed)) { + doris::GlobalMemoryArbitrator::memtable_memory_refresh_cv.wait_for( + l, std::chrono::seconds(1)); + } + if (_stop_background_threads_latch.count() == 0) { + break; + } doris::ExecEnv::GetInstance()->memtable_memory_limiter()->refresh_mem_tracker(); - } + doris::GlobalMemoryArbitrator::memtable_memory_refresh_notify.store( + false, std::memory_order_relaxed); + } while (true); } /* @@ -396,6 +475,35 @@ void Daemon::je_purge_dirty_pages_thread() const { } while (true); } +void Daemon::cache_adjust_capacity_thread() { + do { + std::unique_lock l(doris::GlobalMemoryArbitrator::cache_adjust_capacity_lock); + while (_stop_background_threads_latch.count() != 0 && + !doris::GlobalMemoryArbitrator::cache_adjust_capacity_notify.load( + std::memory_order_relaxed)) { + doris::GlobalMemoryArbitrator::cache_adjust_capacity_cv.wait_for( + l, std::chrono::seconds(1)); + } + double adjust_weighted = GlobalMemoryArbitrator::last_cache_capacity_adjust_weighted; + if (_stop_background_threads_latch.count() == 0) { + break; + } + if (config::disable_memory_gc) { + continue; + } + std::unique_ptr profile = std::make_unique(""); + auto freed_mem = CacheManager::instance()->for_each_cache_refresh_capacity(adjust_weighted, + profile.get()); + std::stringstream ss; + profile->pretty_print(&ss); + LOG(INFO) << fmt::format( + "[MemoryGC] refresh cache capacity end, free memory {}, details: {}", + PrettyPrinter::print(freed_mem, TUnit::BYTES), ss.str()); + doris::GlobalMemoryArbitrator::cache_adjust_capacity_notify.store( + false, std::memory_order_relaxed); + } while (true); +} + void Daemon::cache_prune_stale_thread() { int32_t interval = config::cache_periodic_prune_stale_sweep_sec; while (!_stop_background_threads_latch.wait_for(std::chrono::seconds(interval))) { @@ -411,14 +519,6 @@ void Daemon::cache_prune_stale_thread() { } } -void Daemon::wg_weighted_memory_ratio_refresh_thread() { - // Refresh weighted memory ratio of workload groups - while (!_stop_background_threads_latch.wait_for( - std::chrono::milliseconds(config::wg_weighted_memory_ratio_refresh_interval_ms))) { - doris::ExecEnv::GetInstance()->workload_group_mgr()->refresh_wg_weighted_memory_limit(); - } -} - void Daemon::be_proc_monitor_thread() { while (!_stop_background_threads_latch.wait_for( std::chrono::milliseconds(config::be_proc_monitor_interval_ms))) { @@ -455,6 +555,10 @@ void Daemon::start() { "Daemon", "je_purge_dirty_pages_thread", [this]() { this->je_purge_dirty_pages_thread(); }, &_threads.emplace_back()); CHECK(st.ok()) << st; + st = Thread::create( + "Daemon", "cache_adjust_capacity_thread", + [this]() { this->cache_adjust_capacity_thread(); }, &_threads.emplace_back()); + CHECK(st.ok()) << st; st = Thread::create( "Daemon", "cache_prune_stale_thread", [this]() { this->cache_prune_stale_thread(); }, &_threads.emplace_back()); @@ -464,11 +568,6 @@ void Daemon::start() { [this]() { this->report_runtime_query_statistics_thread(); }, &_threads.emplace_back()); CHECK(st.ok()) << st; - st = Thread::create( - "Daemon", "wg_weighted_memory_ratio_refresh_thread", - [this]() { this->wg_weighted_memory_ratio_refresh_thread(); }, - &_threads.emplace_back()); - if (config::enable_be_proc_monitor) { st = Thread::create( "Daemon", "be_proc_monitor_thread", [this]() { this->be_proc_monitor_thread(); }, diff --git a/be/src/common/daemon.h b/be/src/common/daemon.h index 64c9f0c8993ae3..fe723877dcd027 100644 --- a/be/src/common/daemon.h +++ b/be/src/common/daemon.h @@ -43,9 +43,9 @@ class Daemon { void memtable_memory_refresh_thread(); void calculate_metrics_thread(); void je_purge_dirty_pages_thread() const; + void cache_adjust_capacity_thread(); void cache_prune_stale_thread(); void report_runtime_query_statistics_thread(); - void wg_weighted_memory_ratio_refresh_thread(); void be_proc_monitor_thread(); CountDownLatch _stop_background_threads_latch; diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index cce150670353d9..90140e748f5d6b 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -453,6 +453,17 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, vectorized: break; } + case TYPE_DATETIME: { + std::vector datas(1); + VecDateTimeValue src[1]; + src[0].from_date_str(cell.stringVal.data(), cell.stringVal.size()); + datas[0] = src; + auto data = datas[0]; + reinterpret_cast*>(col_ptr)->insert_data( + reinterpret_cast(data), 0); + nullable_column->get_null_map_data().emplace_back(0); + break; + } default: { std::stringstream ss; ss << "unsupported column type:" << type; diff --git a/be/src/exec/schema_scanner/schema_active_queries_scanner.cpp b/be/src/exec/schema_scanner/schema_active_queries_scanner.cpp index 2b516fc6fdac2b..6aa6e758999fb0 100644 --- a/be/src/exec/schema_scanner/schema_active_queries_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_active_queries_scanner.cpp @@ -17,7 +17,6 @@ #include "exec/schema_scanner/schema_active_queries_scanner.h" -#include "exec/schema_scanner/schema_scanner_helper.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" @@ -101,27 +100,10 @@ Status SchemaActiveQueriesScanner::_get_active_queries_block_from_fe() { for (int i = 0; i < result_data.size(); i++) { TRow row = result_data[i]; - - SchemaScannerHelper::insert_string_value(0, row.column_value[0].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(1, row.column_value[1].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_int_value(2, row.column_value[2].longVal, - _active_query_block.get()); - SchemaScannerHelper::insert_int_value(3, row.column_value[3].longVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(4, row.column_value[4].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(5, row.column_value[5].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(6, row.column_value[6].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(7, row.column_value[7].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(8, row.column_value[8].stringVal, - _active_query_block.get()); - SchemaScannerHelper::insert_string_value(9, row.column_value[9].stringVal, - _active_query_block.get()); + for (int j = 0; j < _s_tbls_columns.size(); j++) { + RETURN_IF_ERROR(insert_block_column(row.column_value[j], j, _active_query_block.get(), + _s_tbls_columns[j].type)); + } } return Status::OK(); } diff --git a/be/src/exec/schema_scanner/schema_partitions_scanner.cpp b/be/src/exec/schema_scanner/schema_partitions_scanner.cpp index 9f86fe6feb49d9..ebe2bd3b70ec0e 100644 --- a/be/src/exec/schema_scanner/schema_partitions_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_partitions_scanner.cpp @@ -22,7 +22,6 @@ #include #include "exec/schema_scanner/schema_helper.h" -#include "exec/schema_scanner/schema_scanner_helper.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" @@ -147,22 +146,9 @@ Status SchemaPartitionsScanner::get_onedb_info_from_fe(int64_t dbId) { for (int i = 0; i < result_data.size(); i++) { TRow row = result_data[i]; - for (int j = 0; j < _s_tbls_columns.size(); j++) { - if ((_s_tbls_columns[j].type == TYPE_BIGINT) || _s_tbls_columns[j].type == TYPE_INT) { - SchemaScannerHelper::insert_int_value(j, row.column_value[j].longVal, - _partitions_block.get()); - } else if (_s_tbls_columns[j].type == TYPE_DATETIME) { - std::vector datas(1); - VecDateTimeValue src[1]; - src[0].from_date_str(row.column_value[j].stringVal.data(), - row.column_value[j].stringVal.size()); - datas[0] = src; - SchemaScannerHelper::insert_datetime_value(j, datas, _partitions_block.get()); - } else { - SchemaScannerHelper::insert_string_value(j, row.column_value[j].stringVal, - _partitions_block.get()); - } + RETURN_IF_ERROR(insert_block_column(row.column_value[j], j, _partitions_block.get(), + _s_tbls_columns[j].type)); } } return Status::OK(); diff --git a/be/src/exec/schema_scanner/schema_routine_scanner.cpp b/be/src/exec/schema_scanner/schema_routine_scanner.cpp index adb18450f26490..e8d95f0abd6d36 100644 --- a/be/src/exec/schema_scanner/schema_routine_scanner.cpp +++ b/be/src/exec/schema_scanner/schema_routine_scanner.cpp @@ -17,7 +17,6 @@ #include "exec/schema_scanner/schema_routine_scanner.h" -#include "exec/schema_scanner/schema_scanner_helper.h" #include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" @@ -103,19 +102,9 @@ Status SchemaRoutinesScanner::get_block_from_fe() { for (int i = 0; i < result_data.size(); i++) { TRow row = result_data[i]; - for (int j = 0; j < _s_tbls_columns.size(); j++) { - if (_s_tbls_columns[j].type == TYPE_DATETIME) { - std::vector datas(1); - VecDateTimeValue src[1]; - src[0].from_date_str(row.column_value[j].stringVal.data(), - row.column_value[j].stringVal.size()); - datas[0] = src; - SchemaScannerHelper::insert_datetime_value(j, datas, _routines_block.get()); - } else { - SchemaScannerHelper::insert_string_value(j, row.column_value[j].stringVal, - _routines_block.get()); - } + RETURN_IF_ERROR(insert_block_column(row.column_value[j], j, _routines_block.get(), + _s_tbls_columns[j].type)); } } return Status::OK(); diff --git a/be/src/http/action/compaction_score_action.cpp b/be/src/http/action/compaction_score_action.cpp new file mode 100644 index 00000000000000..10b8cc6bdbab04 --- /dev/null +++ b/be/src/http/action/compaction_score_action.cpp @@ -0,0 +1,236 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +#include "http/action/compaction_score_action.h" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cloud/cloud_storage_engine.h" +#include "cloud/cloud_tablet.h" +#include "cloud/cloud_tablet_mgr.h" +#include "cloud/config.h" +#include "common/status.h" +#include "http/http_channel.h" +#include "http/http_handler_with_auth.h" +#include "http/http_headers.h" +#include "http/http_request.h" +#include "http/http_status.h" +#include "olap/tablet_fwd.h" +#include "olap/tablet_manager.h" +#include "util/stopwatch.hpp" + +namespace doris { + +const std::string TOP_N = "top_n"; +const std::string SYNC_META = "sync_meta"; +const std::string COMPACTION_SCORE = "compaction_score"; +constexpr size_t DEFAULT_TOP_N = std::numeric_limits::max(); +constexpr bool DEFAULT_SYNC_META = false; +constexpr std::string_view TABLET_ID = "tablet_id"; + +template +concept CompactionScoreAccessble = requires(T t) { + { t.get_real_compaction_score() } -> std::same_as; +}; + +template +std::vector calculate_compaction_scores( + std::span> tablets) { + std::vector result; + result.reserve(tablets.size()); + std::ranges::transform(tablets, std::back_inserter(result), + [](const std::shared_ptr& tablet) -> CompactionScoreResult { + return {.tablet_id = tablet->tablet_id(), + .compaction_score = tablet->get_real_compaction_score()}; + }); + return result; +} + +struct LocalCompactionScoreAccessor final : CompactionScoresAccessor { + LocalCompactionScoreAccessor(TabletManager* tablet_mgr) : tablet_mgr(tablet_mgr) {} + + std::vector get_all_tablet_compaction_scores() override { + auto tablets = tablet_mgr->get_all_tablet(); + std::span s = {tablets.begin(), tablets.end()}; + return calculate_compaction_scores(s); + } + + TabletManager* tablet_mgr; +}; + +struct CloudCompactionScoresAccessor final : CompactionScoresAccessor { + CloudCompactionScoresAccessor(CloudTabletMgr& tablet_mgr) : tablet_mgr(tablet_mgr) {} + + std::vector get_all_tablet_compaction_scores() override { + auto tablets = get_all_tablets(); + std::span s = {tablets.begin(), tablets.end()}; + return calculate_compaction_scores(s); + } + + Status sync_meta() { + auto tablets = get_all_tablets(); + LOG(INFO) << "start to sync meta from ms"; + + MonotonicStopWatch stopwatch; + stopwatch.start(); + + for (const auto& tablet : tablets) { + RETURN_IF_ERROR(tablet->sync_meta()); + RETURN_IF_ERROR(tablet->sync_rowsets()); + } + + stopwatch.stop(); + LOG(INFO) << "sync meta finish, time=" << stopwatch.elapsed_time() << "ns"; + + return Status::OK(); + } + + std::vector get_all_tablets() { + auto weak_tablets = tablet_mgr.get_weak_tablets(); + std::vector tablets; + tablets.reserve(weak_tablets.size()); + for (auto& weak_tablet : weak_tablets) { + if (auto tablet = weak_tablet.lock(); + tablet != nullptr and tablet->tablet_state() == TABLET_RUNNING) { + tablets.push_back(std::move(tablet)); + } + } + return tablets; + } + + CloudTabletMgr& tablet_mgr; +}; + +static rapidjson::Value jsonfy_tablet_compaction_score( + const CompactionScoreResult& result, rapidjson::MemoryPoolAllocator<>& allocator) { + rapidjson::Value node; + node.SetObject(); + + rapidjson::Value tablet_id_key; + tablet_id_key.SetString(TABLET_ID.data(), TABLET_ID.length(), allocator); + rapidjson::Value tablet_id_val; + auto tablet_id_str = std::to_string(result.tablet_id); + tablet_id_val.SetString(tablet_id_str.c_str(), tablet_id_str.length(), allocator); + + rapidjson::Value score_key; + score_key.SetString(COMPACTION_SCORE.data(), COMPACTION_SCORE.size()); + rapidjson::Value score_val; + auto score_str = std::to_string(result.compaction_score); + score_val.SetString(score_str.c_str(), score_str.length(), allocator); + node.AddMember(score_key, score_val, allocator); + + node.AddMember(tablet_id_key, tablet_id_val, allocator); + return node; +} + +CompactionScoreAction::CompactionScoreAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type, TabletManager* tablet_mgr) + : HttpHandlerWithAuth(exec_env, hier, type), + _accessor(std::make_unique(tablet_mgr)) {} + +CompactionScoreAction::CompactionScoreAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type, CloudTabletMgr& tablet_mgr) + : HttpHandlerWithAuth(exec_env, hier, type), + _accessor(std::make_unique(tablet_mgr)) {} + +void CompactionScoreAction::handle(HttpRequest* req) { + req->add_output_header(HttpHeaders::CONTENT_TYPE, HttpHeaders::JsonType.data()); + auto top_n_param = req->param(TOP_N); + + size_t top_n = DEFAULT_TOP_N; + if (!top_n_param.empty()) { + try { + auto tmp_top_n = std::stoll(top_n_param); + if (tmp_top_n < 0) { + throw std::invalid_argument("`top_n` cannot less than 0"); + } + top_n = tmp_top_n; + } catch (const std::exception& e) { + LOG(WARNING) << "convert failed:" << e.what(); + auto msg = fmt::format("invalid argument: top_n={}", top_n_param); + HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, msg); + return; + } + } + + auto sync_meta_param = req->param(SYNC_META); + bool sync_meta = DEFAULT_SYNC_META; + if (!sync_meta_param.empty() and !config::is_cloud_mode()) { + HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, + "param `sync_meta` is only available for cloud mode"); + return; + } + if (sync_meta_param == "true") { + sync_meta = true; + } else if (sync_meta_param == "false") { + sync_meta = false; + } else if (!sync_meta_param.empty()) { + auto msg = fmt::format("invalid argument: sync_meta={}", sync_meta_param); + HttpChannel::send_reply(req, HttpStatus::BAD_REQUEST, msg); + return; + } + + std::string result; + if (auto st = _handle(top_n, sync_meta, &result); !st) { + HttpChannel::send_reply(req, HttpStatus::INTERNAL_SERVER_ERROR, st.to_json()); + return; + } + HttpChannel::send_reply(req, HttpStatus::OK, result); +} + +Status CompactionScoreAction::_handle(size_t top_n, bool sync_meta, std::string* result) { + if (sync_meta) { + DCHECK(config::is_cloud_mode()); + RETURN_IF_ERROR(static_cast(_accessor.get())->sync_meta()); + } + + auto scores = _accessor->get_all_tablet_compaction_scores(); + top_n = std::min(top_n, scores.size()); + std::partial_sort(scores.begin(), scores.begin() + top_n, scores.end(), std::greater<>()); + + rapidjson::Document root; + root.SetArray(); + auto& allocator = root.GetAllocator(); + std::for_each(scores.begin(), scores.begin() + top_n, [&](const auto& score) { + root.PushBack(jsonfy_tablet_compaction_score(score, allocator), allocator); + }); + rapidjson::StringBuffer str_buf; + rapidjson::PrettyWriter writer(str_buf); + root.Accept(writer); + *result = str_buf.GetString(); + return Status::OK(); +} + +} // namespace doris diff --git a/be/src/http/action/compaction_score_action.h b/be/src/http/action/compaction_score_action.h new file mode 100644 index 00000000000000..1c345a4ae24c65 --- /dev/null +++ b/be/src/http/action/compaction_score_action.h @@ -0,0 +1,66 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include +#include +#include + +#include "cloud/cloud_tablet_mgr.h" +#include "common/status.h" +#include "http/http_handler_with_auth.h" +#include "http/http_request.h" +#include "olap/storage_engine.h" +#include "runtime/exec_env.h" +namespace doris { + +struct CompactionScoreResult { + int64_t tablet_id; + size_t compaction_score; +}; + +inline bool operator>(const CompactionScoreResult& lhs, const CompactionScoreResult& rhs) { + return lhs.compaction_score > rhs.compaction_score; +} + +struct CompactionScoresAccessor { + virtual ~CompactionScoresAccessor() = default; + + virtual std::vector get_all_tablet_compaction_scores() = 0; +}; + +// topn, sync +class CompactionScoreAction : public HttpHandlerWithAuth { +public: + explicit CompactionScoreAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type, TabletManager* tablet_mgr); + + explicit CompactionScoreAction(ExecEnv* exec_env, TPrivilegeHier::type hier, + TPrivilegeType::type type, CloudTabletMgr& tablet_mgr); + + void handle(HttpRequest* req) override; + +private: + Status _handle(size_t top_n, bool sync_meta, std::string* result); + + std::unique_ptr _accessor; +}; + +} // namespace doris diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index 143c1ad706bbe7..1fd3b785b9072f 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -29,6 +29,7 @@ #include "olap/rowid_conversion.h" #include "olap/rowset/beta_rowset.h" #include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_fwd.h" #include "olap/rowset/rowset_reader.h" #include "olap/tablet_fwd.h" #include "olap/txn_manager.h" @@ -182,6 +183,14 @@ Status BaseTablet::update_by_least_common_schema(const TabletSchemaSPtr& update_ return Status::OK(); } +uint32_t BaseTablet::get_real_compaction_score() const { + const auto& rs_metas = _tablet_meta->all_rs_metas(); + return std::accumulate(rs_metas.begin(), rs_metas.end(), 0, + [](uint32_t score, const RowsetMetaSharedPtr& rs_meta) { + return score + rs_meta->get_compaction_score(); + }); +} + Status BaseTablet::capture_rs_readers_unlocked(const Versions& version_path, std::vector* rs_splits) const { DCHECK(rs_splits != nullptr && rs_splits->empty()); diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index cfaf536902e03e..943f815581809a 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -105,6 +105,10 @@ class BaseTablet { virtual size_t tablet_footprint() = 0; + // this method just return the compaction sum on each rowset + // note(tsy): we should unify the compaction score calculation finally + uint32_t get_real_compaction_score() const; + // MUST hold shared meta lock Status capture_rs_readers_unlocked(const Versions& version_path, std::vector* rs_splits) const; diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index 741c2423915ede..6e5bb2fa31578f 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -177,6 +177,51 @@ LRUCache::~LRUCache() { prune(); } +PrunedInfo LRUCache::set_capacity(size_t capacity) { + LRUHandle* last_ref_list = nullptr; + { + std::lock_guard l(_mutex); + _capacity = capacity; + _evict_from_lru(0, &last_ref_list); + } + + int64_t pruned_count = 0; + int64_t pruned_size = 0; + while (last_ref_list != nullptr) { + ++pruned_count; + pruned_size += last_ref_list->total_size; + LRUHandle* next = last_ref_list->next; + last_ref_list->free(); + last_ref_list = next; + } + return {pruned_count, pruned_size}; +} + +uint64_t LRUCache::get_lookup_count() { + std::lock_guard l(_mutex); + return _lookup_count; +} + +uint64_t LRUCache::get_hit_count() { + std::lock_guard l(_mutex); + return _hit_count; +} + +size_t LRUCache::get_usage() { + std::lock_guard l(_mutex); + return _usage; +} + +size_t LRUCache::get_capacity() { + std::lock_guard l(_mutex); + return _capacity; +} + +size_t LRUCache::get_element_count() { + std::lock_guard l(_mutex); + return _table.element_count(); +} + bool LRUCache::_unref(LRUHandle* e) { DCHECK(e->refs > 0); e->refs--; @@ -515,19 +560,19 @@ inline uint32_t ShardedLRUCache::_hash_slice(const CacheKey& s) { return s.hash(s.data(), s.size(), 0); } -ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, +ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, uint32_t num_shards, uint32_t total_element_count_capacity) : _name(name), _num_shard_bits(Bits::FindLSBSetNonZero(num_shards)), _num_shards(num_shards), _shards(nullptr), _last_id(1), - _total_capacity(total_capacity) { + _capacity(capacity) { CHECK(num_shards > 0) << "num_shards cannot be 0"; CHECK_EQ((num_shards & (num_shards - 1)), 0) << "num_shards should be power of two, but got " << num_shards; - const size_t per_shard = (total_capacity + (_num_shards - 1)) / _num_shards; + const size_t per_shard = (capacity + (_num_shards - 1)) / _num_shards; const size_t per_shard_element_count_capacity = (total_element_count_capacity + (_num_shards - 1)) / _num_shards; LRUCache** shards = new (std::nothrow) LRUCache*[_num_shards]; @@ -557,12 +602,12 @@ ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, "doris_cache", _name + "_persecond", _lookup_count_bvar.get(), 60)); } -ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, +ShardedLRUCache::ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, uint32_t num_shards, CacheValueTimeExtractor cache_value_time_extractor, bool cache_value_check_timestamp, uint32_t total_element_count_capacity) - : ShardedLRUCache(name, total_capacity, type, num_shards, total_element_count_capacity) { + : ShardedLRUCache(name, capacity, type, num_shards, total_element_count_capacity) { for (int s = 0; s < _num_shards; s++) { _shards[s]->set_cache_value_time_extractor(cache_value_time_extractor); _shards[s]->set_cache_value_check_timestamp(cache_value_check_timestamp); @@ -580,6 +625,24 @@ ShardedLRUCache::~ShardedLRUCache() { } } +PrunedInfo ShardedLRUCache::set_capacity(size_t capacity) { + std::lock_guard l(_mutex); + PrunedInfo pruned_info; + const size_t per_shard = (capacity + (_num_shards - 1)) / _num_shards; + for (int s = 0; s < _num_shards; s++) { + PrunedInfo info = _shards[s]->set_capacity(per_shard); + pruned_info.pruned_count += info.pruned_count; + pruned_info.pruned_size += info.pruned_size; + } + _capacity = capacity; + return pruned_info; +} + +size_t ShardedLRUCache::get_capacity() { + std::lock_guard l(_mutex); + return _capacity; +} + Cache::Handle* ShardedLRUCache::insert(const CacheKey& key, void* value, size_t charge, CachePriority priority) { const uint32_t hash = _hash_slice(key); @@ -638,25 +701,25 @@ int64_t ShardedLRUCache::get_usage() { } void ShardedLRUCache::update_cache_metrics() const { - size_t total_capacity = 0; + size_t capacity = 0; size_t total_usage = 0; size_t total_lookup_count = 0; size_t total_hit_count = 0; size_t total_element_count = 0; for (int i = 0; i < _num_shards; i++) { - total_capacity += _shards[i]->get_capacity(); + capacity += _shards[i]->get_capacity(); total_usage += _shards[i]->get_usage(); total_lookup_count += _shards[i]->get_lookup_count(); total_hit_count += _shards[i]->get_hit_count(); total_element_count += _shards[i]->get_element_count(); } - cache_capacity->set_value(total_capacity); + cache_capacity->set_value(capacity); cache_usage->set_value(total_usage); cache_element_count->set_value(total_element_count); cache_lookup_count->set_value(total_lookup_count); cache_hit_count->set_value(total_hit_count); - cache_usage_ratio->set_value(total_capacity == 0 ? 0 : ((double)total_usage / total_capacity)); + cache_usage_ratio->set_value(capacity == 0 ? 0 : ((double)total_usage / capacity)); cache_hit_ratio->set_value( total_lookup_count == 0 ? 0 : ((double)total_hit_count / total_lookup_count)); } diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 059020deab58f5..de7084382d7398 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -227,7 +227,8 @@ class Cache { virtual int64_t get_usage() = 0; - virtual size_t get_total_capacity() = 0; + virtual PrunedInfo set_capacity(size_t capacity) = 0; + virtual size_t get_capacity() = 0; private: DISALLOW_COPY_AND_ASSIGN(Cache); @@ -327,7 +328,7 @@ class LRUCache { ~LRUCache(); // Separate from constructor so caller can easily make an array of LRUCache - void set_capacity(size_t capacity) { _capacity = capacity; } + PrunedInfo set_capacity(size_t capacity); void set_element_count_capacity(uint32_t element_count_capacity) { _element_count_capacity = element_count_capacity; } @@ -345,11 +346,11 @@ class LRUCache { void set_cache_value_time_extractor(CacheValueTimeExtractor cache_value_time_extractor); void set_cache_value_check_timestamp(bool cache_value_check_timestamp); - uint64_t get_lookup_count() const { return _lookup_count; } - uint64_t get_hit_count() const { return _hit_count; } - size_t get_usage() const { return _usage; } - size_t get_capacity() const { return _capacity; } - size_t get_element_count() const { return _table.element_count(); } + uint64_t get_lookup_count(); + uint64_t get_hit_count(); + size_t get_usage(); + size_t get_capacity(); + size_t get_element_count(); private: void _lru_remove(LRUHandle* e); @@ -403,15 +404,16 @@ class ShardedLRUCache : public Cache { PrunedInfo prune() override; PrunedInfo prune_if(CachePrunePredicate pred, bool lazy_mode = false) override; int64_t get_usage() override; - size_t get_total_capacity() override { return _total_capacity; }; + PrunedInfo set_capacity(size_t capacity) override; + size_t get_capacity() override; private: // LRUCache can only be created and managed with LRUCachePolicy. friend class LRUCachePolicy; - explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, + explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, uint32_t num_shards, uint32_t element_count_capacity); - explicit ShardedLRUCache(const std::string& name, size_t total_capacity, LRUCacheType type, + explicit ShardedLRUCache(const std::string& name, size_t capacity, LRUCacheType type, uint32_t num_shards, CacheValueTimeExtractor cache_value_time_extractor, bool cache_value_check_timestamp, uint32_t element_count_capacity); @@ -429,7 +431,8 @@ class ShardedLRUCache : public Cache { const uint32_t _num_shards; LRUCache** _shards = nullptr; std::atomic _last_id; - size_t _total_capacity; + std::mutex _mutex; + size_t _capacity {0}; std::shared_ptr _entity; IntGauge* cache_capacity = nullptr; @@ -462,7 +465,8 @@ class DummyLRUCache : public Cache { return {0, 0}; }; int64_t get_usage() override { return 0; }; - size_t get_total_capacity() override { return 0; }; + PrunedInfo set_capacity(size_t capacity) override { return {0, 0}; }; + size_t get_capacity() override { return 0; }; }; } // namespace doris diff --git a/be/src/olap/tablet.cpp b/be/src/olap/tablet.cpp index da7a4ec8a6e260..66278afdb666ee 100644 --- a/be/src/olap/tablet.cpp +++ b/be/src/olap/tablet.cpp @@ -1023,6 +1023,9 @@ uint32_t Tablet::calc_cold_data_compaction_score() const { uint32_t Tablet::_calc_cumulative_compaction_score( std::shared_ptr cumulative_compaction_policy) { + if (cumulative_compaction_policy == nullptr) [[unlikely]] { + return 0; + } #ifndef BE_TEST if (_cumulative_compaction_policy == nullptr || _cumulative_compaction_policy->name() != cumulative_compaction_policy->name()) { diff --git a/be/src/pipeline/exec/aggregation_sink_operator.cpp b/be/src/pipeline/exec/aggregation_sink_operator.cpp index 8c96b4d744c83d..260a599a947a0d 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/aggregation_sink_operator.cpp @@ -775,7 +775,7 @@ Status AggSinkOperatorX::open(RuntimeState* state) { _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size()); RETURN_IF_ERROR(vectorized::VExpr::prepare( - _probe_expr_ctxs, state, DataSinkOperatorX::_child_x->row_desc())); + _probe_expr_ctxs, state, DataSinkOperatorX::_child->row_desc())); int j = _probe_expr_ctxs.size(); for (int i = 0; i < j; ++i) { @@ -790,7 +790,7 @@ Status AggSinkOperatorX::open(RuntimeState* state) { SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[j]; SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j]; RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare( - state, DataSinkOperatorX::_child_x->row_desc(), + state, DataSinkOperatorX::_child->row_desc(), intermediate_slot_desc, output_slot_desc)); _aggregate_evaluators[i]->set_version(state->be_exec_version()); } diff --git a/be/src/pipeline/exec/aggregation_sink_operator.h b/be/src/pipeline/exec/aggregation_sink_operator.h index d55b382931d74b..97440de3f09e4c 100644 --- a/be/src/pipeline/exec/aggregation_sink_operator.h +++ b/be/src/pipeline/exec/aggregation_sink_operator.h @@ -143,7 +143,7 @@ class AggSinkOperatorX final : public DataSinkOperatorX { DataDistribution required_data_distribution() const override { if (_probe_expr_ctxs.empty()) { - return _needs_finalize || DataSinkOperatorX::_child_x + return _needs_finalize || DataSinkOperatorX::_child ->ignore_data_distribution() ? DataDistribution(ExchangeType::PASSTHROUGH) : DataSinkOperatorX::required_data_distribution(); diff --git a/be/src/pipeline/exec/analytic_sink_operator.cpp b/be/src/pipeline/exec/analytic_sink_operator.cpp index cc219ecbe642f0..85d7773bdbd025 100644 --- a/be/src/pipeline/exec/analytic_sink_operator.cpp +++ b/be/src/pipeline/exec/analytic_sink_operator.cpp @@ -234,11 +234,11 @@ Status AnalyticSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) Status AnalyticSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(DataSinkOperatorX::open(state)); for (const auto& ctx : _agg_expr_ctxs) { - RETURN_IF_ERROR(vectorized::VExpr::prepare(ctx, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(ctx, state, _child->row_desc())); } if (!_partition_by_eq_expr_ctxs.empty() || !_order_by_eq_expr_ctxs.empty()) { vector tuple_ids; - tuple_ids.push_back(_child_x->row_desc().tuple_descriptors()[0]->id()); + tuple_ids.push_back(_child->row_desc().tuple_descriptors()[0]->id()); tuple_ids.push_back(_buffered_tuple_id); RowDescriptor cmp_row_desc(state->desc_tbl(), tuple_ids, vector(2, false)); if (!_partition_by_eq_expr_ctxs.empty()) { diff --git a/be/src/pipeline/exec/analytic_source_operator.cpp b/be/src/pipeline/exec/analytic_source_operator.cpp index 1996b9af58d2c4..b521a9b583fa94 100644 --- a/be/src/pipeline/exec/analytic_source_operator.cpp +++ b/be/src/pipeline/exec/analytic_source_operator.cpp @@ -562,13 +562,13 @@ Status AnalyticLocalState::close(RuntimeState* state) { Status AnalyticSourceOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(OperatorX::open(state)); - DCHECK(_child_x->row_desc().is_prefix_of(_row_descriptor)); + DCHECK(_child->row_desc().is_prefix_of(_row_descriptor)); _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); for (size_t i = 0; i < _agg_functions.size(); ++i) { SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[i]; SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[i]; - RETURN_IF_ERROR(_agg_functions[i]->prepare(state, _child_x->row_desc(), + RETURN_IF_ERROR(_agg_functions[i]->prepare(state, _child->row_desc(), intermediate_slot_desc, output_slot_desc)); _agg_functions[i]->set_version(state->be_exec_version()); _change_to_nullable_flags.push_back(output_slot_desc->is_nullable() && diff --git a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp index 96212f7fd2ff00..5127605097f4c5 100644 --- a/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp +++ b/be/src/pipeline/exec/distinct_streaming_aggregation_operator.cpp @@ -374,7 +374,7 @@ Status DistinctStreamingAggOperatorX::open(RuntimeState* state) { _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size()); - RETURN_IF_ERROR(vectorized::VExpr::prepare(_probe_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_probe_expr_ctxs, state, _child->row_desc())); int j = _probe_expr_ctxs.size(); for (int i = 0; i < j; ++i) { @@ -389,7 +389,7 @@ Status DistinctStreamingAggOperatorX::open(RuntimeState* state) { SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[j]; SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j]; RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare( - state, _child_x->row_desc(), intermediate_slot_desc, output_slot_desc)); + state, _child->row_desc(), intermediate_slot_desc, output_slot_desc)); _aggregate_evaluators[i]->set_version(state->be_exec_version()); } diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp index 71649aa21ec3d4..366b3c682f7dd5 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.cpp +++ b/be/src/pipeline/exec/exchange_sink_operator.cpp @@ -648,10 +648,10 @@ Status ExchangeSinkLocalState::close(RuntimeState* state, Status exec_status) { } DataDistribution ExchangeSinkOperatorX::required_data_distribution() const { - if (_child_x && _enable_local_merge_sort) { + if (_child && _enable_local_merge_sort) { // SORT_OPERATOR -> DATA_STREAM_SINK_OPERATOR // SORT_OPERATOR -> LOCAL_MERGE_SORT -> DATA_STREAM_SINK_OPERATOR - if (auto sort_source = std::dynamic_pointer_cast(_child_x); + if (auto sort_source = std::dynamic_pointer_cast(_child); sort_source && sort_source->use_local_merge()) { // Sort the data local return ExchangeType::LOCAL_MERGE_SORT; diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index d4ca54da637673..0bee88ed537ea6 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -488,7 +488,7 @@ Status HashJoinBuildSinkOperatorX::open(RuntimeState* state) { _shared_hash_table_context = _shared_hashtable_controller->get_context(node_id()); } } - RETURN_IF_ERROR(vectorized::VExpr::prepare(_build_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_build_expr_ctxs, state, _child->row_desc())); return vectorized::VExpr::open(_build_expr_ctxs, state); } @@ -505,7 +505,7 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block* if (local_state._build_side_mutable_block.empty()) { auto tmp_build_block = vectorized::VectorizedUtils::create_empty_columnswithtypename( - _child_x->row_desc()); + _child->row_desc()); tmp_build_block = *(tmp_build_block.create_same_struct_block(1, false)); local_state._build_col_ids.resize(_build_expr_ctxs.size()); RETURN_IF_ERROR(local_state._do_evaluate(tmp_build_block, local_state._build_expr_ctxs, diff --git a/be/src/pipeline/exec/hashjoin_build_sink.h b/be/src/pipeline/exec/hashjoin_build_sink.h index c373af5d6622ff..cf677833fb5b64 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.h +++ b/be/src/pipeline/exec/hashjoin_build_sink.h @@ -132,9 +132,8 @@ class HashJoinBuildSinkOperatorX final if (_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { return {ExchangeType::NOOP}; } else if (_is_broadcast_join) { - return _child_x->ignore_data_distribution() - ? DataDistribution(ExchangeType::PASS_TO_ONE) - : DataDistribution(ExchangeType::NOOP); + return _child->ignore_data_distribution() ? DataDistribution(ExchangeType::PASS_TO_ONE) + : DataDistribution(ExchangeType::NOOP); } return _join_distribution == TJoinDistributionType::BUCKET_SHUFFLE || _join_distribution == TJoinDistributionType::COLOCATE diff --git a/be/src/pipeline/exec/hashjoin_probe_operator.cpp b/be/src/pipeline/exec/hashjoin_probe_operator.cpp index 7008397db770ae..f91e1eaa2a1b17 100644 --- a/be/src/pipeline/exec/hashjoin_probe_operator.cpp +++ b/be/src/pipeline/exec/hashjoin_probe_operator.cpp @@ -276,7 +276,7 @@ Status HashJoinProbeOperatorX::pull(doris::RuntimeState* state, vectorized::Bloc /// increase the output rows count(just same as `_probe_block`'s rows count). RETURN_IF_ERROR(local_state.filter_data_and_build_output(state, output_block, eos, &local_state._probe_block, false)); - local_state._probe_block.clear_column_data(_child_x->row_desc().num_materialized_slots()); + local_state._probe_block.clear_column_data(_child->row_desc().num_materialized_slots()); return Status::OK(); } @@ -597,7 +597,7 @@ Status HashJoinProbeOperatorX::open(RuntimeState* state) { } } }; - init_output_slots_flags(_child_x->row_desc().tuple_descriptors(), _left_output_slot_flags); + init_output_slots_flags(_child->row_desc().tuple_descriptors(), _left_output_slot_flags); init_output_slots_flags(_build_side_child->row_desc().tuple_descriptors(), _right_output_slot_flags); // _other_join_conjuncts are evaluated in the context of the rows produced by this node @@ -609,12 +609,12 @@ Status HashJoinProbeOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(conjunct->prepare(state, *_intermediate_row_desc)); } - RETURN_IF_ERROR(vectorized::VExpr::prepare(_probe_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_probe_expr_ctxs, state, _child->row_desc())); DCHECK(_build_side_child != nullptr); // right table data types _right_table_data_types = vectorized::VectorizedUtils::get_data_types(_build_side_child->row_desc()); - _left_table_data_types = vectorized::VectorizedUtils::get_data_types(_child_x->row_desc()); + _left_table_data_types = vectorized::VectorizedUtils::get_data_types(_child->row_desc()); _right_table_column_names = vectorized::VectorizedUtils::get_column_names(_build_side_child->row_desc()); diff --git a/be/src/pipeline/exec/join_probe_operator.h b/be/src/pipeline/exec/join_probe_operator.h index 65b7a2694e4b47..3f68c73d04b161 100644 --- a/be/src/pipeline/exec/join_probe_operator.h +++ b/be/src/pipeline/exec/join_probe_operator.h @@ -85,12 +85,12 @@ class JoinProbeOperatorX : public StatefulOperatorX { } Status set_child(OperatorPtr child) override { - if (OperatorX::_child_x && _build_side_child == nullptr) { + if (OperatorX::_child && _build_side_child == nullptr) { // when there already (probe) child, others is build child. set_build_side_child(child); } else { // first child which is probe side is in this pipeline - OperatorX::_child_x = std::move(child); + OperatorX::_child = std::move(child); } return Status::OK(); } diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp index 515c151c3c1fa8..793a37c7396a61 100644 --- a/be/src/pipeline/exec/nested_loop_join_build_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_build_operator.cpp @@ -109,14 +109,14 @@ Status NestedLoopJoinBuildSinkOperatorX::init(const TPlanNode& tnode, RuntimeSta Status NestedLoopJoinBuildSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(JoinBuildSinkOperatorX::open(state)); - int num_build_tuples = _child_x->row_desc().tuple_descriptors().size(); + int num_build_tuples = _child->row_desc().tuple_descriptors().size(); for (int i = 0; i < num_build_tuples; ++i) { - TupleDescriptor* build_tuple_desc = _child_x->row_desc().tuple_descriptors()[i]; + TupleDescriptor* build_tuple_desc = _child->row_desc().tuple_descriptors()[i]; auto tuple_idx = _row_descriptor.get_tuple_idx(build_tuple_desc->id()); RETURN_IF_INVALID_TUPLE_IDX(build_tuple_desc->id(), tuple_idx); } - RETURN_IF_ERROR(vectorized::VExpr::prepare(_filter_src_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_filter_src_expr_ctxs, state, _child->row_desc())); return vectorized::VExpr::open(_filter_src_expr_ctxs, state); } diff --git a/be/src/pipeline/exec/nested_loop_join_build_operator.h b/be/src/pipeline/exec/nested_loop_join_build_operator.h index da7712e3e17685..f2ca259754b661 100644 --- a/be/src/pipeline/exec/nested_loop_join_build_operator.h +++ b/be/src/pipeline/exec/nested_loop_join_build_operator.h @@ -76,8 +76,8 @@ class NestedLoopJoinBuildSinkOperatorX final if (_join_op == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { return {ExchangeType::NOOP}; } - return _child_x->ignore_data_distribution() ? DataDistribution(ExchangeType::BROADCAST) - : DataDistribution(ExchangeType::NOOP); + return _child->ignore_data_distribution() ? DataDistribution(ExchangeType::BROADCAST) + : DataDistribution(ExchangeType::NOOP); } private: diff --git a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp index 5a0b6680eee765..9546ed8df56671 100644 --- a/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp +++ b/be/src/pipeline/exec/nested_loop_join_probe_operator.cpp @@ -450,7 +450,7 @@ Status NestedLoopJoinProbeOperatorX::open(RuntimeState* state) { for (auto& conjunct : _join_conjuncts) { RETURN_IF_ERROR(conjunct->prepare(state, *_intermediate_row_desc)); } - _num_probe_side_columns = _child_x->row_desc().num_materialized_slots(); + _num_probe_side_columns = _child->row_desc().num_materialized_slots(); _num_build_side_columns = _build_side_child->row_desc().num_materialized_slots(); return vectorized::VExpr::open(_join_conjuncts, state); } diff --git a/be/src/pipeline/exec/operator.cpp b/be/src/pipeline/exec/operator.cpp index 217c3219d5c36d..d65769254b9dfc 100644 --- a/be/src/pipeline/exec/operator.cpp +++ b/be/src/pipeline/exec/operator.cpp @@ -116,12 +116,12 @@ std::string PipelineXSinkLocalState::name_suffix() { } DataDistribution DataSinkOperatorXBase::required_data_distribution() const { - return _child_x && _child_x->ignore_data_distribution() + return _child && _child->ignore_data_distribution() ? DataDistribution(ExchangeType::PASSTHROUGH) : DataDistribution(ExchangeType::NOOP); } const RowDescriptor& OperatorBase::row_desc() const { - return _child_x->row_desc(); + return _child->row_desc(); } template @@ -220,15 +220,15 @@ Status OperatorXBase::open(RuntimeState* state) { for (auto& projections : _intermediate_projections) { RETURN_IF_ERROR(vectorized::VExpr::open(projections, state)); } - if (_child_x && !is_source()) { - RETURN_IF_ERROR(_child_x->open(state)); + if (_child && !is_source()) { + RETURN_IF_ERROR(_child->open(state)); } return Status::OK(); } Status OperatorXBase::close(RuntimeState* state) { - if (_child_x && !is_source()) { - RETURN_IF_ERROR(_child_x->close(state)); + if (_child && !is_source()) { + RETURN_IF_ERROR(_child->close(state)); } auto result = state->get_local_state_result(operator_id()); if (!result) { @@ -572,8 +572,7 @@ Status PipelineXSinkLocalState::close(RuntimeState* state, Status e template Status StreamingOperatorX::get_block(RuntimeState* state, vectorized::Block* block, bool* eos) { - RETURN_IF_ERROR( - OperatorX::_child_x->get_block_after_projects(state, block, eos)); + RETURN_IF_ERROR(OperatorX::_child->get_block_after_projects(state, block, eos)); return pull(state, block, eos); } @@ -583,8 +582,8 @@ Status StatefulOperatorX::get_block(RuntimeState* state, vectori auto& local_state = get_local_state(state); if (need_more_input_data(state)) { local_state._child_block->clear_column_data( - OperatorX::_child_x->row_desc().num_materialized_slots()); - RETURN_IF_ERROR(OperatorX::_child_x->get_block_after_projects( + OperatorX::_child->row_desc().num_materialized_slots()); + RETURN_IF_ERROR(OperatorX::_child->get_block_after_projects( state, local_state._child_block.get(), &local_state._child_eos)); *eos = local_state._child_eos; if (local_state._child_block->rows() == 0 && !local_state._child_eos) { @@ -668,66 +667,66 @@ Status AsyncWriterSink::close(RuntimeState* state, Status exec_s return Base::close(state, exec_status); } -#define DECLARE_OPERATOR_X(LOCAL_STATE) template class DataSinkOperatorX; -DECLARE_OPERATOR_X(HashJoinBuildSinkLocalState) -DECLARE_OPERATOR_X(ResultSinkLocalState) -DECLARE_OPERATOR_X(JdbcTableSinkLocalState) -DECLARE_OPERATOR_X(MemoryScratchSinkLocalState) -DECLARE_OPERATOR_X(ResultFileSinkLocalState) -DECLARE_OPERATOR_X(OlapTableSinkLocalState) -DECLARE_OPERATOR_X(OlapTableSinkV2LocalState) -DECLARE_OPERATOR_X(HiveTableSinkLocalState) -DECLARE_OPERATOR_X(IcebergTableSinkLocalState) -DECLARE_OPERATOR_X(AnalyticSinkLocalState) -DECLARE_OPERATOR_X(SortSinkLocalState) -DECLARE_OPERATOR_X(SpillSortSinkLocalState) -DECLARE_OPERATOR_X(LocalExchangeSinkLocalState) -DECLARE_OPERATOR_X(AggSinkLocalState) -DECLARE_OPERATOR_X(PartitionedAggSinkLocalState) -DECLARE_OPERATOR_X(ExchangeSinkLocalState) -DECLARE_OPERATOR_X(NestedLoopJoinBuildSinkLocalState) -DECLARE_OPERATOR_X(UnionSinkLocalState) -DECLARE_OPERATOR_X(MultiCastDataStreamSinkLocalState) -DECLARE_OPERATOR_X(PartitionSortSinkLocalState) -DECLARE_OPERATOR_X(SetProbeSinkLocalState) -DECLARE_OPERATOR_X(SetProbeSinkLocalState) -DECLARE_OPERATOR_X(SetSinkLocalState) -DECLARE_OPERATOR_X(SetSinkLocalState) -DECLARE_OPERATOR_X(PartitionedHashJoinSinkLocalState) -DECLARE_OPERATOR_X(GroupCommitBlockSinkLocalState) - -#undef DECLARE_OPERATOR_X - -#define DECLARE_OPERATOR_X(LOCAL_STATE) template class OperatorX; -DECLARE_OPERATOR_X(HashJoinProbeLocalState) -DECLARE_OPERATOR_X(OlapScanLocalState) -DECLARE_OPERATOR_X(GroupCommitLocalState) -DECLARE_OPERATOR_X(JDBCScanLocalState) -DECLARE_OPERATOR_X(FileScanLocalState) -DECLARE_OPERATOR_X(EsScanLocalState) -DECLARE_OPERATOR_X(AnalyticLocalState) -DECLARE_OPERATOR_X(SortLocalState) -DECLARE_OPERATOR_X(SpillSortLocalState) -DECLARE_OPERATOR_X(AggLocalState) -DECLARE_OPERATOR_X(PartitionedAggLocalState) -DECLARE_OPERATOR_X(TableFunctionLocalState) -DECLARE_OPERATOR_X(ExchangeLocalState) -DECLARE_OPERATOR_X(RepeatLocalState) -DECLARE_OPERATOR_X(NestedLoopJoinProbeLocalState) -DECLARE_OPERATOR_X(AssertNumRowsLocalState) -DECLARE_OPERATOR_X(EmptySetLocalState) -DECLARE_OPERATOR_X(UnionSourceLocalState) -DECLARE_OPERATOR_X(MultiCastDataStreamSourceLocalState) -DECLARE_OPERATOR_X(PartitionSortSourceLocalState) -DECLARE_OPERATOR_X(SetSourceLocalState) -DECLARE_OPERATOR_X(SetSourceLocalState) -DECLARE_OPERATOR_X(DataGenLocalState) -DECLARE_OPERATOR_X(SchemaScanLocalState) -DECLARE_OPERATOR_X(MetaScanLocalState) -DECLARE_OPERATOR_X(LocalExchangeSourceLocalState) -DECLARE_OPERATOR_X(PartitionedHashJoinProbeLocalState) - -#undef DECLARE_OPERATOR_X +#define DECLARE_OPERATOR(LOCAL_STATE) template class DataSinkOperatorX; +DECLARE_OPERATOR(HashJoinBuildSinkLocalState) +DECLARE_OPERATOR(ResultSinkLocalState) +DECLARE_OPERATOR(JdbcTableSinkLocalState) +DECLARE_OPERATOR(MemoryScratchSinkLocalState) +DECLARE_OPERATOR(ResultFileSinkLocalState) +DECLARE_OPERATOR(OlapTableSinkLocalState) +DECLARE_OPERATOR(OlapTableSinkV2LocalState) +DECLARE_OPERATOR(HiveTableSinkLocalState) +DECLARE_OPERATOR(IcebergTableSinkLocalState) +DECLARE_OPERATOR(AnalyticSinkLocalState) +DECLARE_OPERATOR(SortSinkLocalState) +DECLARE_OPERATOR(SpillSortSinkLocalState) +DECLARE_OPERATOR(LocalExchangeSinkLocalState) +DECLARE_OPERATOR(AggSinkLocalState) +DECLARE_OPERATOR(PartitionedAggSinkLocalState) +DECLARE_OPERATOR(ExchangeSinkLocalState) +DECLARE_OPERATOR(NestedLoopJoinBuildSinkLocalState) +DECLARE_OPERATOR(UnionSinkLocalState) +DECLARE_OPERATOR(MultiCastDataStreamSinkLocalState) +DECLARE_OPERATOR(PartitionSortSinkLocalState) +DECLARE_OPERATOR(SetProbeSinkLocalState) +DECLARE_OPERATOR(SetProbeSinkLocalState) +DECLARE_OPERATOR(SetSinkLocalState) +DECLARE_OPERATOR(SetSinkLocalState) +DECLARE_OPERATOR(PartitionedHashJoinSinkLocalState) +DECLARE_OPERATOR(GroupCommitBlockSinkLocalState) + +#undef DECLARE_OPERATOR + +#define DECLARE_OPERATOR(LOCAL_STATE) template class OperatorX; +DECLARE_OPERATOR(HashJoinProbeLocalState) +DECLARE_OPERATOR(OlapScanLocalState) +DECLARE_OPERATOR(GroupCommitLocalState) +DECLARE_OPERATOR(JDBCScanLocalState) +DECLARE_OPERATOR(FileScanLocalState) +DECLARE_OPERATOR(EsScanLocalState) +DECLARE_OPERATOR(AnalyticLocalState) +DECLARE_OPERATOR(SortLocalState) +DECLARE_OPERATOR(SpillSortLocalState) +DECLARE_OPERATOR(AggLocalState) +DECLARE_OPERATOR(PartitionedAggLocalState) +DECLARE_OPERATOR(TableFunctionLocalState) +DECLARE_OPERATOR(ExchangeLocalState) +DECLARE_OPERATOR(RepeatLocalState) +DECLARE_OPERATOR(NestedLoopJoinProbeLocalState) +DECLARE_OPERATOR(AssertNumRowsLocalState) +DECLARE_OPERATOR(EmptySetLocalState) +DECLARE_OPERATOR(UnionSourceLocalState) +DECLARE_OPERATOR(MultiCastDataStreamSourceLocalState) +DECLARE_OPERATOR(PartitionSortSourceLocalState) +DECLARE_OPERATOR(SetSourceLocalState) +DECLARE_OPERATOR(SetSourceLocalState) +DECLARE_OPERATOR(DataGenLocalState) +DECLARE_OPERATOR(SchemaScanLocalState) +DECLARE_OPERATOR(MetaScanLocalState) +DECLARE_OPERATOR(LocalExchangeSourceLocalState) +DECLARE_OPERATOR(PartitionedHashJoinProbeLocalState) + +#undef DECLARE_OPERATOR template class StreamingOperatorX; template class StreamingOperatorX; diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index 0863550dc192e4..48f8a2d1836574 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -39,7 +39,6 @@ #include "vec/runtime/vdata_stream_recvr.h" namespace doris { -class DataSink; class RowDescriptor; class RuntimeState; class TDataSink; @@ -82,7 +81,7 @@ struct LocalSinkStateInfo { class OperatorBase { public: - explicit OperatorBase() : _child_x(nullptr), _is_closed(false) {} + explicit OperatorBase() : _child(nullptr), _is_closed(false) {} virtual ~OperatorBase() = default; virtual bool is_sink() const { return false; } @@ -98,7 +97,7 @@ class OperatorBase { [[nodiscard]] virtual Status close(RuntimeState* state); [[nodiscard]] virtual Status set_child(OperatorPtr child) { - _child_x = std::move(child); + _child = std::move(child); return Status::OK(); } @@ -108,7 +107,7 @@ class OperatorBase { virtual Status revoke_memory(RuntimeState* state) { return Status::OK(); } [[nodiscard]] virtual bool require_data_distribution() const { return false; } - OperatorPtr child_x() { return _child_x; } + OperatorPtr child() { return _child; } [[nodiscard]] bool followed_by_shuffled_join() const { return _followed_by_shuffled_join; } void set_followed_by_shuffled_join(bool followed_by_shuffled_join) { _followed_by_shuffled_join = followed_by_shuffled_join; @@ -116,7 +115,7 @@ class OperatorBase { [[nodiscard]] virtual bool require_shuffled_data_distribution() const { return false; } protected: - OperatorPtr _child_x = nullptr; + OperatorPtr _child = nullptr; bool _is_closed; bool _followed_by_shuffled_join = false; @@ -645,15 +644,15 @@ class OperatorXBase : public OperatorBase { } [[nodiscard]] std::string get_name() const override { return _op_name; } [[nodiscard]] virtual DataDistribution required_data_distribution() const { - return _child_x && _child_x->ignore_data_distribution() && !is_source() + return _child && _child->ignore_data_distribution() && !is_source() ? DataDistribution(ExchangeType::PASSTHROUGH) : DataDistribution(ExchangeType::NOOP); } [[nodiscard]] virtual bool ignore_data_distribution() const { - return _child_x ? _child_x->ignore_data_distribution() : _ignore_data_distribution; + return _child ? _child->ignore_data_distribution() : _ignore_data_distribution; } [[nodiscard]] bool ignore_data_hash_distribution() const { - return _child_x ? _child_x->ignore_data_hash_distribution() : _ignore_data_distribution; + return _child ? _child->ignore_data_hash_distribution() : _ignore_data_distribution; } [[nodiscard]] virtual bool need_more_input_data(RuntimeState* state) const { return true; } void set_ignore_data_distribution() { _ignore_data_distribution = true; } @@ -708,7 +707,7 @@ class OperatorXBase : public OperatorBase { return reinterpret_cast(*this); } - [[nodiscard]] OperatorPtr get_child() { return _child_x; } + [[nodiscard]] OperatorPtr get_child() { return _child; } [[nodiscard]] vectorized::VExprContextSPtrs& conjuncts() { return _conjuncts; } [[nodiscard]] virtual RowDescriptor& row_descriptor() { return _row_descriptor; } diff --git a/be/src/pipeline/exec/partition_sort_sink_operator.cpp b/be/src/pipeline/exec/partition_sort_sink_operator.cpp index 80808185fa8980..94c51e160da2a2 100644 --- a/be/src/pipeline/exec/partition_sort_sink_operator.cpp +++ b/be/src/pipeline/exec/partition_sort_sink_operator.cpp @@ -117,7 +117,7 @@ Status PartitionSortSinkLocalState::init(RuntimeState* state, LocalSinkStateInfo ADD_COUNTER(_profile, "SortedPartitionInputRows", TUnit::UNIT); _partition_sort_info = std::make_shared( &_vsort_exec_exprs, p._limit, 0, p._pool, p._is_asc_order, p._nulls_first, - p._child_x->row_desc(), state, _profile, p._has_global_limit, p._partition_inner_limit, + p._child->row_desc(), state, _profile, p._has_global_limit, p._partition_inner_limit, p._top_n_algorithm, p._topn_phase); RETURN_IF_ERROR(_init_hash_method()); return Status::OK(); @@ -156,8 +156,8 @@ Status PartitionSortSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* st Status PartitionSortSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(DataSinkOperatorX::open(state)); - RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _child_x->row_desc(), _row_descriptor)); - RETURN_IF_ERROR(vectorized::VExpr::prepare(_partition_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _child->row_desc(), _row_descriptor)); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_partition_expr_ctxs, state, _child->row_desc())); RETURN_IF_ERROR(_vsort_exec_exprs.open(state)); RETURN_IF_ERROR(vectorized::VExpr::open(_partition_expr_ctxs, state)); return Status::OK(); @@ -175,7 +175,7 @@ Status PartitionSortSinkOperatorX::sink(RuntimeState* state, vectorized::Block* local_state._value_places.push_back(_pool->add(new PartitionBlocks( local_state._partition_sort_info, local_state._value_places.empty()))); } - local_state._value_places[0]->append_whole_block(input_block, _child_x->row_desc()); + local_state._value_places[0]->append_whole_block(input_block, _child->row_desc()); } else { //just simply use partition num to check //if is TWO_PHASE_GLOBAL, must be sort all data thought partition num threshold have been exceeded. diff --git a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp index 448d3239949a8a..469716b7a22182 100644 --- a/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_aggregation_sink_operator.cpp @@ -141,8 +141,8 @@ Status PartitionedAggSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* s } _agg_sink_operator->set_dests_id(DataSinkOperatorX::dests_id()); - RETURN_IF_ERROR(_agg_sink_operator->set_child( - DataSinkOperatorX::_child_x)); + RETURN_IF_ERROR( + _agg_sink_operator->set_child(DataSinkOperatorX::_child)); return _agg_sink_operator->init(tnode, state); } diff --git a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp index 6529d1eb6540c5..018d63a6deebb1 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp +++ b/be/src/pipeline/exec/partitioned_hash_join_probe_operator.cpp @@ -525,15 +525,15 @@ Status PartitionedHashJoinProbeOperatorX::init(const TPlanNode& tnode, RuntimeSt } Status PartitionedHashJoinProbeOperatorX::open(RuntimeState* state) { - // to avoid open _child_x twice - auto child_x = std::move(_child_x); + // to avoid open _child twice + auto child = std::move(_child); RETURN_IF_ERROR(JoinProbeOperatorX::open(state)); - RETURN_IF_ERROR(_inner_probe_operator->set_child(child_x)); + RETURN_IF_ERROR(_inner_probe_operator->set_child(child)); DCHECK(_build_side_child != nullptr); _inner_probe_operator->set_build_side_child(_build_side_child); RETURN_IF_ERROR(_inner_probe_operator->open(state)); - _child_x = std::move(child_x); - RETURN_IF_ERROR(_partitioner->prepare(state, _child_x->row_desc())); + _child = std::move(child); + RETURN_IF_ERROR(_partitioner->prepare(state, _child->row_desc())); RETURN_IF_ERROR(_partitioner->open(state)); return Status::OK(); } @@ -820,8 +820,8 @@ Status PartitionedHashJoinProbeOperatorX::get_block(RuntimeState* state, vectori return _revoke_memory(state); } - RETURN_IF_ERROR(_child_x->get_block_after_projects(state, local_state._child_block.get(), - &local_state._child_eos)); + RETURN_IF_ERROR(_child->get_block_after_projects(state, local_state._child_block.get(), + &local_state._child_eos)); if (need_to_spill && local_state._child_eos) { RETURN_IF_ERROR(local_state.finish_spilling(0)); diff --git a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp index 7c29fdc6ed08dd..a7297be493f804 100644 --- a/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp +++ b/be/src/pipeline/exec/partitioned_hash_join_sink_operator.cpp @@ -102,7 +102,7 @@ size_t PartitionedHashJoinSinkLocalState::revocable_mem_size(RuntimeState* state Status PartitionedHashJoinSinkLocalState::_revoke_unpartitioned_block(RuntimeState* state) { auto& p = _parent->cast(); _shared_state->inner_shared_state->hash_table_variants.reset(); - auto row_desc = p._child_x->row_desc(); + auto row_desc = p._child->row_desc(); const auto num_slots = row_desc.num_slots(); vectorized::Block build_block; auto inner_sink_state_ = _shared_state->inner_runtime_state->get_sink_local_state(); @@ -426,8 +426,8 @@ Status PartitionedHashJoinSinkOperatorX::init(const TPlanNode& tnode, RuntimeSta Status PartitionedHashJoinSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(JoinBuildSinkOperatorX::open(state)); - RETURN_IF_ERROR(_inner_sink_operator->set_child(_child_x)); - RETURN_IF_ERROR(_partitioner->prepare(state, _child_x->row_desc())); + RETURN_IF_ERROR(_inner_sink_operator->set_child(_child)); + RETURN_IF_ERROR(_partitioner->prepare(state, _child->row_desc())); RETURN_IF_ERROR(_partitioner->open(state)); return _inner_sink_operator->open(state); } diff --git a/be/src/pipeline/exec/repeat_operator.cpp b/be/src/pipeline/exec/repeat_operator.cpp index b6761186c82fb4..d355d99c2e352f 100644 --- a/be/src/pipeline/exec/repeat_operator.cpp +++ b/be/src/pipeline/exec/repeat_operator.cpp @@ -59,7 +59,7 @@ Status RepeatOperatorX::open(RuntimeState* state) { if (_output_tuple_desc == nullptr) { return Status::InternalError("Failed to get tuple descriptor."); } - RETURN_IF_ERROR(vectorized::VExpr::prepare(_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_expr_ctxs, state, _child->row_desc())); for (const auto& slot_desc : _output_tuple_desc->slots()) { _output_slots.push_back(slot_desc); } @@ -211,7 +211,7 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, vectorized::Block* outp int size = _repeat_id_list.size(); if (_repeat_id_idx >= size) { _intermediate_block->clear(); - _child_block.clear_column_data(_child_x->row_desc().num_materialized_slots()); + _child_block.clear_column_data(_child->row_desc().num_materialized_slots()); _repeat_id_idx = 0; } } else if (local_state._expr_ctxs.empty()) { @@ -225,7 +225,7 @@ Status RepeatOperatorX::pull(doris::RuntimeState* state, vectorized::Block* outp RETURN_IF_ERROR( local_state.add_grouping_id_column(rows, cur_col, columns, repeat_id_idx)); } - _child_block.clear_column_data(_child_x->row_desc().num_materialized_slots()); + _child_block.clear_column_data(_child->row_desc().num_materialized_slots()); } RETURN_IF_ERROR(vectorized::VExprContext::filter_block(local_state._conjuncts, output_block, output_block->columns())); diff --git a/be/src/pipeline/exec/set_probe_sink_operator.cpp b/be/src/pipeline/exec/set_probe_sink_operator.cpp index bd4b7481aac240..955f956f60d6fe 100644 --- a/be/src/pipeline/exec/set_probe_sink_operator.cpp +++ b/be/src/pipeline/exec/set_probe_sink_operator.cpp @@ -57,7 +57,7 @@ Status SetProbeSinkOperatorX::init(const TPlanNode& tnode, Runtime template Status SetProbeSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(DataSinkOperatorX>::open(state)); - RETURN_IF_ERROR(vectorized::VExpr::prepare(_child_exprs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_child_exprs, state, _child->row_desc())); return vectorized::VExpr::open(_child_exprs, state); } diff --git a/be/src/pipeline/exec/set_probe_sink_operator.h b/be/src/pipeline/exec/set_probe_sink_operator.h index 3b3ed2f6a2cabd..ab53f5358c2a91 100644 --- a/be/src/pipeline/exec/set_probe_sink_operator.h +++ b/be/src/pipeline/exec/set_probe_sink_operator.h @@ -111,7 +111,7 @@ class SetProbeSinkOperatorX final : public DataSinkOperatorX _partition_exprs; - using OperatorBase::_child_x; + using OperatorBase::_child; }; } // namespace pipeline diff --git a/be/src/pipeline/exec/set_sink_operator.cpp b/be/src/pipeline/exec/set_sink_operator.cpp index 9cebcf8611edc0..38667293d4854b 100644 --- a/be/src/pipeline/exec/set_sink_operator.cpp +++ b/be/src/pipeline/exec/set_sink_operator.cpp @@ -209,7 +209,7 @@ Status SetSinkOperatorX::init(const TPlanNode& tnode, RuntimeState template Status SetSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(Base::open(state)); - RETURN_IF_ERROR(vectorized::VExpr::prepare(_child_exprs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_child_exprs, state, _child->row_desc())); return vectorized::VExpr::open(_child_exprs, state); } diff --git a/be/src/pipeline/exec/set_sink_operator.h b/be/src/pipeline/exec/set_sink_operator.h index 48fd7f400dd5b7..1c08eddc141f2e 100644 --- a/be/src/pipeline/exec/set_sink_operator.h +++ b/be/src/pipeline/exec/set_sink_operator.h @@ -111,7 +111,7 @@ class SetSinkOperatorX final : public DataSinkOperatorX _partition_exprs; - using OperatorBase::_child_x; + using OperatorBase::_child; }; } // namespace pipeline diff --git a/be/src/pipeline/exec/sort_sink_operator.cpp b/be/src/pipeline/exec/sort_sink_operator.cpp index bb7c38d2b709bb..b07942b9ab1c05 100644 --- a/be/src/pipeline/exec/sort_sink_operator.cpp +++ b/be/src/pipeline/exec/sort_sink_operator.cpp @@ -46,19 +46,19 @@ Status SortSinkLocalState::open(RuntimeState* state) { case TSortAlgorithm::HEAP_SORT: { _shared_state->sorter = vectorized::HeapSorter::create_unique( _vsort_exec_exprs, p._limit, p._offset, p._pool, p._is_asc_order, p._nulls_first, - p._child_x->row_desc()); + p._child->row_desc()); break; } case TSortAlgorithm::TOPN_SORT: { _shared_state->sorter = vectorized::TopNSorter::create_unique( _vsort_exec_exprs, p._limit, p._offset, p._pool, p._is_asc_order, p._nulls_first, - p._child_x->row_desc(), state, _profile); + p._child->row_desc(), state, _profile); break; } case TSortAlgorithm::FULL_SORT: { _shared_state->sorter = vectorized::FullSorter::create_unique( _vsort_exec_exprs, p._limit, p._offset, p._pool, p._is_asc_order, p._nulls_first, - p._child_x->row_desc(), state, _profile); + p._child->row_desc(), state, _profile); break; } default: { @@ -108,7 +108,7 @@ Status SortSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { Status SortSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(DataSinkOperatorX::open(state)); - RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _child_x->row_desc(), _row_descriptor)); + RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _child->row_desc(), _row_descriptor)); return _vsort_exec_exprs.open(state); } diff --git a/be/src/pipeline/exec/sort_source_operator.cpp b/be/src/pipeline/exec/sort_source_operator.cpp index 17c936846e5c56..02a99e183c852e 100644 --- a/be/src/pipeline/exec/sort_source_operator.cpp +++ b/be/src/pipeline/exec/sort_source_operator.cpp @@ -42,9 +42,9 @@ Status SortSourceOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { Status SortSourceOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(Base::open(state)); - // spill sort _child_x may be nullptr. - if (_child_x) { - RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _child_x->row_desc(), _row_descriptor)); + // spill sort _child may be nullptr. + if (_child) { + RETURN_IF_ERROR(_vsort_exec_exprs.prepare(state, _child->row_desc(), _row_descriptor)); RETURN_IF_ERROR(_vsort_exec_exprs.open(state)); } return Status::OK(); diff --git a/be/src/pipeline/exec/spill_sort_sink_operator.cpp b/be/src/pipeline/exec/spill_sort_sink_operator.cpp index 5f767f2b6e3ab8..4bf1ab04efb628 100644 --- a/be/src/pipeline/exec/spill_sort_sink_operator.cpp +++ b/be/src/pipeline/exec/spill_sort_sink_operator.cpp @@ -120,7 +120,7 @@ Status SpillSortSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) _name = "SPILL_SORT_SINK_OPERATOR"; _sort_sink_operator->set_dests_id(DataSinkOperatorX::dests_id()); - RETURN_IF_ERROR(_sort_sink_operator->set_child(DataSinkOperatorX::_child_x)); + RETURN_IF_ERROR(_sort_sink_operator->set_child(DataSinkOperatorX::_child)); return _sort_sink_operator->init(tnode, state); } diff --git a/be/src/pipeline/exec/streaming_aggregation_operator.cpp b/be/src/pipeline/exec/streaming_aggregation_operator.cpp index 59e11583f003c2..dfbe42c637ea56 100644 --- a/be/src/pipeline/exec/streaming_aggregation_operator.cpp +++ b/be/src/pipeline/exec/streaming_aggregation_operator.cpp @@ -1182,7 +1182,7 @@ Status StreamingAggOperatorX::open(RuntimeState* state) { _intermediate_tuple_desc = state->desc_tbl().get_tuple_descriptor(_intermediate_tuple_id); _output_tuple_desc = state->desc_tbl().get_tuple_descriptor(_output_tuple_id); DCHECK_EQ(_intermediate_tuple_desc->slots().size(), _output_tuple_desc->slots().size()); - RETURN_IF_ERROR(vectorized::VExpr::prepare(_probe_expr_ctxs, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_probe_expr_ctxs, state, _child->row_desc())); int j = _probe_expr_ctxs.size(); for (int i = 0; i < j; ++i) { @@ -1197,7 +1197,7 @@ Status StreamingAggOperatorX::open(RuntimeState* state) { SlotDescriptor* intermediate_slot_desc = _intermediate_tuple_desc->slots()[j]; SlotDescriptor* output_slot_desc = _output_tuple_desc->slots()[j]; RETURN_IF_ERROR(_aggregate_evaluators[i]->prepare( - state, _child_x->row_desc(), intermediate_slot_desc, output_slot_desc)); + state, _child->row_desc(), intermediate_slot_desc, output_slot_desc)); _aggregate_evaluators[i]->set_version(state->be_exec_version()); } @@ -1295,7 +1295,7 @@ Status StreamingAggOperatorX::push(RuntimeState* state, vectorized::Block* in_bl if (in_block->rows() > 0) { RETURN_IF_ERROR(local_state.do_pre_agg(in_block, local_state._pre_aggregated_block.get())); } - in_block->clear_column_data(_child_x->row_desc().num_materialized_slots()); + in_block->clear_column_data(_child->row_desc().num_materialized_slots()); return Status::OK(); } diff --git a/be/src/pipeline/exec/table_function_operator.cpp b/be/src/pipeline/exec/table_function_operator.cpp index 02f61aa8fa94ea..ff9dfe632faec6 100644 --- a/be/src/pipeline/exec/table_function_operator.cpp +++ b/be/src/pipeline/exec/table_function_operator.cpp @@ -215,7 +215,7 @@ void TableFunctionLocalState::process_next_child_row() { } _child_block->clear_column_data(_parent->cast() - ._child_x->row_desc() + ._child->row_desc() .num_materialized_slots()); _cur_child_offset = -1; return; @@ -285,7 +285,7 @@ Status TableFunctionOperatorX::open(doris::RuntimeState* state) { } // get all input slots - for (const auto& child_tuple_desc : _child_x->row_desc().tuple_descriptors()) { + for (const auto& child_tuple_desc : _child->row_desc().tuple_descriptors()) { for (const auto& child_slot_desc : child_tuple_desc->slots()) { _child_slots.push_back(child_slot_desc); } diff --git a/be/src/pipeline/exec/union_sink_operator.cpp b/be/src/pipeline/exec/union_sink_operator.cpp index 06f301bc75ba40..288fc131037fab 100644 --- a/be/src/pipeline/exec/union_sink_operator.cpp +++ b/be/src/pipeline/exec/union_sink_operator.cpp @@ -74,7 +74,7 @@ Status UnionSinkOperatorX::init(const TPlanNode& tnode, RuntimeState* state) { Status UnionSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(DataSinkOperatorX::open(state)); - RETURN_IF_ERROR(vectorized::VExpr::prepare(_child_expr, state, _child_x->row_desc())); + RETURN_IF_ERROR(vectorized::VExpr::prepare(_child_expr, state, _child->row_desc())); RETURN_IF_ERROR(vectorized::VExpr::check_expr_output_type(_child_expr, _row_descriptor)); // open const expr lists. RETURN_IF_ERROR(vectorized::VExpr::open(_const_expr, state)); diff --git a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp index f5f3155b2d3d4d..19c37f3649bcc7 100644 --- a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp @@ -71,7 +71,7 @@ Status LocalExchangeSinkOperatorX::init(ExchangeType type, const int num_buckets Status LocalExchangeSinkOperatorX::open(RuntimeState* state) { RETURN_IF_ERROR(DataSinkOperatorX::open(state)); if (_type == ExchangeType::HASH_SHUFFLE || _type == ExchangeType::BUCKET_HASH_SHUFFLE) { - RETURN_IF_ERROR(_partitioner->prepare(state, _child_x->row_desc())); + RETURN_IF_ERROR(_partitioner->prepare(state, _child->row_desc())); RETURN_IF_ERROR(_partitioner->open(state)); } diff --git a/be/src/pipeline/local_exchange/local_exchange_source_operator.h b/be/src/pipeline/local_exchange/local_exchange_source_operator.h index ad23cb96aef6fe..c0da5c8120c1e9 100644 --- a/be/src/pipeline/local_exchange/local_exchange_source_operator.h +++ b/be/src/pipeline/local_exchange/local_exchange_source_operator.h @@ -72,10 +72,10 @@ class LocalExchangeSourceOperatorX final : public OperatorXintermediate_row_desc(); + return _child->intermediate_row_desc(); } - RowDescriptor& row_descriptor() override { return _child_x->row_descriptor(); } - const RowDescriptor& row_desc() const override { return _child_x->row_desc(); } + RowDescriptor& row_descriptor() override { return _child->row_descriptor(); } + const RowDescriptor& row_desc() const override { return _child->row_desc(); } Status get_block(RuntimeState* state, vectorized::Block* block, bool* eos) override; diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index 38a99dd66d421f..a489273b68d129 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -706,7 +706,7 @@ Status PipelineFragmentContext::_add_local_exchange_impl( const std::map& bucket_seq_to_instance_idx, const std::map& shuffle_idx_to_instance_idx, const bool ignore_data_hash_distribution) { - auto& operator_xs = cur_pipe->operators(); + auto& operators = cur_pipe->operators(); const auto downstream_pipeline_id = cur_pipe->id(); auto local_exchange_id = next_operator_id(); // 1. Create a new pipeline with local exchange sink. @@ -717,8 +717,8 @@ Status PipelineFragmentContext::_add_local_exchange_impl( * `bucket_seq_to_instance_idx` is empty if no scan operator is contained in this fragment. * So co-located operators(e.g. Agg, Analytic) should use `HASH_SHUFFLE` instead of `BUCKET_HASH_SHUFFLE`. */ - const bool followed_by_shuffled_join = operator_xs.size() > idx - ? operator_xs[idx]->followed_by_shuffled_join() + const bool followed_by_shuffled_join = operators.size() > idx + ? operators[idx]->followed_by_shuffled_join() : cur_pipe->sink()->followed_by_shuffled_join(); const bool should_disable_bucket_shuffle = bucket_seq_to_instance_idx.empty() && @@ -790,7 +790,7 @@ Status PipelineFragmentContext::_add_local_exchange_impl( } break; case ExchangeType::LOCAL_MERGE_SORT: { - auto child_op = cur_pipe->sink()->child_x(); + auto child_op = cur_pipe->sink()->child(); auto sort_source = std::dynamic_pointer_cast(child_op); if (!sort_source) { return Status::InternalError( @@ -825,21 +825,21 @@ Status PipelineFragmentContext::_add_local_exchange_impl( // pipeline1 [Scan - LocalExchangeSink] and pipeline2 [LocalExchangeSource - AggSink]. // 3.1 Initialize new pipeline's operator list. - std::copy(operator_xs.begin(), operator_xs.begin() + idx, + std::copy(operators.begin(), operators.begin() + idx, std::inserter(new_pip->operators(), new_pip->operators().end())); // 3.2 Erase unused operators in previous pipeline. - operator_xs.erase(operator_xs.begin(), operator_xs.begin() + idx); + operators.erase(operators.begin(), operators.begin() + idx); // 4. Initialize LocalExchangeSource and insert it into this pipeline. OperatorPtr source_op; source_op.reset(new LocalExchangeSourceOperatorX(pool, local_exchange_id)); RETURN_IF_ERROR(source_op->set_child(new_pip->operators().back())); RETURN_IF_ERROR(source_op->init(data_distribution.distribution_type)); - if (!operator_xs.empty()) { - RETURN_IF_ERROR(operator_xs.front()->set_child(source_op)); + if (!operators.empty()) { + RETURN_IF_ERROR(operators.front()->set_child(source_op)); } - operator_xs.insert(operator_xs.begin(), source_op); + operators.insert(operators.begin(), source_op); shared_state->create_dependencies(local_exchange_id); @@ -896,8 +896,8 @@ Status PipelineFragmentContext::_add_local_exchange( } *do_local_exchange = true; - auto& operator_xs = cur_pipe->operators(); - auto total_op_num = operator_xs.size(); + auto& operators = cur_pipe->operators(); + auto total_op_num = operators.size(); auto new_pip = add_pipeline(cur_pipe, pip_idx + 1); RETURN_IF_ERROR(_add_local_exchange_impl( idx, pool, cur_pipe, new_pip, data_distribution, do_local_exchange, num_buckets, @@ -1653,8 +1653,8 @@ void PipelineFragmentContext::_close_fragment_instance() { } if (_query_ctx->enable_profile()) { - _query_ctx->add_fragment_profile(_fragment_id, collect_realtime_profile_x(), - collect_realtime_load_channel_profile_x()); + _query_ctx->add_fragment_profile(_fragment_id, collect_realtime_profile(), + collect_realtime_load_channel_profile()); } // all submitted tasks done @@ -1724,7 +1724,7 @@ std::string PipelineFragmentContext::debug_string() { } std::vector> -PipelineFragmentContext::collect_realtime_profile_x() const { +PipelineFragmentContext::collect_realtime_profile() const { std::vector> res; // we do not have mutex to protect pipeline_id_to_profile @@ -1749,7 +1749,7 @@ PipelineFragmentContext::collect_realtime_profile_x() const { } std::shared_ptr -PipelineFragmentContext::collect_realtime_load_channel_profile_x() const { +PipelineFragmentContext::collect_realtime_load_channel_profile() const { // we do not have mutex to protect pipeline_id_to_profile // so we need to make sure this funciton is invoked after fragment context // has already been prepared. diff --git a/be/src/pipeline/pipeline_fragment_context.h b/be/src/pipeline/pipeline_fragment_context.h index 75f3f22c68131c..f46835e95e0647 100644 --- a/be/src/pipeline/pipeline_fragment_context.h +++ b/be/src/pipeline/pipeline_fragment_context.h @@ -69,8 +69,8 @@ class PipelineFragmentContext : public TaskExecutionContext { ~PipelineFragmentContext(); - std::vector> collect_realtime_profile_x() const; - std::shared_ptr collect_realtime_load_channel_profile_x() const; + std::vector> collect_realtime_profile() const; + std::shared_ptr collect_realtime_load_channel_profile() const; bool is_timeout(timespec now) const; diff --git a/be/src/runtime/memory/cache_manager.cpp b/be/src/runtime/memory/cache_manager.cpp index a6516c40a35770..ec57ffba50d318 100644 --- a/be/src/runtime/memory/cache_manager.cpp +++ b/be/src/runtime/memory/cache_manager.cpp @@ -59,11 +59,26 @@ int64_t CacheManager::for_each_cache_prune_all(RuntimeProfile* profile, bool for int64_t CacheManager::cache_prune_all(CachePolicy::CacheType type, bool force) { std::lock_guard l(_caches_lock); auto* cache_policy = _caches[type]; - if (!cache_policy->enable_prune()) { - return -1; - } cache_policy->prune_all(force); return cache_policy->profile()->get_counter("FreedMemory")->value(); } +int64_t CacheManager::for_each_cache_refresh_capacity(double adjust_weighted, + RuntimeProfile* profile) { + int64_t freed_size = 0; + std::lock_guard l(_caches_lock); + for (const auto& pair : _caches) { + auto* cache_policy = pair.second; + if (!cache_policy->enable_prune()) { + continue; + } + cache_policy->adjust_capacity_weighted(adjust_weighted); + freed_size += cache_policy->profile()->get_counter("FreedMemory")->value(); + if (cache_policy->profile()->get_counter("FreedMemory")->value() != 0 && profile) { + profile->add_child(cache_policy->profile(), true, nullptr); + } + } + return freed_size; +} + } // namespace doris diff --git a/be/src/runtime/memory/cache_manager.h b/be/src/runtime/memory/cache_manager.h index d94dca501670bf..a2a089b929dbdf 100644 --- a/be/src/runtime/memory/cache_manager.h +++ b/be/src/runtime/memory/cache_manager.h @@ -81,6 +81,9 @@ class CacheManager { return false; } + int64_t for_each_cache_refresh_capacity(double adjust_weighted, + RuntimeProfile* profile = nullptr); + private: std::mutex _caches_lock; std::unordered_map _caches; diff --git a/be/src/runtime/memory/cache_policy.cpp b/be/src/runtime/memory/cache_policy.cpp index 4e50d64d88eed1..46b9db1b35ad5f 100644 --- a/be/src/runtime/memory/cache_policy.cpp +++ b/be/src/runtime/memory/cache_policy.cpp @@ -21,8 +21,12 @@ namespace doris { -CachePolicy::CachePolicy(CacheType type, uint32_t stale_sweep_time_s, bool enable_prune) - : _type(type), _stale_sweep_time_s(stale_sweep_time_s), _enable_prune(enable_prune) { +CachePolicy::CachePolicy(CacheType type, size_t capacity, uint32_t stale_sweep_time_s, + bool enable_prune) + : _type(type), + _initial_capacity(capacity), + _stale_sweep_time_s(stale_sweep_time_s), + _enable_prune(enable_prune) { CacheManager::instance()->register_cache(this); init_profile(); } diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index c457afd86898f2..c43ca0b2fb7e0a 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -17,13 +17,12 @@ #pragma once -#include "runtime/exec_env.h" #include "util/runtime_profile.h" namespace doris { -static constexpr int32_t CACHE_MIN_FREE_SIZE = 67108864; // 64M -static constexpr int32_t CACHE_MIN_FREE_NUMBER = 1024; +static constexpr int32_t CACHE_MIN_PRUNE_SIZE = 67108864; // 64M +static constexpr int32_t CACHE_MIN_PRUNE_NUMBER = 1024; // Base of all caches. register to CacheManager when cache is constructed. class CachePolicy { @@ -42,12 +41,13 @@ class CachePolicy { TABLET_VERSION_CACHE = 10, LAST_SUCCESS_CHANNEL_CACHE = 11, COMMON_OBJ_LRU_CACHE = 12, - FOR_UT = 13, + FOR_UT_CACHE_SIZE = 13, TABLET_SCHEMA_CACHE = 14, CREATE_TABLET_RR_IDX_CACHE = 15, CLOUD_TABLET_CACHE = 16, CLOUD_TXN_DELETE_BITMAP_CACHE = 17, NONE = 18, // not be used + FOR_UT_CACHE_NUMBER = 19, }; static std::string type_string(CacheType type) { @@ -78,8 +78,8 @@ class CachePolicy { return "LastSuccessChannelCache"; case CacheType::COMMON_OBJ_LRU_CACHE: return "CommonObjLRUCache"; - case CacheType::FOR_UT: - return "ForUT"; + case CacheType::FOR_UT_CACHE_SIZE: + return "ForUTCacheSize"; case CacheType::TABLET_SCHEMA_CACHE: return "TabletSchemaCache"; case CacheType::CREATE_TABLET_RR_IDX_CACHE: @@ -88,6 +88,8 @@ class CachePolicy { return "CloudTabletCache"; case CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE: return "CloudTxnDeleteBitmapCache"; + case CacheType::FOR_UT_CACHE_NUMBER: + return "ForUTCacheNumber"; default: LOG(FATAL) << "not match type of cache policy :" << static_cast(type); } @@ -109,11 +111,12 @@ class CachePolicy { {"MowTabletVersionCache", CacheType::TABLET_VERSION_CACHE}, {"LastSuccessChannelCache", CacheType::LAST_SUCCESS_CHANNEL_CACHE}, {"CommonObjLRUCache", CacheType::COMMON_OBJ_LRU_CACHE}, - {"ForUT", CacheType::FOR_UT}, + {"ForUTCacheSize", CacheType::FOR_UT_CACHE_SIZE}, {"TabletSchemaCache", CacheType::TABLET_SCHEMA_CACHE}, {"CreateTabletRRIdxCache", CacheType::CREATE_TABLET_RR_IDX_CACHE}, {"CloudTabletCache", CacheType::CLOUD_TABLET_CACHE}, - {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}}; + {"CloudTxnDeleteBitmapCache", CacheType::CLOUD_TXN_DELETE_BITMAP_CACHE}, + {"ForUTCacheNumber", CacheType::FOR_UT_CACHE_NUMBER}}; static CacheType string_to_type(std::string type) { if (StringToType.contains(type)) { @@ -123,13 +126,16 @@ class CachePolicy { } } - CachePolicy(CacheType type, uint32_t stale_sweep_time_s, bool enable_prune); + CachePolicy(CacheType type, size_t capacity, uint32_t stale_sweep_time_s, bool enable_prune); virtual ~CachePolicy(); virtual void prune_stale() = 0; virtual void prune_all(bool force) = 0; + virtual int64_t adjust_capacity_weighted(double adjust_weighted) = 0; + virtual size_t get_capacity() = 0; CacheType type() { return _type; } + size_t initial_capacity() const { return _initial_capacity; } bool enable_prune() const { return _enable_prune; } RuntimeProfile* profile() { return _profile.get(); } @@ -139,16 +145,20 @@ class CachePolicy { std::make_unique(fmt::format("Cache type={}", type_string(_type))); _prune_stale_number_counter = ADD_COUNTER(_profile, "PruneStaleNumber", TUnit::UNIT); _prune_all_number_counter = ADD_COUNTER(_profile, "PruneAllNumber", TUnit::UNIT); + _adjust_capacity_weighted_number_counter = + ADD_COUNTER(_profile, "SetCapacityNumber", TUnit::UNIT); _freed_memory_counter = ADD_COUNTER(_profile, "FreedMemory", TUnit::BYTES); _freed_entrys_counter = ADD_COUNTER(_profile, "FreedEntrys", TUnit::UNIT); _cost_timer = ADD_TIMER(_profile, "CostTime"); } CacheType _type; + size_t _initial_capacity {0}; std::unique_ptr _profile; RuntimeProfile::Counter* _prune_stale_number_counter = nullptr; RuntimeProfile::Counter* _prune_all_number_counter = nullptr; + RuntimeProfile::Counter* _adjust_capacity_weighted_number_counter = nullptr; // Reset before each gc RuntimeProfile::Counter* _freed_memory_counter = nullptr; RuntimeProfile::Counter* _freed_entrys_counter = nullptr; diff --git a/be/src/runtime/memory/global_memory_arbitrator.cpp b/be/src/runtime/memory/global_memory_arbitrator.cpp index 344bcbc59846d9..82b69ca02ef9f3 100644 --- a/be/src/runtime/memory/global_memory_arbitrator.cpp +++ b/be/src/runtime/memory/global_memory_arbitrator.cpp @@ -38,6 +38,13 @@ bvar::PassiveStatus g_sys_mem_avail( std::atomic GlobalMemoryArbitrator::_s_process_reserved_memory = 0; std::atomic GlobalMemoryArbitrator::refresh_interval_memory_growth = 0; +std::mutex GlobalMemoryArbitrator::cache_adjust_capacity_lock; +std::condition_variable GlobalMemoryArbitrator::cache_adjust_capacity_cv; +std::atomic GlobalMemoryArbitrator::cache_adjust_capacity_notify {false}; +std::atomic GlobalMemoryArbitrator::last_cache_capacity_adjust_weighted {1}; +std::mutex GlobalMemoryArbitrator::memtable_memory_refresh_lock; +std::condition_variable GlobalMemoryArbitrator::memtable_memory_refresh_cv; +std::atomic GlobalMemoryArbitrator::memtable_memory_refresh_notify {false}; bool GlobalMemoryArbitrator::try_reserve_process_memory(int64_t bytes) { if (sys_mem_available() - bytes < MemInfo::sys_mem_available_warning_water_mark()) { @@ -79,4 +86,12 @@ void GlobalMemoryArbitrator::release_process_reserved_memory(int64_t bytes) { } } +int64_t GlobalMemoryArbitrator::sub_thread_reserve_memory(int64_t bytes) { + doris::ThreadContext* thread_context = doris::thread_context(true); + if (thread_context) { + return bytes - doris::thread_context()->thread_mem_tracker_mgr->reserved_mem(); + } + return bytes; +} + } // namespace doris diff --git a/be/src/runtime/memory/global_memory_arbitrator.h b/be/src/runtime/memory/global_memory_arbitrator.h index f8fda18d0e9a0c..f804452956786d 100644 --- a/be/src/runtime/memory/global_memory_arbitrator.h +++ b/be/src/runtime/memory/global_memory_arbitrator.h @@ -124,12 +124,27 @@ class GlobalMemoryArbitrator { return _s_process_reserved_memory.load(std::memory_order_relaxed); } + // `process_memory_usage` includes all reserved memory. if a thread has `reserved_memory`, + // and the memory allocated by thread is less than the thread `reserved_memory`, + // even if `process_memory_usage` is greater than `process_mem_limit`, memory can still be allocated. + // At this time, `process_memory_usage` will not increase, process physical memory will increase, + // and `reserved_memory` will be reduced. + static int64_t sub_thread_reserve_memory(int64_t bytes); + static bool is_exceed_soft_mem_limit(int64_t bytes = 0) { + bytes = sub_thread_reserve_memory(bytes); + if (bytes <= 0) { + return false; + } return process_memory_usage() + bytes >= MemInfo::soft_mem_limit() || sys_mem_available() - bytes < MemInfo::sys_mem_available_warning_water_mark(); } static bool is_exceed_hard_mem_limit(int64_t bytes = 0) { + bytes = sub_thread_reserve_memory(bytes); + if (bytes <= 0) { + return false; + } // Limit process memory usage using the actual physical memory of the process in `/proc/self/status`. // This is independent of the consumption value of the mem tracker, which counts the virtual memory // of the process malloc. @@ -173,6 +188,23 @@ class GlobalMemoryArbitrator { // avoid multiple threads starting at the same time and causing OOM. static std::atomic refresh_interval_memory_growth; + static std::mutex cache_adjust_capacity_lock; + static std::condition_variable cache_adjust_capacity_cv; + static std::atomic cache_adjust_capacity_notify; + static std::atomic last_cache_capacity_adjust_weighted; + static void notify_cache_adjust_capacity() { + cache_adjust_capacity_notify.store(true, std::memory_order_relaxed); + cache_adjust_capacity_cv.notify_all(); + } + + static std::mutex memtable_memory_refresh_lock; + static std::condition_variable memtable_memory_refresh_cv; + static std::atomic memtable_memory_refresh_notify; + static void notify_memtable_memory_refresh() { + memtable_memory_refresh_notify.store(true, std::memory_order_relaxed); + memtable_memory_refresh_cv.notify_all(); + } + private: static std::atomic _s_process_reserved_memory; diff --git a/be/src/runtime/memory/lru_cache_policy.h b/be/src/runtime/memory/lru_cache_policy.h index 1b6c9ead6d0086..419825c85c4538 100644 --- a/be/src/runtime/memory/lru_cache_policy.h +++ b/be/src/runtime/memory/lru_cache_policy.h @@ -37,7 +37,8 @@ class LRUCachePolicy : public CachePolicy { uint32_t stale_sweep_time_s, uint32_t num_shards = DEFAULT_LRU_CACHE_NUM_SHARDS, uint32_t element_count_capacity = DEFAULT_LRU_CACHE_ELEMENT_COUNT_CAPACITY, bool enable_prune = true) - : CachePolicy(type, stale_sweep_time_s, enable_prune), _lru_cache_type(lru_cache_type) { + : CachePolicy(type, capacity, stale_sweep_time_s, enable_prune), + _lru_cache_type(lru_cache_type) { if (check_capacity(capacity, num_shards)) { _cache = std::shared_ptr( new ShardedLRUCache(type_string(type), capacity, lru_cache_type, num_shards, @@ -53,7 +54,8 @@ class LRUCachePolicy : public CachePolicy { uint32_t element_count_capacity, CacheValueTimeExtractor cache_value_time_extractor, bool cache_value_check_timestamp, bool enable_prune = true) - : CachePolicy(type, stale_sweep_time_s, enable_prune), _lru_cache_type(lru_cache_type) { + : CachePolicy(type, capacity, stale_sweep_time_s, enable_prune), + _lru_cache_type(lru_cache_type) { if (check_capacity(capacity, num_shards)) { _cache = std::shared_ptr( new ShardedLRUCache(type_string(type), capacity, lru_cache_type, num_shards, @@ -106,18 +108,19 @@ class LRUCachePolicy : public CachePolicy { int64_t get_usage() { return _cache->get_usage(); } - size_t get_total_capacity() { return _cache->get_total_capacity(); } + size_t get_capacity() override { return _cache->get_capacity(); } uint64_t new_id() { return _cache->new_id(); }; // Subclass can override this method to determine whether to do the minor or full gc virtual bool exceed_prune_limit() { - return _lru_cache_type == LRUCacheType::SIZE ? mem_consumption() > CACHE_MIN_FREE_SIZE - : get_usage() > CACHE_MIN_FREE_NUMBER; + return _lru_cache_type == LRUCacheType::SIZE ? mem_consumption() > CACHE_MIN_PRUNE_SIZE + : get_usage() > CACHE_MIN_PRUNE_NUMBER; } // Try to prune the cache if expired. void prune_stale() override { + std::lock_guard l(_lock); COUNTER_SET(_freed_entrys_counter, (int64_t)0); COUNTER_SET(_freed_memory_counter, (int64_t)0); if (_stale_sweep_time_s <= 0 && _cache == ExecEnv::GetInstance()->get_dummy_lru_cache()) { @@ -125,7 +128,6 @@ class LRUCachePolicy : public CachePolicy { } if (exceed_prune_limit()) { COUNTER_SET(_cost_timer, (int64_t)0); - SCOPED_TIMER(_cost_timer); const int64_t curtime = UnixMillis(); auto pred = [this, curtime](const LRUHandle* handle) -> bool { return static_cast((handle->last_visit_time + _stale_sweep_time_s * 1000) < @@ -134,33 +136,38 @@ class LRUCachePolicy : public CachePolicy { LOG(INFO) << fmt::format("[MemoryGC] {} prune stale start, consumption {}, usage {}", type_string(_type), mem_consumption(), get_usage()); - // Prune cache in lazy mode to save cpu and minimize the time holding write lock - PrunedInfo pruned_info = _cache->prune_if(pred, true); - COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); - COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); + { + SCOPED_TIMER(_cost_timer); + // Prune cache in lazy mode to save cpu and minimize the time holding write lock + PrunedInfo pruned_info = _cache->prune_if(pred, true); + COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); + COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); + } COUNTER_UPDATE(_prune_stale_number_counter, 1); LOG(INFO) << fmt::format( - "[MemoryGC] {} prune stale {} entries, {} bytes, {} times prune", + "[MemoryGC] {} prune stale {} entries, {} bytes, cost {}, {} times prune", type_string(_type), _freed_entrys_counter->value(), - _freed_memory_counter->value(), _prune_stale_number_counter->value()); + _freed_memory_counter->value(), _cost_timer->value(), + _prune_stale_number_counter->value()); } else { if (_lru_cache_type == LRUCacheType::SIZE) { LOG(INFO) << fmt::format( "[MemoryGC] {} not need prune stale, LRUCacheType::SIZE consumption {} " "less " - "than CACHE_MIN_FREE_SIZE {}", - type_string(_type), mem_consumption(), CACHE_MIN_FREE_SIZE); + "than CACHE_MIN_PRUNE_SIZE {}", + type_string(_type), mem_consumption(), CACHE_MIN_PRUNE_SIZE); } else if (_lru_cache_type == LRUCacheType::NUMBER) { LOG(INFO) << fmt::format( "[MemoryGC] {} not need prune stale, LRUCacheType::NUMBER usage {} less " "than " - "CACHE_MIN_FREE_NUMBER {}", - type_string(_type), get_usage(), CACHE_MIN_FREE_NUMBER); + "CACHE_MIN_PRUNE_NUMBER {}", + type_string(_type), get_usage(), CACHE_MIN_PRUNE_NUMBER); } } } void prune_all(bool force) override { + std::lock_guard l(_lock); COUNTER_SET(_freed_entrys_counter, (int64_t)0); COUNTER_SET(_freed_memory_counter, (int64_t)0); if (_cache == ExecEnv::GetInstance()->get_dummy_lru_cache()) { @@ -168,37 +175,73 @@ class LRUCachePolicy : public CachePolicy { } if ((force && mem_consumption() != 0) || exceed_prune_limit()) { COUNTER_SET(_cost_timer, (int64_t)0); - SCOPED_TIMER(_cost_timer); LOG(INFO) << fmt::format("[MemoryGC] {} prune all start, consumption {}, usage {}", type_string(_type), mem_consumption(), get_usage()); - PrunedInfo pruned_info = _cache->prune(); - COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); - COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); + { + SCOPED_TIMER(_cost_timer); + PrunedInfo pruned_info = _cache->prune(); + COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); + COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); + } COUNTER_UPDATE(_prune_all_number_counter, 1); LOG(INFO) << fmt::format( - "[MemoryGC] {} prune all {} entries, {} bytes, {} times prune, is force: {}", + "[MemoryGC] {} prune all {} entries, {} bytes, cost {}, {} times prune, is " + "force: {}", type_string(_type), _freed_entrys_counter->value(), - _freed_memory_counter->value(), _prune_all_number_counter->value(), force); + _freed_memory_counter->value(), _cost_timer->value(), + _prune_all_number_counter->value(), force); } else { if (_lru_cache_type == LRUCacheType::SIZE) { LOG(INFO) << fmt::format( "[MemoryGC] {} not need prune all, force is {}, LRUCacheType::SIZE " "consumption {}, " - "CACHE_MIN_FREE_SIZE {}", - type_string(_type), force, mem_consumption(), CACHE_MIN_FREE_SIZE); + "CACHE_MIN_PRUNE_SIZE {}", + type_string(_type), force, mem_consumption(), CACHE_MIN_PRUNE_SIZE); } else if (_lru_cache_type == LRUCacheType::NUMBER) { LOG(INFO) << fmt::format( "[MemoryGC] {} not need prune all, force is {}, LRUCacheType::NUMBER " - "usage {}, CACHE_MIN_FREE_NUMBER {}", - type_string(_type), force, get_usage(), CACHE_MIN_FREE_NUMBER); + "usage {}, CACHE_MIN_PRUNE_NUMBER {}", + type_string(_type), force, get_usage(), CACHE_MIN_PRUNE_NUMBER); } } } + int64_t adjust_capacity_weighted(double adjust_weighted) override { + std::lock_guard l(_lock); + auto capacity = static_cast(_initial_capacity * adjust_weighted); + COUNTER_SET(_freed_entrys_counter, (int64_t)0); + COUNTER_SET(_freed_memory_counter, (int64_t)0); + COUNTER_SET(_cost_timer, (int64_t)0); + if (_cache == ExecEnv::GetInstance()->get_dummy_lru_cache()) { + return 0; + } + + size_t old_capacity = get_capacity(); + int64_t old_mem_consumption = mem_consumption(); + int64_t old_usage = get_usage(); + { + SCOPED_TIMER(_cost_timer); + PrunedInfo pruned_info = _cache->set_capacity(capacity); + COUNTER_SET(_freed_entrys_counter, pruned_info.pruned_count); + COUNTER_SET(_freed_memory_counter, pruned_info.pruned_size); + } + COUNTER_UPDATE(_adjust_capacity_weighted_number_counter, 1); + LOG(INFO) << fmt::format( + "[MemoryGC] {} update capacity, old , " + "adjust_weighted {}, new , prune {} " + "entries, {} bytes, cost {}, {} times prune", + type_string(_type), old_capacity, old_mem_consumption, old_usage, adjust_weighted, + get_capacity(), mem_consumption(), get_usage(), _freed_entrys_counter->value(), + _freed_memory_counter->value(), _cost_timer->value(), + _adjust_capacity_weighted_number_counter->value()); + return _freed_entrys_counter->value(); + } + protected: // if check_capacity failed, will return dummy lru cache, // compatible with ShardedLRUCache usage, but will not actually cache. std::shared_ptr _cache; + std::mutex _lock; LRUCacheType _lru_cache_type; }; diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index a8aa44414ebf87..59546b11d51a8a 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -739,10 +739,10 @@ int64_t MemTrackerLimiter::free_top_overcommit_query( LOG(INFO) << log_prefix << "finished, no task need be canceled."; return 0; } - if (query_consumption.size() == 1) { + if (small_num == 0 && canceling_task.empty() && query_consumption.size() == 1) { auto iter = query_consumption.begin(); - LOG(INFO) << log_prefix << "finished, only one task: " << iter->first - << ", memory consumption: " << iter->second << ", no cancel."; + LOG(INFO) << log_prefix << "finished, only one overcommit task: " << iter->first + << ", memory consumption: " << iter->second << ", no other tasks, so no cancel."; return 0; } diff --git a/be/src/runtime/memory/memory_reclamation.cpp b/be/src/runtime/memory/memory_reclamation.cpp index 3adf1d1ac75718..17f5a41f462b50 100644 --- a/be/src/runtime/memory/memory_reclamation.cpp +++ b/be/src/runtime/memory/memory_reclamation.cpp @@ -37,7 +37,6 @@ bool MemoryReclamation::process_minor_gc(std::string mem_info) { std::unique_ptr profile = std::make_unique(""); Defer defer {[&]() { - MemInfo::notify_je_purge_dirty_pages(); std::stringstream ss; profile->pretty_print(&ss); LOG(INFO) << fmt::format( @@ -46,11 +45,6 @@ bool MemoryReclamation::process_minor_gc(std::string mem_info) { ss.str()); }}; - freed_mem += CacheManager::instance()->for_each_cache_prune_stale(profile.get()); - if (freed_mem > MemInfo::process_minor_gc_size()) { - return true; - } - if (config::enable_workload_group_memory_gc) { RuntimeProfile* tg_profile = profile->create_child("WorkloadGroup", true, true); freed_mem += tg_enable_overcommit_group_gc(MemInfo::process_minor_gc_size() - freed_mem, @@ -87,7 +81,6 @@ bool MemoryReclamation::process_full_gc(std::string mem_info) { std::unique_ptr profile = std::make_unique(""); Defer defer {[&]() { - MemInfo::notify_je_purge_dirty_pages(); std::stringstream ss; profile->pretty_print(&ss); LOG(INFO) << fmt::format( @@ -96,11 +89,6 @@ bool MemoryReclamation::process_full_gc(std::string mem_info) { ss.str()); }}; - freed_mem += CacheManager::instance()->for_each_cache_prune_all(profile.get()); - if (freed_mem > MemInfo::process_full_gc_size()) { - return true; - } - if (config::enable_workload_group_memory_gc) { RuntimeProfile* tg_profile = profile->create_child("WorkloadGroup", true, true); freed_mem += tg_enable_overcommit_group_gc(MemInfo::process_full_gc_size() - freed_mem, diff --git a/be/src/runtime/query_context.cpp b/be/src/runtime/query_context.cpp index 97aba2cae286c8..b9430d3899b8d3 100644 --- a/be/src/runtime/query_context.cpp +++ b/be/src/runtime/query_context.cpp @@ -401,7 +401,7 @@ QueryContext::_collect_realtime_query_profile() const { continue; } - auto profile = fragment_ctx->collect_realtime_profile_x(); + auto profile = fragment_ctx->collect_realtime_profile(); if (profile.empty()) { std::string err_msg = fmt::format( diff --git a/be/src/service/http_service.cpp b/be/src/service/http_service.cpp index 9f98a86bda4c98..f2c325bebc7806 100644 --- a/be/src/service/http_service.cpp +++ b/be/src/service/http_service.cpp @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -37,6 +38,7 @@ #include "http/action/checksum_action.h" #include "http/action/clear_cache_action.h" #include "http/action/compaction_action.h" +#include "http/action/compaction_score_action.h" #include "http/action/config_action.h" #include "http/action/debug_point_action.h" #include "http/action/download_action.h" @@ -381,6 +383,11 @@ void HttpService::register_local_handler(StorageEngine& engine) { new ShowNestedIndexFileAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/show_nested_index_file", show_nested_index_file_action); + + CompactionScoreAction* compaction_score_action = _pool.add(new CompactionScoreAction( + _env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN, engine.tablet_manager())); + _ev_http_server->register_handler(HttpMethod::GET, "/api/compaction_score", + compaction_score_action); } void HttpService::register_cloud_handler(CloudStorageEngine& engine) { @@ -417,6 +424,10 @@ void HttpService::register_cloud_handler(CloudStorageEngine& engine) { new ShowNestedIndexFileAction(_env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN)); _ev_http_server->register_handler(HttpMethod::GET, "/api/show_nested_index_file", show_nested_index_file_action); + CompactionScoreAction* compaction_score_action = _pool.add(new CompactionScoreAction( + _env, TPrivilegeHier::GLOBAL, TPrivilegeType::ADMIN, engine.tablet_mgr())); + _ev_http_server->register_handler(HttpMethod::GET, "/api/compaction_score", + compaction_score_action); } // NOLINTEND(readability-function-size) diff --git a/be/src/service/point_query_executor.cpp b/be/src/service/point_query_executor.cpp index 8058e1f1be6302..0a27c415a48c0a 100644 --- a/be/src/service/point_query_executor.cpp +++ b/be/src/service/point_query_executor.cpp @@ -234,6 +234,12 @@ void RowCache::erase(const RowCacheKey& key) { LRUCachePolicy::erase(encoded_key); } +LookupConnectionCache::CacheValue::~CacheValue() { + SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER( + ExecEnv::GetInstance()->point_query_executor_mem_tracker()); + item.reset(); +} + PointQueryExecutor::~PointQueryExecutor() { SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER( ExecEnv::GetInstance()->point_query_executor_mem_tracker()); diff --git a/be/src/service/point_query_executor.h b/be/src/service/point_query_executor.h index 19954479c97ec7..6c6fb28f95a378 100644 --- a/be/src/service/point_query_executor.h +++ b/be/src/service/point_query_executor.h @@ -246,8 +246,8 @@ class LookupConnectionCache : public LRUCachePolicyTrackingManual { auto* value = new CacheValue; value->item = item; LOG(INFO) << "Add item mem" - << ", cache_capacity: " << get_total_capacity() - << ", cache_usage: " << get_usage() << ", mem_consum: " << mem_consumption(); + << ", cache_capacity: " << get_capacity() << ", cache_usage: " << get_usage() + << ", mem_consum: " << mem_consumption(); auto* lru_handle = insert(key, value, 1, sizeof(Reusable), CachePriority::NORMAL); release(lru_handle); } @@ -265,6 +265,7 @@ class LookupConnectionCache : public LRUCachePolicyTrackingManual { class CacheValue : public LRUCacheValueBase { public: + ~CacheValue() override; std::shared_ptr item; }; }; diff --git a/be/src/util/simd/vstring_function.h b/be/src/util/simd/vstring_function.h index 579da50d2df230..99313132382e5c 100644 --- a/be/src/util/simd/vstring_function.h +++ b/be/src/util/simd/vstring_function.h @@ -187,6 +187,23 @@ class VStringFunctions { return p; } + // Iterate a UTF-8 string without exceeding a given length n. + // The function returns two values: + // the first represents the byte length traversed, and the second represents the char length traversed. + static inline std::pair iterate_utf8_with_limit_length(const char* begin, + const char* end, + size_t n) { + const char* p = begin; + int char_size = 0; + + size_t i = 0; + for (; i < n && p < end; ++i, p += char_size) { + char_size = UTF8_BYTE_LENGTH[static_cast(*p)]; + } + + return {p - begin, i}; + } + // Gcc will do auto simd in this function static bool is_ascii(const StringRef& str) { #ifdef __AVX2__ diff --git a/be/src/vec/common/allocator.cpp b/be/src/vec/common/allocator.cpp index dff1330888f82d..2619c0bafffb16 100644 --- a/be/src/vec/common/allocator.cpp +++ b/be/src/vec/common/allocator.cpp @@ -106,9 +106,6 @@ void Allocator::sys_mem return; } - // no significant impact on performance is expected. - doris::MemInfo::notify_je_purge_dirty_pages(); - if (doris::thread_context()->thread_mem_tracker_mgr->is_attach_query() && doris::thread_context()->thread_mem_tracker_mgr->wait_gc()) { int64_t wait_milliseconds = 0; diff --git a/be/src/vec/common/sort/sorter.h b/be/src/vec/common/sort/sorter.h index aa7d88dfbc2a3a..a290de65bb6534 100644 --- a/be/src/vec/common/sort/sorter.h +++ b/be/src/vec/common/sort/sorter.h @@ -177,8 +177,8 @@ class FullSorter final : public Sorter { private: bool _reach_limit() { - return _state->unsorted_block_->rows() > buffered_block_size_ || - _state->unsorted_block_->bytes() > buffered_block_bytes_; + return _enable_spill && (_state->unsorted_block_->rows() > buffered_block_size_ || + _state->unsorted_block_->bytes() > buffered_block_bytes_); } Status _do_sort(); diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index 5cc20c214c103b..cbb3d0f572365b 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -152,7 +152,7 @@ Status ScannerContext::init() { for (int i = 0; i < _max_thread_num; ++i) { std::weak_ptr next_scanner; if (_scanners.try_dequeue(next_scanner)) { - submit_scan_task(std::make_shared(next_scanner)); + RETURN_IF_ERROR(submit_scan_task(std::make_shared(next_scanner))); _num_running_scanners++; } } @@ -196,10 +196,10 @@ bool ScannerContext::empty_in_queue(int id) { return _blocks_queue.empty(); } -void ScannerContext::submit_scan_task(std::shared_ptr scan_task) { +Status ScannerContext::submit_scan_task(std::shared_ptr scan_task) { _scanner_sched_counter->update(1); _num_scheduled_scanners++; - _scanner_scheduler->submit(shared_from_this(), scan_task); + return _scanner_scheduler->submit(shared_from_this(), scan_task); } void ScannerContext::append_block_to_queue(std::shared_ptr scan_task) { @@ -247,10 +247,15 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo auto scan_task = _blocks_queue.front(); DCHECK(scan_task); + // The abnormal status of scanner may come from the execution of the scanner itself, + // or come from the scanner scheduler, such as TooManyTasks. if (!scan_task->status_ok()) { + // TODO: If the scanner status is TooManyTasks, maybe we can retry the scanner after a while. + _process_status = scan_task->get_status(); _set_scanner_done(); - return scan_task->get_status(); + return _process_status; } + if (!scan_task->cached_blocks.empty()) { auto [current_block, block_size] = std::move(scan_task->cached_blocks.front()); scan_task->cached_blocks.pop_front(); @@ -263,13 +268,20 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo block->swap(*current_block); return_free_block(std::move(current_block)); } else { + // This scan task do not have any cached blocks. _blocks_queue.pop_front(); - if (scan_task->is_eos()) { // current scanner is finished, and no more data to read + // current scanner is finished, and no more data to read + if (scan_task->is_eos()) { _num_finished_scanners++; std::weak_ptr next_scanner; // submit one of the remaining scanners if (_scanners.try_dequeue(next_scanner)) { - submit_scan_task(std::make_shared(next_scanner)); + auto submit_status = submit_scan_task(std::make_shared(next_scanner)); + if (!submit_status.ok()) { + _process_status = submit_status; + _set_scanner_done(); + return _process_status; + } } else { // no more scanner to be scheduled // `_free_blocks` serve all running scanners, maybe it's too large for the remaining scanners @@ -284,11 +296,16 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo } } else { // resubmit current running scanner to read the next block - submit_scan_task(scan_task); + Status submit_status = submit_scan_task(scan_task); + if (!submit_status.ok()) { + _process_status = submit_status; + _set_scanner_done(); + return _process_status; + } } } // scale up - _try_to_scale_up(); + RETURN_IF_ERROR(_try_to_scale_up()); } if (_num_finished_scanners == _all_scanners.size() && _blocks_queue.empty()) { @@ -303,7 +320,7 @@ Status ScannerContext::get_block_from_queue(RuntimeState* state, vectorized::Blo return Status::OK(); } -void ScannerContext::_try_to_scale_up() { +Status ScannerContext::_try_to_scale_up() { // Four criteria to determine whether to increase the parallelism of the scanners // 1. It ran for at least `SCALE_UP_DURATION` ms after last scale up // 2. Half(`WAIT_BLOCK_DURATION_RATIO`) of the duration is waiting to get blocks @@ -320,7 +337,7 @@ void ScannerContext::_try_to_scale_up() { // when _last_wait_duration_ratio > 0, it has scaled up before. // we need to determine if the scale-up is effective: // the wait duration ratio after last scaling up should less than 80% of `_last_wait_duration_ratio` - return; + return Status::OK(); } bool is_scale_up = false; @@ -335,7 +352,10 @@ void ScannerContext::_try_to_scale_up() { // get enough memory to launch one more scanner. std::weak_ptr scale_up_scanner; if (_scanners.try_dequeue(scale_up_scanner)) { - submit_scan_task(std::make_shared(scale_up_scanner)); + // Just return error to caller. + // Because _try_to_scale_up is called under _transfer_lock locked, if we add the scanner + // to the block queue, we will get a deadlock. + RETURN_IF_ERROR(submit_scan_task(std::make_shared(scale_up_scanner))); _num_running_scanners++; _scale_up_scanners_counter->update(1); is_scale_up = true; @@ -350,6 +370,8 @@ void ScannerContext::_try_to_scale_up() { _total_wait_block_time = 0; } } + + return Status::OK(); } Status ScannerContext::validate_block_schema(Block* block) { diff --git a/be/src/vec/exec/scan/scanner_context.h b/be/src/vec/exec/scan/scanner_context.h index f93d01eef88427..03c4e5a4f1bba7 100644 --- a/be/src/vec/exec/scan/scanner_context.h +++ b/be/src/vec/exec/scan/scanner_context.h @@ -139,7 +139,7 @@ class ScannerContext : public std::enable_shared_from_this, // set the next scanned block to `ScanTask::current_block` // set the error state to `ScanTask::status` // set the `eos` to `ScanTask::eos` if there is no more data in current scanner - void submit_scan_task(std::shared_ptr scan_task); + Status submit_scan_task(std::shared_ptr scan_task); // append the running scanner and its cached block to `_blocks_queue` void append_block_to_queue(std::shared_ptr scan_task); @@ -186,7 +186,7 @@ class ScannerContext : public std::enable_shared_from_this, /// 3. `_free_blocks_memory_usage` < `_max_bytes_in_queue`, remains enough memory to scale up /// 4. At most scale up `MAX_SCALE_UP_RATIO` times to `_max_thread_num` void _set_scanner_done(); - void _try_to_scale_up(); + Status _try_to_scale_up(); RuntimeState* _state = nullptr; pipeline::ScanLocalStateBase* _local_state = nullptr; diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index e30983932ee244..444ff4dbb0cd9f 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -120,23 +120,23 @@ Status ScannerScheduler::init(ExecEnv* env) { return Status::OK(); } -void ScannerScheduler::submit(std::shared_ptr ctx, - std::shared_ptr scan_task) { +Status ScannerScheduler::submit(std::shared_ptr ctx, + std::shared_ptr scan_task) { scan_task->last_submit_time = GetCurrentTimeNanos(); if (ctx->done()) { - return; + return Status::OK(); } auto task_lock = ctx->task_exec_ctx(); if (task_lock == nullptr) { LOG(INFO) << "could not lock task execution context, query " << ctx->debug_string() << " maybe finished"; - return; + return Status::OK(); } if (ctx->thread_token != nullptr) { std::shared_ptr scanner_delegate = scan_task->scanner.lock(); if (scanner_delegate == nullptr) { - return; + return Status::OK(); } scanner_delegate->_scanner->start_wait_worker_timer(); @@ -153,13 +153,12 @@ void ScannerScheduler::submit(std::shared_ptr ctx, }); if (!s.ok()) { scan_task->set_status(s); - ctx->append_block_to_queue(scan_task); - return; + return s; } } else { std::shared_ptr scanner_delegate = scan_task->scanner.lock(); if (scanner_delegate == nullptr) { - return; + return Status::OK(); } scanner_delegate->_scanner->start_wait_worker_timer(); @@ -187,14 +186,18 @@ void ScannerScheduler::submit(std::shared_ptr ctx, return scan_sched->submit_scan_task(simple_scan_task); }; - if (auto ret = sumbit_task(); !ret) { - scan_task->set_status(Status::InternalError( - "Failed to submit scanner to scanner pool reason:" + std::string(ret.msg()) + - "|type:" + std::to_string(type))); - ctx->append_block_to_queue(scan_task); - return; + Status submit_status = sumbit_task(); + if (!submit_status.ok()) { + // User will see TooManyTasks error. It looks like a more reasonable error. + Status scan_task_status = Status::TooManyTasks( + "Failed to submit scanner to scanner pool reason:" + + std::string(submit_status.msg()) + "|type:" + std::to_string(type)); + scan_task->set_status(scan_task_status); + return scan_task_status; } } + + return Status::OK(); } std::unique_ptr ScannerScheduler::new_limited_scan_pool_token( diff --git a/be/src/vec/exec/scan/scanner_scheduler.h b/be/src/vec/exec/scan/scanner_scheduler.h index ddc61396e23f15..439291f2107185 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.h +++ b/be/src/vec/exec/scan/scanner_scheduler.h @@ -57,7 +57,7 @@ class ScannerScheduler { [[nodiscard]] Status init(ExecEnv* env); - void submit(std::shared_ptr ctx, std::shared_ptr scan_task); + Status submit(std::shared_ptr ctx, std::shared_ptr scan_task); void stop(); diff --git a/be/src/vec/functions/function_convert_tz.h b/be/src/vec/functions/function_convert_tz.h index af118c80583769..d0a600a9e41a86 100644 --- a/be/src/vec/functions/function_convert_tz.h +++ b/be/src/vec/functions/function_convert_tz.h @@ -53,6 +53,13 @@ #include "vec/runtime/vdatetime_value.h" namespace doris::vectorized { +struct ConvertTzState { + bool use_state = false; + bool is_valid = false; + cctz::time_zone from_tz; + cctz::time_zone to_tz; +}; + template class FunctionConvertTZ : public IFunction { using DateValueType = date_cast::TypeToValueTypeV; @@ -88,8 +95,62 @@ class FunctionConvertTZ : public IFunction { std::make_shared()}; } + Status open(FunctionContext* context, FunctionContext::FunctionStateScope scope) override { + if (scope == FunctionContext::THREAD_LOCAL) { + return Status::OK(); + } + std::shared_ptr state = std::make_shared(); + + context->set_function_state(scope, state); + DCHECK_EQ(context->get_num_args(), 3); + const auto* const_from_tz = context->get_constant_col(1); + const auto* const_to_tz = context->get_constant_col(2); + + // ConvertTzState is used only when both the second and third parameters are constants + if (const_from_tz != nullptr && const_to_tz != nullptr) { + state->use_state = true; + init_convert_tz_state(state, const_from_tz, const_to_tz); + } else { + state->use_state = false; + } + + return IFunction::open(context, scope); + } + + void init_convert_tz_state(std::shared_ptr state, + const ColumnPtrWrapper* const_from_tz, + const ColumnPtrWrapper* const_to_tz) { + auto const_data_from_tz = const_from_tz->column_ptr->get_data_at(0); + auto const_data_to_tz = const_to_tz->column_ptr->get_data_at(0); + + // from_tz and to_tz must both be non-null. + if (const_data_from_tz.data == nullptr || const_data_to_tz.data == nullptr) { + state->is_valid = false; + return; + } + + auto from_tz_name = const_data_from_tz.to_string(); + auto to_tz_name = const_data_to_tz.to_string(); + + if (!TimezoneUtils::find_cctz_time_zone(from_tz_name, state->from_tz)) { + state->is_valid = false; + return; + } + if (!TimezoneUtils::find_cctz_time_zone(to_tz_name, state->to_tz)) { + state->is_valid = false; + return; + } + state->is_valid = true; + } + Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments, size_t result, size_t input_rows_count) const override { + auto* convert_tz_state = reinterpret_cast( + context->get_function_state(FunctionContext::FRAGMENT_LOCAL)); + if (!convert_tz_state) { + return Status::RuntimeError( + "funciton context for function '{}' must have ConvertTzState;", get_name()); + } auto result_null_map_column = ColumnUInt8::create(input_rows_count, 0); bool col_const[3]; @@ -106,12 +167,16 @@ class FunctionConvertTZ : public IFunction { if (col_const[1] && col_const[2]) { auto result_column = ColumnType::create(); - execute_tz_const(context, assert_cast(argument_columns[0].get()), - assert_cast(argument_columns[1].get()), - assert_cast(argument_columns[2].get()), - assert_cast(result_column.get()), - assert_cast(result_null_map_column.get())->get_data(), - input_rows_count); + if (convert_tz_state->use_state) { + execute_tz_const_with_state( + convert_tz_state, assert_cast(argument_columns[0].get()), + assert_cast(result_column.get()), + assert_cast(result_null_map_column.get())->get_data(), + input_rows_count); + } else { + return Status::RuntimeError("ConvertTzState is not initialized in function {}", + get_name()); + } block.get_by_position(result).column = ColumnNullable::create( std::move(result_column), std::move(result_null_map_column)); } else { @@ -144,18 +209,55 @@ class FunctionConvertTZ : public IFunction { } } - static void execute_tz_const(FunctionContext* context, const ColumnType* date_column, - const ColumnString* from_tz_column, - const ColumnString* to_tz_column, ReturnColumnType* result_column, - NullMap& result_null_map, size_t input_rows_count) { - auto from_tz = from_tz_column->get_data_at(0).to_string(); - auto to_tz = to_tz_column->get_data_at(0).to_string(); + static void execute_tz_const_with_state(ConvertTzState* convert_tz_state, + const ColumnType* date_column, + ReturnColumnType* result_column, + NullMap& result_null_map, size_t input_rows_count) { + cctz::time_zone& from_tz = convert_tz_state->from_tz; + cctz::time_zone& to_tz = convert_tz_state->to_tz; + auto push_null = [&](int row) { + result_null_map[row] = true; + result_column->insert_default(); + }; + if (!convert_tz_state->is_valid) { + // If an invalid timezone is present, return null + for (size_t i = 0; i < input_rows_count; i++) { + push_null(i); + } + return; + } for (size_t i = 0; i < input_rows_count; i++) { if (result_null_map[i]) { result_column->insert_default(); continue; } - execute_inner_loop(date_column, from_tz, to_tz, result_column, result_null_map, i); + + DateValueType ts_value = + binary_cast(date_column->get_element(i)); + ReturnDateValueType ts_value2; + + if constexpr (std::is_same_v) { + std::pair timestamp; + if (!ts_value.unix_timestamp(×tamp, from_tz)) { + push_null(i); + continue; + } + ts_value2.from_unixtime(timestamp, to_tz); + } else { + int64_t timestamp; + if (!ts_value.unix_timestamp(×tamp, from_tz)) { + push_null(i); + continue; + } + ts_value2.from_unixtime(timestamp, to_tz); + } + + if (!ts_value2.is_valid_date()) [[unlikely]] { + push_null(i); + continue; + } + + result_column->insert(binary_cast(ts_value2)); } } diff --git a/be/src/vec/functions/function_string.h b/be/src/vec/functions/function_string.h index 618641dcfb227e..2e33dba3f332fe 100644 --- a/be/src/vec/functions/function_string.h +++ b/be/src/vec/functions/function_string.h @@ -1556,64 +1556,92 @@ class FunctionStringPad : public IFunction { const auto* padcol = assert_cast(col[2].get()); const auto& padcol_offsets = padcol->get_offsets(); const auto& padcol_chars = padcol->get_chars(); + std::visit( + [&](auto str_const, auto len_const, auto pad_const) { + execute_utf8( + strcol_offsets, strcol_chars, col_len_data, padcol_offsets, + padcol_chars, res_offsets, res_chars, null_map_data, input_rows_count); + }, + vectorized::make_bool_variant(col_const[0]), + vectorized::make_bool_variant(col_const[1]), + vectorized::make_bool_variant(col_const[2])); - std::vector str_index; + block.get_by_position(result).column = + ColumnNullable::create(std::move(res), std::move(null_map)); + return Status::OK(); + } + + template + void execute_utf8(const ColumnString::Offsets& strcol_offsets, + const ColumnString::Chars& strcol_chars, + const ColumnInt32::Container& col_len_data, + const ColumnString::Offsets& padcol_offsets, + const ColumnString::Chars& padcol_chars, ColumnString::Offsets& res_offsets, + ColumnString::Chars& res_chars, ColumnUInt8::Container& null_map_data, + size_t input_rows_count) const { std::vector pad_index; + size_t const_pad_char_size = 0; + // If pad_const = true, initialize pad_index only once. + // The same logic applies to the if constexpr (!pad_const) condition below. + if constexpr (pad_const) { + const_pad_char_size = simd::VStringFunctions::get_char_len( + (const char*)padcol_chars.data(), padcol_offsets[0], pad_index); + } fmt::memory_buffer buffer; - const bool str_const = col_const[0]; - const bool len_const = col_const[1]; - const bool pad_const = col_const[2]; + buffer.reserve(strcol_chars.size()); + size_t buffer_len = 0; + for (size_t i = 0; i < input_rows_count; ++i) { - str_index.clear(); - pad_index.clear(); + if constexpr (!pad_const) { + pad_index.clear(); + } buffer.clear(); - const auto len = col_len_data[index_check_const(i, len_const)]; + const auto len = col_len_data[index_check_const(i)]; if (len < 0) { // return NULL when input length is invalid number null_map_data[i] = true; - StringOP::push_empty_string(i, res_chars, res_offsets); + res_offsets[i] = buffer_len; } else { - const auto str_idx = index_check_const(i, str_const); + const auto str_idx = index_check_const(i); const int str_len = strcol_offsets[str_idx] - strcol_offsets[str_idx - 1]; const auto* str_data = &strcol_chars[strcol_offsets[str_idx - 1]]; - const auto pad_idx = index_check_const(i, pad_const); + const auto pad_idx = index_check_const(i); const int pad_len = padcol_offsets[pad_idx] - padcol_offsets[pad_idx - 1]; const auto* pad_data = &padcol_chars[padcol_offsets[pad_idx - 1]]; - // get utf8 len - size_t str_char_size = simd::VStringFunctions::get_char_len((const char*)str_data, - str_len, str_index); - size_t pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, - pad_len, pad_index); - - if (len <= str_char_size) { - // truncate the input string - if (len < str_char_size) { - buffer.append(str_data, str_data + str_index[len]); - } else { - buffer.append(str_data, str_data + str_len); - } - StringOP::push_value_string(std::string_view(buffer.data(), buffer.size()), i, - res_chars, res_offsets); + auto [iterate_byte_len, iterate_char_len] = + simd::VStringFunctions::iterate_utf8_with_limit_length( + (const char*)str_data, (const char*)str_data + str_len, len); + // If iterate_char_len equals len, it indicates that the str length is greater than or equal to len + if (iterate_char_len == len) { + buffer.reserve(buffer_len + iterate_byte_len); + memcpy(buffer.data() + buffer_len, str_data, iterate_byte_len); + buffer_len += iterate_byte_len; + res_offsets[i] = buffer_len; continue; } + size_t pad_char_size; + if constexpr (!pad_const) { + pad_char_size = simd::VStringFunctions::get_char_len((const char*)pad_data, + pad_len, pad_index); + } else { + pad_char_size = const_pad_char_size; + } // make compatible with mysql. return empty string if pad is empty if (pad_char_size == 0) { - StringOP::push_empty_string(i, res_chars, res_offsets); + res_offsets[i] = buffer_len; continue; } - - const int32_t pad_times = (len - str_char_size) / pad_char_size; - const int32_t pad_remainder = (len - str_char_size) % pad_char_size; - size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; + const size_t str_char_size = iterate_char_len; + const size_t pad_times = (len - str_char_size) / pad_char_size; + const size_t pad_remainder_len = pad_index[(len - str_char_size) % pad_char_size]; + const size_t new_capacity = str_len + size_t(pad_times + 1) * pad_len; ColumnString::check_chars_length(new_capacity, 0); - buffer.reserve(new_capacity); - auto* buffer_data = buffer.data(); - int32_t buffer_len = 0; + buffer.reserve(buffer_len + new_capacity); if constexpr (!Impl::is_lpad) { - memcpy(buffer_data, str_data, str_len); + memcpy(buffer.data() + buffer_len, str_data, str_len); buffer_len += str_len; } // Prepend chars of pad. @@ -1621,21 +1649,17 @@ class FunctionStringPad : public IFunction { pad_times); buffer_len += pad_times * pad_len; - memcpy(buffer_data + buffer_len, pad_data, pad_index[pad_remainder]); - buffer_len += pad_index[pad_remainder]; + memcpy(buffer.data() + buffer_len, pad_data, pad_remainder_len); + buffer_len += pad_remainder_len; if constexpr (Impl::is_lpad) { - memcpy(buffer_data + buffer_len, str_data, str_len); + memcpy(buffer.data() + buffer_len, str_data, str_len); buffer_len += str_len; } - StringOP::push_value_string(std::string_view(buffer_data, buffer_len), i, res_chars, - res_offsets); + res_offsets[i] = buffer_len; } } - - block.get_by_position(result).column = - ColumnNullable::create(std::move(res), std::move(null_map)); - return Status::OK(); + res_chars.insert(buffer.data(), buffer.data() + buffer_len); } }; diff --git a/be/src/vec/runtime/vdata_stream_mgr.cpp b/be/src/vec/runtime/vdata_stream_mgr.cpp index 80cc2d93f8e103..a5db9a6150dcfa 100644 --- a/be/src/vec/runtime/vdata_stream_mgr.cpp +++ b/be/src/vec/runtime/vdata_stream_mgr.cpp @@ -109,6 +109,8 @@ Status VDataStreamMgr::transmit_block(const PTransmitDataParams* request, t_finst_id.hi = finst_id.hi(); t_finst_id.lo = finst_id.lo(); std::shared_ptr recvr = nullptr; + ThreadCpuStopWatch cpu_time_stop_watch; + cpu_time_stop_watch.start(); static_cast(find_recvr(t_finst_id, request->node_id(), &recvr)); if (recvr == nullptr) { // The receiver may remove itself from the receiver map via deregister_recvr() @@ -137,9 +139,9 @@ Status VDataStreamMgr::transmit_block(const PTransmitDataParams* request, bool eos = request->eos(); if (request->has_block()) { - RETURN_IF_ERROR(recvr->add_block(request->block(), request->sender_id(), - request->be_number(), request->packet_seq(), - eos ? nullptr : done, wait_for_worker)); + RETURN_IF_ERROR(recvr->add_block( + request->block(), request->sender_id(), request->be_number(), request->packet_seq(), + eos ? nullptr : done, wait_for_worker, cpu_time_stop_watch.elapsed_time())); } if (eos) { diff --git a/be/src/vec/runtime/vdata_stream_recvr.cpp b/be/src/vec/runtime/vdata_stream_recvr.cpp index 5326f2b7d0ab0a..1ca6bb7f2c5931 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.cpp +++ b/be/src/vec/runtime/vdata_stream_recvr.cpp @@ -134,7 +134,8 @@ void VDataStreamRecvr::SenderQueue::try_set_dep_ready_without_lock() { Status VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_number, int64_t packet_seq, ::google::protobuf::Closure** done, - const int64_t wait_for_worker) { + const int64_t wait_for_worker, + const uint64_t time_to_find_recvr) { { std::lock_guard l(_lock); if (_is_cancelled) { @@ -189,6 +190,10 @@ Status VDataStreamRecvr::SenderQueue::add_block(const PBlock& pblock, int be_num _recvr->_max_wait_worker_time->set(wait_for_worker); } + if (_recvr->_max_find_recvr_time->value() < time_to_find_recvr) { + _recvr->_max_find_recvr_time->set((int64_t)time_to_find_recvr); + } + _block_queue.emplace_back(std::move(block), block_byte_size); COUNTER_UPDATE(_recvr->_remote_bytes_received_counter, block_byte_size); _record_debug_info(); @@ -363,6 +368,7 @@ VDataStreamRecvr::VDataStreamRecvr(VDataStreamMgr* stream_mgr, RuntimeState* sta _blocks_produced_counter = ADD_COUNTER(_profile, "BlocksProduced", TUnit::UNIT); _max_wait_worker_time = ADD_COUNTER(_profile, "MaxWaitForWorkerTime", TUnit::UNIT); _max_wait_to_process_time = ADD_COUNTER(_profile, "MaxWaitToProcessTime", TUnit::UNIT); + _max_find_recvr_time = ADD_COUNTER(_profile, "MaxFindRecvrTime(NS)", TUnit::UNIT); } VDataStreamRecvr::~VDataStreamRecvr() { @@ -391,11 +397,12 @@ Status VDataStreamRecvr::create_merger(const VExprContextSPtrs& ordering_expr, Status VDataStreamRecvr::add_block(const PBlock& pblock, int sender_id, int be_number, int64_t packet_seq, ::google::protobuf::Closure** done, - const int64_t wait_for_worker) { + const int64_t wait_for_worker, + const uint64_t time_to_find_recvr) { SCOPED_ATTACH_TASK(_query_thread_context); int use_sender_id = _is_merging ? sender_id : 0; return _sender_queues[use_sender_id]->add_block(pblock, be_number, packet_seq, done, - wait_for_worker); + wait_for_worker, time_to_find_recvr); } void VDataStreamRecvr::add_block(Block* block, int sender_id, bool use_move) { diff --git a/be/src/vec/runtime/vdata_stream_recvr.h b/be/src/vec/runtime/vdata_stream_recvr.h index 7eebdf0249b958..e8dcfdedba5fb9 100644 --- a/be/src/vec/runtime/vdata_stream_recvr.h +++ b/be/src/vec/runtime/vdata_stream_recvr.h @@ -83,7 +83,8 @@ class VDataStreamRecvr : public HasTaskExecutionCtx { std::vector sender_queues() const { return _sender_queues; } Status add_block(const PBlock& pblock, int sender_id, int be_number, int64_t packet_seq, - ::google::protobuf::Closure** done, const int64_t wait_for_worker); + ::google::protobuf::Closure** done, const int64_t wait_for_worker, + const uint64_t time_to_find_recvr); void add_block(Block* block, int sender_id, bool use_move); @@ -160,6 +161,7 @@ class VDataStreamRecvr : public HasTaskExecutionCtx { RuntimeProfile::Counter* _blocks_produced_counter = nullptr; RuntimeProfile::Counter* _max_wait_worker_time = nullptr; RuntimeProfile::Counter* _max_wait_to_process_time = nullptr; + RuntimeProfile::Counter* _max_find_recvr_time = nullptr; std::vector> _sender_to_local_channel_dependency; }; @@ -178,7 +180,8 @@ class VDataStreamRecvr::SenderQueue { Status get_batch(Block* next_block, bool* eos); Status add_block(const PBlock& pblock, int be_number, int64_t packet_seq, - ::google::protobuf::Closure** done, const int64_t wait_for_worker); + ::google::protobuf::Closure** done, const int64_t wait_for_worker, + const uint64_t time_to_find_recvr); void add_block(Block* block, bool use_move); diff --git a/be/src/vec/sink/vrow_distribution.cpp b/be/src/vec/sink/vrow_distribution.cpp index d45aa2ea911f2e..3a4c7e911f4c14 100644 --- a/be/src/vec/sink/vrow_distribution.cpp +++ b/be/src/vec/sink/vrow_distribution.cpp @@ -68,8 +68,10 @@ Status VRowDistribution::_save_missing_values( } cur_row_values.push_back(node); } - //For duplicate cur_values, they will be filtered in FE - _partitions_need_create.emplace_back(cur_row_values); + if (!_deduper.contains(cur_row_values)) { + _deduper.insert(cur_row_values); + _partitions_need_create.emplace_back(cur_row_values); + } } // to avoid too large mem use diff --git a/be/src/vec/sink/vrow_distribution.h b/be/src/vec/sink/vrow_distribution.h index 5267b488400b8f..fffe0e3f7f1887 100644 --- a/be/src/vec/sink/vrow_distribution.h +++ b/be/src/vec/sink/vrow_distribution.h @@ -24,7 +24,9 @@ #include #include +#include #include +#include #include #include "common/status.h" @@ -133,6 +135,10 @@ class VRowDistribution { Status automatic_create_partition(); void clear_batching_stats(); + // for auto partition + std::unique_ptr _batching_block; + bool _deal_batched = false; // If true, send batched block before any block's append. + private: std::pair _get_partition_function(); @@ -170,17 +176,29 @@ class VRowDistribution { int64_t rows); void _reset_find_tablets(int64_t rows); + struct NullableStringListHash { + std::size_t _hash(const TNullableStringLiteral& arg) const { + if (arg.is_null) { + return 0; + } + return std::hash()(arg.value); + } + std::size_t operator()(const std::vector& arg) const { + std::size_t result = 0; + for (const auto& v : arg) { + result = (result << 1) ^ _hash(v); + } + return result; + } + }; + RuntimeState* _state = nullptr; int _batch_size = 0; // for auto partitions std::vector> _partitions_need_create; - -public: - std::unique_ptr _batching_block; - bool _deal_batched = false; // If true, send batched block before any block's append. -private: size_t _batching_rows = 0, _batching_bytes = 0; + std::unordered_set, NullableStringListHash> _deduper; OlapTableBlockConvertor* _block_convertor = nullptr; OlapTabletFinder* _tablet_finder = nullptr; diff --git a/be/src/vec/utils/util.hpp b/be/src/vec/utils/util.hpp index 3c5f4f194aac9b..8d17b2787a53da 100644 --- a/be/src/vec/utils/util.hpp +++ b/be/src/vec/utils/util.hpp @@ -173,12 +173,12 @@ inline std::string remove_suffix(const std::string& name, const std::string& suf }; inline ColumnPtr create_always_true_column(size_t size, bool is_nullable) { - auto res_data_column = ColumnUInt8::create(size, 1); + ColumnPtr res_data_column = ColumnUInt8::create(1, 1); if (is_nullable) { - auto null_map = ColumnVector::create(size, 0); - return ColumnNullable::create(std::move(res_data_column), std::move(null_map)); + auto null_map = ColumnVector::create(1, 0); + res_data_column = ColumnNullable::create(res_data_column, std::move(null_map)); } - return res_data_column; + return ColumnConst::create(std::move(res_data_column), size); } // change null element to true element diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp index 4fc096380c754b..9adb30b93054f4 100644 --- a/be/test/olap/lru_cache_test.cpp +++ b/be/test/olap/lru_cache_test.cpp @@ -88,25 +88,46 @@ class CacheTest : public testing::Test { void* value; }; - class CacheTestPolicy : public LRUCachePolicyTrackingManual { + class CacheTestSizePolicy : public LRUCachePolicyTrackingManual { public: - CacheTestPolicy(size_t capacity) - : LRUCachePolicyTrackingManual(CachePolicy::CacheType::FOR_UT, capacity, + CacheTestSizePolicy(size_t capacity) + : LRUCachePolicyTrackingManual(CachePolicy::CacheType::FOR_UT_CACHE_SIZE, capacity, LRUCacheType::SIZE, -1) {} }; + class CacheTestNumberPolicy : public LRUCachePolicyTrackingManual { + public: + CacheTestNumberPolicy(size_t capacity, uint32_t num_shards) + : LRUCachePolicyTrackingManual(CachePolicy::CacheType::FOR_UT_CACHE_NUMBER, + capacity, LRUCacheType::NUMBER, -1, num_shards) {} + }; + // there is 16 shards in ShardedLRUCache // And the LRUHandle size is about 100B. So the cache size should big enough // to run the UT. static const int kCacheSize = 1000 * 16; std::vector _deleted_keys; std::vector _deleted_values; - CacheTestPolicy* _cache; + LRUCachePolicy* _cache = nullptr; - CacheTest() : _cache(new CacheTestPolicy(kCacheSize)) { _s_current = this; } + CacheTest() { _s_current = this; } ~CacheTest() override { delete _cache; } + void init_size_cache(size_t capacity = kCacheSize) { + if (_cache != nullptr) { + delete _cache; + } + _cache = new CacheTestSizePolicy(capacity); + } + + void init_number_cache(size_t capacity = kCacheSize, uint32_t num_shards = 1) { + if (_cache != nullptr) { + delete _cache; + } + _cache = new CacheTestNumberPolicy(capacity, num_shards); + } + LRUCachePolicy* cache() const { return _cache; } int Lookup(int key) const { @@ -149,7 +170,25 @@ class CacheTest : public testing::Test { }; CacheTest* CacheTest::_s_current; +static void insert_LRUCache(LRUCache& cache, const CacheKey& key, int value, + CachePriority priority) { + uint32_t hash = key.hash(key.data(), key.size(), 0); + auto* cache_value = new CacheTest::CacheValue(EncodeValue(value)); + cache.release(cache.insert(key, hash, cache_value, value, priority)); +} + +static void insert_number_LRUCache(LRUCache& cache, const CacheKey& key, int value, int charge, + CachePriority priority) { + uint32_t hash = key.hash(key.data(), key.size(), 0); + auto* cache_value = new CacheTest::CacheValue(EncodeValue(value)); + cache.release(cache.insert(key, hash, cache_value, charge, priority)); +} + +// https://stackoverflow.com/questions/42756443/undefined-reference-with-gtest +const int CacheTest::kCacheSize; + TEST_F(CacheTest, HitAndMiss) { + init_size_cache(); EXPECT_EQ(-1, Lookup(100)); Insert(100, 101, 1); @@ -173,6 +212,7 @@ TEST_F(CacheTest, HitAndMiss) { } TEST_F(CacheTest, Erase) { + init_size_cache(); Erase(200); EXPECT_EQ(0, _deleted_keys.size()); @@ -192,6 +232,7 @@ TEST_F(CacheTest, Erase) { } TEST_F(CacheTest, EntriesArePinned) { + init_size_cache(); Insert(100, 101, 1); std::string result1; Cache::Handle* h1 = cache()->lookup(EncodeKey(&result1, 100)); @@ -219,6 +260,7 @@ TEST_F(CacheTest, EntriesArePinned) { } TEST_F(CacheTest, EvictionPolicy) { + init_size_cache(); Insert(100, 101, 1); Insert(200, 201, 1); @@ -234,6 +276,7 @@ TEST_F(CacheTest, EvictionPolicy) { } TEST_F(CacheTest, EvictionPolicyWithDurable) { + init_size_cache(); Insert(100, 101, 1); InsertDurable(200, 201, 1); Insert(300, 101, 1); @@ -250,20 +293,6 @@ TEST_F(CacheTest, EvictionPolicyWithDurable) { EXPECT_EQ(201, Lookup(200)); } -static void insert_LRUCache(LRUCache& cache, const CacheKey& key, int value, - CachePriority priority) { - uint32_t hash = key.hash(key.data(), key.size(), 0); - auto* cache_value = new CacheTest::CacheValue(EncodeValue(value)); - cache.release(cache.insert(key, hash, cache_value, value, priority)); -} - -static void insert_number_LRUCache(LRUCache& cache, const CacheKey& key, int value, int charge, - CachePriority priority) { - uint32_t hash = key.hash(key.data(), key.size(), 0); - auto* cache_value = new CacheTest::CacheValue(EncodeValue(value)); - cache.release(cache.insert(key, hash, cache_value, charge, priority)); -} - TEST_F(CacheTest, Usage) { LRUCache cache(LRUCacheType::SIZE); cache.set_capacity(1040); @@ -463,6 +492,7 @@ TEST_F(CacheTest, Number) { } TEST_F(CacheTest, HeavyEntries) { + init_size_cache(); // Add a bunch of light and heavy entries and then count the combined // size of items still in the cache, which must be approximately the // same as the total capacity. @@ -494,12 +524,14 @@ TEST_F(CacheTest, HeavyEntries) { } TEST_F(CacheTest, NewId) { + init_size_cache(); uint64_t a = cache()->new_id(); uint64_t b = cache()->new_id(); EXPECT_NE(a, b); } TEST_F(CacheTest, SimpleBenchmark) { + init_size_cache(); for (int i = 0; i < kCacheSize * LOOP_LESS_OR_MORE(10, 10000); i++) { Insert(1000 + i, 2000 + i, 1); EXPECT_EQ(2000 + i, Lookup(1000 + i)); @@ -598,4 +630,78 @@ TEST(CacheHandleTest, HandleTableTest) { } } +TEST_F(CacheTest, SetCapacity) { + init_number_cache(); + for (int i = 0; i < kCacheSize; i++) { + Insert(i, 1000 + i, 1); + EXPECT_EQ(1000 + i, Lookup(i)); + } + ASSERT_EQ(kCacheSize, cache()->get_capacity()); + ASSERT_EQ(kCacheSize, cache()->get_usage()); + + int64_t prune_num = cache()->adjust_capacity_weighted(2); + ASSERT_EQ(prune_num, 0); + ASSERT_EQ(kCacheSize * 2, cache()->get_capacity()); + ASSERT_EQ(kCacheSize, cache()->get_usage()); + + prune_num = cache()->adjust_capacity_weighted(0.5); + ASSERT_EQ(prune_num, kCacheSize / 2); + ASSERT_EQ(kCacheSize / 2, cache()->get_capacity()); + ASSERT_EQ(kCacheSize / 2, cache()->get_usage()); + + std::vector handles(kCacheSize, nullptr); + for (int i = 0; i < kCacheSize; i++) { + std::string result; + CacheKey cache_key = EncodeKey(&result, kCacheSize + i); + auto* cache_value = new CacheValueWithKey(DecodeKey(cache_key), EncodeValue(i)); + handles[i] = cache()->insert(cache_key, cache_value, 1, 1); + } + ASSERT_EQ(kCacheSize / 2, cache()->get_capacity()); + ASSERT_EQ(kCacheSize, + cache()->get_usage()); // Handle not be released, so key cannot be evicted. + + for (int i = 0; i < kCacheSize; i++) { + Insert(i + kCacheSize, 2000 + i, 1); + EXPECT_EQ(-1, Lookup(i + kCacheSize)); // Cache is full, insert failed. + } + ASSERT_EQ(kCacheSize / 2, cache()->get_capacity()); + ASSERT_EQ(kCacheSize, cache()->get_usage()); + + cache()->adjust_capacity_weighted(2); + ASSERT_EQ(kCacheSize * 2, cache()->get_capacity()); + ASSERT_EQ(kCacheSize, cache()->get_usage()); + + for (int i = 0; i < kCacheSize; i++) { + Insert(i, 3000 + i, 1); + EXPECT_EQ(3000 + i, Lookup(i)); + } + ASSERT_EQ(kCacheSize * 2, cache()->get_capacity()); + ASSERT_EQ(kCacheSize * 2, cache()->get_usage()); + + cache()->adjust_capacity_weighted(0); + ASSERT_EQ(0, cache()->get_capacity()); + ASSERT_EQ(kCacheSize, cache()->get_usage()); + + for (auto it : handles) { + cache()->release(it); + } + ASSERT_EQ(0, cache()->get_capacity()); + ASSERT_EQ(0, cache()->get_usage()); + + cache()->adjust_capacity_weighted(1); + ASSERT_EQ(kCacheSize, cache()->get_capacity()); + ASSERT_EQ(0, cache()->get_usage()); + + cache()->adjust_capacity_weighted(0); + ASSERT_EQ(0, cache()->get_capacity()); + ASSERT_EQ(0, cache()->get_usage()); + + for (int i = 0; i < kCacheSize; i++) { + Insert(i, 4000 + i, 1); + EXPECT_EQ(-1, Lookup(i)); + } + ASSERT_EQ(0, cache()->get_capacity()); + ASSERT_EQ(0, cache()->get_usage()); +} + } // namespace doris diff --git a/cloud/src/meta-service/meta_service_resource.cpp b/cloud/src/meta-service/meta_service_resource.cpp index 90a88f86006643..8a25a73771ccbd 100644 --- a/cloud/src/meta-service/meta_service_resource.cpp +++ b/cloud/src/meta-service/meta_service_resource.cpp @@ -254,6 +254,8 @@ void MetaServiceImpl::get_obj_store_info(google::protobuf::RpcController* contro } } + response->set_enable_storage_vault(instance.enable_storage_vault()); + // Iterate all the resources to return to the rpc caller if (!instance.resource_ids().empty()) { std::string storage_vault_start = storage_vault_key({instance.instance_id(), ""}); diff --git a/docker/runtime/doris-compose/Readme.md b/docker/runtime/doris-compose/Readme.md index a83fa81e7615fa..770414f7a2bdf8 100644 --- a/docker/runtime/doris-compose/Readme.md +++ b/docker/runtime/doris-compose/Readme.md @@ -23,7 +23,16 @@ Use doris compose to create doris docker compose clusters. ## Requirements -1. The doris image should contains: +##### 1. Make sure you have docker permissions + + run: +``` +docker run hello-world +``` + +if have problem with permission denied, then [add-docker-permission](https://docs.docker.com/engine/install/linux-postinstall/). + +##### 2. The doris image should contains ``` /opt/apache-doris/{fe, be, cloud} @@ -32,16 +41,14 @@ Use doris compose to create doris docker compose clusters. if don't create cloud cluster, the image no need to contains the cloud pkg. -if build doris use `sh build.sh --fe --be --cloud`, then its output satisfy with all above, then run command in doris root +if build doris use `sh build.sh --fe --be --cloud`, then its output satisfy with all above, then run command in doris root directory + will generate such a image. ``` docker build -f docker/runtime/doris-compose/Dockerfile -t . ``` -will generate a image. - -2. Install the dependent python library in 'docker/runtime/doris-compose/requirements.txt' - +##### 3. Install the dependent python library in 'docker/runtime/doris-compose/requirements.txt' ``` python -m pip install --user -r docker/runtime/doris-compose/requirements.txt @@ -49,6 +56,20 @@ python -m pip install --user -r docker/runtime/doris-compose/requirements.txt ## Usage +### Notice + +Each cluster will have a directory in '/tmp/doris/{cluster-name}', user can set env LOCAL_DORIS_PATH to change its directory. + +For example, if user export LOCAL_DORIS_PATH=/mydoris, then the cluster's directory is '/mydoris/{cluster-name}'. + +And cluster's directory will contains all its containers's logs and data, like fe-1, fe-2, be-1, ..., etc. + +If there are multiple users run doris-compose on the same machine, suggest don't change LOCAL_DORIS_PATH or they should export the same LOCAL_DORIS_PATH. + +Because when create a new cluster, doris-compose will search the local doris path, and choose a docker network which is different with this path's clusters. + +So if multiple users use different LOCAL_DORIS_PATH, their clusters may have docker network conflict!!! + ### Create a cluster or recreate its containers ``` @@ -65,9 +86,11 @@ add fe/be nodes with the specific image, or update existing nodes with `--fe-id` For create a cloud cluster, steps are as below: + 1. Write cloud s3 store config file, its default path is '/tmp/doris/cloud.ini'. It's defined in environment variable DORIS_CLOUD_CFG_FILE, user can change this env var to change its path. A Example file is locate in 'docker/runtime/doris-compose/resource/cloud.ini.example'. + 2. Use doris compose up command with option '--cloud' to create a new cloud cluster. The simplest way to create a cloud cluster: @@ -127,7 +150,7 @@ Generate regression-conf-custom.groovy to connect to the specific docker cluster steps: -1. Create a new cluster: `python doris-compose.py up my-cluster my-image --add-fe-num 2 --add-be-num 4 --cloud` -2. Generate regression-conf-custom.groovy: `python doris-compose.py config my-cluster --connect-follow-fe` +1. Create a new cluster: `python docker/runtime/doris-compose/doris-compose.py up my-cluster my-image --add-fe-num 2 --add-be-num 4 --cloud` +2. Generate regression-conf-custom.groovy: `python docker/runtime/doris-compose/doris-compose.py config my-cluster --connect-follow-fe` 3. Run regression test: `bash run-regression-test.sh --run -times 1 -parallel 1 -suiteParallel 1 -d cloud/multi_cluster` diff --git a/docker/runtime/doris-compose/command.py b/docker/runtime/doris-compose/command.py index ed88dd03f4daf8..b6862bdcb000b1 100644 --- a/docker/runtime/doris-compose/command.py +++ b/docker/runtime/doris-compose/command.py @@ -826,7 +826,16 @@ def run(self, args): print("\nNo write regression custom file.") return + annotation_start = "//---------- Start auto generate by doris-compose.py---------" + annotation_end = "//---------- End auto generate by doris-compose.py---------" + + old_contents = [] + if os.path.exists(regression_conf_custom): + with open(regression_conf_custom, "r") as f: + old_contents = f.readlines() with open(regression_conf_custom, "w") as f: + # write auto gen config + f.write(annotation_start) f.write(base_conf.format(fe_ip=fe_ip)) if cluster.is_cloud: multi_cluster_bes = ",".join([ @@ -845,6 +854,23 @@ def run(self, args): multi_cluster_bes=multi_cluster_bes, fe_cloud_unique_id=cluster.get_node( CLUSTER.Node.TYPE_FE, 1).cloud_unique_id())) + f.write(annotation_end + "\n\n") + annotation_end_line_count = -1 + + # write not-auto gen config + in_annotation = False + annotation_end_line_idx = -100 + for line_idx, line in enumerate(old_contents): + line = line.rstrip() + if line == annotation_start: + in_annotation = True + elif line == annotation_end: + in_annotation = False + annotation_end_line_idx = line_idx + elif not in_annotation: + if line or line_idx != annotation_end_line_idx + 1: + f.write(line + "\n") + print("\nWrite succ: " + regression_conf_custom) diff --git a/docker/runtime/doris-compose/resource/common.sh b/docker/runtime/doris-compose/resource/common.sh index de6ba29865a948..a1c1b3ff2a5bdf 100644 --- a/docker/runtime/doris-compose/resource/common.sh +++ b/docker/runtime/doris-compose/resource/common.sh @@ -23,7 +23,7 @@ export LOG_FILE=$DORIS_HOME/log/health.out export LOCK_FILE=$DORIS_HOME/status/token health_log() { - echo "$(date +'%Y-%m-%d %H:%M:%S') $@" >>$LOG_FILE + echo "$(date +'%Y-%m-%d %H:%M:%S') $@" | tee -a $LOG_FILE } # concurrent write meta service server will failed due to fdb txn conflict. diff --git a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java index 1dcd062261b2b8..7a8dda5aabedef 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java +++ b/fe/fe-common/src/main/java/org/apache/doris/catalog/Type.java @@ -866,10 +866,6 @@ public static boolean canCastTo(Type sourceType, Type targetType) { return false; } for (int i = 0; i < sourceAggState.getSubTypes().size(); i++) { - // target subtype is not null but source subtype is nullable - if (!targetAggState.getSubTypeNullables().get(i) && sourceAggState.getSubTypeNullables().get(i)) { - return false; - } if (!canCastTo(sourceAggState.getSubTypes().get(i), targetAggState.getSubTypes().get(i))) { return false; } diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 360ec7ae6035c7..f12fff59c0355f 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -3052,4 +3052,8 @@ public static int metaServiceRpcRetryTimes() { @ConfField(mutable = true, description = {"表示最大锁持有时间,超过该时间会打印告警日志,单位秒", "Maximum lock hold time; logs a warning if exceeded"}) public static long max_lock_hold_threshold_seconds = 10; + + @ConfField(mutable = true, description = {"元数据同步是否开启安全模式", + "Is metadata synchronization enabled in safe mode"}) + public static boolean meta_helper_security_mode = false; } diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 index 2343f208642d6e..e3ae9788e6af14 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisLexer.g4 @@ -472,6 +472,7 @@ SEMI: 'SEMI'; SEQUENCE: 'SEQUENCE'; SERIALIZABLE: 'SERIALIZABLE'; SESSION: 'SESSION'; +SESSION_USER: 'SESSION_USER'; SET: 'SET'; SETS: 'SETS'; SET_SESSION_VARIABLE: 'SET_SESSION_VARIABLE'; diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 087af9d717de13..008425fb5a197e 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -1510,6 +1510,7 @@ primaryExpression | name=LOCALTIME #localTime | name=LOCALTIMESTAMP #localTimestamp | name=CURRENT_USER #currentUser + | name=SESSION_USER #sessionUser | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase | name=CAST LEFT_PAREN expression AS castDataType RIGHT_PAREN #cast @@ -1577,6 +1578,7 @@ functionNameIdentifier | REGEXP | RIGHT | SCHEMA + | SESSION_USER | TRIM | USER ; @@ -2031,6 +2033,7 @@ nonReserved | SET_SESSION_VARIABLE | SEQUENCE | SESSION + | SESSION_USER | SHAPE | SKEW | SNAPSHOT diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java index bcb0864b64f2e2..86ed6e55ab5c6b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/BuiltinScalarFunctions.java @@ -360,6 +360,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsSub; +import org.apache.doris.nereids.trees.expressions.functions.scalar.SessionUser; import org.apache.doris.nereids.trees.expressions.functions.scalar.Sha1; import org.apache.doris.nereids.trees.expressions.functions.scalar.Sha2; import org.apache.doris.nereids.trees.expressions.functions.scalar.Sign; @@ -934,7 +935,8 @@ public class BuiltinScalarFunctions implements FunctionHelper { scalar(YearsAdd.class, "years_add"), scalar(YearsDiff.class, "years_diff"), scalar(YearsSub.class, "years_sub"), - scalar(MultiMatch.class, "multi_match")); + scalar(MultiMatch.class, "multi_match"), + scalar(SessionUser.class, "session_user")); public static final BuiltinScalarFunctions INSTANCE = new BuiltinScalarFunctions(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java index 15bf65c3c73728..768ae22d202dc4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchema.java @@ -21,7 +21,7 @@ import org.apache.doris.analysis.ColumnNullableType; import org.apache.doris.analysis.TypeDef; import org.apache.doris.common.UserException; -import org.apache.doris.plugin.audit.AuditLoaderPlugin; +import org.apache.doris.plugin.audit.AuditLoader; import org.apache.doris.statistics.StatisticConstants; import com.google.common.collect.Lists; @@ -168,7 +168,7 @@ public static List getCopiedSchema(String tblName) throws UserExcepti case StatisticConstants.HISTOGRAM_TBL_NAME: schema = HISTO_STATS_SCHEMA; break; - case AuditLoaderPlugin.AUDIT_LOG_TABLE: + case AuditLoader.AUDIT_LOG_TABLE: schema = AUDIT_SCHEMA; break; default: diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java index 87e8a0fc3b0ce8..c038414fc65bc6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/InternalSchemaInitializer.java @@ -38,7 +38,7 @@ import org.apache.doris.common.util.PropertyAnalyzer; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.ha.FrontendNodeType; -import org.apache.doris.plugin.audit.AuditLoaderPlugin; +import org.apache.doris.plugin.audit.AuditLoader; import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.statistics.util.StatisticsUtil; @@ -98,7 +98,7 @@ public void run() { Database database = op.get(); modifyTblReplicaCount(database, StatisticConstants.TABLE_STATISTIC_TBL_NAME); modifyTblReplicaCount(database, StatisticConstants.PARTITION_STATISTIC_TBL_NAME); - modifyTblReplicaCount(database, AuditLoaderPlugin.AUDIT_LOG_TABLE); + modifyTblReplicaCount(database, AuditLoader.AUDIT_LOG_TABLE); } @VisibleForTesting @@ -215,7 +215,7 @@ private static CreateTableStmt buildStatisticsTblStmt(String statsTableName, Lis private static CreateTableStmt buildAuditTblStmt() throws UserException { TableName tableName = new TableName("", - FeConstants.INTERNAL_DB_NAME, AuditLoaderPlugin.AUDIT_LOG_TABLE); + FeConstants.INTERNAL_DB_NAME, AuditLoader.AUDIT_LOG_TABLE); String engineName = "olap"; ArrayList dupKeys = Lists.newArrayList("query_id", "time", "client_ip"); @@ -244,7 +244,7 @@ private static CreateTableStmt buildAuditTblStmt() throws UserException { PropertyAnalyzer.getInstance().rewriteForceProperties(properties); CreateTableStmt createTableStmt = new CreateTableStmt(true, false, - tableName, InternalSchema.getCopiedSchema(AuditLoaderPlugin.AUDIT_LOG_TABLE), + tableName, InternalSchema.getCopiedSchema(AuditLoader.AUDIT_LOG_TABLE), engineName, keysDesc, partitionDesc, distributionDesc, properties, null, "Doris internal audit table, DO NOT MODIFY IT", null); StatisticsUtil.analyze(createTableStmt); @@ -286,7 +286,7 @@ private boolean created() { } // 3. check audit table - optionalStatsTbl = db.getTable(AuditLoaderPlugin.AUDIT_LOG_TABLE); + optionalStatsTbl = db.getTable(AuditLoader.AUDIT_LOG_TABLE); return optionalStatsTbl.isPresent(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index a60da71b299328..2f5eb35ad757fb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -3147,9 +3147,10 @@ public List getPartitionColumns() { public MTMVSnapshotIf getPartitionSnapshot(String partitionName, MTMVRefreshContext context) throws AnalysisException { Map partitionVersions = context.getBaseVersions().getPartitionVersions(); + long partitionId = getPartitionOrAnalysisException(partitionName).getId(); long visibleVersion = partitionVersions.containsKey(partitionName) ? partitionVersions.get(partitionName) : getPartitionOrAnalysisException(partitionName).getVisibleVersion(); - return new MTMVVersionSnapshot(visibleVersion); + return new MTMVVersionSnapshot(visibleVersion, partitionId); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/View.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/View.java index 8285dedc0941b8..62402c7d474f3b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/View.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/View.java @@ -76,6 +76,7 @@ public class View extends Table implements GsonPostProcessable { private String inlineViewDef; // for persist + @SerializedName("sm") private long sqlMode = 0L; // View definition created by parsing inlineViewDef_ into a QueryStmt. diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java index 43f7dcbc6879f3..75ded96f4b6757 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/catalog/CloudReplica.java @@ -150,6 +150,13 @@ public long getBackendId() { } private long getBackendIdImpl(String cluster) { + // if cluster is SUSPENDED, wait + try { + cluster = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).waitForAutoStart(cluster); + } catch (DdlException e) { + // this function cant throw exception. so just log it + LOG.warn("cant resume cluster {}, exception", cluster, e); + } // check default cluster valid. if (Strings.isNullOrEmpty(cluster)) { LOG.warn("failed to get available be, clusterName: {}", cluster); @@ -163,13 +170,6 @@ private long getBackendIdImpl(String cluster) { return -1; } - // if cluster is SUSPENDED, wait - try { - ((CloudSystemInfoService) Env.getCurrentSystemInfo()).waitForAutoStart(cluster); - } catch (DdlException e) { - // this function cant throw exception. so just log it - LOG.warn("cant resume cluster {}, exception", cluster, e); - } String clusterId = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudClusterIdByName(cluster); if (isColocated()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java index 202d576e3bf0d5..03cbbfe814a8b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/system/CloudSystemInfoService.java @@ -737,20 +737,20 @@ public String getClusterNameAutoStart(final String clusterName) { return cloudClusterTypeAndName.clusterName; } - public void waitForAutoStart(String clusterName) throws DdlException { + public String waitForAutoStart(String clusterName) throws DdlException { if (Config.isNotCloudMode()) { - return; + return null; } clusterName = getClusterNameAutoStart(clusterName); if (Strings.isNullOrEmpty(clusterName)) { LOG.warn("auto start in cloud mode, but clusterName empty {}", clusterName); - return; + return null; } String clusterStatus = getCloudStatusByName(clusterName); if (Strings.isNullOrEmpty(clusterStatus)) { // for cluster rename or cluster dropped LOG.warn("cant find clusterStatus in fe, clusterName {}", clusterName); - return; + return null; } if (Cloud.ClusterStatus.valueOf(clusterStatus) == Cloud.ClusterStatus.MANUAL_SHUTDOWN) { @@ -765,7 +765,7 @@ public void waitForAutoStart(String clusterName) throws DdlException { // root ? see StatisticsUtil.buildConnectContext if (ConnectContext.get() != null && ConnectContext.get().getUserIdentity().isRootUser()) { LOG.warn("auto start daemon thread run in root, not resume cluster {}-{}", clusterName, clusterStatus); - return; + return null; } Cloud.AlterClusterRequest.Builder builder = Cloud.AlterClusterRequest.newBuilder(); builder.setCloudUniqueId(Config.cloud_unique_id); @@ -794,7 +794,8 @@ public void waitForAutoStart(String clusterName) throws DdlException { StopWatch stopWatch = new StopWatch(); stopWatch.start(); boolean hasAutoStart = false; - while (!String.valueOf(Cloud.ClusterStatus.NORMAL).equals(clusterStatus) + boolean existAliveBe = true; + while ((!String.valueOf(Cloud.ClusterStatus.NORMAL).equals(clusterStatus) || !existAliveBe) && retryTime < retryTimes) { hasAutoStart = true; ++retryTime; @@ -812,6 +813,8 @@ public void waitForAutoStart(String clusterName) throws DdlException { LOG.info("change cluster sleep wait InterruptedException: ", e); } clusterStatus = getCloudStatusByName(clusterName); + // Check that the bes node in the cluster have at least one alive + existAliveBe = getBackendsByClusterName(clusterName).stream().anyMatch(Backend::isAlive); } if (retryTime >= retryTimes) { // auto start timeout @@ -824,5 +827,6 @@ public void waitForAutoStart(String clusterName) throws DdlException { if (hasAutoStart) { LOG.info("auto start cluster {}, start cost {} ms", clusterName, stopWatch.getTime()); } + return clusterName; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java index f51454ad269c51..f224d2929a65c1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/transaction/CloudGlobalTransactionMgr.java @@ -990,6 +990,20 @@ public void abortTransaction(Long dbId, Long transactionId, String reason, TxnCommitAttachment txnCommitAttachment, List tableList) throws UserException { LOG.info("try to abort transaction, dbId:{}, transactionId:{}", dbId, transactionId); + if (txnCommitAttachment != null) { + if (txnCommitAttachment instanceof RLTaskTxnCommitAttachment) { + RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment = (RLTaskTxnCommitAttachment) txnCommitAttachment; + TxnStateChangeCallback cb = callbackFactory.getCallback(rlTaskTxnCommitAttachment.getJobId()); + if (cb != null) { + // use a temporary transaction state to do before commit check, + // what actually works is the transactionId + TransactionState tmpTxnState = new TransactionState(); + tmpTxnState.setTransactionId(transactionId); + cb.beforeAborted(tmpTxnState); + } + } + } + AbortTxnRequest.Builder builder = AbortTxnRequest.newBuilder(); builder.setDbId(dbId); builder.setTxnId(transactionId); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java b/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java index 206d1cb208959b..39d13b0e989727 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/Log4jConfig.java @@ -252,13 +252,18 @@ private static void reconfig() throws IOException { } newXmlConfTemplate = newXmlConfTemplate.replaceAll(VERBOSE_MODULE_PLACEHOLDER, sb.toString()); - if (sysLogMode.equalsIgnoreCase("NORMAL")) { + // BRIEF: async, no location + // ASYNC: async, with location + // NORMAL: sync, with location + boolean includeLocation = !sysLogMode.equalsIgnoreCase("BRIEF"); + boolean immediateFlush = sysLogMode.equalsIgnoreCase("NORMAL"); + if (includeLocation) { newXmlConfTemplate = newXmlConfTemplate.replaceAll(RUNTIME_LOG_FORMAT_PLACEHOLDER, " [%C{1}.%M():%L] "); } else { newXmlConfTemplate = newXmlConfTemplate.replaceAll(RUNTIME_LOG_FORMAT_PLACEHOLDER, " "); - if (sysLogMode.equalsIgnoreCase("ASYNC")) { - newXmlConfTemplate = newXmlConfTemplate.replaceAll("Root", "AsyncRoot"); - } + } + if (!immediateFlush) { + newXmlConfTemplate = newXmlConfTemplate.replaceAll("Root", "AsyncRoot"); } if (Config.enable_file_logger) { @@ -298,11 +303,6 @@ private static void reconfig() throws IOException { properties.put("warn_sys_accumulated_file_size", String.valueOf(Config.warn_sys_accumulated_file_size)); properties.put("audit_sys_accumulated_file_size", String.valueOf(Config.audit_sys_accumulated_file_size)); - // BRIEF: async, no location - // ASYNC: async, with location - // NORMAL: sync, with location - boolean includeLocation = !sysLogMode.equalsIgnoreCase("BRIEF"); - boolean immediateFlush = sysLogMode.equalsIgnoreCase("NORMAL"); properties.put("include_location_flag", Boolean.toString(includeLocation)); properties.put("immediate_flush_flag", Boolean.toString(immediateFlush)); properties.put("audit_file_postfix", compressAuditLog ? ".gz" : ""); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java index d3d6826174f91d..7828a38e6eb242 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/ExecutionProfile.java @@ -182,6 +182,7 @@ private RuntimeProfile getPipelineAggregatedProfile(Map planNod } newFragmentProfile.addChild(mergedpipelineProfile); pipelineIdx++; + fragmentsProfile.rowsProducedMap.putAll(mergedpipelineProfile.rowsProducedMap); } } return fragmentsProfile; diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java index 76414677d0a05b..88fd317879451e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/Profile.java @@ -22,8 +22,11 @@ import org.apache.doris.common.util.ProfileManager; import org.apache.doris.common.util.RuntimeProfile; import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.trees.plans.AbstractPlan; +import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.distribute.DistributedPlan; import org.apache.doris.nereids.trees.plans.distribute.FragmentIdMapping; +import org.apache.doris.nereids.trees.plans.physical.PhysicalPlan; import org.apache.doris.nereids.trees.plans.physical.PhysicalRelation; import org.apache.doris.planner.Planner; @@ -45,6 +48,8 @@ import java.io.FileOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.zip.Deflater; @@ -107,6 +112,10 @@ public class Profile { // Profile size is the size of profile file private long profileSize = 0; + private PhysicalPlan physicalPlan; + public Map rowsProducedMap = new HashMap<>(); + private List physicalRelations = new ArrayList<>(); + // Need default constructor for read from storage public Profile() {} @@ -273,20 +282,8 @@ public synchronized void updateSummary(Map summaryInfo, boolean if (planner instanceof NereidsPlanner) { NereidsPlanner nereidsPlanner = ((NereidsPlanner) planner); - StringBuilder builder = new StringBuilder(); - builder.append("\n"); - builder.append(nereidsPlanner.getPhysicalPlan() - .treeString()); - builder.append("\n"); - for (PhysicalRelation relation : nereidsPlanner.getPhysicalRelations()) { - if (relation.getStats() != null) { - builder.append(relation).append("\n") - .append(relation.getStats().printColumnStats()); - } - } - summaryInfo.put(SummaryProfile.PHYSICAL_PLAN, - builder.toString().replace("\n", "\n ")); - + physicalPlan = nereidsPlanner.getPhysicalPlan(); + physicalRelations.addAll(nereidsPlanner.getPhysicalRelations()); FragmentIdMapping distributedPlans = nereidsPlanner.getDistributedPlans(); if (distributedPlans != null) { summaryInfo.put(SummaryProfile.DISTRIBUTED_PLAN, @@ -414,15 +411,43 @@ public void getExecutionProfileContent(StringBuilder builder) { // Only generate merged profile for select, insert into select. // Not support broker load now. + RuntimeProfile mergedProfile = null; if (this.profileLevel == MergedProfileLevel && this.executionProfiles.size() == 1) { try { - builder.append("\n MergedProfile \n"); - this.executionProfiles.get(0).getAggregatedFragmentsProfile(planNodeMap).prettyPrint(builder, " "); + mergedProfile = this.executionProfiles.get(0).getAggregatedFragmentsProfile(planNodeMap); + this.rowsProducedMap.putAll(mergedProfile.rowsProducedMap); + if (physicalPlan != null) { + updateActualRowCountOnPhysicalPlan(physicalPlan); + } } catch (Throwable aggProfileException) { LOG.warn("build merged simple profile {} failed", this.id, aggProfileException); + } + } + + if (physicalPlan != null) { + builder.append("\nPhysical Plan \n"); + StringBuilder physcialPlanBuilder = new StringBuilder(); + physcialPlanBuilder.append(physicalPlan.treeString()); + physcialPlanBuilder.append("\n"); + for (PhysicalRelation relation : physicalRelations) { + if (relation.getStats() != null) { + physcialPlanBuilder.append(relation).append("\n") + .append(relation.getStats().printColumnStats()); + } + } + builder.append( + physcialPlanBuilder.toString().replace("\n", "\n ")); + } + + if (this.profileLevel == MergedProfileLevel && this.executionProfiles.size() == 1) { + builder.append("\nMergedProfile \n"); + if (mergedProfile != null) { + mergedProfile.prettyPrint(builder, " "); + } else { builder.append("build merged simple profile failed"); } } + try { // For load task, they will have multiple execution_profiles. for (ExecutionProfile executionProfile : executionProfiles) { @@ -646,4 +671,25 @@ public boolean shouldBeRemoveFromMemory() { return true; } + + public PhysicalPlan getPhysicalPlan() { + return physicalPlan; + } + + public void setPhysicalPlan(PhysicalPlan physicalPlan) { + this.physicalPlan = physicalPlan; + } + + private void updateActualRowCountOnPhysicalPlan(Plan plan) { + if (plan == null || rowsProducedMap.isEmpty()) { + return; + } + Long actualRowCount = rowsProducedMap.get(String.valueOf(((AbstractPlan) plan).getId())); + if (actualRowCount != null) { + ((AbstractPlan) plan).updateActualRowCount(actualRowCount); + } + for (Plan child : plan.children()) { + updateActualRowCountOnPhysicalPlan(child); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java index df4e73be0483ed..20e41b18d6955f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/profile/SummaryProfile.java @@ -66,7 +66,6 @@ public class SummaryProfile { public static final String PARALLEL_FRAGMENT_EXEC_INSTANCE = "Parallel Fragment Exec Instance Num"; public static final String TRACE_ID = "Trace ID"; public static final String WORKLOAD_GROUP = "Workload Group"; - public static final String PHYSICAL_PLAN = "Physical Plan"; public static final String DISTRIBUTED_PLAN = "Distributed Plan"; public static final String SYSTEM_MESSAGE = "System Message"; public static final String EXECUTED_BY_FRONTEND = "Executed By Frontend"; @@ -129,7 +128,6 @@ public class SummaryProfile { START_TIME, END_TIME, TOTAL_TIME, TASK_STATE, USER, DEFAULT_CATALOG, DEFAULT_DB, SQL_STATEMENT); public static final ImmutableList SUMMARY_KEYS = new ImmutableList.Builder() .addAll(SUMMARY_CAPTIONS) - .add(PHYSICAL_PLAN) .add(DISTRIBUTED_PLAN) .build(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/publish/TopicPublisherThread.java b/fe/fe-core/src/main/java/org/apache/doris/common/publish/TopicPublisherThread.java index 74cefeca4d907e..797b0893936513 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/publish/TopicPublisherThread.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/publish/TopicPublisherThread.java @@ -85,10 +85,13 @@ protected void runAfterCatalogReady() { } AckResponseHandler handler = new AckResponseHandler(nodesToPublish); for (Backend be : nodesToPublish) { - executor.submit(new TopicPublishWorker(request, be, handler)); + if (be.isAlive()) { + executor.submit(new TopicPublishWorker(request, be, handler)); + } } try { int timeoutMs = Config.publish_topic_info_interval_ms / 3 * 2; + timeoutMs = timeoutMs <= 0 ? 3000 : timeoutMs; if (!handler.awaitAllInMs(timeoutMs)) { Backend[] backends = handler.pendingNodes(); if (backends.length > 0) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java index 60207b49172ba0..3ffc303a6db89d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/RuntimeProfile.java @@ -40,12 +40,15 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Formatter; +import java.util.HashMap; import java.util.LinkedList; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeSet; import java.util.concurrent.locks.ReentrantReadWriteLock; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * It is accessed by two kinds of thread, one is to create this RuntimeProfile @@ -100,6 +103,8 @@ public class RuntimeProfile { @SerializedName(value = "nodeid") private int nodeid = -1; + public Map rowsProducedMap = new HashMap<>(); + public RuntimeProfile() { init(); } @@ -494,6 +499,7 @@ public static void mergeProfiles(List profiles, // RuntimeProfile has at least one counter named TotalTime, should exclude it. if (newCreatedMergedChildProfile.counterMap.size() > 1) { simpleProfile.addChildWithCheck(newCreatedMergedChildProfile, planNodeMap); + simpleProfile.rowsProducedMap.putAll(newCreatedMergedChildProfile.rowsProducedMap); } } } @@ -504,6 +510,12 @@ private static void mergeCounters(String parentCounterName, List return; } RuntimeProfile templateProfile = profiles.get(0); + Pattern pattern = Pattern.compile("nereids_id=(\\d+)"); + Matcher matcher = pattern.matcher(templateProfile.getName()); + String nereidsId = null; + if (matcher.find()) { + nereidsId = matcher.group(1); + } Set childCounterSet = templateProfile.childCounterMap.get(parentCounterName); if (childCounterSet == null) { return; @@ -517,6 +529,9 @@ private static void mergeCounters(String parentCounterName, List Counter orgCounter = profile.counterMap.get(childCounterName); aggCounter.addCounter(orgCounter); } + if (nereidsId != null && childCounterName.equals("RowsProduced")) { + simpleProfile.rowsProducedMap.put(nereidsId, aggCounter.sum.getValue()); + } if (simpleProfile.counterMap.containsKey(parentCounterName)) { simpleProfile.addCounter(childCounterName, aggCounter, parentCounterName); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java index ac610237cddfcc..6183c277c1bdf5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSTransaction.java @@ -1484,6 +1484,8 @@ public void doCommit() { runS3cleanWhenSuccess(); doAddPartitionsTask(); doUpdateStatisticsTasks(); + //delete write path + pruneAndDeleteStagingDirectories(); doNothing(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/StreamLoadRecordMgr.java b/fe/fe-core/src/main/java/org/apache/doris/load/StreamLoadRecordMgr.java index 7f1d33bd649736..3a38641036fb5f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/StreamLoadRecordMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/StreamLoadRecordMgr.java @@ -30,8 +30,8 @@ import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.persist.gson.GsonUtils; -import org.apache.doris.plugin.audit.AuditEvent; -import org.apache.doris.plugin.audit.AuditEvent.EventType; +import org.apache.doris.plugin.AuditEvent; +import org.apache.doris.plugin.AuditEvent.EventType; import org.apache.doris.plugin.audit.StreamLoadAuditEvent; import org.apache.doris.qe.ConnectContext; import org.apache.doris.system.Backend; diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java index 7b78efc7a50cd5..12aa673eabf16f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/BulkLoadJob.java @@ -46,7 +46,7 @@ import org.apache.doris.load.EtlJobType; import org.apache.doris.load.FailMsg; import org.apache.doris.persist.gson.GsonPostProcessable; -import org.apache.doris.plugin.audit.AuditEvent; +import org.apache.doris.plugin.AuditEvent; import org.apache.doris.plugin.audit.LoadAuditEvent; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.OriginStatement; diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java index 4e01f3a5058774..fbb3aab4ebdcd2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MasterImpl.java @@ -267,7 +267,7 @@ private void finishCreateReplica(AgentTask task, TFinishTaskRequest request) { // this should be called before 'countDownLatch()' Env.getCurrentSystemInfo().updateBackendReportVersion(task.getBackendId(), - request.getReportVersion(), task.getDbId(), task.getTableId()); + request.getReportVersion(), task.getDbId(), task.getTableId(), true); createReplicaTask.countDownLatch(task.getBackendId(), task.getSignature()); if (LOG.isDebugEnabled()) { @@ -383,7 +383,7 @@ private void finishRealtimePush(AgentTask task, TFinishTaskRequest request) thro // should be done before addReplicaPersistInfos and countDownLatch long reportVersion = request.getReportVersion(); Env.getCurrentSystemInfo().updateBackendReportVersion(task.getBackendId(), reportVersion, - task.getDbId(), task.getTableId()); + task.getDbId(), task.getTableId(), true); List tabletIds = finishTabletInfos.stream().map( tTabletInfo -> tTabletInfo.getTabletId()).collect(Collectors.toList()); @@ -515,7 +515,7 @@ private void finishPublishVersion(AgentTask task, TFinishTaskRequest request) { // report version is required. here we check if set, for compatibility. long reportVersion = request.getReportVersion(); Env.getCurrentSystemInfo().updateBackendReportVersion( - task.getBackendId(), reportVersion, task.getDbId(), task.getTableId()); + task.getBackendId(), reportVersion, task.getDbId(), task.getTableId(), true); } PublishVersionTask publishVersionTask = (PublishVersionTask) task; @@ -545,7 +545,7 @@ private void finishClone(AgentTask task, TFinishTaskRequest request) { if (request.isSetReportVersion()) { long reportVersion = request.getReportVersion(); Env.getCurrentSystemInfo().updateBackendReportVersion( - task.getBackendId(), reportVersion, task.getDbId(), task.getTableId()); + task.getBackendId(), reportVersion, task.getDbId(), task.getTableId(), true); } Env.getCurrentEnv().getTabletScheduler().finishCloneTask(cloneTask, request); } else { @@ -628,7 +628,7 @@ private void finishAlterTask(AgentTask task, TFinishTaskRequest request) { if (request.isSetReportVersion()) { long reportVersion = request.getReportVersion(); Env.getCurrentSystemInfo().updateBackendReportVersion( - task.getBackendId(), reportVersion, task.getDbId(), task.getTableId()); + task.getBackendId(), reportVersion, task.getDbId(), task.getTableId(), true); } } catch (MetaNotFoundException e) { LOG.warn("failed to handle finish alter task: {}, {}", task.getSignature(), e.getMessage()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/MetaHelper.java b/fe/fe-core/src/main/java/org/apache/doris/master/MetaHelper.java index e4fd5cacf71262..cf63a82cd870d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/MetaHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/MetaHelper.java @@ -18,6 +18,7 @@ package org.apache.doris.master; import org.apache.doris.catalog.Env; +import org.apache.doris.common.Config; import org.apache.doris.common.io.IOUtils; import org.apache.doris.common.util.HttpURLUtil; import org.apache.doris.httpv2.entity.ResponseBody; @@ -32,7 +33,6 @@ import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStreamReader; @@ -47,6 +47,8 @@ public class MetaHelper { public static final String X_IMAGE_MD5 = "X-Image-Md5"; private static final int BUFFER_BYTES = 8 * 1024; private static final int CHECKPOINT_LIMIT_BYTES = 30 * 1024 * 1024; + private static final String VALID_FILENAME_REGEX = "^image\\.\\d+(\\.part)?$"; + public static File getMasterImageDir() { String metaDir = Env.getCurrentEnv().getImageDir(); @@ -57,24 +59,89 @@ public static int getLimit() { return CHECKPOINT_LIMIT_BYTES; } + private static void completeCheck(File dir, File file, File newFile) throws IOException { + if (!Config.meta_helper_security_mode) { + return; + } + String dirPath = dir.getCanonicalPath(); // Get the canonical path of the directory + String filePath = file.getCanonicalPath(); // Get the canonical path of the original file + String newFilePath = newFile.getCanonicalPath(); // Get the canonical path of the new file + + // Ensure both file paths are within the specified directory to prevent path traversal attacks + if (!filePath.startsWith(dirPath) || !newFilePath.startsWith(dirPath)) { + throw new SecurityException("File path traversal attempt detected."); + } + + // Ensure the original file exists and is a valid file to avoid renaming a non-existing file + if (!file.exists() || !file.isFile()) { + throw new IOException("Source file does not exist or is not a valid file."); + } + + } + // rename the .PART_SUFFIX file to filename public static File complete(String filename, File dir) throws IOException { - File file = new File(dir, filename + MetaHelper.PART_SUFFIX); - File newFile = new File(dir, filename); + // Validate that the filename does not contain illegal path elements + checkIsValidFileName(filename); + + File file = new File(dir, filename + MetaHelper.PART_SUFFIX); // Original file with a specific suffix + File newFile = new File(dir, filename); // Target file without the suffix + + completeCheck(dir, file, newFile); + // Attempt to rename the file. If it fails, throw an exception if (!file.renameTo(newFile)) { - throw new IOException("Complete file" + filename + " failed"); + throw new IOException("Complete file " + filename + " failed"); } - return newFile; + + return newFile; // Return the newly renamed file } - public static OutputStream getOutputStream(String filename, File dir) - throws FileNotFoundException { + public static File getFile(String filename, File dir) throws IOException { + checkIsValidFileName(filename); File file = new File(dir, filename + MetaHelper.PART_SUFFIX); - return new FileOutputStream(file); + checkFile(dir, file); + return file; + } + + private static void checkFile(File dir, File file) throws IOException { + if (!Config.meta_helper_security_mode) { + return; + } + String dirPath = dir.getCanonicalPath(); + String filePath = file.getCanonicalPath(); + + if (!filePath.startsWith(dirPath)) { + throw new SecurityException("File path traversal attempt detected."); + } } - public static File getFile(String filename, File dir) { - return new File(dir, filename + MetaHelper.PART_SUFFIX); + + private static void checkIsValidFileName(String filename) { + if (!Config.meta_helper_security_mode) { + return; + } + if (!filename.matches(VALID_FILENAME_REGEX)) { + throw new IllegalArgumentException("Invalid filename"); + } + } + + private static void checkFile(File file) throws IOException { + if (!Config.meta_helper_security_mode) { + return; + } + if (!file.getAbsolutePath().startsWith(file.getCanonicalFile().getParent())) { + throw new IllegalArgumentException("Invalid file path"); + } + + File parentDir = file.getParentFile(); + if (!parentDir.canWrite()) { + throw new IOException("No write permission in directory: " + parentDir); + } + + if (file.exists() && !file.delete()) { + throw new IOException("Failed to delete existing file: " + file); + } + checkIsValidFileName(file.getName()); } public static ResponseBody doGet(String url, int timeout, Class clazz) throws IOException { @@ -88,6 +155,8 @@ public static ResponseBody doGet(String url, int timeout, Class clazz) th public static void getRemoteFile(String urlStr, int timeout, File file) throws IOException { HttpURLConnection conn = null; + checkFile(file); + boolean md5Matched = true; OutputStream out = new FileOutputStream(file); try { conn = HttpURLUtil.getConnectionWithNodeIdent(urlStr); @@ -117,6 +186,7 @@ public static void getRemoteFile(String urlStr, int timeout, File file) if (remoteMd5 != null) { String localMd5 = DigestUtils.md5Hex(new FileInputStream(file)); if (!remoteMd5.equals(localMd5)) { + md5Matched = false; throw new IOException("Unexpected image md5, expected: " + remoteMd5 + ", actual: " + localMd5); } } @@ -127,6 +197,9 @@ public static void getRemoteFile(String urlStr, int timeout, File file) if (out != null) { out.close(); } + if (!md5Matched && file.exists() & Config.meta_helper_security_mode) { + file.delete(); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index bce5825c5cace6..f7702a495544d2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -178,11 +178,13 @@ public TMasterResult handleReport(TReportRequest request) throws TException { tablets = request.getTablets(); reportVersion = request.getReportVersion(); reportType = ReportType.TABLET; + Env.getCurrentSystemInfo().updateBackendReportVersion(beId, reportVersion, -1L, -1L, false); } else if (request.isSetTabletList()) { // the 'tablets' member will be deprecated in future. tablets = buildTabletMap(request.getTabletList()); reportVersion = request.getReportVersion(); reportType = ReportType.TABLET; + Env.getCurrentSystemInfo().updateBackendReportVersion(beId, reportVersion, -1L, -1L, false); } if (request.isSetPartitionsVersion()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java index 63bbfc2e037084..fa17ed766661d0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java @@ -18,6 +18,9 @@ package org.apache.doris.mtmv; import org.apache.doris.catalog.MTMV; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.Partition; +import org.apache.doris.common.AnalysisException; import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; @@ -74,6 +77,46 @@ public String toString() { } public void compatible(MTMV mtmv) { + try { + // snapshot add partitionId resolve problem of insert overwrite + compatiblePartitions(mtmv); + } catch (Throwable e) { + LOG.warn("MTMV compatiblePartitions failed, mtmv: {}", mtmv.getName(), e); + } + try { + // change table id to BaseTableInfo + compatibleTables(mtmv); + } catch (Throwable e) { + LOG.warn("MTMV compatibleTables failed, mtmv: {}", mtmv.getName(), e); + } + } + + private void compatiblePartitions(MTMV mtmv) throws AnalysisException { + if (!checkHasDataWithoutPartitionId()) { + return; + } + OlapTable relatedTable = (OlapTable) mtmv.getMvPartitionInfo().getRelatedTable(); + for (Entry entry : partitions.entrySet()) { + MTMVVersionSnapshot versionSnapshot = (MTMVVersionSnapshot) entry.getValue(); + if (versionSnapshot.getId() == 0) { + Partition partition = relatedTable.getPartition(entry.getKey()); + if (partition != null) { + (versionSnapshot).setId(partition.getId()); + } + } + } + } + + private boolean checkHasDataWithoutPartitionId() { + for (MTMVSnapshotIf snapshot : partitions.values()) { + if (snapshot instanceof MTMVVersionSnapshot && ((MTMVVersionSnapshot) snapshot).getId() == 0) { + return true; + } + } + return false; + } + + private void compatibleTables(MTMV mtmv) { if (tables.size() == tablesInfo.size()) { return; } @@ -87,7 +130,7 @@ public void compatible(MTMV mtmv) { if (tableInfo.isPresent()) { tablesInfo.put(tableInfo.get(), entry.getValue()); } else { - LOG.warn("MTMV compatible failed, tableId: {}, relationTables: {}", entry.getKey(), + LOG.warn("MTMV compatibleTables failed, tableId: {}, relationTables: {}", entry.getKey(), relation.getBaseTablesOneLevel()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java index 0eb7860bc54ee0..2440649462ebf3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVVersionSnapshot.java @@ -24,10 +24,30 @@ public class MTMVVersionSnapshot implements MTMVSnapshotIf { @SerializedName("v") private long version; + // The partition version after insert overwrite is 1, + // which may cause the upper level materialized view to be unaware of changes in the data at the bottom level. + // However, the partition ID after overwrite will change, so the partition ID should be added. + // only for partition, table will always 0 + @SerializedName("id") + private long id; + public MTMVVersionSnapshot(long version) { this.version = version; } + public MTMVVersionSnapshot(long version, long id) { + this.version = version; + this.id = id; + } + + public long getId() { + return id; + } + + public void setId(long id) { + this.id = id; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -37,18 +57,19 @@ public boolean equals(Object o) { return false; } MTMVVersionSnapshot that = (MTMVVersionSnapshot) o; - return version == that.version; + return version == that.version && id == that.id; } @Override public int hashCode() { - return Objects.hashCode(version); + return Objects.hashCode(version, id); } @Override public String toString() { return "MTMVVersionSnapshot{" + "version=" + version + + ", id=" + id + '}'; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index a304fe36062c80..bd74c5835e287f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -564,9 +564,10 @@ public String getExplainString(ExplainOptions explainOptions) { String plan = ""; String mvSummary = ""; if (this.getPhysicalPlan() != null && cascadesContext != null) { - mvSummary = "\n\n========== MATERIALIZATIONS ==========\n" - + MaterializationContext.toSummaryString(cascadesContext.getMaterializationContexts(), - this.getPhysicalPlan()); + mvSummary = cascadesContext.getMaterializationContexts().isEmpty() ? "" : + "\n\n========== MATERIALIZATIONS ==========\n" + + MaterializationContext.toSummaryString(cascadesContext.getMaterializationContexts(), + this.getPhysicalPlan()); } switch (explainLevel) { case PARSED_PLAN: @@ -625,9 +626,10 @@ public String getExplainString(ExplainOptions explainOptions) { default: plan = super.getExplainString(explainOptions); plan += mvSummary; + plan += "\n\n\n========== STATISTICS ==========\n"; if (statementContext != null) { if (statementContext.isHasUnknownColStats()) { - plan += "\n\nStatistics\n planed with unknown column statistics\n"; + plan += "planed with unknown column statistics\n"; } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java index 2a34bc3ca91dd2..28456041f7d3bb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/glue/translator/PhysicalPlanTranslator.java @@ -720,7 +720,6 @@ public PlanFragment visitPhysicalJdbcScan(PhysicalJdbcScan jdbcScan, PlanTransla JdbcScanNode jdbcScanNode = new JdbcScanNode(context.nextPlanNodeId(), tupleDescriptor, table instanceof JdbcExternalTable); jdbcScanNode.setNereidsId(jdbcScan.getId()); - jdbcScanNode.addConjuncts(translateToLegacyConjuncts(jdbcScan.getConjuncts())); Utils.execWithUncheckedException(jdbcScanNode::init); context.addScanNode(jdbcScanNode, jdbcScan); context.getRuntimeTranslator().ifPresent( @@ -744,7 +743,6 @@ public PlanFragment visitPhysicalOdbcScan(PhysicalOdbcScan odbcScan, PlanTransla OdbcScanNode odbcScanNode = new OdbcScanNode(context.nextPlanNodeId(), tupleDescriptor, (OdbcTable) table); odbcScanNode.setNereidsId(odbcScan.getId()); - odbcScanNode.addConjuncts(translateToLegacyConjuncts(odbcScan.getConjuncts())); Utils.execWithUncheckedException(odbcScanNode::init); context.addScanNode(odbcScanNode, odbcScan); context.getRuntimeTranslator().ifPresent( @@ -1258,6 +1256,12 @@ public PlanFragment visitPhysicalFilter(PhysicalFilter filter, P MultiCastDataSink multiCastDataSink = (MultiCastDataSink) inputFragment.getSink(); DataStreamSink dataStreamSink = multiCastDataSink.getDataStreamSinks().get( multiCastDataSink.getDataStreamSinks().size() - 1); + if (CollectionUtils.isNotEmpty(dataStreamSink.getConjuncts()) + || CollectionUtils.isNotEmpty(dataStreamSink.getProjections())) { + String errMsg = "generate invalid plan \n" + filter.treeString(); + LOG.warn(errMsg); + throw new AnalysisException(errMsg); + } filter.getConjuncts().stream() .map(e -> ExpressionTranslator.translate(e, context)) .forEach(dataStreamSink::addConjunct); @@ -1265,24 +1269,28 @@ public PlanFragment visitPhysicalFilter(PhysicalFilter filter, P } PlanNode planNode = inputFragment.getPlanRoot(); - Plan child = filter.child(); - while (child instanceof PhysicalLimit) { - child = ((PhysicalLimit) child).child(); - } - if (planNode instanceof ExchangeNode || planNode instanceof SortNode || planNode instanceof UnionNode - // this means we have filter->limit->project, need a SelectNode - || child instanceof PhysicalProject) { - // the three nodes don't support conjuncts, need create a SelectNode to filter data + // the three nodes don't support conjuncts, need create a SelectNode to filter data + if (planNode instanceof ExchangeNode || planNode instanceof SortNode || planNode instanceof UnionNode) { SelectNode selectNode = new SelectNode(context.nextPlanNodeId(), planNode); selectNode.setNereidsId(filter.getId()); addConjunctsToPlanNode(filter, selectNode, context); addPlanRoot(inputFragment, selectNode, filter); } else { if (!(filter.child(0) instanceof AbstractPhysicalJoin)) { + // already have filter on this node, we should not override it, so need a new node + if (!planNode.getConjuncts().isEmpty() + // already have project on this node, filter need execute after project, so need a new node + || CollectionUtils.isNotEmpty(planNode.getProjectList()) + // already have limit on this node, filter need execute after limit, so need a new node + || planNode.hasLimit()) { + planNode = new SelectNode(context.nextPlanNodeId(), planNode); + planNode.setNereidsId(filter.getId()); + addPlanRoot(inputFragment, planNode, filter); + } addConjunctsToPlanNode(filter, planNode, context); - updateLegacyPlanIdToPhysicalPlan(inputFragment.getPlanRoot(), filter); } } + updateLegacyPlanIdToPhysicalPlan(inputFragment.getPlanRoot(), filter); // in ut, filter.stats may be null if (filter.getStats() != null) { inputFragment.getPlanRoot().setCardinalityAfterFilter((long) filter.getStats().getRowCount()); @@ -1866,8 +1874,15 @@ public PlanFragment visitPhysicalProject(PhysicalProject project } PlanFragment inputFragment = project.child(0).accept(this, context); - PlanNode inputPlanNode = inputFragment.getPlanRoot(); + // this means already have project on this node, filter need execute after project, so need a new node + if (CollectionUtils.isNotEmpty(inputPlanNode.getProjectList())) { + SelectNode selectNode = new SelectNode(context.nextPlanNodeId(), inputPlanNode); + selectNode.setNereidsId(project.getId()); + addPlanRoot(inputFragment, selectNode, project); + inputPlanNode = selectNode; + } + List projectionExprs = null; List allProjectionExprs = Lists.newArrayList(); List slots = null; @@ -1905,6 +1920,11 @@ public PlanFragment visitPhysicalProject(PhysicalProject project MultiCastDataSink multiCastDataSink = (MultiCastDataSink) inputFragment.getSink(); DataStreamSink dataStreamSink = multiCastDataSink.getDataStreamSinks().get( multiCastDataSink.getDataStreamSinks().size() - 1); + if (CollectionUtils.isNotEmpty(dataStreamSink.getProjections())) { + String errMsg = "generate invalid plan \n" + project.treeString(); + LOG.warn(errMsg); + throw new AnalysisException(errMsg); + } TupleDescriptor projectionTuple = generateTupleDesc(slots, null, context); dataStreamSink.setProjections(projectionExprs); dataStreamSink.setOutputTupleDesc(projectionTuple); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 324ab808226930..732b41acc91f1e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -327,6 +327,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsSub; +import org.apache.doris.nereids.trees.expressions.functions.scalar.SessionUser; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekCeil; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeekFloor; import org.apache.doris.nereids.trees.expressions.functions.scalar.WeeksAdd; @@ -593,7 +594,7 @@ public LogicalPlan visitInsertTable(InsertTableContext ctx) { isAutoDetect, isOverwrite, ConnectContext.get().getSessionVariable().isEnableUniqueKeyPartialUpdate(), - DMLCommandType.INSERT, + ctx.tableId == null ? DMLCommandType.INSERT : DMLCommandType.GROUP_COMMIT, plan); Optional cte = Optional.empty(); if (ctx.cte() != null) { @@ -2049,6 +2050,11 @@ public Expression visitCurrentUser(DorisParser.CurrentUserContext ctx) { return new CurrentUser().alias("CURRENT_USER"); } + @Override + public Expression visitSessionUser(DorisParser.SessionUserContext ctx) { + return new SessionUser().alias("SESSION_USER"); + } + @Override public Expression visitDoublePipes(DorisParser.DoublePipesContext ctx) { return ParserUtils.withOrigin(ctx, () -> { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java index c9e7f07f5d08e3..6d8ad94242b53c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java @@ -68,6 +68,7 @@ import org.apache.doris.nereids.types.DataType; import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.types.coercion.CharacterType; +import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.RelationUtil; import org.apache.doris.nereids.util.TypeCoercionUtils; @@ -126,7 +127,8 @@ private Plan bindOlapTableSink(MatchingContext> ctx) { && table.getSequenceMapCol() != null && sink.getColNames().contains(table.getSequenceMapCol()); Pair, Integer> bindColumnsResult = - bindTargetColumns(table, sink.getColNames(), childHasSeqCol, needExtraSeqCol); + bindTargetColumns(table, sink.getColNames(), childHasSeqCol, needExtraSeqCol, + sink.getDMLCommandType() == DMLCommandType.GROUP_COMMIT); List bindColumns = bindColumnsResult.first; int extraColumnsNum = bindColumnsResult.second; @@ -176,8 +178,12 @@ private Plan bindOlapTableSink(MatchingContext> ctx) { .filter(col -> col.getName().equalsIgnoreCase(table.getSequenceMapCol())) .findFirst(); } else { - if (!sink.getColNames().isEmpty()) { - if (sink.getColNames().stream() + // ATTN: must use bindColumns here. Because of insert into from group_commit tvf submitted by BE + // do not follow any column list with target table, but it contains all inviable data in sink's + // child. THis is different with other insert action that contain non-inviable data by default. + if (!bindColumns.isEmpty()) { + if (bindColumns.stream() + .map(Column::getName) .anyMatch(c -> c.equalsIgnoreCase(Column.SEQUENCE_COL))) { haveInputSeqCol = true; // case2.a } // else case2.b @@ -205,7 +211,8 @@ private Plan bindOlapTableSink(MatchingContext> ctx) { Map columnToOutput = getColumnToOutput( ctx, table, isPartialUpdate, boundSink, child); - LogicalProject fullOutputProject = getOutputProjectByCoercion(table.getFullSchema(), child, columnToOutput); + LogicalProject fullOutputProject = getOutputProjectByCoercion( + table.getFullSchema(), child, columnToOutput); return boundSink.withChildAndUpdateOutput(fullOutputProject); } @@ -267,15 +274,14 @@ private static Map getColumnToOutput( // we need to insert all the columns of the target table // although some columns are not mentions. // so we add a projects to supply the default value. - Map columnToChildOutput = Maps.newHashMap(); for (int i = 0; i < child.getOutput().size(); ++i) { columnToChildOutput.put(boundSink.getCols().get(i), child.getOutput().get(i)); } - Map columnToOutput = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + Map columnToReplaced = Maps.newTreeMap(String.CASE_INSENSITIVE_ORDER); + Map replaceMap = Maps.newHashMap(); NereidsParser expressionParser = new NereidsParser(); - List generatedColumns = Lists.newArrayList(); List materializedViewColumn = Lists.newArrayList(); // generate slots not mentioned in sql, mv slots and shaded slots. @@ -291,7 +297,12 @@ private static Map getColumnToOutput( // do not process explicitly use DEFAULT value here: // insert into table t values(DEFAULT) && !(columnToChildOutput.get(column) instanceof DefaultValueSlot)) { - columnToOutput.put(column.getName(), columnToChildOutput.get(column)); + Alias output = new Alias(TypeCoercionUtils.castIfNotSameType( + columnToChildOutput.get(column), DataType.fromCatalogType(column.getType())), + column.getName()); + columnToOutput.put(column.getName(), output); + columnToReplaced.put(column.getName(), output.toSlot()); + replaceMap.put(output.toSlot(), output.child()); } else { if (table instanceof OlapTable && ((OlapTable) table).hasSequenceCol() && column.getName().equals(Column.SEQUENCE_COL) @@ -312,6 +323,8 @@ private static Map getColumnToOutput( seqColumn = new Alias(seqColumn, column.getName()); } columnToOutput.put(column.getName(), seqColumn); + columnToReplaced.put(column.getName(), seqColumn.toSlot()); + replaceMap.put(seqColumn.toSlot(), seqColumn.child(0)); } } else if (isPartialUpdate) { // If the current load is a partial update, the values of unmentioned @@ -328,9 +341,12 @@ private static Map getColumnToOutput( Expression defualtValueExpression = ExpressionAnalyzer.analyzeFunction( boundSink, ctx.cascadesContext, unboundFunctionDefaultValue ); - columnToOutput.put(column.getName(), - new Alias(defualtValueExpression, column.getName()) - ); + Alias output = new Alias(TypeCoercionUtils.castIfNotSameType( + defualtValueExpression, DataType.fromCatalogType(column.getType())), + column.getName()); + columnToOutput.put(column.getName(), output); + columnToReplaced.put(column.getName(), output.toSlot()); + replaceMap.put(output.toSlot(), output.child()); } else { continue; } @@ -343,10 +359,11 @@ private static Map getColumnToOutput( } // Otherwise, the unmentioned columns should be filled with default values // or null values - columnToOutput.put(column.getName(), new Alias( - new NullLiteral(DataType.fromCatalogType(column.getType())), - column.getName() - )); + Alias output = new Alias(new NullLiteral(DataType.fromCatalogType(column.getType())), + column.getName()); + columnToOutput.put(column.getName(), output); + columnToReplaced.put(column.getName(), output.toSlot()); + replaceMap.put(output.toSlot(), output.child()); } else { try { // it comes from the original planner, if default value expression is @@ -365,8 +382,12 @@ private static Map getColumnToOutput( if (defualtValueExpression instanceof Alias) { defualtValueExpression = ((Alias) defualtValueExpression).child(); } - columnToOutput.put(column.getName(), - new Alias(defualtValueExpression, column.getName())); + Alias output = new Alias((TypeCoercionUtils.castIfNotSameType( + defualtValueExpression, DataType.fromCatalogType(column.getType()))), + column.getName()); + columnToOutput.put(column.getName(), output); + columnToReplaced.put(column.getName(), output.toSlot()); + replaceMap.put(output.toSlot(), output.child()); } } catch (Exception e) { throw new AnalysisException(e.getMessage(), e.getCause()); @@ -380,13 +401,16 @@ private static Map getColumnToOutput( for (Column column : generatedColumns) { GeneratedColumnInfo info = column.getGeneratedColumnInfo(); Expression parsedExpression = new NereidsParser().parseExpression(info.getExpr().toSqlWithoutTbl()); - Expression boundExpression = new CustomExpressionAnalyzer(boundSink, ctx.cascadesContext, columnToOutput) + Expression boundExpression = new CustomExpressionAnalyzer(boundSink, ctx.cascadesContext, columnToReplaced) .analyze(parsedExpression); if (boundExpression instanceof Alias) { boundExpression = ((Alias) boundExpression).child(); } - NamedExpression slot = new Alias(boundExpression, info.getExprSql()); - columnToOutput.put(column.getName(), slot); + boundExpression = ExpressionUtils.replace(boundExpression, replaceMap); + Alias output = new Alias(boundExpression, info.getExprSql()); + columnToOutput.put(column.getName(), output); + columnToReplaced.put(column.getName(), output.toSlot()); + replaceMap.put(output.toSlot(), output.child()); } for (Column column : materializedViewColumn) { if (column.isMaterializedViewColumn()) { @@ -400,12 +424,15 @@ private static Map getColumnToOutput( // may not be bound, we have to bind it again. // for example: to_bitmap. Expression boundExpression = new CustomExpressionAnalyzer( - boundSink, ctx.cascadesContext, columnToOutput).analyze(parsedExpression); + boundSink, ctx.cascadesContext, columnToReplaced).analyze(parsedExpression); if (boundExpression instanceof Alias) { boundExpression = ((Alias) boundExpression).child(); } - NamedExpression slot = new Alias(boundExpression, column.getDefineExpr().toSqlWithoutTbl()); - columnToOutput.put(column.getName(), slot); + boundExpression = ExpressionUtils.replace(boundExpression, replaceMap); + boundExpression = TypeCoercionUtils.castIfNotSameType(boundExpression, + DataType.fromCatalogType(column.getType())); + Alias output = new Alias(boundExpression, column.getDefineExpr().toSqlWithoutTbl()); + columnToOutput.put(column.getName(), output); } } return columnToOutput; @@ -554,12 +581,14 @@ private List bindPartitionIds(OlapTable table, List partitions, bo } private Pair, Integer> bindTargetColumns(OlapTable table, List colsName, - boolean childHasSeqCol, boolean needExtraSeqCol) { + boolean childHasSeqCol, boolean needExtraSeqCol, boolean isGroupCommit) { // if the table set sequence column in stream load phase, the sequence map column is null, we query it. if (colsName.isEmpty()) { + // ATTN: group commit without column list should return all base index column + // because it already prepares data for these columns. return Pair.of(table.getBaseSchema(true).stream() - .filter(c -> validColumn(c, childHasSeqCol)) - .collect(ImmutableList.toImmutableList()), 0); + .filter(c -> isGroupCommit || validColumn(c, childHasSeqCol)) + .collect(ImmutableList.toImmutableList()), 0); } else { int extraColumnsNum = (needExtraSeqCol ? 1 : 0); List processedColsName = Lists.newArrayList(colsName); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java index e193c5fc4938de..df8ec64fc2e1ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java @@ -24,6 +24,7 @@ import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.ExprId; import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.Match; import org.apache.doris.nereids.trees.expressions.NamedExpression; import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.expressions.SlotNotFromChildren; @@ -38,6 +39,9 @@ import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.algebra.Generate; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; +import org.apache.doris.nereids.trees.plans.logical.LogicalDeferMaterializeOlapScan; +import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSort; import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; import org.apache.doris.nereids.trees.plans.logical.LogicalWindow; @@ -60,6 +64,7 @@ public Rule build() { checkAllSlotReferenceFromChildren(plan); checkUnexpectedExpression(plan); checkMetricTypeIsUsedCorrectly(plan); + checkMatchIsUsedCorrectly(plan); return null; }).toRule(RuleType.CHECK_ANALYSIS); } @@ -176,4 +181,19 @@ private void checkMetricTypeIsUsedCorrectly(Plan plan) { }); } } + + private void checkMatchIsUsedCorrectly(Plan plan) { + for (Expression expression : plan.getExpressions()) { + if (expression instanceof Match) { + if (plan instanceof LogicalFilter && (plan.child(0) instanceof LogicalOlapScan + || plan.child(0) instanceof LogicalDeferMaterializeOlapScan)) { + return; + } else { + throw new AnalysisException(String.format( + "Not support match in %s in plan: %s, only support in olapScan filter", + plan.child(0), plan)); + } + } + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializationContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializationContext.java index 7913c47b36a8dd..609125280ded4b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializationContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializationContext.java @@ -408,7 +408,7 @@ public Void visitPhysicalRelation(PhysicalRelation physicalRelation, Void contex } private static String generateIdentifierName(List qualifiers) { - return String.join("#", qualifiers); + return String.join(".", qualifiers); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnFE.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnFE.java index fdd3b02e6fd483..1b830c7d11de41 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnFE.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/FoldConstantRuleOnFE.java @@ -63,6 +63,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.EncryptKeyRef; import org.apache.doris.nereids.trees.expressions.functions.scalar.If; import org.apache.doris.nereids.trees.expressions.functions.scalar.Password; +import org.apache.doris.nereids.trees.expressions.functions.scalar.SessionUser; import org.apache.doris.nereids.trees.expressions.functions.scalar.User; import org.apache.doris.nereids.trees.expressions.functions.scalar.Version; import org.apache.doris.nereids.trees.expressions.literal.ArrayLiteral; @@ -164,7 +165,8 @@ public List> buildRules() { matches(Password.class, this::visitPassword), matches(Array.class, this::visitArray), matches(Date.class, this::visitDate), - matches(Version.class, this::visitVersion) + matches(Version.class, this::visitVersion), + matches(SessionUser.class, this::visitSessionUser) ); } @@ -326,6 +328,12 @@ public Expression visitUser(User user, ExpressionRewriteContext context) { return new VarcharLiteral(res); } + @Override + public Expression visitSessionUser(SessionUser user, ExpressionRewriteContext context) { + String res = context.cascadesContext.getConnectContext().getUserIdentity().toString(); + return new VarcharLiteral(res); + } + @Override public Expression visitConnectionId(ConnectionId connectionId, ExpressionRewriteContext context) { return new BigIntLiteral(context.cascadesContext.getConnectContext().getConnectionId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java index 9ea5811502293a..5946192a27eff9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/stats/StatsCalculator.java @@ -364,19 +364,28 @@ private void checkIfUnknownStatsUsedAsKey(StatisticsBuilder builder) { } } - private Statistics computeOlapScan(OlapScan olapScan) { + private double getOlapTableRowCount(OlapScan olapScan) { OlapTable olapTable = olapScan.getTable(); - double tableRowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId(), true); - if (tableRowCount <= 0) { - AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); - TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); - if (tableMeta != null) { - // create-view after analyzing, we may get -1 for this view row count - tableRowCount = Math.max(1, tableMeta.getRowCount(olapScan.getSelectedIndexId())); - } else { - tableRowCount = 1; + AnalysisManager analysisManager = Env.getCurrentEnv().getAnalysisManager(); + TableStatsMeta tableMeta = analysisManager.findTableStatsStatus(olapScan.getTable().getId()); + double rowCount = -1; + if (tableMeta != null && tableMeta.userInjected) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } else { + rowCount = olapTable.getRowCountForIndex(olapScan.getSelectedIndexId(), true); + if (rowCount == -1) { + if (tableMeta != null) { + rowCount = tableMeta.getRowCount(olapScan.getSelectedIndexId()); + } } } + return rowCount; + } + + private Statistics computeOlapScan(OlapScan olapScan) { + OlapTable olapTable = olapScan.getTable(); + double tableRowCount = getOlapTableRowCount(olapScan); + tableRowCount = Math.max(1, tableRowCount); if (olapScan.getSelectedIndexId() != olapScan.getTable().getBaseIndexId() || olapTable instanceof MTMV) { // mv is selected, return its estimated stats @@ -441,10 +450,13 @@ private Statistics computeOlapScan(OlapScan olapScan) { } } + boolean useTableLevelStats = true; if (olapScan.getSelectedPartitionIds().size() < olapScan.getTable().getPartitionNum()) { // partition pruned + // try to use selected partition stats, if failed, fall back to table stats double selectedPartitionsRowCount = getSelectedPartitionRowCount(olapScan); - if (selectedPartitionsRowCount > 0) { + if (selectedPartitionsRowCount >= 0) { + useTableLevelStats = false; List selectedPartitionNames = new ArrayList<>(olapScan.getSelectedPartitionIds().size()); olapScan.getSelectedPartitionIds().forEach(id -> { selectedPartitionNames.add(olapScan.getTable().getPartition(id).getName()); @@ -458,19 +470,11 @@ private Statistics computeOlapScan(OlapScan olapScan) { } checkIfUnknownStatsUsedAsKey(builder); builder.setRowCount(selectedPartitionsRowCount + deltaRowCount); - } else { - // if partition row count is invalid (-1), fallback to table stats - for (SlotReference slot : visibleOutputSlots) { - ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); - ColumnStatisticBuilder colStatsBuilder = new ColumnStatisticBuilder(cache); - colStatsBuilder.setCount(tableRowCount); - colStatsBuilder.normalizeAvgSizeByte(slot); - builder.putColumnStatistics(slot, colStatsBuilder.build()); - } - checkIfUnknownStatsUsedAsKey(builder); - builder.setRowCount(tableRowCount + deltaRowCount); } - } else { + } + // 1. no partition is pruned, or + // 2. fall back to table stats + if (useTableLevelStats) { // get table level stats for (SlotReference slot : visibleOutputSlots) { ColumnStatistic cache = getColumnStatsFromTableCache((CatalogRelation) olapScan, slot); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SessionUser.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SessionUser.java new file mode 100644 index 00000000000000..b91e2c30942a43 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/SessionUser.java @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.expressions.functions.scalar; + +import org.apache.doris.catalog.FunctionSignature; +import org.apache.doris.nereids.trees.expressions.functions.AlwaysNotNullable; +import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature; +import org.apache.doris.nereids.trees.expressions.shape.LeafExpression; +import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; +import org.apache.doris.nereids.types.VarcharType; + +import com.google.common.collect.ImmutableList; + +import java.util.List; + +/** + * ScalarFunction 'SessionUser'. + */ +public class SessionUser extends ScalarFunction + implements LeafExpression, ExplicitlyCastableSignature, AlwaysNotNullable { + + public static final List SIGNATURES = ImmutableList.of( + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args() + ); + + public SessionUser() { + super("session_user", ImmutableList.of()); + } + + @Override + public List getSignatures() { + return SIGNATURES; + } + + @Override + public R accept(ExpressionVisitor visitor, C context) { + return visitor.visitSessionUser(this, context); + } + + @Override + public boolean isDeterministic() { + return false; + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/HttpStream.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/HttpStream.java index de052b078db43e..8e35e25240e6ca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/HttpStream.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/table/HttpStream.java @@ -19,12 +19,17 @@ import org.apache.doris.catalog.FunctionSignature; import org.apache.doris.nereids.exceptions.AnalysisException; +import org.apache.doris.nereids.properties.DistributionSpecHash; +import org.apache.doris.nereids.properties.DistributionSpecHash.ShuffleType; +import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.trees.expressions.Properties; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.coercion.AnyDataType; import org.apache.doris.tablefunction.HttpStreamTableValuedFunction; import org.apache.doris.tablefunction.TableValuedFunctionIf; +import com.google.common.collect.ImmutableList; + import java.util.Map; /** http_stream */ @@ -49,6 +54,12 @@ protected TableValuedFunctionIf toCatalogFunction() { } } + @Override + public PhysicalProperties getPhysicalProperties() { + return PhysicalProperties.createHash(new DistributionSpecHash(ImmutableList.of(), + ShuffleType.EXECUTION_BUCKETED)); + } + @Override public R accept(ExpressionVisitor visitor, C context) { return visitor.visitHttpStream(this, context); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java index 20772ae716be2f..8741da5c7d63ec 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/visitor/ScalarFunctionVisitor.java @@ -358,6 +358,7 @@ import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsAdd; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsDiff; import org.apache.doris.nereids.trees.expressions.functions.scalar.SecondsSub; +import org.apache.doris.nereids.trees.expressions.functions.scalar.SessionUser; import org.apache.doris.nereids.trees.expressions.functions.scalar.Sha1; import org.apache.doris.nereids.trees.expressions.functions.scalar.Sha2; import org.apache.doris.nereids.trees.expressions.functions.scalar.Sign; @@ -2084,6 +2085,10 @@ default R visitUser(User user, C context) { return visitScalarFunction(user, context); } + default R visitSessionUser(SessionUser user, C context) { + return visitScalarFunction(user, context); + } + default R visitUtcTimestamp(UtcTimestamp utcTimestamp, C context) { return visitScalarFunction(utcTimestamp, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java index 9dfca3195d691e..eb65048050fda1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/AbstractPlan.java @@ -226,4 +226,8 @@ public List getAncestors() { } return ancestors; } + + public void updateActualRowCount(long actualRowCount) { + statistics.setActualRowCount(actualRowCount); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DMLCommandType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DMLCommandType.java index 18d8179abe4d0e..aa97f26df18c58 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DMLCommandType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DMLCommandType.java @@ -27,6 +27,8 @@ public enum DMLCommandType { NONE, // for INSERT INTO or INSERT INTO SELECT INSERT, + // for group_commit tvf + GROUP_COMMIT, // for UPDATE UPDATE, // for DELETE diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/AbstractInsertExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/AbstractInsertExecutor.java index defcd6c6e997fa..cdf74f5e9aca3a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/AbstractInsertExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/AbstractInsertExecutor.java @@ -94,6 +94,8 @@ public String getLabelName() { return labelName; } + public abstract long getTxnId(); + /** * begin transaction if necessary */ @@ -192,14 +194,19 @@ public void executeSingleInsert(StmtExecutor executor, long jobId) throws Except execImpl(executor, jobId); checkStrictModeAndFilterRatio(); int retryTimes = 0; - while (retryTimes < Config.mow_insert_into_commit_retry_times) { + while (true) { try { onComplete(); break; } catch (UserException e) { - LOG.warn("failed to commit txn", e); + LOG.warn("failed to commit txn, txnId={}, jobId={}, retryTimes={}", + getTxnId(), jobId, retryTimes, e); if (e.getErrorCode() == InternalErrorCode.DELETE_BITMAP_LOCK_ERR) { retryTimes++; + if (retryTimes >= Config.mow_insert_into_commit_retry_times) { + // should throw exception after running out of retry times + throw e; + } } else { throw e; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/BaseExternalTableInsertExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/BaseExternalTableInsertExecutor.java index 1c22b9bf56a846..a3aa33f96ab02c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/BaseExternalTableInsertExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/BaseExternalTableInsertExecutor.java @@ -70,6 +70,7 @@ public BaseExternalTableInsertExecutor(ConnectContext ctx, ExternalTable table, } } + @Override public long getTxnId() { return txnId; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java index 1262829aa481da..b57ac3834958d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/OlapInsertExecutor.java @@ -85,6 +85,7 @@ public OlapInsertExecutor(ConnectContext ctx, Table table, super(ctx, table, labelName, planner, insertCtx, emptyInsert); } + @Override public long getTxnId() { return txnId; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java index 56c18908ad69da..a68da1a5b3d12d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/AbstractPhysicalJoin.java @@ -267,8 +267,9 @@ public Set getConditionSlot() { @Override public String toString() { - List args = Lists.newArrayList("type", joinType, + List args = Lists.newArrayList( "stats", statistics, + "type", joinType, "hashCondition", hashJoinConjuncts, "otherCondition", otherJoinConjuncts, "markCondition", markJoinConjuncts); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCTEProducer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCTEProducer.java index 53ff3e3025742d..568b8e6660ab39 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCTEProducer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalCTEProducer.java @@ -89,6 +89,7 @@ public int hashCode() { @Override public String toString() { return Utils.toSqlString("PhysicalCTEProducer[" + id.asInt() + "]", + "stats", statistics, "cteId", cteId); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java index fb3087e260869c..c8187727da47f2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalHashAggregate.java @@ -193,9 +193,9 @@ public String toString() { TopnPushInfo topnPushInfo = (TopnPushInfo) getMutableState( MutableState.KEY_PUSH_TOPN_TO_AGG).orElseGet(() -> null); return Utils.toSqlString("PhysicalHashAggregate[" + id.asInt() + "]" + getGroupIdWithPrefix(), + "stats", statistics, "aggPhase", aggregateParam.aggPhase, "aggMode", aggregateParam.aggMode, - "stats", statistics, "maybeUseStreaming", maybeUsingStream, "groupByExpr", groupByExpressions, "outputExpr", outputExpressions, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalQuickSort.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalQuickSort.java index c1973668c7d919..0e377b46d238ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalQuickSort.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalQuickSort.java @@ -107,8 +107,8 @@ public String shapeInfo() { @Override public String toString() { return Utils.toSqlString("PhysicalQuickSort[" + id.asInt() + "]" + getGroupIdWithPrefix(), - "orderKeys", orderKeys, - "phase", phase.toString(), "stats", statistics + "stats", statistics, "orderKeys", orderKeys, + "phase", phase.toString() ); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTopN.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTopN.java index 96dc709bbde8b6..c387a58dd0c993 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTopN.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalTopN.java @@ -143,6 +143,7 @@ public String shapeInfo() { @Override public String toString() { return Utils.toSqlString("PhysicalTopN[" + id.asInt() + "]" + getGroupIdWithPrefix(), + "stats", statistics, "limit", limit, "offset", offset, "orderKeys", orderKeys, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalUnion.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalUnion.java index ba20c9267059f1..2a81698812a3c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalUnion.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalUnion.java @@ -89,11 +89,11 @@ public R accept(PlanVisitor visitor, C context) { @Override public String toString() { return Utils.toSqlString("PhysicalUnion" + "[" + id.asInt() + "]" + getGroupIdWithPrefix(), + "stats", statistics, "qualifier", qualifier, "outputs", outputs, "regularChildrenOutputs", regularChildrenOutputs, - "constantExprsList", constantExprsList, - "stats", statistics); + "constantExprsList", constantExprsList); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalWindow.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalWindow.java index b1703f47496706..7e6fd48f02da6d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalWindow.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/physical/PhysicalWindow.java @@ -105,8 +105,9 @@ public List getExpressions() { @Override public String toString() { return Utils.toSqlString("PhysicalWindow[" + id.asInt() + "]" + getGroupIdWithPrefix(), + "stats", statistics, "windowFrameGroup", windowFrameGroup, - "requiredProperties", requireProperties, "stats", statistics + "requiredProperties", requireProperties ); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/AuditEvent.java similarity index 99% rename from fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java rename to fe/fe-core/src/main/java/org/apache/doris/plugin/AuditEvent.java index 0b64a748a10471..55a8b00d2e8d13 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/AuditEvent.java @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.plugin.audit; +package org.apache.doris.plugin; import java.lang.annotation.Retention; diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/AuditPlugin.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/AuditPlugin.java index 55962a3dd10c85..d9c9ec84697ca4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/AuditPlugin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/AuditPlugin.java @@ -17,8 +17,6 @@ package org.apache.doris.plugin; -import org.apache.doris.plugin.audit.AuditEvent; - /** * Audit plugin interface describe. */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/PluginMgr.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/PluginMgr.java index 7fddf54e1ee7d2..ea69b247e66427 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/PluginMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/PluginMgr.java @@ -27,7 +27,7 @@ import org.apache.doris.nereids.parser.Dialect; import org.apache.doris.plugin.PluginInfo.PluginType; import org.apache.doris.plugin.PluginLoader.PluginStatus; -import org.apache.doris.plugin.audit.AuditLoaderPlugin; +import org.apache.doris.plugin.audit.AuditLoader; import org.apache.doris.plugin.audit.AuditLogBuilder; import org.apache.doris.plugin.dialect.HttpDialectConverterPlugin; @@ -113,7 +113,7 @@ private void initBuiltinPlugins() { } // AuditLoader: log audit log to internal table - AuditLoaderPlugin auditLoaderPlugin = new AuditLoaderPlugin(); + AuditLoader auditLoaderPlugin = new AuditLoader(); if (!registerBuiltinPlugin(auditLoaderPlugin.getPluginInfo(), auditLoaderPlugin)) { LOG.warn("failed to register audit log builder"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLoaderPlugin.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLoader.java similarity index 98% rename from fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLoaderPlugin.java rename to fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLoader.java index 4503b5b8802790..27193856937d87 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLoaderPlugin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLoader.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.common.util.DigitalVersion; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.plugin.AuditEvent; import org.apache.doris.plugin.AuditPlugin; import org.apache.doris.plugin.Plugin; import org.apache.doris.plugin.PluginContext; @@ -42,8 +43,8 @@ /* * This plugin will load audit log to specified doris table at specified interval */ -public class AuditLoaderPlugin extends Plugin implements AuditPlugin { - private static final Logger LOG = LogManager.getLogger(AuditLoaderPlugin.class); +public class AuditLoader extends Plugin implements AuditPlugin { + private static final Logger LOG = LogManager.getLogger(AuditLoader.class); public static final String AUDIT_LOG_TABLE = "audit_log"; @@ -65,10 +66,10 @@ public class AuditLoaderPlugin extends Plugin implements AuditPlugin { private final PluginInfo pluginInfo; - public AuditLoaderPlugin() { + public AuditLoader() { pluginInfo = new PluginInfo(PluginMgr.BUILTIN_PLUGIN_PREFIX + "AuditLoader", PluginType.AUDIT, "builtin audit loader, to load audit log to internal table", DigitalVersion.fromString("2.1.0"), - DigitalVersion.fromString("1.8.31"), AuditLoaderPlugin.class.getName(), null, null); + DigitalVersion.fromString("1.8.31"), AuditLoader.class.getName(), null, null); } public PluginInfo getPluginInfo() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLogBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLogBuilder.java index 210081b101cd93..8d9e2c9d96efbc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLogBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditLogBuilder.java @@ -20,13 +20,14 @@ import org.apache.doris.common.AuditLog; import org.apache.doris.common.Config; import org.apache.doris.common.util.DigitalVersion; +import org.apache.doris.plugin.AuditEvent; +import org.apache.doris.plugin.AuditEvent.AuditField; +import org.apache.doris.plugin.AuditEvent.EventType; import org.apache.doris.plugin.AuditPlugin; import org.apache.doris.plugin.Plugin; import org.apache.doris.plugin.PluginInfo; import org.apache.doris.plugin.PluginInfo.PluginType; import org.apache.doris.plugin.PluginMgr; -import org.apache.doris.plugin.audit.AuditEvent.AuditField; -import org.apache.doris.plugin.audit.AuditEvent.EventType; import com.google.common.collect.Maps; import com.google.common.collect.Sets; diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditStreamLoader.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditStreamLoader.java index 3765872810d413..0b70e9591d509d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditStreamLoader.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/AuditStreamLoader.java @@ -46,7 +46,7 @@ public class AuditStreamLoader { public AuditStreamLoader() { this.hostPort = "127.0.0.1:" + Config.http_port; this.db = FeConstants.INTERNAL_DB_NAME; - this.auditLogTbl = AuditLoaderPlugin.AUDIT_LOG_TABLE; + this.auditLogTbl = AuditLoader.AUDIT_LOG_TABLE; this.auditLogLoadUrlStr = String.format(loadUrlPattern, hostPort, db, auditLogTbl); // currently, FE identity is FE's IP, so we replace the "." in IP to make it suitable for label this.feIdentity = hostPort.replaceAll("\\.", "_").replaceAll(":", "_"); diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/LoadAuditEvent.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/LoadAuditEvent.java index eb3e098bf416d8..e9e948df43fdcd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/LoadAuditEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/LoadAuditEvent.java @@ -17,6 +17,8 @@ package org.apache.doris.plugin.audit; +import org.apache.doris.plugin.AuditEvent; + public class LoadAuditEvent extends AuditEvent { @AuditField(value = "JobId") diff --git a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/StreamLoadAuditEvent.java b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/StreamLoadAuditEvent.java index 8733a59656c228..4a20901673a452 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/StreamLoadAuditEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/plugin/audit/StreamLoadAuditEvent.java @@ -17,6 +17,8 @@ package org.apache.doris.plugin.audit; +import org.apache.doris.plugin.AuditEvent; + public class StreamLoadAuditEvent extends AuditEvent { @AuditField(value = "Label") diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/AuditEventProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/AuditEventProcessor.java index 12e174ab5d0f18..5cb826dc86c990 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/AuditEventProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/AuditEventProcessor.java @@ -18,11 +18,11 @@ package org.apache.doris.qe; import org.apache.doris.common.Config; +import org.apache.doris.plugin.AuditEvent; import org.apache.doris.plugin.AuditPlugin; import org.apache.doris.plugin.Plugin; import org.apache.doris.plugin.PluginInfo.PluginType; import org.apache.doris.plugin.PluginMgr; -import org.apache.doris.plugin.audit.AuditEvent; import com.google.common.base.Strings; import com.google.common.collect.Queues; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java b/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java index 7d14586bbeaf3a..904910822b8a9f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/AuditLogHelper.java @@ -39,8 +39,8 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalInlineTable; import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; -import org.apache.doris.plugin.audit.AuditEvent.AuditEventBuilder; -import org.apache.doris.plugin.audit.AuditEvent.EventType; +import org.apache.doris.plugin.AuditEvent.AuditEventBuilder; +import org.apache.doris.plugin.AuditEvent.EventType; import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.service.FrontendOptions; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index 1b70c5b318bd10..fa81825d370bc0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -58,7 +58,7 @@ import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.plsql.Exec; import org.apache.doris.plsql.executor.PlSqlOperation; -import org.apache.doris.plugin.audit.AuditEvent.AuditEventBuilder; +import org.apache.doris.plugin.AuditEvent.AuditEventBuilder; import org.apache.doris.resource.Tag; import org.apache.doris.service.arrowflight.results.FlightSqlChannel; import org.apache.doris.statistics.ColumnStatistic; @@ -1249,7 +1249,7 @@ public String getCloudCluster(boolean updateErr) { String choseWay = null; if (!Strings.isNullOrEmpty(this.cloudCluster)) { cluster = this.cloudCluster; - choseWay = "use @cluster"; + choseWay = "use context cluster"; LOG.debug("finally set context cluster name {} for user {} with chose way '{}'", cloudCluster, getCurrentUserIdentity(), choseWay); return cluster; @@ -1260,9 +1260,9 @@ public String getCloudCluster(boolean updateErr) { cluster = defaultCluster; choseWay = "default cluster"; } else { - String authorizedCluster = getAuthorizedCloudCluster(); - if (!Strings.isNullOrEmpty(authorizedCluster)) { - cluster = authorizedCluster; + CloudClusterResult cloudClusterTypeAndName = getCloudClusterByPolicy(); + if (cloudClusterTypeAndName != null && !Strings.isNullOrEmpty(cloudClusterTypeAndName.clusterName)) { + cluster = cloudClusterTypeAndName.clusterName; choseWay = "authorized cluster"; } } @@ -1293,35 +1293,6 @@ public String getDefaultCloudCluster() { return null; } - public String getAuthorizedCloudCluster() { - List cloudClusterNames = ((CloudSystemInfoService) Env.getCurrentSystemInfo()).getCloudClusterNames(); - // get all available cluster of the user - for (String cloudClusterName : cloudClusterNames) { - if (!Env.getCurrentEnv().getAuth().checkCloudPriv(getCurrentUserIdentity(), - cloudClusterName, PrivPredicate.USAGE, ResourceTypeEnum.CLUSTER)) { - continue; - } - // find a cluster has more than one alive be - List bes = ((CloudSystemInfoService) Env.getCurrentSystemInfo()) - .getBackendsByClusterName(cloudClusterName); - AtomicBoolean hasAliveBe = new AtomicBoolean(false); - bes.stream().filter(Backend::isAlive).findAny().ifPresent(backend -> { - if (LOG.isDebugEnabled()) { - LOG.debug("get a clusterName {}, it's has more than one alive be {}", cloudClusterName, backend); - } - hasAliveBe.set(true); - }); - if (hasAliveBe.get()) { - if (LOG.isDebugEnabled()) { - LOG.debug("set context cluster name {}", cloudClusterName); - } - return cloudClusterName; - } - } - - return null; - } - public StatsErrorEstimator getStatsErrorEstimator() { return statsErrorEstimator; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 0fff8e9f23c09f..5e3a59d9a54d96 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -2166,9 +2166,10 @@ protected void computeScanRangeAssignment() throws Exception { FragmentScanRangeAssignment assignment = fragmentExecParamsMap.get(scanNode.getFragmentId()).scanRangeAssignment; boolean fragmentContainsColocateJoin = isColocateFragment(scanNode.getFragment(), - scanNode.getFragment().getPlanRoot()); + scanNode.getFragment().getPlanRoot()) && (scanNode instanceof OlapScanNode); boolean fragmentContainsBucketShuffleJoin = bucketShuffleJoinController - .isBucketShuffleJoin(scanNode.getFragmentId().asInt(), scanNode.getFragment().getPlanRoot()); + .isBucketShuffleJoin(scanNode.getFragmentId().asInt(), scanNode.getFragment().getPlanRoot()) + && (scanNode instanceof OlapScanNode); // A fragment may contain both colocate join and bucket shuffle join // on need both compute scanRange to init basic data for query coordinator diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 71614770afb70f..6bf09f2229bd8d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -1211,6 +1211,10 @@ public void updateProfile(boolean isFinished) { // failed, the insert stmt should be success try { profile.updateSummary(getSummaryInfo(isFinished), isFinished, this.planner); + if (planner instanceof NereidsPlanner) { + NereidsPlanner nereidsPlanner = ((NereidsPlanner) planner); + profile.setPhysicalPlan(nereidsPlanner.getPhysicalPlan()); + } } catch (Throwable t) { LOG.warn("failed to update profile, ignore this error", t); } @@ -3440,8 +3444,16 @@ private HttpStreamParams generateHttpStreamNereidsPlan(TUniqueId queryId) { httpStreamParams.setLabel(insertExecutor.getLabelName()); PlanNode planRoot = planner.getFragments().get(0).getPlanRoot(); - Preconditions.checkState(planRoot instanceof TVFScanNode || planRoot instanceof GroupCommitScanNode, - "Nereids' planNode cannot be converted to " + planRoot.getClass().getName()); + boolean isValidPlan = !planner.getScanNodes().isEmpty(); + for (ScanNode scanNode : planner.getScanNodes()) { + if (!(scanNode instanceof TVFScanNode || planRoot instanceof GroupCommitScanNode)) { + isValidPlan = false; + break; + } + } + if (!isValidPlan) { + throw new AnalysisException("plan is invalid: " + planRoot.getExplainString()); + } } catch (QueryStateException e) { LOG.debug("Command(" + originStmt.originStmt + ") process failed.", e); context.setState(e.getQueryState()); @@ -3512,11 +3524,8 @@ public HttpStreamParams generateHttpStreamPlan(TUniqueId queryId) throws Excepti LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e); throw ((NereidsException) e).getException(); } - boolean isInsertIntoCommand = parsedStmt != null && parsedStmt instanceof LogicalPlanAdapter - && ((LogicalPlanAdapter) parsedStmt).getLogicalPlan() instanceof InsertIntoTableCommand; if (e instanceof NereidsException - && !context.getSessionVariable().enableFallbackToOriginalPlanner - && !isInsertIntoCommand) { + && !context.getSessionVariable().enableFallbackToOriginalPlanner) { LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e); throw ((NereidsException) e).getException(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java b/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java index b2de010b9e418f..695bf983dc6b2f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/resource/workloadschedpolicy/WorkloadRuntimeStatusMgr.java @@ -21,7 +21,7 @@ import org.apache.doris.common.Config; import org.apache.doris.common.Pair; import org.apache.doris.common.util.MasterDaemon; -import org.apache.doris.plugin.audit.AuditEvent; +import org.apache.doris.plugin.AuditEvent; import org.apache.doris.thrift.TQueryStatistics; import org.apache.doris.thrift.TReportWorkloadRuntimeStatusParams; diff --git a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java index 162dab5d13601c..6883eb0b54208a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java +++ b/fe/fe-core/src/main/java/org/apache/doris/statistics/Statistics.java @@ -46,6 +46,8 @@ public class Statistics { private double deltaRowCount = 0.0; + private long actualRowCount = -1L; + public Statistics(Statistics another) { this.rowCount = another.rowCount; this.widthInJoinCluster = another.widthInJoinCluster; @@ -193,21 +195,24 @@ public double dataSizeFactor(List slots) { @Override public String toString() { + StringBuilder builder = new StringBuilder(); if (Double.isNaN(rowCount)) { - return "NaN"; - } - if (Double.POSITIVE_INFINITY == rowCount) { - return "Infinite"; - } - if (Double.NEGATIVE_INFINITY == rowCount) { - return "-Infinite"; + builder.append("NaN"); + } else if (Double.POSITIVE_INFINITY == rowCount) { + builder.append("Infinite"); + } else if (Double.NEGATIVE_INFINITY == rowCount) { + builder.append("-Infinite"); + } else { + DecimalFormat format = new DecimalFormat("#,###.##"); + builder.append(format.format(rowCount)); } - DecimalFormat format = new DecimalFormat("#,###.##"); - String rows = format.format(rowCount); if (deltaRowCount > 0) { - rows = rows + "(" + format.format(deltaRowCount) + ")"; + builder.append("(").append((long) deltaRowCount).append(")"); + } + if (actualRowCount != -1) { + builder.append(" actualRows=").append(actualRowCount); } - return rows; + return builder.toString(); } public String printColumnStats() { @@ -292,4 +297,12 @@ public double getDeltaRowCount() { public void setDeltaRowCount(double deltaRowCount) { this.deltaRowCount = deltaRowCount; } + + public long getActualRowCount() { + return actualRowCount; + } + + public void setActualRowCount(long actualRowCount) { + this.actualRowCount = actualRowCount; + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index f81d8b4d7b02b6..76140d2ef26d9e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -19,7 +19,6 @@ import org.apache.doris.analysis.ModifyBackendClause; import org.apache.doris.analysis.ModifyBackendHostNameClause; -import org.apache.doris.catalog.Database; import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.ReplicaAllocation; @@ -642,18 +641,31 @@ public long getBackendReportVersion(long backendId) { } } - public void updateBackendReportVersion(long backendId, long newReportVersion, long dbId, long tableId) { - AtomicLong atomicLong; - if ((atomicLong = idToReportVersionRef.get(backendId)) != null) { - Database db = (Database) Env.getCurrentInternalCatalog().getDbNullable(dbId); - if (db == null) { - LOG.warn("failed to update backend report version, db {} does not exist", dbId); - return; + public void updateBackendReportVersion(long backendId, long newReportVersion, long dbId, long tableId, + boolean checkDbExist) { + AtomicLong atomicLong = idToReportVersionRef.get(backendId); + if (atomicLong == null) { + return; + } + if (checkDbExist && Env.getCurrentInternalCatalog().getDbNullable(dbId) == null) { + LOG.warn("failed to update backend report version, db {} does not exist", dbId); + return; + } + while (true) { + long curReportVersion = atomicLong.get(); + if (curReportVersion >= newReportVersion) { + if (LOG.isDebugEnabled()) { + LOG.debug("skip update backend {} report version: {}, current version: {}, db: {}, table: {}", + backendId, newReportVersion, curReportVersion, dbId, tableId); + } + break; } - atomicLong.set(newReportVersion); - if (LOG.isDebugEnabled()) { - LOG.debug("update backend {} report version: {}, db: {}, table: {}", - backendId, newReportVersion, dbId, tableId); + if (atomicLong.compareAndSet(curReportVersion, newReportVersion)) { + if (LOG.isDebugEnabled()) { + LOG.debug("update backend {} report version: {}, db: {}, table: {}", + backendId, newReportVersion, dbId, tableId); + } + break; } } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/InternalSchemaAlterTest.java b/fe/fe-core/src/test/java/org/apache/doris/alter/InternalSchemaAlterTest.java index cf9d31b1ccaf3c..122014f0e8b2c2 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/alter/InternalSchemaAlterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/alter/InternalSchemaAlterTest.java @@ -28,7 +28,7 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; -import org.apache.doris.plugin.audit.AuditLoaderPlugin; +import org.apache.doris.plugin.audit.AuditLoader; import org.apache.doris.statistics.StatisticConstants; import org.apache.doris.utframe.TestWithFeService; @@ -54,11 +54,12 @@ protected void runBeforeAll() throws Exception { public void testModifyTblReplicaCount() throws AnalysisException { Database db = Env.getCurrentEnv().getCatalogMgr() .getInternalCatalog().getDbNullable(FeConstants.INTERNAL_DB_NAME); + InternalSchemaInitializer.modifyTblReplicaCount(db, StatisticConstants.TABLE_STATISTIC_TBL_NAME); - InternalSchemaInitializer.modifyTblReplicaCount(db, AuditLoaderPlugin.AUDIT_LOG_TABLE); + InternalSchemaInitializer.modifyTblReplicaCount(db, AuditLoader.AUDIT_LOG_TABLE); checkReplicationNum(db, StatisticConstants.TABLE_STATISTIC_TBL_NAME); - checkReplicationNum(db, AuditLoaderPlugin.AUDIT_LOG_TABLE); + checkReplicationNum(db, AuditLoader.AUDIT_LOG_TABLE); } private void checkReplicationNum(Database db, String tblName) throws AnalysisException { @@ -77,7 +78,7 @@ public void testCheckAuditLogTable() throws AnalysisException { Database db = Env.getCurrentEnv().getCatalogMgr() .getInternalCatalog().getDbNullable(FeConstants.INTERNAL_DB_NAME); Assertions.assertNotNull(db); - OlapTable table = db.getOlapTableOrAnalysisException(AuditLoaderPlugin.AUDIT_LOG_TABLE); + OlapTable table = db.getOlapTableOrAnalysisException(AuditLoader.AUDIT_LOG_TABLE); Assertions.assertNotNull(table); for (ColumnDef def : InternalSchema.AUDIT_SCHEMA) { Assertions.assertNotNull(table.getColumn(def.getName())); diff --git a/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java b/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java index c48ba030e77234..8a9216aecc0f6e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/cluster/SystemInfoServiceTest.java @@ -233,7 +233,7 @@ public void addBackendTest() throws UserException { Assert.assertTrue(Env.getCurrentSystemInfo().getBackendReportVersion(backendId) == 0L); - Env.getCurrentSystemInfo().updateBackendReportVersion(backendId, 2L, 20000L, 30000L); + Env.getCurrentSystemInfo().updateBackendReportVersion(backendId, 2L, 20000L, 30000L, true); Assert.assertTrue(Env.getCurrentSystemInfo().getBackendReportVersion(backendId) == 2L); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java index 7e99667b73124b..395a063fbc8a24 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/datasource/hive/HmsCommitTest.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.hive; +import org.apache.doris.backup.Status; import org.apache.doris.catalog.Column; import org.apache.doris.catalog.PrimitiveType; import org.apache.doris.common.info.SimpleTableInfo; @@ -169,15 +170,21 @@ public void testNewPartitionForUnPartitionedTable() throws IOException { @Test public void testAppendPartitionForUnPartitionedTable() throws IOException { genQueryID(); - System.out.println(DebugUtil.printId(connectContext.queryId())); List pus = new ArrayList<>(); pus.add(createRandomAppend(null)); pus.add(createRandomAppend(null)); pus.add(createRandomAppend(null)); + new MockUp(HMSTransaction.HmsCommitter.class) { + @Mock + private void doNothing() { + Assert.assertEquals(Status.ErrCode.NOT_FOUND, fs.exists(getWritePath()).getErrCode()); + } + }; commit(dbName, tbWithoutPartition, pus); Table table = hmsClient.getTable(dbName, tbWithoutPartition); assertNumRows(3, table); + genQueryID(); List pus2 = new ArrayList<>(); pus2.add(createRandomAppend(null)); @@ -204,6 +211,12 @@ public void testOverwritePartitionForUnPartitionedTable() throws IOException { @Test public void testNewPartitionForPartitionedTable() throws IOException { + new MockUp(HMSTransaction.HmsCommitter.class) { + @Mock + private void doNothing() { + Assert.assertEquals(Status.ErrCode.NOT_FOUND, fs.exists(getWritePath()).getErrCode()); + } + }; genQueryID(); List pus = new ArrayList<>(); pus.add(createRandomNew("a")); @@ -377,6 +390,11 @@ public THivePartitionUpdate createRandomOverwrite(String partition) throws IOExc genOnePartitionUpdate("c3=" + partition, TUpdateMode.OVERWRITE); } + private String getWritePath() { + String queryId = DebugUtil.printId(ConnectContext.get().queryId()); + return writeLocation + queryId + "/"; + } + public void commit(String dbName, String tableName, List hivePUs) { @@ -385,7 +403,7 @@ public void commit(String dbName, HiveInsertCommandContext ctx = new HiveInsertCommandContext(); String queryId = DebugUtil.printId(ConnectContext.get().queryId()); ctx.setQueryId(queryId); - ctx.setWritePath(writeLocation + queryId + "/"); + ctx.setWritePath(getWritePath()); hmsTransaction.beginInsertTable(ctx); hmsTransaction.finishInsertTable(new SimpleTableInfo(dbName, tableName)); hmsTransaction.commit(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/master/MetaHelperTest.java b/fe/fe-core/src/test/java/org/apache/doris/master/MetaHelperTest.java index 070979494bfd6c..40083abf956aa4 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/master/MetaHelperTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/master/MetaHelperTest.java @@ -17,6 +17,7 @@ package org.apache.doris.master; +import org.apache.doris.common.Config; import org.apache.doris.httpv2.entity.ResponseBody; import org.apache.doris.httpv2.rest.RestApiStatusCode; import org.apache.doris.persist.StorageInfo; @@ -25,6 +26,11 @@ import com.fasterxml.jackson.databind.ObjectMapper; import org.junit.Assert; import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; + +import java.io.File; +import java.io.IOException; public class MetaHelperTest { @@ -49,4 +55,45 @@ private ResponseBody buildResponseBody() { bodyBefore.setMsg("msg"); return bodyBefore; } + + File tempDir = new File(System.getProperty("java.io.tmpdir"), "tempDir"); + + @BeforeEach + void setUp() { + + if (tempDir.exists()) { + tempDir.delete(); + } + tempDir.mkdir(); + } + + @Test + public void testFile() throws IOException { + + String errorFilename = "testfile."; + File errorFileWithSuffix = new File(tempDir, errorFilename); + String rightFilename = "image.1"; + File rightFileWithSuffix = new File(tempDir, rightFilename); + + Config.meta_helper_security_mode = true; + + if (errorFileWithSuffix.exists()) { + errorFileWithSuffix.delete(); + } + Assert.assertThrows(IllegalArgumentException.class, () -> MetaHelper.complete(errorFilename, tempDir)); + Assert.assertThrows(IllegalArgumentException.class, () -> MetaHelper.getFile(errorFilename, tempDir)); + if (rightFileWithSuffix.exists()) { + rightFileWithSuffix.delete(); + } + Assert.assertEquals(rightFileWithSuffix.getName() + ".part", MetaHelper.getFile(rightFilename, tempDir).getName()); + + } + + @AfterEach + public void tearDown() { + if (tempDir.exists()) { + tempDir.delete(); + } + } + } diff --git a/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java b/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java index 6c9f54080a049e..f80b485609c5b9 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/qe/AuditEventProcessorTest.java @@ -19,9 +19,9 @@ import org.apache.doris.catalog.Env; import org.apache.doris.common.util.DigitalVersion; +import org.apache.doris.plugin.AuditEvent; +import org.apache.doris.plugin.AuditEvent.EventType; import org.apache.doris.plugin.PluginInfo; -import org.apache.doris.plugin.audit.AuditEvent; -import org.apache.doris.plugin.audit.AuditEvent.EventType; import org.apache.doris.plugin.audit.AuditLogBuilder; import org.apache.doris.utframe.UtFrameUtils; diff --git a/gensrc/proto/cloud.proto b/gensrc/proto/cloud.proto index b4c2d0d0968ae9..268744a0088f61 100644 --- a/gensrc/proto/cloud.proto +++ b/gensrc/proto/cloud.proto @@ -895,6 +895,7 @@ message GetObjStoreInfoResponse { repeated StorageVaultPB storage_vault = 3; optional string default_storage_vault_id = 4; optional string default_storage_vault_name = 5; + optional bool enable_storage_vault = 6; }; message CreateTabletsRequest { diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index ac66e01f90626c..d3d3ee264cfad7 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -207,7 +207,7 @@ txYunSk="***********" //arrow flight sql test config extArrowFlightSqlHost = "127.0.0.1" -extArrowFlightSqlPort = 8080 +extArrowFlightSqlPort = 8081 extArrowFlightSqlUser = "root" extArrowFlightSqlPassword= "" diff --git a/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_insert_timeout.out b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_insert_timeout.out new file mode 100644 index 00000000000000..ee71e1e449d57d --- /dev/null +++ b/regression-test/data/fault_injection_p0/cloud/test_cloud_mow_insert_timeout.out @@ -0,0 +1,11 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +1 1 1 +2 2 2 +3 3 3 + +-- !sql -- +1 1 1 +2 2 2 +3 3 3 + diff --git a/regression-test/data/load_p0/http_stream/test_group_commit_http_stream.out b/regression-test/data/load_p0/http_stream/test_group_commit_http_stream.out index 57c2525815ad78..e4f297347cc196 100644 --- a/regression-test/data/load_p0/http_stream/test_group_commit_http_stream.out +++ b/regression-test/data/load_p0/http_stream/test_group_commit_http_stream.out @@ -21,5 +21,5 @@ 8 f 80 -- !sql -- -2402288 +1201144 diff --git a/regression-test/data/load_p0/stream_load/test_group_commit_stream_load.out b/regression-test/data/load_p0/stream_load/test_group_commit_stream_load.out index 246be06453bd16..1f1afae813f336 100644 --- a/regression-test/data/load_p0/stream_load/test_group_commit_stream_load.out +++ b/regression-test/data/load_p0/stream_load/test_group_commit_stream_load.out @@ -23,5 +23,5 @@ 11 a 11 -- !sql -- -2402288 +1201144 diff --git a/regression-test/data/mtmv_p0/test_multi_level_mtmv.out b/regression-test/data/mtmv_p0/test_multi_level_mtmv.out index 7543b21ffa7bbc..7d44e381cc8fc2 100644 --- a/regression-test/data/mtmv_p0/test_multi_level_mtmv.out +++ b/regression-test/data/mtmv_p0/test_multi_level_mtmv.out @@ -11,6 +11,17 @@ -- !mv2_should_one_partition -- ["p_2"] +-- !mv1_should_one_partition_again -- +["p_2"] + +-- !mv2_should_one_partition_again -- +["p_2"] + +-- !mv2_again -- +1 1 +2 2 +2 3 + -- !status1 -- multi_level_mtmv1 SCHEMA_CHANGE SUCCESS diff --git a/regression-test/data/mtmv_up_down_olap_p0/test_upgrade_downgrade_olap_mtmv.out b/regression-test/data/mtmv_up_down_olap_p0/test_upgrade_downgrade_olap_mtmv.out new file mode 100644 index 00000000000000..760e94479a82b0 --- /dev/null +++ b/regression-test/data/mtmv_up_down_olap_p0/test_upgrade_downgrade_olap_mtmv.out @@ -0,0 +1,9 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !refresh_init -- +1 2017-01-15 1 +2 2017-02-15 2 +3 2017-03-15 3 + +-- !mtmv_sync -- +true + diff --git a/regression-test/data/nereids_p0/insert_into_table/insert_use_table_id.out b/regression-test/data/nereids_p0/insert_into_table/insert_use_table_id.out deleted file mode 100644 index d0020443bf67df..00000000000000 --- a/regression-test/data/nereids_p0/insert_into_table/insert_use_table_id.out +++ /dev/null @@ -1,48 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !sql_cross_join -- -1 10 1 1 1.0 2000-01-01 1 10 10 10.0 2000-01-10 1 -1 10 1 1 1.0 2000-01-01 1 10 10 10.0 2000-01-10 4 -1 10 1 1 1.0 2000-01-01 1 10 10 10.0 2000-01-10 5 -1 10 1 1 1.0 2000-01-01 2 20 20 20.0 2000-01-20 1 -1 10 1 1 1.0 2000-01-01 2 20 20 20.0 2000-01-20 4 -1 10 1 1 1.0 2000-01-01 2 20 20 20.0 2000-01-20 5 -1 10 1 1 1.0 2000-01-01 3 30 30 30.0 2000-01-30 1 -1 10 1 1 1.0 2000-01-01 3 30 30 30.0 2000-01-30 4 -1 10 1 1 1.0 2000-01-01 3 30 30 30.0 2000-01-30 5 -1 10 1 1 1.0 2000-01-01 4 4 4 4.0 2000-01-04 1 -1 10 1 1 1.0 2000-01-01 4 4 4 4.0 2000-01-04 4 -1 10 1 1 1.0 2000-01-01 4 4 4 4.0 2000-01-04 5 -1 10 1 1 1.0 2000-01-01 5 5 5 5.0 2000-01-05 1 -1 10 1 1 1.0 2000-01-01 5 5 5 5.0 2000-01-05 4 -1 10 1 1 1.0 2000-01-01 5 5 5 5.0 2000-01-05 5 -2 20 2 2 2.0 2000-01-02 1 10 10 10.0 2000-01-10 1 -2 20 2 2 2.0 2000-01-02 1 10 10 10.0 2000-01-10 4 -2 20 2 2 2.0 2000-01-02 1 10 10 10.0 2000-01-10 5 -2 20 2 2 2.0 2000-01-02 2 20 20 20.0 2000-01-20 1 -2 20 2 2 2.0 2000-01-02 2 20 20 20.0 2000-01-20 4 -2 20 2 2 2.0 2000-01-02 2 20 20 20.0 2000-01-20 5 -2 20 2 2 2.0 2000-01-02 3 30 30 30.0 2000-01-30 1 -2 20 2 2 2.0 2000-01-02 3 30 30 30.0 2000-01-30 4 -2 20 2 2 2.0 2000-01-02 3 30 30 30.0 2000-01-30 5 -2 20 2 2 2.0 2000-01-02 4 4 4 4.0 2000-01-04 1 -2 20 2 2 2.0 2000-01-02 4 4 4 4.0 2000-01-04 4 -2 20 2 2 2.0 2000-01-02 4 4 4 4.0 2000-01-04 5 -2 20 2 2 2.0 2000-01-02 5 5 5 5.0 2000-01-05 1 -2 20 2 2 2.0 2000-01-02 5 5 5 5.0 2000-01-05 4 -2 20 2 2 2.0 2000-01-02 5 5 5 5.0 2000-01-05 5 -3 30 3 3 3.0 2000-01-03 1 10 10 10.0 2000-01-10 1 -3 30 3 3 3.0 2000-01-03 1 10 10 10.0 2000-01-10 4 -3 30 3 3 3.0 2000-01-03 1 10 10 10.0 2000-01-10 5 -3 30 3 3 3.0 2000-01-03 2 20 20 20.0 2000-01-20 1 -3 30 3 3 3.0 2000-01-03 2 20 20 20.0 2000-01-20 4 -3 30 3 3 3.0 2000-01-03 2 20 20 20.0 2000-01-20 5 -3 30 3 3 3.0 2000-01-03 3 30 30 30.0 2000-01-30 1 -3 30 3 3 3.0 2000-01-03 3 30 30 30.0 2000-01-30 4 -3 30 3 3 3.0 2000-01-03 3 30 30 30.0 2000-01-30 5 -3 30 3 3 3.0 2000-01-03 4 4 4 4.0 2000-01-04 1 -3 30 3 3 3.0 2000-01-03 4 4 4 4.0 2000-01-04 4 -3 30 3 3 3.0 2000-01-03 4 4 4 4.0 2000-01-04 5 -3 30 3 3 3.0 2000-01-03 5 5 5 5.0 2000-01-05 1 -3 30 3 3 3.0 2000-01-03 5 5 5 5.0 2000-01-05 4 -3 30 3 3 3.0 2000-01-03 5 5 5 5.0 2000-01-05 5 - diff --git a/regression-test/data/query_p0/sql_functions/encryption_digest/test_encryption_function.out b/regression-test/data/query_p0/sql_functions/encryption_digest/test_encryption_function.out index 721412dc0364c1..c652b3074558cc 100644 --- a/regression-test/data/query_p0/sql_functions/encryption_digest/test_encryption_function.out +++ b/regression-test/data/query_p0/sql_functions/encryption_digest/test_encryption_function.out @@ -80,3 +80,195 @@ text -- !sql -- aaaaaa +-- !sql1 -- +aaaaaa + +-- !sql2 -- +aaaaaa + +-- !sql3 -- +zhang + +-- !sql4 -- +zhang + +-- !sql5 -- +aaaaaa + +-- !sql6 -- +aaaaaa + +-- !sql7 -- +zhang + +-- !sql8 -- +zhang + +-- !sql9 -- +aaaaaa + +-- !sql10 -- +aaaaaa + +-- !sql11 -- +zhang + +-- !sql12 -- +zhang + +-- !sql9 -- +aaaaaa + +-- !sql10 -- +aaaaaa + +-- !sql11 -- +zhang + +-- !sql12 -- +zhang + +-- !sql13 -- +aaaaaa + +-- !sql14 -- +aaaaaa + +-- !sql15 -- +zhang + +-- !sql16 -- +zhang + +-- !sql17 -- +aaaaaa + +-- !sql18 -- +aaaaaa + +-- !sql19 -- +zhang + +-- !sql20 -- +zhang + +-- !sql21 -- +aaaaaa + +-- !sql22 -- +aaaaaa + +-- !sql23 -- +zhang + +-- !sql24 -- +zhang + +-- !sql25 -- +aaaaaa + +-- !sql26 -- +aaaaaa + +-- !sql27 -- +zhang + +-- !sql28 -- +zhang + +-- !sql29 -- +aaaaaa + +-- !sql30 -- +aaaaaa + +-- !sql31 -- +zhang + +-- !sql32 -- +zhang + +-- !sql29 -- +aaaaaa + +-- !sql30 -- +aaaaaa + +-- !sql31 -- +zhang + +-- !sql32 -- +zhang + +-- !sql33 -- +aaaaaa + +-- !sql34 -- +aaaaaa + +-- !sql35 -- +zhang + +-- !sql36 -- +zhang + +-- !sql37 -- +aaaaaa + +-- !sql38 -- +aaaaaa + +-- !sql39 -- +zhang + +-- !sql40 -- +zhang + +-- !sql41 -- +aaaaaa + +-- !sql42 -- +aaaaaa + +-- !sql43 -- +zhang + +-- !sql44 -- +zhang + +-- !sql45 -- +aaaaaa + +-- !sql46 -- +aaaaaa + +-- !sql47 -- +zhang + +-- !sql48 -- +zhang + +-- !sql49 -- +aaaaaa + +-- !sql50 -- +aaaaaa + +-- !sql51 -- +zhang + +-- !sql52 -- +zhang + +-- !sql53 -- +aaaaaa + +-- !sql54 -- +aaaaaa + +-- !sql55 -- +zhang + +-- !sql56 -- +zhang + diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy index 53fd6b0415faef..0042aa69a0aded 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy @@ -117,6 +117,7 @@ class Config { public InetSocketAddress recycleServiceHttpInetSocketAddress public Integer parallel public Integer suiteParallel + public Integer dockerSuiteParallel public Integer actionParallel public Integer times public boolean withOutLoadData @@ -467,6 +468,7 @@ class Config { config.forceGenerateOutputFile = cmd.hasOption(forceGenOutOpt) config.parallel = Integer.parseInt(cmd.getOptionValue(parallelOpt, "10")) config.suiteParallel = Integer.parseInt(cmd.getOptionValue(suiteParallelOpt, "10")) + config.dockerSuiteParallel = Integer.parseInt(cmd.getOptionValue(dockerSuiteParallelOpt, "1")) config.actionParallel = Integer.parseInt(cmd.getOptionValue(actionParallelOpt, "10")) config.times = Integer.parseInt(cmd.getOptionValue(timesOpt, "1")) config.randomOrder = cmd.hasOption(randomOrderOpt) @@ -888,6 +890,11 @@ class Config { log.info("Set suiteParallel to 1 because not specify.".toString()) } + if (config.dockerSuiteParallel == null) { + config.dockerSuiteParallel = 1 + log.info("Set dockerSuiteParallel to 1 because not specify.".toString()) + } + if (config.actionParallel == null) { config.actionParallel = 10 log.info("Set actionParallel to 10 because not specify.".toString()) diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy index b1a782da94b656..67322287d07aa5 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/ConfigOptions.groovy @@ -67,6 +67,7 @@ class ConfigOptions { static Option forceGenOutOpt static Option parallelOpt static Option suiteParallelOpt + static Option dockerSuiteParallelOpt static Option actionParallelOpt static Option randomOrderOpt static Option stopWhenFailOpt @@ -425,6 +426,14 @@ class ConfigOptions { .longOpt("suiteParallel") .desc("the num of threads running for suites") .build() + dockerSuiteParallelOpt = Option.builder("dockerSuiteParallel") + .argName("dockerSuiteParallel") + .required(false) + .hasArg(true) + .type(String.class) + .longOpt("dockerSuiteParallel") + .desc("the num of threads running for docker suites") + .build() actionParallelOpt = Option.builder("actionParallel") .argName("parallel") .required(false) @@ -607,6 +616,7 @@ class ConfigOptions { .addOption(forceGenOutOpt) .addOption(parallelOpt) .addOption(suiteParallelOpt) + .addOption(dockerSuiteParallelOpt) .addOption(actionParallelOpt) .addOption(randomOrderOpt) .addOption(stopWhenFailOpt) diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/RegressionTest.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/RegressionTest.groovy index 92e92a9b736c32..a0cc8ba2ea12c4 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/RegressionTest.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/RegressionTest.groovy @@ -52,16 +52,22 @@ import java.util.function.Predicate @CompileStatic class RegressionTest { + static enum GroupExecType { + NORMAL, + SINGLE, // group contains nonConcurrent + DOCKER, // group contains docker + } + static ClassLoader classloader static CompilerConfiguration compileConfig static GroovyShell shell static ExecutorService scriptExecutors - static ExecutorService suiteExecutors - static ExecutorService singleSuiteExecutors static ExecutorService actionExecutors + static Map suiteExecutors static ThreadLocal threadLoadedClassNum = new ThreadLocal<>() static final int cleanLoadedClassesThreshold = 20 static String nonConcurrentTestGroup = "nonConcurrent" + static String dockerTestGroup = "docker" static { ch.qos.logback.classic.Logger loggerOfSuite = @@ -113,8 +119,9 @@ class RegressionTest { } } actionExecutors.shutdown() - suiteExecutors.shutdown() - singleSuiteExecutors.shutdown() + for (ExecutorService suiteExecutor : suiteExecutors.values()) { + suiteExecutor.shutdown() + } scriptExecutors.shutdown() log.info("Test finished") if (!success) { @@ -135,17 +142,24 @@ class RegressionTest { .build(); scriptExecutors = Executors.newFixedThreadPool(config.parallel, scriptFactory) + suiteExecutors = [:] BasicThreadFactory suiteFactory = new BasicThreadFactory.Builder() .namingPattern("suite-thread-%d") .priority(Thread.MAX_PRIORITY) .build(); - suiteExecutors = Executors.newFixedThreadPool(config.suiteParallel, suiteFactory) + suiteExecutors[GroupExecType.NORMAL] = Executors.newFixedThreadPool(config.suiteParallel, suiteFactory) BasicThreadFactory singleSuiteFactory = new BasicThreadFactory.Builder() .namingPattern("non-concurrent-thread-%d") .priority(Thread.MAX_PRIORITY) .build(); - singleSuiteExecutors = Executors.newFixedThreadPool(1, singleSuiteFactory) + suiteExecutors[GroupExecType.SINGLE] = Executors.newFixedThreadPool(1, singleSuiteFactory) + + BasicThreadFactory dockerSuiteFactory = new BasicThreadFactory.Builder() + .namingPattern("docker-suite-thread-%d") + .priority(Thread.MAX_PRIORITY) + .build(); + suiteExecutors[GroupExecType.DOCKER] = Executors.newFixedThreadPool(config.dockerSuiteParallel, dockerSuiteFactory) BasicThreadFactory actionFactory = new BasicThreadFactory.Builder() .namingPattern("action-thread-%d") @@ -198,9 +212,9 @@ class RegressionTest { return sources } - static void runScript(Config config, ScriptSource source, Recorder recorder, boolean isSingleThreadScript) { + static void runScript(Config config, ScriptSource source, Recorder recorder, GroupExecType grpExecType) { def suiteFilter = { String suiteName, String groupName -> - canRun(config, suiteName, groupName, isSingleThreadScript) + canRun(config, suiteName, groupName, grpExecType) } def file = source.getFile() int failureLimit = Integer.valueOf(config.otherConfigs.getOrDefault("max_failure_num", "-1").toString()); @@ -211,12 +225,7 @@ class RegressionTest { return; } def eventListeners = getEventListeners(config, recorder) - ExecutorService executors = null - if (isSingleThreadScript) { - executors = singleSuiteExecutors - } else { - executors = suiteExecutors - } + ExecutorService executors = suiteExecutors[grpExecType] new ScriptContext(file, executors, actionExecutors, config, eventListeners, suiteFilter).start { scriptContext -> @@ -242,11 +251,20 @@ class RegressionTest { scriptSources.eachWithIndex { source, i -> // log.info("Prepare scripts [${i + 1}/${totalFile}]".toString()) def future = scriptExecutors.submit { - runScript(config, source, recorder, false) + runScript(config, source, recorder, GroupExecType.NORMAL) } futures.add(future) } + List dockerFutures = Lists.newArrayList() + scriptSources.eachWithIndex { source, i -> +// log.info("Prepare scripts [${i + 1}/${totalFile}]".toString()) + def future = scriptExecutors.submit { + runScript(config, source, recorder, GroupExecType.DOCKER) + } + dockerFutures.add(future) + } + // wait all scripts for (Future future : futures) { try { @@ -261,12 +279,20 @@ class RegressionTest { scriptSources.eachWithIndex { source, i -> // log.info("Prepare scripts [${i + 1}/${totalFile}]".toString()) def future = scriptExecutors.submit { - runScript(config, source, recorder, true) + runScript(config, source, recorder, GroupExecType.SINGLE) } futures.add(future) } // wait all scripts + for (Future future : dockerFutures) { + try { + future.get() + } catch (Throwable t) { + // do nothing, because already save to Recorder + } + } + for (Future future : futures) { try { future.get() @@ -323,19 +349,19 @@ class RegressionTest { return true } - static boolean canRun(Config config, String suiteName, String group, boolean isSingleThreadScript) { + static boolean canRun(Config config, String suiteName, String group, GroupExecType grpExecType) { + return getGroupExecType(group) == grpExecType && filterGroups(config, group) && filterSuites(config, suiteName) + } + + static GroupExecType getGroupExecType(String group) { Set suiteGroups = group.split(',').collect { g -> g.trim() }.toSet(); - if (isSingleThreadScript) { - if (!suiteGroups.contains(nonConcurrentTestGroup)) { - return false - } + if (suiteGroups.contains(nonConcurrentTestGroup)) { + return GroupExecType.SINGLE + } else if (suiteGroups.contains(dockerTestGroup)) { + return GroupExecType.DOCKER } else { - if (suiteGroups.contains(nonConcurrentTestGroup)) { - return false - } + return GroupExecType.NORMAL } - - return filterGroups(config, group) && filterSuites(config, suiteName) } static List getEventListeners(Config config, Recorder recorder) { @@ -421,7 +447,7 @@ class RegressionTest { } pluginPath.eachFileRecurse({ it -> if (it.name.endsWith(".groovy")) { - ScriptContext context = new ScriptContext(it, suiteExecutors, actionExecutors, + ScriptContext context = new ScriptContext(it, suiteExecutors[GroupExecType.NORMAL], actionExecutors, config, [], { name -> true }) File pluginFile = it context.start({ @@ -454,7 +480,7 @@ class RegressionTest { + "output: ${sout.toString()}, error: ${serr.toString()}") } - def pipList = 'pip list'.execute().text + def pipList = 'python -m pip list'.execute().text log.info("python library: ${pipList}") } diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 713a30e98ca076..eb816ecb73f997 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -29,6 +29,7 @@ import groovy.json.JsonSlurper import com.google.common.collect.ImmutableList import org.apache.commons.lang3.ObjectUtils import org.apache.doris.regression.Config +import org.apache.doris.regression.RegressionTest import org.apache.doris.regression.action.BenchmarkAction import org.apache.doris.regression.action.ProfileAction import org.apache.doris.regression.action.WaitForAction @@ -276,6 +277,11 @@ class Suite implements GroovyInterceptable { return } + if (RegressionTest.getGroupExecType(group) != RegressionTest.GroupExecType.DOCKER) { + throw new Exception("Need to add 'docker' to docker suite's belong groups, " + + "see example demo_p0/docker_action.groovy") + } + boolean pipelineIsCloud = isCloudMode() boolean dockerIsCloud = false if (options.cloudMode == null) { diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy index 44220500d1b5bd..a2f99868bd739b 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/SuiteCluster.groovy @@ -40,6 +40,7 @@ class ClusterOptions { ] List beConfigs = [ + 'max_sys_mem_available_low_water_mark_bytes=0', //no check mem available memory 'report_disk_state_interval_seconds=2', 'report_random_wait=false', ] diff --git a/regression-test/suites/backup_restore/test_backup_restore_with_view.groovy b/regression-test/suites/backup_restore/test_backup_restore_with_view.groovy index eee4a70c745ed5..be7769953230db 100644 --- a/regression-test/suites/backup_restore/test_backup_restore_with_view.groovy +++ b/regression-test/suites/backup_restore/test_backup_restore_with_view.groovy @@ -92,6 +92,22 @@ suite("test_backup_restore_with_view", "backup_restore") { assertTrue(show_view.contains("${dbName1}")) assertTrue(show_view.contains("${tableName}")) + // restore to db, test the view signature. + sql """ + RESTORE SNAPSHOT ${dbName}.${snapshotName} + FROM `${repoName}` + PROPERTIES + ( + "backup_timestamp" = "${snapshot}", + "reserve_replica" = "true" + ) + """ + + syncer.waitAllRestoreFinish(dbName) + def restore_result = sql_return_maparray """ SHOW RESTORE FROM ${dbName} WHERE Label ="${snapshotName}" """ + restore_result.last() + logger.info("show restore result: ${restore_result}") + assertTrue(restore_result.last().State == "FINISHED") sql "DROP TABLE ${dbName}.${tableName} FORCE" sql "DROP VIEW ${dbName}.${viewName}" diff --git a/regression-test/suites/clone_p0/test_clone_missing_version.groovy b/regression-test/suites/clone_p0/test_clone_missing_version.groovy index 2981cf3c5e3638..aa119158dfa8ac 100644 --- a/regression-test/suites/clone_p0/test_clone_missing_version.groovy +++ b/regression-test/suites/clone_p0/test_clone_missing_version.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite('test_clone_missing_version') { +suite('test_clone_missing_version', 'docker') { def options = new ClusterOptions() options.feConfigs += [ 'disable_tablet_scheduler=true', diff --git a/regression-test/suites/clone_p0/test_clone_no_missing_version.groovy b/regression-test/suites/clone_p0/test_clone_no_missing_version.groovy index 75eb3866ec8302..b19521441dd20e 100644 --- a/regression-test/suites/clone_p0/test_clone_no_missing_version.groovy +++ b/regression-test/suites/clone_p0/test_clone_no_missing_version.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite('test_clone_no_missing_version') { +suite('test_clone_no_missing_version', 'docker') { def tbl = 'tbl_test_clone_no_missing_version' def options = new ClusterOptions() options.feConfigs += [ diff --git a/regression-test/suites/clone_p0/test_decommission_mtmv.groovy b/regression-test/suites/clone_p0/test_decommission_mtmv.groovy index 24853aa718c574..b29d5c13c94447 100644 --- a/regression-test/suites/clone_p0/test_decommission_mtmv.groovy +++ b/regression-test/suites/clone_p0/test_decommission_mtmv.groovy @@ -17,7 +17,7 @@ import org.apache.doris.regression.suite.ClusterOptions -suite('test_decommission_mtmv') { +suite('test_decommission_mtmv', 'docker') { def options = new ClusterOptions() options.feConfigs += [ 'disable_balance=true', diff --git a/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy b/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy index ebf1259a72f2c8..f9c72f3b71c23b 100644 --- a/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy +++ b/regression-test/suites/clone_p0/test_drop_clone_tablet_path_race.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.junit.Assert -suite('test_drop_clone_tablet_path_race') { +suite('test_drop_clone_tablet_path_race', 'docker') { if (isCloudMode()) { return } diff --git a/regression-test/suites/cloud_p0/multi_cluster/test_auto_start.groovy b/regression-test/suites/cloud_p0/multi_cluster/test_auto_start.groovy new file mode 100644 index 00000000000000..2ce9a9d8f4b531 --- /dev/null +++ b/regression-test/suites/cloud_p0/multi_cluster/test_auto_start.groovy @@ -0,0 +1,172 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import groovy.json.JsonSlurper +import groovy.json.JsonOutput +import org.awaitility.Awaitility; +import org.apache.doris.regression.util.Http +import static java.util.concurrent.TimeUnit.SECONDS; + +suite('test_auto_start_in_cloud', 'multi_cluster') { + if (!isCloudMode()) { + return; + } + def options = new ClusterOptions() + options.feConfigs += [ + 'cloud_cluster_check_interval_second=1', + 'cloud_pre_heating_time_limit_sec=1', + 'sys_log_verbose_modules=org', + 'heartbeat_interval_second=1' + ] + options.setFeNum(3) + options.setBeNum(3) + options.cloudMode = true + options.connectToFollower = true + + def getClusterFragementStatus = { def fe -> + def (feHost, feHttpPort) = fe.getHttpAddress() + // curl -X GET -u root: '128.1.1.1:8030/rest/v2/manager/cluster/cluster_info/cloud_cluster_status' + def url = 'http://' + feHost + ':' + feHttpPort + '/rest/v2/manager/cluster/cluster_info/cloud_cluster_status' + def result = Http.GET(url, true) + result + } + + + def set_cluster_status = { String unique_id , String cluster_id, String status, def ms -> + def jsonOutput = new JsonOutput() + def reqBody = [ + cloud_unique_id: unique_id, + cluster : [ + cluster_id : cluster_id, + cluster_status : status + ] + ] + def js = jsonOutput.toJson(reqBody) + log.info("drop cluster req: ${js} ".toString()) + + def set_cluster_status_api = { request_body, check_func -> + httpTest { + endpoint ms.host+':'+ms.httpPort + uri "/MetaService/http/set_cluster_status?token=greedisgood9999" + body request_body + check check_func + } + } + + set_cluster_status_api.call(js) { + respCode, body -> + log.info("set cluster status resp: ${body} ${respCode}".toString()) + def json = parseJson(body) + assertTrue(json.code.equalsIgnoreCase("OK")) + } + } + + docker(options) { + sql """ + CREATE TABLE table1 ( + class INT, + id INT, + score INT SUM + ) + AGGREGATE KEY(class, id) + DISTRIBUTED BY HASH(class) BUCKETS 48 + """ + + sql """INSERT INTO table1 VALUES (1, 1, 100)""" + // master + def fe1 = cluster.getFeByIndex(1) + // ms + def ms = cluster.getAllMetaservices().get(0) + + def result = sql_return_maparray """SHOW CLUSTERS""" + String clusterName = result[0].cluster + def tag = getCloudBeTagByName(clusterName) + logger.info("tag = {}", tag) + + def jsonSlurper = new JsonSlurper() + def jsonObject = jsonSlurper.parseText(tag) + String cloudClusterId = jsonObject.cloud_cluster_id + String uniqueId = jsonObject.cloud_unique_id + + sleep(5 * 1000) + + Map fragmentUpdateTimeMap = [:] + + // no read,write,sc, 20s suspend cluster + boolean clusterCanSuspend = true + for (int i = 0; i < 20; i++) { + result = getClusterFragementStatus(fe1) + result.data.compute_cluster_id.each { + if (fragmentUpdateTimeMap[it.host] == null) { + fragmentUpdateTimeMap[it.host] = it.lastFragmentUpdateTime + } else if (fragmentUpdateTimeMap[it.host] != it.lastFragmentUpdateTime) { + log.info("fragment update time changed be: {} old time: {} new time: {}", it.host, fragmentUpdateTimeMap[it.host], it.lastFragmentUpdateTime) + clusterCanSuspend = false + } + } + sleep(1 * 1000) + } + assertTrue(clusterCanSuspend) + + // cloud control set cluster status SUSPENDED + set_cluster_status(uniqueId, cloudClusterId, "SUSPENDED", ms) + + dockerAwaitUntil(5) { + tag = getCloudBeTagByName(clusterName) + logger.info("tag = {}", tag) + jsonObject = jsonSlurper.parseText(tag) + String cluster_status = jsonObject.cloud_cluster_status + cluster_status == "SUSPENDED" + } + + cluster.stopBackends(1,2,3) + + // select + future1 = thread { + def begin = System.currentTimeMillis(); + // root cant resume, due to deamon thread use root + def connInfo = context.threadLocalConn.get() + result = connect(user = 'admin', password = '', url = connInfo.conn.getMetaData().getURL()) { + sql 'SELECT * FROM table1' + } + def cost = System.currentTimeMillis() - begin; + log.info("result {} time cost: {}", result, cost) + assertTrue(cost > 5000) + assertEquals(1, result.size()) + } + // insert + + // cloud control + future2 = thread { + // check cluster "TO_RESUME" + dockerAwaitUntil(5) { + tag = getCloudBeTagByName(clusterName) + logger.info("tag = {}", tag) + jsonObject = jsonSlurper.parseText(tag) + String cluster_status = jsonObject.cloud_cluster_status + cluster_status == "TO_RESUME" + } + sleep(5 * 1000) + cluster.startBackends(1,2,3) + set_cluster_status(uniqueId, cloudClusterId, "NORMAL", ms) + } + + future1.get() + future2.get() + } +} diff --git a/regression-test/suites/cloud_p0/multi_cluster/test_rebalance.groovy b/regression-test/suites/cloud_p0/multi_cluster/test_rebalance.groovy index 0aa2e83ccc2bfd..542f9a969f5d6a 100644 --- a/regression-test/suites/cloud_p0/multi_cluster/test_rebalance.groovy +++ b/regression-test/suites/cloud_p0/multi_cluster/test_rebalance.groovy @@ -20,7 +20,7 @@ import groovy.json.JsonSlurper import org.awaitility.Awaitility; import static java.util.concurrent.TimeUnit.SECONDS; -suite('test_rebalance_in_cloud', 'multi_cluster') { +suite('test_rebalance_in_cloud', 'multi_cluster,docker') { if (!isCloudMode()) { return; } diff --git a/regression-test/suites/cloud_p0/multi_cluster/test_tvf.groovy b/regression-test/suites/cloud_p0/multi_cluster/test_tvf.groovy index 13af1209e99db4..90fd6656b8ffbb 100644 --- a/regression-test/suites/cloud_p0/multi_cluster/test_tvf.groovy +++ b/regression-test/suites/cloud_p0/multi_cluster/test_tvf.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import groovy.json.JsonSlurper -suite('test_tvf_in_cloud', 'multi_cluster') { +suite('test_tvf_in_cloud', 'multi_cluster,docker') { if (!isCloudMode()) { return; } @@ -83,4 +83,4 @@ suite('test_tvf_in_cloud', 'multi_cluster') { sql """use @${currentCluster.cluster}""" testCase.call() } -} \ No newline at end of file +} diff --git a/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy b/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy index 2d8ca3f529674d..88ec8e8861d6f4 100644 --- a/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy +++ b/regression-test/suites/cloud_p0/query_retry/test_retry_e-230.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType import org.apache.doris.regression.suite.SuiteCluster -suite("test_retry_e-230") { +suite("test_retry_e-230", 'docker') { if (!isCloudMode()) { return } diff --git a/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy b/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy index b393979d44218a..ea5e818c2ee06b 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction10/test_schema_change_with_compaction10.groovy @@ -20,7 +20,7 @@ import org.apache.http.NoHttpResponseException import org.apache.doris.regression.util.DebugPoint import org.apache.doris.regression.util.NodeType -suite('test_schema_change_with_compaction10') { +suite('test_schema_change_with_compaction10', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() @@ -260,4 +260,4 @@ suite('test_schema_change_with_compaction10') { assertTrue(out.contains("[8-16]")) } } -} \ No newline at end of file +} diff --git a/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy b/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy index fd257fcb7ea950..d49d8646d3fd51 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction11/test_schema_change_with_compaction11.groovy @@ -20,7 +20,7 @@ import org.apache.http.NoHttpResponseException import org.apache.doris.regression.util.DebugPoint import org.apache.doris.regression.util.NodeType -suite('test_schema_change_with_compaction11') { +suite('test_schema_change_with_compaction11', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() diff --git a/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy index f5028ff9e818c3..fd6267b85bcbc9 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction5/test_schema_change_with_compaction5.groovy @@ -20,7 +20,7 @@ import org.apache.http.NoHttpResponseException import org.apache.doris.regression.util.DebugPoint import org.apache.doris.regression.util.NodeType -suite('test_schema_change_with_compaction5', 'nonConcurrent') { +suite('test_schema_change_with_compaction5', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() @@ -257,4 +257,4 @@ suite('test_schema_change_with_compaction5', 'nonConcurrent') { assertTrue(out.contains("[8-16]")) } } -} \ No newline at end of file +} diff --git a/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy index 951535433d1362..d77db4eb2df541 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction6/test_schema_change_with_compaction6.groovy @@ -20,7 +20,7 @@ import org.apache.http.NoHttpResponseException import org.apache.doris.regression.util.DebugPoint import org.apache.doris.regression.util.NodeType -suite('test_schema_change_with_compaction6', 'nonConcurrent') { +suite('test_schema_change_with_compaction6', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() @@ -260,4 +260,4 @@ suite('test_schema_change_with_compaction6', 'nonConcurrent') { assertTrue(out.contains("[8-16]")) } } -} \ No newline at end of file +} diff --git a/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy b/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy index 83c549eefc5abd..3797a89f565997 100644 --- a/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy +++ b/regression-test/suites/cloud_p0/schema_change/compaction9/test_schema_change_with_compaction9.groovy @@ -20,7 +20,7 @@ import org.apache.http.NoHttpResponseException import org.apache.doris.regression.util.DebugPoint import org.apache.doris.regression.util.NodeType -suite('test_schema_change_with_compaction9') { +suite('test_schema_change_with_compaction9', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() @@ -257,4 +257,4 @@ suite('test_schema_change_with_compaction9') { assertTrue(out.contains("[8-16]")) } } -} \ No newline at end of file +} diff --git a/regression-test/suites/compaction/test_compaction_score_action.groovy b/regression-test/suites/compaction/test_compaction_score_action.groovy new file mode 100644 index 00000000000000..9ab8743778fb10 --- /dev/null +++ b/regression-test/suites/compaction/test_compaction_score_action.groovy @@ -0,0 +1,53 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_compaction_score_action") { + def tableName = "test_compaction_score_action"; + + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE IF NOT EXISTS ${tableName} ( + id INT NOT NULL, + name STRING NOT NULL + ) DUPLICATE KEY (`id`) + PROPERTIES ("replication_num" = "1", "disable_auto_compaction" = "true"); + """ + for (i in 0..<30) { + sql """ INSERT INTO ${tableName} VALUES(1, "Vedal") """ + sql """ INSERT INTO ${tableName} VALUES(2, "Neuro") """ + sql """ INSERT INTO ${tableName} VALUES(3, "Evil") """ + } + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + + for (int i=0;i= 90) + } else { + def (code, text, err) = curl("GET",beHttpAddress+"/api/compaction_score?top_n=1") + def score_str = parseJson(text).get(0).get("compaction_score") + def score = Integer.parseInt(score_str) + assertTrue(score >= 90) + } + } +} diff --git a/regression-test/suites/compaction/test_compaction_with_visible_version.groovy b/regression-test/suites/compaction/test_compaction_with_visible_version.groovy index 4a6ee4c847a5db..e9b60774efa22b 100644 --- a/regression-test/suites/compaction/test_compaction_with_visible_version.groovy +++ b/regression-test/suites/compaction/test_compaction_with_visible_version.groovy @@ -19,7 +19,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.Http import org.apache.doris.regression.util.NodeType -suite('test_compaction_with_visible_version') { +suite('test_compaction_with_visible_version', 'docker') { def options = new ClusterOptions() def compaction_keep_invisible_version_min_count = 50L options.feConfigs += [ diff --git a/regression-test/suites/control_p0/test_report_version_missing.groovy b/regression-test/suites/control_p0/test_report_version_missing.groovy deleted file mode 100644 index 675ac52362e6fb..00000000000000 --- a/regression-test/suites/control_p0/test_report_version_missing.groovy +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -import org.apache.doris.regression.suite.ClusterOptions -import org.apache.doris.regression.util.NodeType - -suite('test_report_version_missing', "nonConcurrent") { - if (isCloudMode()) { - return - } - def tableName = "test_set_replica_status_table_in_docker" - try { - setFeConfig('disable_tablet_scheduler', true) - Thread.sleep(2000) - - sql "DROP TABLE IF EXISTS ${tableName}" - sql """ - CREATE TABLE ${tableName} ( - `id` LARGEINT NOT NULL, - `count` LARGEINT SUM DEFAULT "0") - AGGREGATE KEY(`id`) - DISTRIBUTED BY HASH(`id`) BUCKETS 1 - PROPERTIES - ( - "replication_num" = "1" - ) - """ - List values = [] - for (int i = 1; i <= 10; ++i) { - values.add("(${i}, ${i})") - } - sql """INSERT INTO ${tableName} VALUES ${values.join(",")}""" - - def result = sql_return_maparray """show tablets from ${tableName}""" - assertNotNull(result) - def tabletId = null - for (def res : result) { - tabletId = res.TabletId - break - } - - GetDebugPoint().enableDebugPointForAllBEs("Tablet.build_tablet_report_info.version_miss", [tablet_id:"${tabletId}",version_miss:true]) - boolean succ = false - - for (int i = 0; i < 3; ++i) { - result = sql_return_maparray """show tablets from ${tableName}""" - logger.info("show tablets from ${result}, has after ${i} * 60 s") - assertNotNull(result) - // LstFailedVersion > 0, version missing - for (def res : result) { - if (res.TabletId.toLong() == tabletId.toLong() && res.LstFailedVersion.toLong() > 0) { - succ = true - break - } - } - if (succ) { - break - } - Thread.sleep(60000) - } - assertTrue(succ) - - } finally { - setFeConfig('disable_tablet_scheduler', false) - GetDebugPoint().disableDebugPointForAllBEs("Tablet.build_tablet_report_info.version_miss") - sql "DROP TABLE IF EXISTS ${tableName}" - } -} diff --git a/regression-test/suites/control_p1/test_report_version_missing.groovy b/regression-test/suites/control_p1/test_report_version_missing.groovy new file mode 100644 index 00000000000000..63ccd4ed6dd6ea --- /dev/null +++ b/regression-test/suites/control_p1/test_report_version_missing.groovy @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.apache.doris.regression.suite.ClusterOptions +import org.apache.doris.regression.util.NodeType + +import org.awaitility.Awaitility +import static java.util.concurrent.TimeUnit.SECONDS + +suite('test_report_version_missing', 'nonConcurrent,p1') { + if (isCloudMode()) { + return + } + def tableName = 'test_set_replica_status_table_in_docker' + try { + setFeConfig('disable_tablet_scheduler', true) + Thread.sleep(2000) + + sql "DROP TABLE IF EXISTS ${tableName}" + sql """ + CREATE TABLE ${tableName} ( + `id` LARGEINT NOT NULL, + `count` LARGEINT SUM DEFAULT "0") + AGGREGATE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES + ( + "replication_num" = "1" + ) + """ + List values = [] + for (int i = 1; i <= 10; ++i) { + values.add("(${i}, ${i})") + } + sql """INSERT INTO ${tableName} VALUES ${values.join(',')}""" + + def result = sql_return_maparray """show tablets from ${tableName}""" + assertNotNull(result) + def tabletId = null + for (def res : result) { + tabletId = res.TabletId + break + } + + GetDebugPoint().enableDebugPointForAllBEs('Tablet.build_tablet_report_info.version_miss', [tablet_id:"${tabletId}", version_miss:true]) + boolean succ = false + + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort) + + backendId_to_backendIP.each { beId, beIp -> + def port = backendId_to_backendHttpPort.get(beId) as int + be_report_tablet(beIp, port) + } + + Awaitility.await().atMost(180, SECONDS).pollInterval(1, SECONDS).await().until({ + def tablets = sql_return_maparray """show tablets from ${tableName}""" + logger.info("show tablets from ${tablets}") + assertNotNull(tablets) + succ = tablets.any { it.TabletId.toLong() == tabletId.toLong() && it.LstFailedVersion.toLong() > 0 } + return succ + }) + + assertTrue(succ) + } finally { + setFeConfig('disable_tablet_scheduler', false) + GetDebugPoint().disableDebugPointForAllBEs('Tablet.build_tablet_report_info.version_miss') + sql "DROP TABLE IF EXISTS ${tableName}" + } +} diff --git a/regression-test/suites/datatype_p0/agg_state/max/test_agg_state_max.groovy b/regression-test/suites/datatype_p0/agg_state/max/test_agg_state_max.groovy index 983f51beed1f3c..a71da554afb5df 100644 --- a/regression-test/suites/datatype_p0/agg_state/max/test_agg_state_max.groovy +++ b/regression-test/suites/datatype_p0/agg_state/max/test_agg_state_max.groovy @@ -30,7 +30,7 @@ suite("test_agg_state_max") { test { sql "insert into a_table values(100,max_state(null));" - exception "can not cast from origin type agg_state" + exception "illegal for non_nullable" } sql """insert into a_table diff --git a/regression-test/suites/demo_p0/docker_action.groovy b/regression-test/suites/demo_p0/docker_action.groovy index 6d62d6ea7bea8d..bfe9c0039e2761 100644 --- a/regression-test/suites/demo_p0/docker_action.groovy +++ b/regression-test/suites/demo_p0/docker_action.groovy @@ -17,7 +17,15 @@ import org.apache.doris.regression.suite.ClusterOptions -suite('docker_action') { +// run docker suite steps: +// 1. Read 'docker/runtime/doris-compose/Readme.md', make sure you can setup a doris docker cluster; +// 2. update regression-conf-custom.groovy with config: +// image = "xxxx" // your doris docker image +// excludeDockerTest = false // do run docker suite, default is false +// dockerEndDeleteFiles = false // after run docker suite, whether delete contains's log and data in directory '/tmp/doris/' + +// need add 'docker' to suite's group, and don't add 'nonConcurrent' to it +suite('docker_action', 'docker') { // run a new docker docker { sql '''create table tb1 (k int) DISTRIBUTED BY HASH(k) BUCKETS 10''' diff --git a/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_insert_timeout.groovy b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_insert_timeout.groovy new file mode 100644 index 00000000000000..23d92f31e5ad8e --- /dev/null +++ b/regression-test/suites/fault_injection_p0/cloud/test_cloud_mow_insert_timeout.groovy @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.junit.Assert +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility + +suite("test_cloud_mow_insert_timeout", "nonConcurrent") { + if (!isCloudMode()) { + return + } + + GetDebugPoint().clearDebugPointsForAllFEs() + GetDebugPoint().clearDebugPointsForAllBEs() + + def table1 = "test_cloud_mow_insert_timeout" + sql "DROP TABLE IF EXISTS ${table1} FORCE;" + sql """ CREATE TABLE IF NOT EXISTS ${table1} ( + `k1` int NOT NULL, + `c1` int, + `c2` int + )UNIQUE KEY(k1) + DISTRIBUTED BY HASH(k1) BUCKETS 1 + PROPERTIES ( + "enable_mow_light_delete" = "false", + "enable_unique_key_merge_on_write" = "true", + "disable_auto_compaction" = "true", + "replication_num" = "1"); """ + + sql "insert into ${table1} values(1,1,1);" + sql "insert into ${table1} values(2,2,2);" + sql "insert into ${table1} values(3,3,3);" + sql "sync;" + order_qt_sql "select * from ${table1};" + + def customFeConfig = [ + delete_bitmap_lock_expiration_seconds : 5, + calculate_delete_bitmap_task_timeout_seconds : 2, + mow_insert_into_commit_retry_times : 2 + ] + + setFeConfigTemporary(customFeConfig) { + try { + explain { + sql "delete from ${table1} where k1=2;" + contains "IS_PARTIAL_UPDATE: true" + } + + // block the calculation of delete bitmap on BE + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.enable_spin_wait", [token: "token1"]) + GetDebugPoint().enableDebugPointForAllBEs("BaseTablet::update_delete_bitmap.block", [wait_token: "token1"]) + + // should return error after running out of try times + test { + sql "delete from ${table1} where k1=2;" + exception "Failed to calculate delete bitmap. Timeout." + } + + test { + sql "insert into ${table1} values(4,4,4)" + exception "Failed to calculate delete bitmap. Timeout." + } + + order_qt_sql "select * from ${table1};" + + } catch(Exception e) { + logger.info(e.getMessage()) + throw e + } finally { + GetDebugPoint().clearDebugPointsForAllBEs() + } + sql "DROP TABLE IF EXISTS ${table1};" + } +} diff --git a/regression-test/suites/fault_injection_p0/test_group_commit_async_wal_msg_fault_injection.groovy b/regression-test/suites/fault_injection_p0/test_group_commit_async_wal_msg_fault_injection.groovy index c9e22504b1b132..2d0b91a01ed5fc 100644 --- a/regression-test/suites/fault_injection_p0/test_group_commit_async_wal_msg_fault_injection.groovy +++ b/regression-test/suites/fault_injection_p0/test_group_commit_async_wal_msg_fault_injection.groovy @@ -42,7 +42,7 @@ suite("test_group_commit_async_wal_msg_fault_injection","nonConcurrent") { ) engine=olap DISTRIBUTED BY HASH(`k`) BUCKETS 5 - properties("replication_num" = "1") + properties("replication_num" = "1", "group_commit_interval_ms" = "10") """ GetDebugPoint().clearDebugPointsForAllBEs() @@ -79,7 +79,7 @@ suite("test_group_commit_async_wal_msg_fault_injection","nonConcurrent") { ) engine=olap DISTRIBUTED BY HASH(`k`) BUCKETS 5 - properties("replication_num" = "1") + properties("replication_num" = "1", "group_commit_interval_ms" = "10") """ GetDebugPoint().clearDebugPointsForAllBEs() @@ -118,7 +118,7 @@ suite("test_group_commit_async_wal_msg_fault_injection","nonConcurrent") { ) engine=olap DISTRIBUTED BY HASH(`k`) BUCKETS 5 - properties("replication_num" = "1") + properties("replication_num" = "1", "group_commit_interval_ms" = "10") """ GetDebugPoint().clearDebugPointsForAllBEs() diff --git a/regression-test/suites/insert_p0/group_commit/replay_wal_restart_fe.groovy b/regression-test/suites/insert_p0/group_commit/replay_wal_restart_fe.groovy index d39bdd9d4a954d..8347950ca6bbe4 100644 --- a/regression-test/suites/insert_p0/group_commit/replay_wal_restart_fe.groovy +++ b/regression-test/suites/insert_p0/group_commit/replay_wal_restart_fe.groovy @@ -21,7 +21,7 @@ import org.apache.doris.regression.suite.ClusterOptions -suite("replay_wal_restart_fe") { +suite("replay_wal_restart_fe", 'docker') { def check_schema_change = { state -> for (int i = 0; i < 30; i++) { def jobs = sql_return_maparray "SHOW ALTER TABLE COLUMN WHERE TableName = 'tbl_2' order by CreateTime desc;" diff --git a/regression-test/suites/insert_p0/insert_group_commit_into.groovy b/regression-test/suites/insert_p0/insert_group_commit_into.groovy index dbf2bd2e18ef12..7af61dfc25fa6d 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_into.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_into.groovy @@ -116,7 +116,8 @@ suite("insert_group_commit_into") { ) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( - "replication_num" = "1" + "replication_num" = "1", + "group_commit_interval_ms" = "200" ); """ @@ -333,7 +334,7 @@ suite("insert_group_commit_into") { ) UNIQUE key (`teamID`,`service_id`, `start_time`) DISTRIBUTED BY hash(`start_time`) BUCKETS 1 - PROPERTIES ("replication_allocation" = "tag.location.default: 1") + PROPERTIES ("replication_allocation" = "tag.location.default: 1", "group_commit_interval_ms" = "200") """ connect(user = context.config.jdbcUser, password = context.config.jdbcPassword, url = context.config.jdbcUrl) { @@ -384,7 +385,8 @@ suite("insert_group_commit_into") { COMMENT 'OLAP' DISTRIBUTED BY HASH(`ordernum`) BUCKETS 3 PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" + "replication_allocation" = "tag.location.default: 1", + "group_commit_interval_ms" = "200" );""" sql """drop table if exists ${table_tmp};""" sql """CREATE TABLE ${table_tmp} ( @@ -403,7 +405,8 @@ suite("insert_group_commit_into") { COMMENT 'OLAP' DISTRIBUTED BY HASH(`ordernum`) BUCKETS 1 PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" + "replication_allocation" = "tag.location.default: 1", + "group_commit_interval_ms" = "200" ); """ sql """DROP MATERIALIZED VIEW IF EXISTS ods_zn_dnt_max1 ON ${table};""" createMV("""create materialized view ods_zn_dnt_max1 as @@ -508,7 +511,8 @@ suite("insert_group_commit_into") { DUPLICATE KEY(`k1`) DISTRIBUTED BY HASH(`k1`) BUCKETS 1 PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" + "replication_allocation" = "tag.location.default: 1", + "group_commit_interval_ms" = "200" ); """ diff --git a/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy b/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy index 0624f1bcf37e7d..64ae30f8f8a63f 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy @@ -41,19 +41,6 @@ suite("insert_group_commit_into_max_filter_ratio") { } } - def normal_insert = { sql, expected_row_count -> - def stmt = prepareStatement """ ${sql} """ - def result = stmt.executeUpdate() - logger.info("insert result: " + result) - def serverInfo = (((StatementImpl) stmt).results).getServerInfo() - logger.info("result server info: " + serverInfo) - if (result != expected_row_count) { - logger.warn("insert result: " + result + ", expected_row_count: " + expected_row_count + ", sql: " + sql) - } - assertTrue(serverInfo.contains("'status':'VISIBLE'")) - assertTrue(serverInfo.contains("'label':'label")) - } - def group_commit_insert = { sql, expected_row_count -> def stmt = prepareStatement """ ${sql} """ def result = stmt.executeUpdate() @@ -172,7 +159,7 @@ suite("insert_group_commit_into_max_filter_ratio") { DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_num" = "1", - "group_commit_interval_ms" = "1000" + "group_commit_interval_ms" = "200" ); """ diff --git a/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy b/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy index ca280cd17d83eb..8ae0d41565d488 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_into_unique.groovy @@ -86,7 +86,8 @@ suite("insert_group_commit_into_unique") { UNIQUE KEY(`id`, `name`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( - "replication_num" = "1" + "replication_num" = "1", + "group_commit_interval_ms" = "100" ); """ @@ -171,7 +172,8 @@ suite("insert_group_commit_into_unique") { DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_num" = "1", - "function_column.sequence_col" = "score" + "function_column.sequence_col" = "score", + "group_commit_interval_ms" = "100" ); """ @@ -257,7 +259,8 @@ suite("insert_group_commit_into_unique") { DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_num" = "1", - "function_column.sequence_type" = "int" + "function_column.sequence_type" = "int", + "group_commit_interval_ms" = "100" ); """ diff --git a/regression-test/suites/insert_p0/insert_group_commit_into_unique_sync_mode.groovy b/regression-test/suites/insert_p0/insert_group_commit_into_unique_sync_mode.groovy index c3a1e79cba517c..f58b306ab4ef66 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_into_unique_sync_mode.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_into_unique_sync_mode.groovy @@ -124,7 +124,7 @@ suite("insert_group_commit_into_unique_sync_mode") { DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( "replication_num" = "1", - "group_commit_interval_ms" = "1000" + "group_commit_interval_ms" = "10" ); """ @@ -211,7 +211,7 @@ suite("insert_group_commit_into_unique_sync_mode") { PROPERTIES ( "replication_num" = "1", "function_column.sequence_col" = "score", - "group_commit_interval_ms" = "1000" + "group_commit_interval_ms" = "10" ); """ @@ -301,7 +301,7 @@ suite("insert_group_commit_into_unique_sync_mode") { PROPERTIES ( "replication_num" = "1", "function_column.sequence_type" = "int", - "group_commit_interval_ms" = "1000" + "group_commit_interval_ms" = "10" ); """ diff --git a/regression-test/suites/insert_p0/insert_group_commit_with_exception.groovy b/regression-test/suites/insert_p0/insert_group_commit_with_exception.groovy index 1081064d9fe3a3..f59c9bb8b00c69 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_with_exception.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_with_exception.groovy @@ -20,7 +20,7 @@ import java.sql.DriverManager import java.sql.Statement import java.sql.PreparedStatement -suite("insert_group_commit_with_exception", "nonConcurrent") { +suite("insert_group_commit_with_exception") { def table = "insert_group_commit_with_exception" def getRowCount = { expectedRowCount -> def retry = 0 @@ -57,6 +57,7 @@ suite("insert_group_commit_with_exception", "nonConcurrent") { DUPLICATE KEY(`id`, `name`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( + "group_commit_interval_ms" = "200", "replication_num" = "1" ); """ @@ -65,10 +66,10 @@ suite("insert_group_commit_with_exception", "nonConcurrent") { if (item == "nereids") { sql """ set enable_nereids_planner=true; """ sql """ set enable_fallback_to_original_planner=false; """ - sql "set global enable_server_side_prepared_statement = true" + sql "set enable_server_side_prepared_statement = true" } else { sql """ set enable_nereids_planner = false; """ - sql "set global enable_server_side_prepared_statement = false" + sql "set enable_server_side_prepared_statement = false" } // insert into without column @@ -128,10 +129,10 @@ suite("insert_group_commit_with_exception", "nonConcurrent") { if (item == "nereids") { statement.execute("set enable_nereids_planner=true;"); statement.execute("set enable_fallback_to_original_planner=false;"); - sql "set global enable_server_side_prepared_statement = true" + sql "set enable_server_side_prepared_statement = true" } else { statement.execute("set enable_nereids_planner = false;") - sql "set global enable_server_side_prepared_statement = false" + sql "set enable_server_side_prepared_statement = false" } // without column try (PreparedStatement ps = connection.prepareStatement("insert into ${table} values(?, ?, ?, ?)")) { @@ -291,5 +292,4 @@ suite("insert_group_commit_with_exception", "nonConcurrent") { // try_sql("DROP TABLE ${table}") } } - sql "set global enable_server_side_prepared_statement = true" } diff --git a/regression-test/suites/insert_p0/insert_group_commit_with_large_data.groovy b/regression-test/suites/insert_p0/insert_group_commit_with_large_data.groovy index 2af290ffc188e1..b66130c9e29627 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_with_large_data.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_with_large_data.groovy @@ -62,6 +62,7 @@ suite("insert_group_commit_with_large_data") { DUPLICATE KEY(`id`, `name`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( + "group_commit_interval_ms" = "40", "replication_num" = "1" ); """ diff --git a/regression-test/suites/insert_p0/insert_group_commit_with_prepare_stmt.groovy b/regression-test/suites/insert_p0/insert_group_commit_with_prepare_stmt.groovy index 6e05513a8d648b..7f2919f8118d10 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_with_prepare_stmt.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_with_prepare_stmt.groovy @@ -144,6 +144,7 @@ suite("insert_group_commit_with_prepare_stmt") { UNIQUE KEY(`id`, `name`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( + "group_commit_interval_ms" = "40", "replication_num" = "1" ); """ @@ -206,6 +207,7 @@ suite("insert_group_commit_with_prepare_stmt") { DUPLICATE KEY(`id`, `name`) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( + "group_commit_interval_ms" = "40", "replication_num" = "1" ); """ diff --git a/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe.groovy b/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe.groovy index 9d1e372224e80e..bd478bc2359959 100644 --- a/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe.groovy +++ b/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe.groovy @@ -22,7 +22,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite("txn_insert_restart_fe") { +suite("txn_insert_restart_fe", 'docker') { def get_observer_fe_url = { def fes = sql_return_maparray "show frontends" logger.info("frontends: ${fes}") diff --git a/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe_with_schema_change.groovy b/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe_with_schema_change.groovy index fec2fc4378dff2..d2537bfe8c6a55 100644 --- a/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe_with_schema_change.groovy +++ b/regression-test/suites/insert_p0/transaction/txn_insert_restart_fe_with_schema_change.groovy @@ -22,7 +22,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite("txn_insert_restart_fe_with_schema_change") { +suite("txn_insert_restart_fe_with_schema_change", 'docker') { def getAlterTableState = { dbName, show_sql -> def retry = 0 sql "use ${dbName};" diff --git a/regression-test/suites/inverted_index_p0/index_change/test_build_index_with_clone_by_docker.groovy b/regression-test/suites/inverted_index_p0/index_change/test_build_index_with_clone_by_docker.groovy index f8478c3ea61ea0..999b58350ff1ab 100644 --- a/regression-test/suites/inverted_index_p0/index_change/test_build_index_with_clone_by_docker.groovy +++ b/regression-test/suites/inverted_index_p0/index_change/test_build_index_with_clone_by_docker.groovy @@ -19,7 +19,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType import org.apache.doris.regression.suite.SuiteCluster -suite("test_build_index_with_clone_by_docker"){ +suite("test_build_index_with_clone_by_docker", 'docker'){ if (isCloudMode()) { return } diff --git a/regression-test/suites/load/insert/test_min_load_replica_num_complicate.groovy b/regression-test/suites/load/insert/test_min_load_replica_num_complicate.groovy index 10869ed9bf2d3b..d6c9c71539b406 100644 --- a/regression-test/suites/load/insert/test_min_load_replica_num_complicate.groovy +++ b/regression-test/suites/load/insert/test_min_load_replica_num_complicate.groovy @@ -34,7 +34,7 @@ class InjectCase { } -suite('test_min_load_replica_num_complicate') { +suite('test_min_load_replica_num_complicate', 'docker') { def beCloneCostMs = 3000 def random = new Random() diff --git a/regression-test/suites/load/insert/test_publish_one_succ.groovy b/regression-test/suites/load/insert/test_publish_one_succ.groovy index 22f78c64300222..f58bca5c4581c9 100644 --- a/regression-test/suites/load/insert/test_publish_one_succ.groovy +++ b/regression-test/suites/load/insert/test_publish_one_succ.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite('test_publish_one_succ') { +suite('test_publish_one_succ', 'docker') { def options = new ClusterOptions() options.enableDebugPoints() docker(options) { diff --git a/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy b/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy index 8d3cddc42187b8..d9cd077eab01c6 100644 --- a/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy +++ b/regression-test/suites/load/insert/test_publish_slow_not_wait.groovy @@ -17,7 +17,7 @@ import org.apache.doris.regression.suite.ClusterOptions -suite('test_publish_slow_not_wait') { +suite('test_publish_slow_not_wait', 'docker') { def options = new ClusterOptions() options.beNum = 3 options.feConfigs.add('disable_tablet_scheduler=true') diff --git a/regression-test/suites/load_p0/broker_load/test_etl_failed.groovy b/regression-test/suites/load_p0/broker_load/test_etl_failed.groovy index 4049fdadb1f1f8..70d2a42166dac0 100644 --- a/regression-test/suites/load_p0/broker_load/test_etl_failed.groovy +++ b/regression-test/suites/load_p0/broker_load/test_etl_failed.groovy @@ -67,7 +67,7 @@ suite("test_etl_failed", "load_p0") { assertTrue(1 == 2, "etl should be failed") break; } - if (result[0][2].equals("CANCELLED") && result[0][13].contains("_load_error_log")) { + if (result[0][2].equals("CANCELLED") && result[0][13].contains("error_log")) { break; } Thread.sleep(1000) diff --git a/regression-test/suites/load_p0/http_stream/test_group_commit_http_stream.groovy b/regression-test/suites/load_p0/http_stream/test_group_commit_http_stream.groovy index 5f4906662d3453..cb17cc82655fa7 100644 --- a/regression-test/suites/load_p0/http_stream/test_group_commit_http_stream.groovy +++ b/regression-test/suites/load_p0/http_stream/test_group_commit_http_stream.groovy @@ -88,6 +88,7 @@ suite("test_group_commit_http_stream") { ) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( + "group_commit_interval_ms" = "200", "replication_num" = "1" ); """ @@ -285,6 +286,7 @@ suite("test_group_commit_http_stream") { PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 4 PROPERTIES ( + "group_commit_interval_ms" = "200", "replication_num" = "1" ); """ @@ -307,7 +309,7 @@ suite("test_group_commit_http_stream") { sql """ alter table ${tableName} order by (${new_columns}); """ }).start();*/ - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 2; i++) { streamLoad { set 'version', '1' @@ -334,7 +336,7 @@ suite("test_group_commit_http_stream") { } } - getRowCount(2402288) + getRowCount(600572 * 2) qt_sql """ select count(*) from ${tableName} """ // assertTrue(getAlterTableState()) diff --git a/regression-test/suites/load_p0/insert/test_min_load_replica_num_simple.groovy b/regression-test/suites/load_p0/insert/test_min_load_replica_num_simple.groovy index 75d7155c3d44a6..88bcbfd248e2b7 100644 --- a/regression-test/suites/load_p0/insert/test_min_load_replica_num_simple.groovy +++ b/regression-test/suites/load_p0/insert/test_min_load_replica_num_simple.groovy @@ -17,7 +17,7 @@ import org.apache.doris.regression.suite.ClusterOptions -suite('test_min_load_replica_num_simple') { +suite('test_min_load_replica_num_simple', 'docker') { def options = new ClusterOptions() options.feConfigs.add('tablet_checker_interval_ms=1000') docker(options) { diff --git a/regression-test/suites/load_p0/routine_load/test_routine_load_restart_fe.groovy b/regression-test/suites/load_p0/routine_load/test_routine_load_restart_fe.groovy index d60fbf265fd9e5..d8ea6f911799a2 100644 --- a/regression-test/suites/load_p0/routine_load/test_routine_load_restart_fe.groovy +++ b/regression-test/suites/load_p0/routine_load/test_routine_load_restart_fe.groovy @@ -22,7 +22,7 @@ import org.apache.kafka.clients.producer.KafkaProducer import org.apache.kafka.clients.producer.ProducerRecord import org.apache.kafka.clients.producer.ProducerConfig -suite("test_routine_load_restart_fe", "p0, nonConcurrent") { +suite("test_routine_load_restart_fe", "docker") { def kafkaCsvTpoics = [ "test_out_of_range", ] diff --git a/regression-test/suites/load_p0/stream_load/test_coordidator_be_restart.groovy b/regression-test/suites/load_p0/stream_load/test_coordidator_be_restart.groovy index bb6b0c18a0daf7..e728335003a4f7 100644 --- a/regression-test/suites/load_p0/stream_load/test_coordidator_be_restart.groovy +++ b/regression-test/suites/load_p0/stream_load/test_coordidator_be_restart.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_coordidator_be_restart') { +suite('test_coordidator_be_restart', 'docker') { def options = new ClusterOptions() options.cloudMode = false options.enableDebugPoints() diff --git a/regression-test/suites/load_p0/stream_load/test_group_commit_and_wal_back_pressure.groovy b/regression-test/suites/load_p0/stream_load/test_group_commit_and_wal_back_pressure.groovy index bd443b6eab7509..622ade420a2c91 100644 --- a/regression-test/suites/load_p0/stream_load/test_group_commit_and_wal_back_pressure.groovy +++ b/regression-test/suites/load_p0/stream_load/test_group_commit_and_wal_back_pressure.groovy @@ -41,6 +41,7 @@ suite("test_group_commit_and_wal_back_pressure") { UNIQUE KEY(k) DISTRIBUTED BY HASH (k) BUCKETS 32 PROPERTIES( + "group_commit_interval_ms" = "100", "replication_num" = "1" ); """ diff --git a/regression-test/suites/load_p0/stream_load/test_group_commit_stream_load.groovy b/regression-test/suites/load_p0/stream_load/test_group_commit_stream_load.groovy index fea930f59125ef..be483c4dd484fc 100644 --- a/regression-test/suites/load_p0/stream_load/test_group_commit_stream_load.groovy +++ b/regression-test/suites/load_p0/stream_load/test_group_commit_stream_load.groovy @@ -76,7 +76,8 @@ suite("test_group_commit_stream_load") { ) DISTRIBUTED BY HASH(`id`) BUCKETS 1 PROPERTIES ( - "replication_num" = "1" + "replication_num" = "1", + "group_commit_interval_ms" = "200" ); """ @@ -250,7 +251,8 @@ suite("test_group_commit_stream_load") { PARTITION p1998 VALUES [("19980101"), ("19990101"))) DISTRIBUTED BY HASH(`lo_orderkey`) BUCKETS 4 PROPERTIES ( - "replication_num" = "1" + "replication_num" = "1", + "group_commit_interval_ms" = "200" ); """ // load data @@ -272,7 +274,7 @@ suite("test_group_commit_stream_load") { sql """ alter table ${tableName} order by (${new_columns}); """ }).start();*/ - for (int i = 0; i < 4; i++) { + for (int i = 0; i < 2; i++) { streamLoad { table tableName @@ -297,7 +299,7 @@ suite("test_group_commit_stream_load") { } } - getRowCount(2402288) + getRowCount(600572 * 2) qt_sql """ select count(*) from ${tableName} """ // assertTrue(getAlterTableState()) diff --git a/regression-test/suites/migrate_p0/test_migrate_disk_with_publish_version.groovy b/regression-test/suites/migrate_p0/test_migrate_disk_with_publish_version.groovy index fb80d4272ddbaa..61dac3e4141822 100644 --- a/regression-test/suites/migrate_p0/test_migrate_disk_with_publish_version.groovy +++ b/regression-test/suites/migrate_p0/test_migrate_disk_with_publish_version.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite('test_migrate_disk_with_publish_version') { +suite('test_migrate_disk_with_publish_version', 'docker') { if (isCloudMode()) { return } diff --git a/regression-test/suites/mtmv_p0/test_multi_level_mtmv.groovy b/regression-test/suites/mtmv_p0/test_multi_level_mtmv.groovy index 55689b741489eb..33a876c46d4ef2 100644 --- a/regression-test/suites/mtmv_p0/test_multi_level_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_multi_level_mtmv.groovy @@ -87,6 +87,22 @@ suite("test_multi_level_mtmv") { waitingMTMVTaskFinishedByMvName(mv2) order_qt_mv2_should_one_partition "select NeedRefreshPartitions from tasks('type'='mv') where MvName = '${mv2}' order by CreateTime desc limit 1" + // insert into p2 again, check partition version if change + sql """ + INSERT INTO ${tableName} VALUES(2,3); + """ + sql """ + REFRESH MATERIALIZED VIEW ${mv1} AUTO + """ + waitingMTMVTaskFinishedByMvName(mv1) + order_qt_mv1_should_one_partition_again "select NeedRefreshPartitions from tasks('type'='mv') where MvName = '${mv1}' order by CreateTime desc limit 1" + sql """ + REFRESH MATERIALIZED VIEW ${mv2} AUTO + """ + waitingMTMVTaskFinishedByMvName(mv2) + order_qt_mv2_should_one_partition_again "select NeedRefreshPartitions from tasks('type'='mv') where MvName = '${mv2}' order by CreateTime desc limit 1" + order_qt_mv2_again "select * from ${mv2}" + // drop table sql """ drop table ${tableName} diff --git a/regression-test/suites/mtmv_up_down_olap_p0/load.groovy b/regression-test/suites/mtmv_up_down_olap_p0/load.groovy new file mode 100644 index 00000000000000..f909b33064d457 --- /dev/null +++ b/regression-test/suites/mtmv_up_down_olap_p0/load.groovy @@ -0,0 +1,71 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_upgrade_downgrade_prepare_olap_mtmv","p0,mtmv,restart_fe") { + String suiteName = "mtmv_up_down_olap" + String mvName = "${suiteName}_mtmv" + String tableName = "${suiteName}_table" + String tableName2 = "${suiteName}_table2" + + sql """drop materialized view if exists ${mvName};""" + sql """drop table if exists `${tableName}`""" + sql """drop table if exists `${tableName2}`""" + + sql """ + CREATE TABLE `${tableName}` ( + `user_id` LARGEINT NOT NULL COMMENT '\"用户id\"', + `date` DATE NOT NULL COMMENT '\"数据灌入日期时间\"', + `num` SMALLINT NOT NULL COMMENT '\"数量\"' + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`, `date`, `num`) + COMMENT 'OLAP' + PARTITION BY RANGE(`date`) + (PARTITION p201701_1000 VALUES [('0000-01-01'), ('2017-02-01')), + PARTITION p201702_2000 VALUES [('2017-02-01'), ('2017-03-01')), + PARTITION p201703_all VALUES [('2017-03-01'), ('2017-04-01'))) + DISTRIBUTED BY HASH(`user_id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1') ; + """ + sql """ + insert into ${tableName} values(1,"2017-01-15",1),(2,"2017-02-15",2),(3,"2017-03-15",3); + """ + + sql """ + CREATE TABLE `${tableName2}` ( + `user_id` LARGEINT NOT NULL COMMENT '\"用户id\"', + `age` SMALLINT NOT NULL COMMENT '\"年龄\"' + ) ENGINE=OLAP + DUPLICATE KEY(`user_id`, `age`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`user_id`) BUCKETS 2 + PROPERTIES ('replication_num' = '1') ; + """ + sql """ + insert into ${tableName2} values(1,1),(2,2),(3,3); + """ + + sql """ + CREATE MATERIALIZED VIEW ${mvName} + REFRESH AUTO ON MANUAL + partition by(`date`) + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT a.* FROM ${tableName} a inner join ${tableName2} b on a.user_id=b.user_id; + """ + waitingMTMVTaskFinishedByMvName(mvName) +} diff --git a/regression-test/suites/mtmv_up_down_olap_p0/test_upgrade_downgrade_olap_mtmv.groovy b/regression-test/suites/mtmv_up_down_olap_p0/test_upgrade_downgrade_olap_mtmv.groovy new file mode 100644 index 00000000000000..253908ff4ae8ce --- /dev/null +++ b/regression-test/suites/mtmv_up_down_olap_p0/test_upgrade_downgrade_olap_mtmv.groovy @@ -0,0 +1,32 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_upgrade_downgrade_olap_mtmv","p0,mtmv,restart_fe") { + String suiteName = "mtmv_up_down_olap" + String dbName = context.config.getDbNameByFile(context.file) + String mvName = "${suiteName}_mtmv" + String tableName = "${suiteName}_table" + // test data is normal + order_qt_refresh_init "SELECT * FROM ${mvName}" + // test is sync + order_qt_mtmv_sync "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + sql """ + REFRESH MATERIALIZED VIEW ${mvName} complete + """ + // test can refresh success + waitingMTMVTaskFinishedByMvName(mvName) +} diff --git a/regression-test/suites/nereids_p0/insert_into_table/insert_use_table_id.groovy b/regression-test/suites/nereids_p0/insert_into_table/insert_use_table_id.groovy deleted file mode 100644 index 930fe35b60ed65..00000000000000 --- a/regression-test/suites/nereids_p0/insert_into_table/insert_use_table_id.groovy +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - - - - -suite('nereids_insert_use_table_id') { - sql 'set enable_nereids_planner=true' - sql 'set enable_fallback_to_original_planner=false' - sql 'set enable_nereids_dml=true' - sql 'set enable_strict_consistency_dml=true' - - // sql 'CREATE DATABASE IF NOT EXISTS dnereids_insert_use_table_id_test' - // sql 'use nereids_insert_use_table_id_test' - - def t1 = 'table_id_value_t1' - def t2 = 'table_id_value_t2' - def t3 = 'table_id_value_t3' - - sql "drop table if exists ${t1}" - sql "drop table if exists ${t2}" - sql "drop table if exists ${t3}" - - sql """ - create table ${t1} ( - id int, - id1 int, - c1 bigint, - c2 string, - c3 double, - c4 date - ) unique key (id, id1) - distributed by hash(id, id1) buckets 13 - properties( - 'replication_num'='1', - "function_column.sequence_col" = "c4" - ); - """ - - sql """ - create table ${t2} ( - id int, - c1 bigint, - c2 string, - c3 double, - c4 date - ) unique key (id) - distributed by hash(id) buckets 13 - properties( - 'replication_num'='1' - ); - """ - - sql """ - create table ${t3} ( - id int - ) distributed by hash(id) buckets 13 - properties( - 'replication_num'='1' - ); - """ - - - sql """ - INSERT INTO DORIS_INTERNAL_TABLE_ID(${getTableId(t1)}) VALUES - (1, (1 + 9) * (10 - 9), 1, '1', 1.0, '2000-01-01'), - (2, 20, 2, '2', 2.0, days_add('2000-01-01', 1)), - (3, 30, 3, '3', 3.0, makedate(2000, 3)); - """ - - sql """ - INSERT INTO DORIS_INTERNAL_TABLE_ID(${getTableId(t2)}) VALUES - (1, 10, '10', 10.0, '2000-01-10'), - (2, 20, '20', 20.0, '2000-01-20'), - (3, 30, '30', 30.0, '2000-01-30'), - (4, 4, '4', 4.0, '2000-01-04'), - (5, 5, '5', 5.0, '2000-01-05'); - """ - - sql """ - INSERT INTO DORIS_INTERNAL_TABLE_ID(${getTableId(t3)}) VALUES - (1), - (4), - (5); - """ - - sql "sync" - qt_sql_cross_join "select * from ${t1}, ${t2}, ${t3} order by ${t1}.id, ${t1}.id1, ${t2}.id, ${t3}.id" - - -} - - diff --git a/regression-test/suites/nereids_rules_p0/mv/same_name/sync_async_same_name.groovy b/regression-test/suites/nereids_rules_p0/mv/same_name/sync_async_same_name.groovy index e8350d487a88ef..20dbc0a083158f 100644 --- a/regression-test/suites/nereids_rules_p0/mv/same_name/sync_async_same_name.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/same_name/sync_async_same_name.groovy @@ -165,7 +165,7 @@ suite("sync_async_same_name") { check {result -> def splitResult = result.split("MaterializedViewRewriteFail") splitResult.length == 2 ? splitResult[0].contains(common_mv_name) - && splitResult[0].contains("orders#${common_mv_name}") : false + && splitResult[0].contains("orders.${common_mv_name}") : false } } diff --git a/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_mod_distribution_key.groovy b/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_mod_distribution_key.groovy index db44f59216bbdd..c52e5897aa0880 100644 --- a/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_mod_distribution_key.groovy +++ b/regression-test/suites/partition_p0/dynamic_partition/test_dynamic_partition_mod_distribution_key.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions -suite("test_dynamic_partition_mod_distribution_key") { +suite("test_dynamic_partition_mod_distribution_key", "docker") { def options = new ClusterOptions() options.setFeNum(2) docker(options) { @@ -72,4 +72,4 @@ suite("test_dynamic_partition_mod_distribution_key") { assertEquals(9, result.size()) } } -} \ No newline at end of file +} diff --git a/regression-test/suites/partition_p0/test_create_table_exception.groovy b/regression-test/suites/partition_p0/test_create_table_exception.groovy index 96f097c76705f2..7c96e4b59da16e 100644 --- a/regression-test/suites/partition_p0/test_create_table_exception.groovy +++ b/regression-test/suites/partition_p0/test_create_table_exception.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType import org.apache.doris.regression.suite.SuiteCluster -suite("test_create_table_exception") { +suite("test_create_table_exception", "docker") { def options = new ClusterOptions() options.enableDebugPoints() options.setFeNum(3) diff --git a/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy b/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy index f7e77f06f38c23..836dff938f8c96 100644 --- a/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy +++ b/regression-test/suites/partition_p0/test_partition_create_tablet_rr.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType import org.apache.doris.regression.suite.SuiteCluster -suite("test_partition_create_tablet_rr") { +suite("test_partition_create_tablet_rr", "docker") { def options = new ClusterOptions() options.beNum = 1 options.feConfigs.add('disable_balance=true') diff --git a/regression-test/suites/query_p0/sql_functions/encryption_digest/test_encryption_function.groovy b/regression-test/suites/query_p0/sql_functions/encryption_digest/test_encryption_function.groovy index f4a67b052c3750..25095f46917c96 100644 --- a/regression-test/suites/query_p0/sql_functions/encryption_digest/test_encryption_function.groovy +++ b/regression-test/suites/query_p0/sql_functions/encryption_digest/test_encryption_function.groovy @@ -91,4 +91,139 @@ suite("test_encryption_function") { """ sql""" insert into quantile_table values(1,"aaaaaa");""" qt_sql """ select sm4_decrypt(sm4_encrypt(k,"doris","0123456789abcdef"),"doris","0123456789abcdef") from quantile_table; """ + + // sm4_encrypt sm4_decrypt + // aes_encrypt aes_decrypt + //two arg (column/const) + sql "set enable_fold_constant_by_be = false;" + sql """ set block_encryption_mode=""; """ // SM4_128_ECB + qt_sql1 """ select sm4_decrypt(sm4_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql2 """ select sm4_decrypt(sm4_encrypt(k,k),k) from quantile_table; """ + qt_sql3 """ select sm4_decrypt(sm4_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql4 """ select sm4_decrypt(sm4_encrypt("zhang",k),k) from quantile_table; """ + + sql """ set block_encryption_mode="SM4_128_CBC"; """ + qt_sql5 """ select sm4_decrypt(sm4_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql6 """ select sm4_decrypt(sm4_encrypt(k,k),k) from quantile_table; """ + qt_sql7 """ select sm4_decrypt(sm4_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql8 """ select sm4_decrypt(sm4_encrypt("zhang",k),k) from quantile_table; """ + + sql """ set block_encryption_mode="SM4_128_OFB"; """ + qt_sql9 """ select sm4_decrypt(sm4_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql10 """ select sm4_decrypt(sm4_encrypt(k,k),k) from quantile_table; """ + qt_sql11 """ select sm4_decrypt(sm4_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql12 """ select sm4_decrypt(sm4_encrypt("zhang",k),k) from quantile_table; """ + + sql """ set block_encryption_mode="SM4_128_CTR"; """ + qt_sql9 """ select sm4_decrypt(sm4_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql10 """ select sm4_decrypt(sm4_encrypt(k,k),k) from quantile_table; """ + qt_sql11 """ select sm4_decrypt(sm4_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql12 """ select sm4_decrypt(sm4_encrypt("zhang",k),k) from quantile_table; """ + + sql """ set block_encryption_mode=""; """ // AES_128_ECB + qt_sql13 """ select aes_decrypt(aes_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql14 """ select aes_decrypt(aes_encrypt(k,k),k) from quantile_table; """ + qt_sql15 """ select aes_decrypt(aes_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql16 """ select aes_decrypt(aes_encrypt("zhang",k),k) from quantile_table; """ + + sql """ set block_encryption_mode="AES_256_CBC"; """ + qt_sql17 """ select aes_decrypt(aes_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql18 """ select aes_decrypt(aes_encrypt(k,k),k) from quantile_table; """ + qt_sql19 """ select aes_decrypt(aes_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql20 """ select aes_decrypt(aes_encrypt("zhang",k),k) from quantile_table; """ + + + sql """ set block_encryption_mode="AES_128_CTR"; """ + qt_sql21 """ select aes_decrypt(aes_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql22 """ select aes_decrypt(aes_encrypt(k,k),k) from quantile_table; """ + qt_sql23 """ select aes_decrypt(aes_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql24 """ select aes_decrypt(aes_encrypt("zhang",k),k) from quantile_table; """ + + + sql """ set block_encryption_mode="AES_256_OFB"; """ + qt_sql25 """ select aes_decrypt(aes_encrypt(k,"doris"),"doris") from quantile_table; """ + qt_sql26 """ select aes_decrypt(aes_encrypt(k,k),k) from quantile_table; """ + qt_sql27 """ select aes_decrypt(aes_encrypt("zhang","doris"),"doris") from quantile_table; """ + qt_sql28 """ select aes_decrypt(aes_encrypt("zhang",k),k) from quantile_table; """ + + sql """ set block_encryption_mode=""; """ + + sql """ select to_base64(aes_encrypt(k,"doris")) from quantile_table;""" // 3A7GoWeuMNEBWzJx+YefZw== + qt_sql29 """ select aes_decrypt(FROM_BASE64("3A7GoWeuMNEBWzJx+YefZw=="),"doris") from quantile_table; """ + + sql """ select to_base64(aes_encrypt(k,k)) from quantile_table;""" //ADnRqPtFBjreZu06UTD64g== + qt_sql30 """ select aes_decrypt(FROM_BASE64("ADnRqPtFBjreZu06UTD64g=="),k) from quantile_table; """ + + sql """ select to_base64(aes_encrypt("zhang","doris")) from quantile_table;""" //fLhlYvn/yZhqd2LTRHImrw== + qt_sql31 """ select aes_decrypt(FROM_BASE64("fLhlYvn/yZhqd2LTRHImrw=="),"doris") from quantile_table; """ + + sql """ select to_base64(aes_encrypt("zhang",k)) from quantile_table;""" //2C8acACKfoRwHZS5B4juNw== + qt_sql32 """ select aes_decrypt(FROM_BASE64("2C8acACKfoRwHZS5B4juNw=="),k) from quantile_table; """ + + + + sql """ select to_base64(sm4_encrypt(k,"doris")) from quantile_table;""" // 7vSaqYqMl9no8trrzbdAEw== + qt_sql29 """ select sm4_decrypt(FROM_BASE64("7vSaqYqMl9no8trrzbdAEw=="),"doris") from quantile_table; """ + + sql """ select to_base64(sm4_encrypt(k,k)) from quantile_table;""" // PcPR18T6lhMuFTqQtymb8w== + qt_sql30 """ select sm4_decrypt(FROM_BASE64("PcPR18T6lhMuFTqQtymb8w=="),k) from quantile_table; """ + + sql """ select to_base64(sm4_encrypt("zhang","doris")) from quantile_table;""" // WY+4o1/cZwAFQ0F6dlyEqQ== + qt_sql31 """ select sm4_decrypt(FROM_BASE64("WY+4o1/cZwAFQ0F6dlyEqQ=="),"doris") from quantile_table; """ + + sql """ select to_base64(sm4_encrypt("zhang",k)) from quantile_table;""" // lhDiiEnRn3PvY6v4sHES0A== + qt_sql32 """ select sm4_decrypt(FROM_BASE64("lhDiiEnRn3PvY6v4sHES0A=="),k) from quantile_table; """ + + + sql "DROP TABLE IF EXISTS quantile_table2" + sql""" + CREATE TABLE quantile_table2 + ( + id int, + k string, + k1 string, + k2 string + ) + ENGINE=OLAP + UNIQUE KEY(id) + DISTRIBUTED BY HASH(id) BUCKETS 4 + PROPERTIES ( + "enable_unique_key_merge_on_write" = "true", + "replication_num" = "1" + ); + """ + sql""" insert into quantile_table2 values(1,"aaaaaa", "key_word", "init_word");""" + + //four arg (column/const) + sql """ set block_encryption_mode=""; """ // SM4_128_ECB + qt_sql33 """ select sm4_decrypt(sm4_encrypt(k,"doris","abcdefghij", "SM4_128_CBC"),"doris","abcdefghij","SM4_128_CBC") from quantile_table2; """ + qt_sql34 """ select sm4_decrypt(sm4_encrypt(k,k,"abcdefghij", "SM4_128_CBC"),k,"abcdefghij", "SM4_128_CBC") from quantile_table2; """ + qt_sql35 """ select sm4_decrypt(sm4_encrypt("zhang","doris","abcdefghij", "SM4_128_CBC"),"doris","abcdefghij", "SM4_128_CBC") from quantile_table2; """ + qt_sql36 """ select sm4_decrypt(sm4_encrypt("zhang",k,"abcdefghij", "SM4_128_CBC"),k,"abcdefghij", "SM4_128_CBC") from quantile_table2; """ + + qt_sql37 """ select sm4_decrypt(sm4_encrypt(k,"doris",k2, "SM4_128_CBC"),"doris",k2,"SM4_128_CBC") from quantile_table2; """ + qt_sql38 """ select sm4_decrypt(sm4_encrypt(k,k,k2, "SM4_128_CBC"),k,k2, "SM4_128_CBC") from quantile_table2; """ + qt_sql39 """ select sm4_decrypt(sm4_encrypt("zhang","doris",k2, "SM4_128_CBC"),"doris",k2, "SM4_128_CBC") from quantile_table2; """ + qt_sql40 """ select sm4_decrypt(sm4_encrypt("zhang",k,k2, "SM4_128_CBC"),k,k2, "SM4_128_CBC") from quantile_table2; """ + + qt_sql41 """ select sm4_decrypt(sm4_encrypt(k,k1,k2, "SM4_128_CBC"),k1,k2,"SM4_128_CBC") from quantile_table2; """ + qt_sql42 """ select sm4_decrypt(sm4_encrypt(k,k1,k2, "SM4_128_CBC"),k1,k2, "SM4_128_CBC") from quantile_table2; """ + qt_sql43 """ select sm4_decrypt(sm4_encrypt("zhang",k1,k2, "SM4_128_CBC"),k1,k2, "SM4_128_CBC") from quantile_table2; """ + qt_sql44 """ select sm4_decrypt(sm4_encrypt("zhang",k1,k2, "SM4_128_CBC"),k1,k2, "SM4_128_CBC") from quantile_table2; """ + + + qt_sql45 """ select aes_decrypt(aes_encrypt(k,"doris","abcdefghij", "AES_256_CFB"),"doris","abcdefghij","AES_256_CFB") from quantile_table2; """ + qt_sql46 """ select aes_decrypt(aes_encrypt(k,k,"abcdefghij", "AES_256_CFB"),k,"abcdefghij", "AES_256_CFB") from quantile_table2; """ + qt_sql47 """ select aes_decrypt(aes_encrypt("zhang","doris","abcdefghij", "AES_256_CFB"),"doris","abcdefghij", "AES_256_CFB") from quantile_table2; """ + qt_sql48 """ select aes_decrypt(aes_encrypt("zhang",k,"abcdefghij", "AES_256_CFB"),k,"abcdefghij", "AES_256_CFB") from quantile_table2; """ + + qt_sql49 """ select aes_decrypt(aes_encrypt(k,"doris",k2, "AES_256_CFB"),"doris",k2,"AES_256_CFB") from quantile_table2; """ + qt_sql50 """ select aes_decrypt(aes_encrypt(k,k,k2, "AES_256_CFB"),k,k2, "AES_256_CFB") from quantile_table2; """ + qt_sql51 """ select aes_decrypt(aes_encrypt("zhang","doris",k2, "AES_256_CFB"),"doris",k2, "AES_256_CFB") from quantile_table2; """ + qt_sql52 """ select aes_decrypt(aes_encrypt("zhang",k,k2, "AES_256_CFB"),k,k2, "AES_256_CFB") from quantile_table2; """ + + qt_sql53 """ select aes_decrypt(aes_encrypt(k,k1,k2, "AES_256_CFB"),k1,k2,"AES_256_CFB") from quantile_table2; """ + qt_sql54 """ select aes_decrypt(aes_encrypt(k,k1,k2, "AES_256_CFB"),k1,k2, "AES_256_CFB") from quantile_table2; """ + qt_sql55 """ select aes_decrypt(aes_encrypt("zhang",k1,k2, "AES_256_CFB"),k1,k2, "AES_256_CFB") from quantile_table2; """ + qt_sql56 """ select aes_decrypt(aes_encrypt("zhang",k1,k2, "AES_256_CFB"),k1,k2, "AES_256_CFB") from quantile_table2; """ } diff --git a/regression-test/suites/query_p0/system/test_query_sys.groovy b/regression-test/suites/query_p0/system/test_query_sys.groovy index b17500a0ccba0d..dd7998b010f9c8 100644 --- a/regression-test/suites/query_p0/system/test_query_sys.groovy +++ b/regression-test/suites/query_p0/system/test_query_sys.groovy @@ -35,6 +35,7 @@ suite("test_query_sys", "query,p0") { } sql "SELECT CONNECTION_ID();" sql "SELECT CURRENT_USER();" + sql "SELECT SESSION_USER();" sql "SELECT CURRENT_CATALOG();" // sql "select now();" sql "select localtime();" diff --git a/regression-test/suites/query_p0/test_forward_qeury.groovy b/regression-test/suites/query_p0/test_forward_qeury.groovy index 8dbef459d2dd75..28295e4ec895bb 100644 --- a/regression-test/suites/query_p0/test_forward_qeury.groovy +++ b/regression-test/suites/query_p0/test_forward_qeury.groovy @@ -19,7 +19,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite("test_forward_query") { +suite("test_forward_query", 'docker') { def options = new ClusterOptions() options.enableDebugPoints() options.setFeNum(2) @@ -49,4 +49,4 @@ suite("test_forward_query") { assertTrue(false) } } -} \ No newline at end of file +} diff --git a/regression-test/suites/query_profile/adaptive_pipeline_task_serial_read_on_limit.groovy b/regression-test/suites/query_profile/adaptive_pipeline_task_serial_read_on_limit.groovy index 15481fe7c9a8dc..46ff11b7845c91 100644 --- a/regression-test/suites/query_profile/adaptive_pipeline_task_serial_read_on_limit.groovy +++ b/regression-test/suites/query_profile/adaptive_pipeline_task_serial_read_on_limit.groovy @@ -116,6 +116,8 @@ suite('adaptive_pipeline_task_serial_read_on_limit') { sql "set enable_profile=false" + Thread.sleep(5) + def wholeString = getProfileList() List profileData = new JsonSlurper().parseText(wholeString).data.rows String queryIdNoLimit1 = ""; @@ -149,27 +151,22 @@ suite('adaptive_pipeline_task_serial_read_on_limit') { } } - logger.info("queryIdNoLimit1_${uuidString}: {}", queryIdNoLimit1) logger.info("queryIdWithLimit1_${uuidString}: {}", queryIdWithLimit1) - logger.info("queryIdWithLimit2_${uuidString}: {}", queryIdWithLimit2) - logger.info("queryIDNotEnableLimit_${uuidString}: {}", queryIDNotEnableLimit) logger.info("queryIdModifyTo20_${uuidString}: {}", queryIdModifyTo20) - assertTrue(queryIdNoLimit1 != "") assertTrue(queryIdWithLimit1 != "") - assertTrue(queryIdWithLimit2 != "") - assertTrue(queryIDNotEnableLimit != "") assertTrue(queryIdModifyTo20 != "") - def String profileNoLimit1 = getProfile(queryIdNoLimit1).toString() def String profileWithLimit1 = getProfile(queryIdWithLimit1).toString() - def String profileWithLimit2 = getProfile(queryIdWithLimit2).toString() - def String profileNotEnableLimit = getProfile(queryIDNotEnableLimit).toString() def String profileModifyTo20 = getProfile(queryIdModifyTo20).toString() - assertTrue(profileNoLimit1.contains("- MaxScannerThreadNum: 10")) + if (!profileWithLimit1.contains("- MaxScannerThreadNum: 1")) { + logger.info("profileWithLimit1:\n{}", profileWithLimit1) + } assertTrue(profileWithLimit1.contains("- MaxScannerThreadNum: 1")) - assertTrue(profileWithLimit2.contains("- MaxScannerThreadNum: 10")) - assertTrue(profileNotEnableLimit.contains("- MaxScannerThreadNum: 10")) + + if (!profileModifyTo20.contains("- MaxScannerThreadNum: 1")) { + logger.info("profileModifyTo20:\n{}", profileModifyTo20) + } assertTrue(profileModifyTo20.contains("- MaxScannerThreadNum: 1")) } \ No newline at end of file diff --git a/regression-test/suites/query_profile/scanner_profile.groovy b/regression-test/suites/query_profile/scanner_profile.groovy index 38216d211e65ea..75ae6a5ab65a9b 100644 --- a/regression-test/suites/query_profile/scanner_profile.groovy +++ b/regression-test/suites/query_profile/scanner_profile.groovy @@ -98,6 +98,10 @@ suite('scanner_profile') { logger.info("queryIdWithLimit1_${uuidString}: {}", queryIdWithLimit1) assertTrue(queryIdWithLimit1 != "") + + // Sleep 5 seconds to make sure profile collection is done + Thread.sleep(5000) + def String profileWithLimit1 = getProfile(queryIdWithLimit1).toString() logger.info("query profile {}", profileWithLimit1) assertTrue(profileWithLimit1.contains("- PeakRunningScanner: 1")) diff --git a/regression-test/suites/schema_change/test_schema_change_concurrent_with_txn.groovy b/regression-test/suites/schema_change/test_schema_change_concurrent_with_txn.groovy index d250a000c0123f..3a63e306ae15de 100644 --- a/regression-test/suites/schema_change/test_schema_change_concurrent_with_txn.groovy +++ b/regression-test/suites/schema_change/test_schema_change_concurrent_with_txn.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType -suite('test_schema_change_concurrent_with_txn') { +suite('test_schema_change_concurrent_with_txn', 'docker') { def options = new ClusterOptions() options.enableDebugPoints() options.feConfigs.add('publish_wait_time_second=-1') diff --git a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud1.groovy b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud1.groovy index 03f77917731c64..f2d0b767eb89fe 100644 --- a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud1.groovy +++ b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud1.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_abort_txn_by_be_cloud1') { +suite('test_abort_txn_by_be_cloud1', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() diff --git a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud2.groovy b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud2.groovy index 7a2d382f3abfda..7264ac7f90a9f4 100644 --- a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud2.groovy +++ b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_cloud2.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_abort_txn_by_be_cloud2') { +suite('test_abort_txn_by_be_cloud2', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() diff --git a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local5.groovy b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local5.groovy index df4fb5d637e566..0df8254ff25844 100644 --- a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local5.groovy +++ b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local5.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_abort_txn_by_be_local5') { +suite('test_abort_txn_by_be_local5', 'docker') { def options = new ClusterOptions() options.cloudMode = false options.skipRunWhenPipelineDiff = false diff --git a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local6.groovy b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local6.groovy index 1f6e6df4417212..a95d335579b046 100644 --- a/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local6.groovy +++ b/regression-test/suites/schema_change_p0/test_abort_txn_by_be_local6.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_abort_txn_by_be_local6') { +suite('test_abort_txn_by_be_local6', 'docker') { def options = new ClusterOptions() options.cloudMode = false options.skipRunWhenPipelineDiff = true diff --git a/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_cloud4.groovy b/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_cloud4.groovy index bd12d57fd34ed1..80b61e16efd3b0 100644 --- a/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_cloud4.groovy +++ b/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_cloud4.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_abort_txn_by_fe_cloud4') { +suite('test_abort_txn_by_fe_cloud4', 'docker') { def options = new ClusterOptions() options.cloudMode = true options.enableDebugPoints() diff --git a/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_local3.groovy b/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_local3.groovy index 37667abe9506d7..355dab0587917c 100644 --- a/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_local3.groovy +++ b/regression-test/suites/schema_change_p0/test_abort_txn_by_fe_local3.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.http.NoHttpResponseException -suite('test_abort_txn_by_fe_local3') { +suite('test_abort_txn_by_fe_local3', 'docker') { def options = new ClusterOptions() options.cloudMode = false options.skipRunWhenPipelineDiff = false diff --git a/regression-test/suites/storage_medium_p0/test_partition_default_medium.groovy b/regression-test/suites/storage_medium_p0/test_partition_default_medium.groovy index 3543ce64ab1ae7..163761833c21b7 100644 --- a/regression-test/suites/storage_medium_p0/test_partition_default_medium.groovy +++ b/regression-test/suites/storage_medium_p0/test_partition_default_medium.groovy @@ -18,7 +18,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.apache.doris.regression.util.NodeType import org.apache.doris.regression.suite.SuiteCluster -suite("test_partition_default_medium") { +suite("test_partition_default_medium", 'docker') { def options = new ClusterOptions() options.feConfigs += [ 'default_storage_medium=HDD', diff --git a/regression-test/suites/storage_medium_p0/test_storage_medium_has_disk.groovy b/regression-test/suites/storage_medium_p0/test_storage_medium_has_disk.groovy index bd06680d2b2aeb..c252e10b130333 100644 --- a/regression-test/suites/storage_medium_p0/test_storage_medium_has_disk.groovy +++ b/regression-test/suites/storage_medium_p0/test_storage_medium_has_disk.groovy @@ -17,7 +17,7 @@ import org.apache.doris.regression.suite.ClusterOptions -suite('test_storage_medium_has_disk') { +suite('test_storage_medium_has_disk', 'docker') { if (isCloudMode()) { return } diff --git a/regression-test/suites/trash_p0/clean_trash.groovy b/regression-test/suites/trash_p0/clean_trash.groovy index 71438b5b1b5af7..fdfafc4887fdcd 100644 --- a/regression-test/suites/trash_p0/clean_trash.groovy +++ b/regression-test/suites/trash_p0/clean_trash.groovy @@ -17,7 +17,7 @@ import org.apache.doris.regression.suite.ClusterOptions import org.junit.Assert -suite("test_clean_trash", "p0") { +suite("test_clean_trash", "docker") { if (isCloudMode()) { return } @@ -77,4 +77,4 @@ suite("test_clean_trash", "p0") { sql """admin clean trash""" checkFunc(true) } -} \ No newline at end of file +} diff --git a/regression-test/suites/unique_with_mow_c_p0/test_mow_full_clone_exception.groovy b/regression-test/suites/unique_with_mow_c_p0/test_mow_full_clone_exception.groovy index 516cff9d4f58e3..6ab872239cd945 100644 --- a/regression-test/suites/unique_with_mow_c_p0/test_mow_full_clone_exception.groovy +++ b/regression-test/suites/unique_with_mow_c_p0/test_mow_full_clone_exception.groovy @@ -29,7 +29,7 @@ import org.apache.doris.regression.util.NodeType // // the bug is fixed in #37001 -suite('test_full_clone_exception') { +suite('test_full_clone_exception', 'docker') { def options = new ClusterOptions() options.feConfigs += [ 'disable_tablet_scheduler=true', diff --git a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_conflict_be_restart.groovy b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_conflict_be_restart.groovy index bc2a44425b30c8..642363f909866d 100644 --- a/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_conflict_be_restart.groovy +++ b/regression-test/suites/unique_with_mow_p0/partial_update/test_partial_update_conflict_be_restart.groovy @@ -35,7 +35,7 @@ import org.apache.http.client.methods.CloseableHttpResponse import org.apache.http.util.EntityUtils import org.apache.doris.regression.suite.ClusterOptions -suite("test_partial_update_conflict_be_restart") { +suite("test_partial_update_conflict_be_restart", 'docker') { def dbName = context.config.getDbNameByFile(context.file) def options = new ClusterOptions() diff --git a/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy b/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy index c3fb567f258f8d..42befff4e54cce 100644 --- a/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy +++ b/regression-test/suites/unique_with_mow_p0/test_mow_full_clone_exception.groovy @@ -29,7 +29,7 @@ import org.apache.doris.regression.util.NodeType // // the bug is fixed in #37001 -suite('test_full_clone_exception') { +suite('test_full_clone_exception', 'docker') { def options = new ClusterOptions() options.feConfigs += [ 'disable_tablet_scheduler=true', diff --git a/run-regression-test.sh b/run-regression-test.sh index ea7ced8be9498b..6357f4111a7f5d 100755 --- a/run-regression-test.sh +++ b/run-regression-test.sh @@ -43,6 +43,7 @@ Usage: $0 -genOut generate .out file if not exist -forceGenOut delete and generate .out file -parallel run tests using specified threads + -dockerSuiteParallel run docker tests using specified threads -randomOrder run tests in a random order -noKillDocker don't kill container when finish docker suites -times rum tests {times} times diff --git a/tools/fdb/fdb_ctl.sh b/tools/fdb/fdb_ctl.sh new file mode 100755 index 00000000000000..9c809abd5d4a50 --- /dev/null +++ b/tools/fdb/fdb_ctl.sh @@ -0,0 +1,418 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# +# 1. Run fdb_ctrl.sh deploy on each machine to deploy FoundationDB. +# This will create the necessary directories, configuration files. +# +# 2. Run fdb_ctrl.sh start on each machine to start the fdb cluster +# and get the cluster connection string. +# + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")" &>/dev/null && pwd)" + +if [[ -f "${ROOT_DIR}/fdb_vars.sh" ]]; then + source "${ROOT_DIR}/fdb_vars.sh" +else + echo "Please create fdb_vars.sh first" + exit 1 +fi + +if [[ ! -d "${FDB_HOME}" ]]; then + echo "Please set and create FDB_HOME first" + exit 1 +fi + +if [[ ! "${FDB_HOME}" = /* ]]; then + echo "${FDB_HOME} is not an absolute path." + exit 1 +fi + +if [[ -z ${FDB_CLUSTER_ID} ]]; then + echo "Please set FDB_CLUSTER_ID first" + exit 1 +fi + +# TODO verify config + +FDB_CLUSTER_DESC=${FDB_CLUSTER_DESC:-"doris-fdb"} + +# A dir to provide FDB binary pkgs +FDB_PKG_DIR=${ROOT_DIR}/pkgs/${FDB_VERSION} + +FDB_PORT=${FDB_PORT:-4500} + +LOG_DIR=${LOG_DIR:-${FDB_HOME}/log} + +mkdir -p "${LOG_DIR}" +mkdir -p "${FDB_HOME}"/conf +mkdir -p "${FDB_HOME}"/log + +function ensure_port_is_listenable() { + local component="$1" + local port="$2" + + if lsof -nP -iTCP:"${port}" -sTCP:LISTEN >/dev/null; then + echo "The port ${port} of ${component} is occupied" + exit 1 + fi +} + +function download_fdb() { + if [[ -d "${FDB_PKG_DIR}" ]]; then + echo "FDB ${FDB_VERSION} already exists" + return + fi + + local URL="https://github.com/apple/foundationdb/releases/download/${FDB_VERSION}/" + local TMP="${FDB_PKG_DIR}-tmp" + + rm -rf "${TMP}" + mkdir -p "${TMP}" + + wget "${URL}/fdbbackup.x86_64" -O "${TMP}/fdbbackup" + wget "${URL}/fdbserver.x86_64" -O "${TMP}/fdbserver" + wget "${URL}/fdbcli.x86_64" -O "${TMP}/fdbcli" + wget "${URL}/fdbmonitor.x86_64" -O "${TMP}/fdbmonitor" + wget "${URL}/libfdb_c.x86_64.so" -O "${TMP}/libfdb_c.x86_64.so" + chmod +x "${TMP}"/fdb* + + mv "${TMP}" "${FDB_PKG_DIR}" + echo "Download fdb binary pkgs success" +} + +# Function to configure coordinators +get_coordinators() { + local num_nodes + local num_coordinators + + num_nodes=$(echo "${FDB_CLUSTER_IPS}" | tr ',' '\n' | wc -l) + + if [[ ${num_nodes} -le 2 ]]; then + num_coordinators=1 + elif [[ ${num_nodes} -le 4 ]]; then + num_coordinators=3 + else + num_coordinators=5 + fi + + echo "${FDB_CLUSTER_IPS}" | cut -d',' -f1-"${num_coordinators}" | tr ',' '\n' | sed "s/$/:${FDB_PORT}/" | paste -sd ',' +} + +get_fdb_mode() { + # Initialize a new database + local num_nodes + local fdb_mode + + num_nodes=$(echo "${FDB_CLUSTER_IPS}" | tr ',' '\n' | wc -l) + if [[ ${num_nodes} -eq 1 ]]; then + fdb_mode="single" + elif [[ ${num_nodes} -le 4 ]]; then + fdb_mode="double" + else + fdb_mode="triple" + fi + + echo "${fdb_mode}" +} + +# Function to calculate number of processes +calculate_process_numbers() { + # local memory_gb=$1 + local cpu_cores=$2 + + local min_processes=1 + local data_dir_count + + # Convert comma-separated DATA_DIRS into an array + IFS=',' read -r -a DATA_DIR_ARRAY <<<"${DATA_DIRS}" + data_dir_count=${#DATA_DIR_ARRAY[@]} + + # Stateless processes (at least 1, up to 1/4 of CPU cores) + local stateless_processes=$((cpu_cores / 4)) + [[ ${stateless_processes} -lt ${min_processes} ]] && stateless_processes=${min_processes} + + # Storage processes (must be a multiple of the number of data directories) + local storage_processes=$((cpu_cores / 4)) + [[ ${storage_processes} -lt ${data_dir_count} ]] && storage_processes=${data_dir_count} + storage_processes=$(((storage_processes / data_dir_count) * data_dir_count)) + + # Transaction processes (must be a multiple of the number of data directories) + local transaction_processes=$((cpu_cores / 8)) + [[ ${transaction_processes} -lt ${min_processes} ]] && transaction_processes=${min_processes} + [[ ${transaction_processes} -lt ${data_dir_count} ]] && transaction_processes=${data_dir_count} + transaction_processes=$(((transaction_processes / data_dir_count) * data_dir_count)) + + # Return the values + echo "${stateless_processes} ${storage_processes} ${transaction_processes}" +} + +function deploy_fdb() { + download_fdb + + ln -sf "${FDB_PKG_DIR}/fdbserver" "${FDB_HOME}/fdbserver" + ln -sf "${FDB_PKG_DIR}/fdbmonitor" "${FDB_HOME}/fdbmonitor" + ln -sf "${FDB_PKG_DIR}/fdbbackup" "${FDB_HOME}/backup_agent" + ln -sf "${FDB_PKG_DIR}/fdbcli" "${FDB_HOME}/fdbcli" + + CLUSTER_DESC="${FDB_CLUSTER_DESC:-${FDB_CLUSTER_ID}}" + + # Convert comma-separated DATA_DIRS into an array + IFS=',' read -r -a DATA_DIR_ARRAY <<<"${DATA_DIRS}" + for DIR in "${DATA_DIR_ARRAY[@]}"; do + mkdir -p "${DIR}" || handle_error "Failed to create data directory ${DIR}" + done + + echo -e "\tCreate fdb.cluster, coordinator: $(get_coordinators)" + echo -e "\tfdb.cluster content is: ${CLUSTER_DESC}:${FDB_CLUSTER_ID}@$(get_coordinators)" + cat >"${FDB_HOME}/conf/fdb.cluster" <"${FDB_HOME}/conf/fdb.conf" <>"${FDB_HOME}/conf/fdb.conf" + done + + FDB_PORT=$((FDB_PORT + stateless_processes)) + + # Add storage processes + STORAGE_DIR_COUNT=${#DATA_DIR_ARRAY[@]} + for ((i = 0; i < storage_processes; i++)); do + PORT=$((FDB_PORT + i)) + DIR_INDEX=$((i % STORAGE_DIR_COUNT)) + echo "[fdbserver.${PORT}] +class = storage +datadir = ${DATA_DIR_ARRAY[${DIR_INDEX}]}/${PORT}" | tee -a "${FDB_HOME}/conf/fdb.conf" >/dev/null + done + + FDB_PORT=$((FDB_PORT + storage_processes)) + + # Add transaction processes + for ((i = 0; i < transaction_processes; i++)); do + PORT=$((FDB_PORT + i)) + DIR_INDEX=$((i % STORAGE_DIR_COUNT)) + echo "[fdbserver.${PORT}] +class = transaction +datadir = ${DATA_DIR_ARRAY[${DIR_INDEX}]}/${PORT}" | tee -a "${FDB_HOME}/conf/fdb.conf" >/dev/null + done + + echo "[backup_agent] +command = ${FDB_HOME}/backup_agent +logdir = ${LOG_DIR}" >>"${FDB_HOME}/conf/fdb.conf" + + echo "Deploy FDB to: ${FDB_HOME}" +} + +function start_fdb() { + if [[ ! -f "${FDB_HOME}/fdbmonitor" ]]; then + echo 'Please run setup before start fdb server' + exit 1 + fi + + ensure_port_is_listenable "fdbserver" "${FDB_PORT}" + + echo "Run FDB monitor ..." + "${FDB_HOME}/fdbmonitor" \ + --conffile "${FDB_HOME}/conf/fdb.conf" \ + --lockfile "${FDB_HOME}/fdbmonitor.pid" \ + --daemonize +} + +function stop_fdb() { + if [[ -f "${FDB_HOME}/fdbmonitor.pid" ]]; then + local fdb_pid + fdb_pid=$(cat "${FDB_HOME}/fdbmonitor.pid") + if ps -p "${fdb_pid}" >/dev/null; then + echo "Stop fdbmonitor with pid ${fdb_pid}" + kill -9 "${fdb_pid}" + fi + fi +} + +function clean_fdb() { + if [[ -f "${FDB_HOME}/fdbmonitor.pid" ]]; then + local fdb_pid + + fdb_pid=$(cat "${FDB_HOME}/fdbmonitor.pid") + if ps -p "${fdb_pid}" >/dev/null; then + echo "fdbmonitor with pid ${fdb_pid} is running, stop it first." + exit 1 + fi + fi + + sleep 1 + + # Check if FDB_HOME is set and not root + if [[ -z "${FDB_HOME}" || "${FDB_HOME}" == "/" ]]; then + echo "Error: FDB_HOME is not set or is set to root directory. Aborting cleanup." + exit 1 + fi + + # Check if FDB_HOME is empty + if [[ -z "$(ls -A "${FDB_HOME}")" ]]; then + echo "Error: FDB_HOME is empty. Nothing to clean." + exit 1 + fi + + # Remove all directories and files under ${FDB_HOME} + echo "Removing all directories and files under ${FDB_HOME}" + rm -rf "${FDB_HOME:?}"/* +} + +function deploy() { + local job="$1" + local skip_pkg="$2" + local skip_config="$3" + + if [[ ${job} =~ ^(all|fdb)$ ]]; then + deploy_fdb + fi +} + +function start() { + local job="$1" + local init="$2" + + if [[ ${job} =~ ^(all|fdb)$ ]]; then + start_fdb + fi + + if [[ ${init} =~ ^(all|fdb)$ ]]; then + echo "Try create database ..." + local fdb_mode + + fdb_mode=$(get_fdb_mode) + "${FDB_HOME}/fdbcli" -C "${FDB_HOME}/conf/fdb.cluster" \ + --exec "configure new ${fdb_mode} ssd" || true + fi + + echo "Start fdb success, and the cluster is:" + cat "${FDB_HOME}/conf/fdb.cluster" +} + +function stop() { + local job="$1" + + if [[ ${job} =~ ^(all|fdb)$ ]]; then + stop_fdb & + fi + wait +} + +function clean() { + local job="$1" + + if [[ ${job} =~ ^(all|fdb)$ ]]; then + clean_fdb & + fi + wait +} + +function status() { + pgrep -f "${FDB_CLUSTER_DESC}" +} + +function usage() { + echo "Usage: $0 [--skip-pkg] [--skip-config]" + echo -e "\t deploy \t setup fdb env (dir, binary, conf ...)" + echo -e "\t clean \t clean fdb data" + echo -e "\t start \t start fdb" + echo -e "\t stop \t stop fdb" + echo -e "" + echo -e "" + echo -e "Args:" + echo -e "\t --skip-pkg \t skip to update binary pkgs during deploy" + echo -e "\t --skip-config \t skip to update config during deploy" + echo -e "" + exit 1 +} + +function unknown_cmd() { + local cmd="$1" + + printf "Unknown cmd: %s \n" "${cmd}" + usage +} + +if [[ $# -lt 1 ]]; then + usage +fi + +cmd="$1" +shift + +job="fdb" + +init="fdb" +skip_pkg="false" +skip_config="false" + +case ${cmd} in +deploy) + deploy "${job}" "${skip_pkg}" "${skip_config}" + ;; +start) + start "${job}" "${init}" + ;; +stop) + stop "${job}" + ;; +clean) + clean "${job}" + ;; +fdbcli) + "${FDB_HOME}/fdbcli" -C "${FDB_HOME}/conf/fdb.cluster" "$@" + ;; +config) + generate_regression_config true + ;; +*) + unknown_cmd "${cmd}" + ;; +esac diff --git a/tools/fdb/fdb_vars.sh b/tools/fdb/fdb_vars.sh new file mode 100644 index 00000000000000..c0bbadabdd6cd1 --- /dev/null +++ b/tools/fdb/fdb_vars.sh @@ -0,0 +1,72 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Description: Variables for FoundationDB + +#======================= MUST CUSTOMIZATION ==================================== +# Data directories for FoundationDB storage +# Make sure to create these directories before running the script, and have to be absolute path. +# For simplicity, you can use one direcotry. For production, you should use SSDs. +# shellcheck disable=2034 +DATA_DIRS="/mnt/foundationdb/data1,/mnt/foundationdb/data2,/mnt/foundationdb/data3" + +# Define the cluster IPs (comma-separated list of IP addresses) +# You should have at least 3 IP addresses for a production cluster +# The first IP addresses will be used as the coordinator, +# num of coordinators depends on the number of nodes, see the function get_coordinators. +# For high availability, machines should be in diffrent rack. +# shellcheck disable=2034 +FDB_CLUSTER_IPS="172.200.0.2,172.200.0.3,172.200.0.4" + +# Define the FoundationDB home directory, which contains the fdb binaries and logs. +# default is /fdbhome and have to be absolute path. +# shellcheck disable=2034 +FDB_HOME="/fdbhome" + +# Define the cluster id, shoule be generated random like mktemp -u XXXXXXXX, +# have to be different for each cluster. +# shellcheck disable=2034 +FDB_CLUSTER_ID=$(mktemp -u XXXXXXXX) + +# Define the cluster description, you 'd better to change it. +# shellcheck disable=2034 +FDB_CLUSTER_DESC="mycluster" + +#======================= OPTIONAL CUSTOMIZATION ============================ +# Define resource limits +# Memory limit in gigabytes +# shellcheck disable=2034 +MEMORY_LIMIT_GB=16 + +# CPU cores limit +# shellcheck disable=2034 +CPU_CORES_LIMIT=8 + +#=========================================================================== +# Define starting port for the servers +# This is the base port number for the fdbserver processes, usually does not need to be changed +# shellcheck disable=2034 +FDB_PORT=4500 + +# Define the FoundationDB version +# shellcheck disable=2034 +FDB_VERSION="7.1.38" + +# Users who run the fdb processes, default is the current user +# shellcheck disable=2034 +USER=$(whoami)