Skip to content

Commit

Permalink
Merge branch 'master' into quantile_state
Browse files Browse the repository at this point in the history
  • Loading branch information
zhangstar333 authored Oct 13, 2023
2 parents d72172b + a30d30e commit 3c46f70
Show file tree
Hide file tree
Showing 363 changed files with 11,260 additions and 4,487 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/clang-format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,19 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
if: ${{ github.event_name != 'pull_request_target' }}
uses: actions/checkout@v3
with:
persist-credentials: false
submodules: recursive

- name: Checkout ${{ github.ref }} ( ${{ github.event.pull_request.head.sha }} )
if: ${{ github.event_name == 'pull_request_target' }}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
submodules: recursive

- name: Paths filter
uses: ./.github/actions/paths-filter
id: filter
Expand Down
13 changes: 12 additions & 1 deletion .github/workflows/license-eyes.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,18 @@ jobs:
runs-on: ubuntu-latest
steps:
- name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )"
uses: actions/checkout@v2
if: ${{ github.event_name != 'pull_request_target' }}
uses: actions/checkout@v3
with:
submodules: recursive

- name: Checkout ${{ github.ref }} ( ${{ github.event.pull_request.head.sha }} )
if: ${{ github.event_name == 'pull_request_target' }}
uses: actions/checkout@v3
with:
ref: ${{ github.event.pull_request.head.sha }}
submodules: recursive

- name: Check License
uses: apache/[email protected]
env:
Expand Down
1 change: 1 addition & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ header:
- ".gitmodules"
- ".licenserc.yaml"
- ".rat-excludes"
- ".github/**"
- "be/src/common/status.cpp"
- "be/src/common/status.h"
- "be/src/env/env.h"
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ Apache Doris is an easy-to-use, high-performance and real-time analytical databa

All this makes Apache Doris an ideal tool for scenarios including report analysis, ad-hoc query, unified data warehouse, and data lake query acceleration. On Apache Doris, users can build various applications, such as user behavior analysis, AB test platform, log retrieval analysis, user portrait analysis, and order analysis.

🎉 Version 2.0.1 version released now. The 2.0.1 version has achieved over 10x performance improvements on standard Benchmark, comprehensive enhancement in log analysis and lakehouse scenarios, more efficient and stable data update and write efficiency, support for more comprehensive multi-tenant and resource isolation mechanisms, and take a new step in the direction of resource elasticity and storage computing separation. It has also been added a series of usability features for enterprise users. We welcome all users who have requirements for the new features of the 2.0 version to deploy and upgrade. Check out the 🔗[Release Notes](https://github.com/apache/doris/issues/23640) here.
Doris Summit Asia 2023 is coming and warmly invite you to join! Click Now 🔗[doris-summit.org.cn](https://doris-summit.org.cn/?utm_source=website&utm_medium=readme&utm_campaign=2023&utm_id=2023)

🎉 Version 2.0.2 version released now. The 2.0.2 version has achieved over 10x performance improvements on standard Benchmark, comprehensive enhancement in log analysis and lakehouse scenarios, more efficient and stable data update and write efficiency, support for more comprehensive multi-tenant and resource isolation mechanisms, and take a new step in the direction of resource elasticity and storage computing separation. It has also been added a series of usability features for enterprise users. We welcome all users who have requirements for the new features of the 2.0 version to deploy and upgrade. Check out the 🔗[Release Notes](https://github.com/apache/doris/issues/25011) here.

🎉 Version 1.2.7 released now! It is fully evolved release and all users are encouraged to upgrade to this release. Check out the 🔗[Release Notes](https://doris.apache.org/docs/dev/releasenotes/release-1.2.7) here.

Expand Down
21 changes: 20 additions & 1 deletion be/src/agent/task_worker_pool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <fmt/format.h>
#include <gen_cpp/AgentService_types.h>
#include <gen_cpp/HeartbeatService_types.h>
#include <gen_cpp/MasterService_types.h>
#include <gen_cpp/Status_types.h>
#include <gen_cpp/Types_types.h>
#include <unistd.h>
Expand Down Expand Up @@ -55,6 +54,7 @@
#include "olap/olap_common.h"
#include "olap/rowset/rowset_meta.h"
#include "olap/snapshot_manager.h"
#include "olap/special_dir.h"
#include "olap/storage_engine.h"
#include "olap/storage_policy.h"
#include "olap/tablet.h"
Expand Down Expand Up @@ -677,8 +677,13 @@ void TaskWorkerPool::_report_disk_state_worker_thread_callback() {
disk.__set_disk_available_capacity(root_path_info.available);
disk.__set_trash_used_capacity(root_path_info.trash_used_capacity);
disk.__set_used(root_path_info.is_used);
disk.__set_dir_type(TDiskType::STORAGE);
request.disks[root_path_info.path] = disk;
}

_set_disk_infos(request, TDiskType::LOG);
_set_disk_infos(request, TDiskType::DEPLOY);

request.__set_num_cores(CpuInfo::num_cores());
request.__set_pipeline_executor_size(config::pipeline_executor_size > 0
? config::pipeline_executor_size
Expand Down Expand Up @@ -1096,6 +1101,20 @@ void TaskWorkerPool::_handle_report(const TReportRequest& request, ReportType ty
}
}

void TaskWorkerPool::_set_disk_infos(TReportRequest& request, TDiskType::type type) {
SpecialDirInfo dir_info;
StorageEngine::instance()->get_special_dir_info(&dir_info, type);

TDisk special_disk;
special_disk.__set_root_path(dir_info.path);
special_disk.__set_data_used_capacity(0);
special_disk.__set_disk_total_capacity(dir_info.capacity);
special_disk.__set_disk_available_capacity(dir_info.available);
special_disk.__set_used(dir_info.is_used);
special_disk.__set_dir_type(type);
request.disks[dir_info.path] = special_disk;
}

void TaskWorkerPool::_random_sleep(int second) {
Random rnd(UnixMillis());
sleep(rnd.Uniform(second) + 1);
Expand Down
2 changes: 2 additions & 0 deletions be/src/agent/task_worker_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

#include <butil/macros.h>
#include <gen_cpp/AgentService_types.h>
#include <gen_cpp/MasterService_types.h>
#include <gen_cpp/Types_types.h>
#include <stdint.h>

Expand Down Expand Up @@ -211,6 +212,7 @@ class TaskWorkerPool {
TFinishTaskRequest* finish_task_request);

void _handle_report(const TReportRequest& request, ReportType type);
void _set_disk_infos(TReportRequest& request, TDiskType::type type);

Status _get_tablet_info(const TTabletId tablet_id, const TSchemaHash schema_hash,
int64_t signature, TTabletInfo* tablet_info);
Expand Down
7 changes: 2 additions & 5 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ DEFINE_mBool(disable_memory_gc, "false");

DEFINE_mInt64(large_memory_check_bytes, "2147483648");

// The maximum time a thread waits for a full GC. Currently only query will wait for full gc.
// The maximum time a thread waits for full GC. Currently only query will wait for full gc.
DEFINE_mInt32(thread_wait_gc_max_milliseconds, "1000");

DEFINE_mInt64(pre_serialize_keys_limit_bytes, "16777216");
Expand Down Expand Up @@ -1071,10 +1071,6 @@ DEFINE_mInt64(lookup_connection_cache_bytes_limit, "4294967296");
// level of compression when using LZ4_HC, whose defalut value is LZ4HC_CLEVEL_DEFAULT
DEFINE_mInt64(LZ4_HC_compression_level, "9");

DEFINE_Bool(enable_hdfs_hedged_read, "false");
DEFINE_Int32(hdfs_hedged_read_thread_num, "128");
DEFINE_Int32(hdfs_hedged_read_threshold_time, "500");

DEFINE_mBool(enable_merge_on_write_correctness_check, "true");

// The secure path with user files, used in the `local` table function.
Expand All @@ -1096,6 +1092,7 @@ DEFINE_Int32(group_commit_sync_wal_batch, "10");

// the count of thread to group commit insert
DEFINE_Int32(group_commit_insert_threads, "10");
DEFINE_mInt32(group_commit_interval_seconds, "10");

DEFINE_mInt32(scan_thread_nice_value, "0");
DEFINE_mInt32(tablet_schema_cache_recycle_interval, "86400");
Expand Down
11 changes: 1 addition & 10 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1127,16 +1127,6 @@ DECLARE_mInt64(lookup_connection_cache_bytes_limit);
// level of compression when using LZ4_HC, whose defalut value is LZ4HC_CLEVEL_DEFAULT
DECLARE_mInt64(LZ4_HC_compression_level);

// whether to enable hdfs hedged read.
// If set to true, it will be enabled even if user not enable it when creating catalog
DECLARE_Bool(enable_hdfs_hedged_read);
// hdfs hedged read thread pool size, for "dfs.client.hedged.read.threadpool.size"
// Maybe overwritten by the value specified when creating catalog
DECLARE_Int32(hdfs_hedged_read_thread_num);
// the threshold of doing hedged read, for "dfs.client.hedged.read.threshold.millis"
// Maybe overwritten by the value specified when creating catalog
DECLARE_Int32(hdfs_hedged_read_threshold_time);

DECLARE_mBool(enable_merge_on_write_correctness_check);

// The secure path with user files, used in the `local` table function.
Expand Down Expand Up @@ -1166,6 +1156,7 @@ DECLARE_Int32(group_commit_sync_wal_batch);

// This config can be set to limit thread number in group commit insert thread pool.
DECLARE_mInt32(group_commit_insert_threads);
DECLARE_mInt32(group_commit_interval_seconds);

// The configuration item is used to lower the priority of the scanner thread,
// typically employed to ensure CPU scheduling for write operations.
Expand Down
6 changes: 3 additions & 3 deletions be/src/common/daemon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -229,7 +229,7 @@ void Daemon::memory_gc_thread() {
auto proc_mem_no_allocator_cache = doris::MemInfo::proc_mem_no_allocator_cache();

// GC excess memory for resource groups that not enable overcommit
auto tg_free_mem = doris::MemInfo::tg_hard_memory_limit_gc();
auto tg_free_mem = doris::MemInfo::tg_not_enable_overcommit_group_gc();
sys_mem_available += tg_free_mem;
proc_mem_no_allocator_cache -= tg_free_mem;

Expand All @@ -239,7 +239,7 @@ void Daemon::memory_gc_thread() {
// No longer full gc and minor gc during sleep.
memory_full_gc_sleep_time_ms = memory_gc_sleep_time_ms;
memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms;
LOG(INFO) << fmt::format("Start Full GC, {}.",
LOG(INFO) << fmt::format("[MemoryGC] start full GC, {}.",
MemTrackerLimiter::process_limit_exceeded_errmsg_str());
doris::MemTrackerLimiter::print_log_process_usage();
if (doris::MemInfo::process_full_gc()) {
Expand All @@ -251,7 +251,7 @@ void Daemon::memory_gc_thread() {
proc_mem_no_allocator_cache >= doris::MemInfo::soft_mem_limit())) {
// No minor gc during sleep, but full gc is possible.
memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms;
LOG(INFO) << fmt::format("Start Minor GC, {}.",
LOG(INFO) << fmt::format("[MemoryGC] start minor GC, {}.",
MemTrackerLimiter::process_soft_limit_exceeded_errmsg_str());
doris::MemTrackerLimiter::print_log_process_usage();
if (doris::MemInfo::process_minor_gc()) {
Expand Down
1 change: 1 addition & 0 deletions be/src/common/status.h
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,7 @@ E(ENTRY_NOT_FOUND, -6011);
constexpr bool capture_stacktrace(int code) {
return code != ErrorCode::OK
&& code != ErrorCode::END_OF_FILE
&& code != ErrorCode::DATA_QUALITY_ERROR
&& code != ErrorCode::MEM_LIMIT_EXCEEDED
&& code != ErrorCode::TRY_LOCK_FAILED
&& code != ErrorCode::TOO_MANY_SEGMENTS
Expand Down
3 changes: 2 additions & 1 deletion be/src/exec/es/es_scan_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,7 @@ Status ESScanReader::open() {
}
_network_client.set_basic_auth(_user_name, _passwd);
_network_client.set_content_type("application/json");
_network_client.set_timeout_ms(_http_timeout_ms);
if (_use_ssl_client) {
_network_client.use_untrusted_ssl();
}
Expand Down Expand Up @@ -214,7 +215,7 @@ Status ESScanReader::close() {
_network_client.set_basic_auth(_user_name, _passwd);
_network_client.set_method(DELETE);
_network_client.set_content_type("application/json");
_network_client.set_timeout_ms(5 * 1000);
_network_client.set_timeout_ms(_http_timeout_ms);
if (_use_ssl_client) {
_network_client.use_untrusted_ssl();
}
Expand Down
25 changes: 13 additions & 12 deletions be/src/exprs/json_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -254,18 +254,19 @@ Status JsonFunctions::extract_from_object(simdjson::ondemand::object& obj,
simdjson::ondemand::value* value) noexcept {
// Return DataQualityError when it's a malformed json.
// Otherwise the path was not found, due to array out of bound or not exist
#define HANDLE_SIMDJSON_ERROR(err, msg) \
do { \
const simdjson::error_code& _err = err; \
const std::string& _msg = msg; \
if (UNLIKELY(_err)) { \
if (_err == simdjson::NO_SUCH_FIELD || _err == simdjson::INDEX_OUT_OF_BOUNDS) { \
return Status::NotFound( \
fmt::format("err: {}, msg: {}", simdjson::error_message(_err), _msg)); \
} \
return Status::DataQualityError( \
fmt::format("err: {}, msg: {}", simdjson::error_message(_err), _msg)); \
} \
#define HANDLE_SIMDJSON_ERROR(err, msg) \
do { \
const simdjson::error_code& _err = err; \
const std::string& _msg = msg; \
if (UNLIKELY(_err)) { \
if (_err == simdjson::NO_SUCH_FIELD || _err == simdjson::INDEX_OUT_OF_BOUNDS) { \
return Status::DataQualityError( \
fmt::format("Not found target filed, err: {}, msg: {}", \
simdjson::error_message(_err), _msg)); \
} \
return Status::DataQualityError( \
fmt::format("err: {}, msg: {}", simdjson::error_message(_err), _msg)); \
} \
} while (false);

if (jsonpath.size() <= 1) {
Expand Down
Loading

0 comments on commit 3c46f70

Please sign in to comment.