Skip to content

Commit

Permalink
[improvement](create tablet) be choose disk tolerate with little skew (
Browse files Browse the repository at this point in the history
  • Loading branch information
yujun777 authored Jan 25, 2024
1 parent 2f2f954 commit 53760a5
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 14 deletions.
3 changes: 3 additions & 0 deletions be/src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1164,6 +1164,9 @@ DEFINE_mInt32(report_query_statistics_interval_ms, "3000");
// 30s
DEFINE_mInt32(query_statistics_reserve_timeout_ms, "30000");

// consider two high usage disk at the same available level if they do not exceed this diff.
DEFINE_mDouble(high_disk_avail_level_diff_usages, "0.15");

// create tablet in partition random robin idx lru size, default 10000
DEFINE_Int32(partition_disk_index_lru_size, "10000");

Expand Down
3 changes: 3 additions & 0 deletions be/src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1240,6 +1240,9 @@ DECLARE_Int32(ignore_invalid_partition_id_rowset_num);
DECLARE_mInt32(report_query_statistics_interval_ms);
DECLARE_mInt32(query_statistics_reserve_timeout_ms);

// consider two high usage disk at the same available level if they do not exceed this diff.
DECLARE_mDouble(high_disk_avail_level_diff_usages);

// create tablet in partition random robin idx lru size, default 10000
DECLARE_Int32(partition_disk_index_lru_size);

Expand Down
53 changes: 42 additions & 11 deletions be/src/olap/storage_engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -456,16 +456,6 @@ Status StorageEngine::set_cluster_id(int32_t cluster_id) {
return Status::OK();
}

StorageEngine::DiskRemainingLevel get_available_level(double disk_usage_percent) {
assert(disk_usage_percent <= 1);
if (disk_usage_percent < 0.7) {
return StorageEngine::DiskRemainingLevel::LOW;
} else if (disk_usage_percent < 0.85) {
return StorageEngine::DiskRemainingLevel::MID;
}
return StorageEngine::DiskRemainingLevel::HIGH;
}

int StorageEngine::_get_and_set_next_disk_index(int64 partition_id,
TStorageMedium::type storage_medium) {
auto key = CreateTabletIdxCache::get_key(partition_id, storage_medium);
Expand All @@ -481,6 +471,7 @@ int StorageEngine::_get_and_set_next_disk_index(int64 partition_id,

void StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
std::vector<DirInfo>& dir_infos) {
std::vector<double> usages;
for (auto& it : _store_map) {
DataDir* data_dir = it.second.get();
if (data_dir->is_used()) {
Expand All @@ -489,11 +480,51 @@ void StorageEngine::_get_candidate_stores(TStorageMedium::type storage_medium,
!data_dir->reach_capacity_limit(0)) {
DirInfo dir_info;
dir_info.data_dir = data_dir;
dir_info.available_level = get_available_level(data_dir->get_usage(0));
dir_info.available_level = 0;
usages.push_back(data_dir->get_usage(0));
dir_infos.push_back(dir_info);
}
}
}

if (dir_infos.size() <= 1) {
return;
}

std::sort(usages.begin(), usages.end());
if (usages.back() < 0.7) {
return;
}

std::vector<double> level_min_usages;
level_min_usages.push_back(usages[0]);
for (auto usage : usages) {
// usage < 0.7 consider as one level, give a small skew
if (usage < 0.7 - (config::high_disk_avail_level_diff_usages / 2.0)) {
continue;
}

// at high usages, default 15% is one level
// for example: there disk usages are: 0.66, 0.72, 0.83
// then level_min_usages = [0.66, 0.83], divide disks into 2 levels: [0.66, 0.72], [0.83]
if (usage >= level_min_usages.back() + config::high_disk_avail_level_diff_usages) {
level_min_usages.push_back(usage);
}
}
for (auto& dir_info : dir_infos) {
double usage = dir_info.data_dir->get_usage(0);
for (size_t i = 1; i < level_min_usages.size() && usage >= level_min_usages[i]; i++) {
dir_info.available_level++;
}

// when usage is too high, no matter consider balance now,
// make it a higher level.
// for example, two disks and usages are: 0.85 and 0.92, then let tablets fall on the first disk.
// by default, storage_flood_stage_usage_percent = 90
if (usage > config::storage_flood_stage_usage_percent / 100.0) {
dir_info.available_level++;
}
}
}

std::vector<DataDir*> StorageEngine::get_stores_for_create_tablet(
Expand Down
4 changes: 1 addition & 3 deletions be/src/olap/storage_engine.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,6 @@ class StorageEngine final : public BaseStorageEngine {
StorageEngine(const EngineOptions& options);
~StorageEngine() override;

enum class DiskRemainingLevel { LOW, MID, HIGH };

Status open() override;

Status create_tablet(const TCreateTabletReq& request, RuntimeProfile* profile);
Expand Down Expand Up @@ -541,7 +539,7 @@ class CreateTabletIdxCache : public LRUCachePolicy {
struct DirInfo {
DataDir* data_dir;

StorageEngine::DiskRemainingLevel available_level;
int available_level = 0;

bool operator<(const DirInfo& other) const {
if (available_level != other.available_level) {
Expand Down

0 comments on commit 53760a5

Please sign in to comment.