Skip to content

Commit

Permalink
Merge pull request ClickHouse#64694 from ClickHouse/fix_ttl_incompati…
Browse files Browse the repository at this point in the history
…bility

Fix backward incompatibility in TTL execution
  • Loading branch information
alesapin authored Jun 3, 2024
2 parents c53fb9b + 1d1eafc commit c45ebaf
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 1 deletion.
31 changes: 30 additions & 1 deletion src/Storages/MergeTree/MergeTreeDataPartTTLInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
#include <IO/WriteHelpers.h>
#include <Common/quoteString.h>
#include <algorithm>
#include <Parsers/ExpressionListParsers.h>
#include <Parsers/parseQuery.h>

#include <base/JSON.h>

Expand Down Expand Up @@ -272,6 +274,29 @@ bool MergeTreeDataPartTTLInfos::hasAnyNonFinishedTTLs() const
return false;
}

namespace
{
/// We had backward incompatibility in representation of serialized expressions, example:
///
/// `expired + toIntervalSecond(20)` vs `plus(expired, toIntervalSecond(20))`
/// Since they are stored as strings we cannot compare them directly as strings
/// To avoid backward incompatibility we parse them and check AST hashes.
/// This O(N^2), but amount of TTLs should be small, so it should be Ok.
auto tryToFindTTLExpressionInMapByASTMatching(const TTLInfoMap & ttl_info_map, const std::string & result_column)
{
ParserExpression parser;
auto ast_needle = parseQuery(parser, result_column.data(), result_column.data() + result_column.size(), "", 0, 0, 0);
for (auto it = ttl_info_map.begin(); it != ttl_info_map.end(); ++it)
{
const std::string & stored_expression = it->first;
auto ast_candidate = parseQuery(parser, stored_expression.data(), stored_expression.data() + stored_expression.size(), "", 0, 0, 0);
if (ast_candidate->getTreeHash(false) == ast_needle->getTreeHash(false))
return it;
}
return ttl_info_map.end();
}
}

std::optional<TTLDescription> selectTTLDescriptionForTTLInfos(const TTLDescriptions & descriptions, const TTLInfoMap & ttl_info_map, time_t current_time, bool use_max)
{
time_t best_ttl_time = 0;
Expand All @@ -281,7 +306,11 @@ std::optional<TTLDescription> selectTTLDescriptionForTTLInfos(const TTLDescripti
auto ttl_info_it = ttl_info_map.find(ttl_entry_it->result_column);

if (ttl_info_it == ttl_info_map.end())
continue;
{
ttl_info_it = tryToFindTTLExpressionInMapByASTMatching(ttl_info_map, ttl_entry_it->result_column);
if (ttl_info_it == ttl_info_map.end())
continue;
}

time_t ttl_time;

Expand Down
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
<clickhouse>
<logger>
<level>test</level>
</logger>

<storage_configuration>
<disks>
<s3>
<type>s3</type>
<endpoint>http://minio1:9001/root/data/</endpoint>
<access_key_id>minio</access_key_id>
<secret_access_key>minio123</secret_access_key>
</s3>
</disks>
<policies>
<default>
<default>
<disk>default</disk>
</default>
</default>
<s3>
<volumes>
<default>
<disk>default</disk>
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
</default>
<main>
<disk>s3</disk>
<perform_ttl_move_on_insert>False</perform_ttl_move_on_insert>
</main>
</volumes>
<move_factor>0.0</move_factor>
</s3>
</policies>
</storage_configuration>
</clickhouse>
105 changes: 105 additions & 0 deletions tests/integration/test_move_ttl_broken_compatibility/test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env python3

import logging
import random
import string
import time

import pytest
from helpers.cluster import ClickHouseCluster
import minio


cluster = ClickHouseCluster(__file__)


@pytest.fixture(scope="module")
def started_cluster():
try:
cluster.add_instance(
"node1",
main_configs=["configs/storage_conf.xml"],
image="clickhouse/clickhouse-server",
with_minio=True,
tag="24.1",
stay_alive=True,
with_installed_binary=True,
)
cluster.start()

yield cluster
finally:
cluster.shutdown()


def test_bc_compatibility(started_cluster):
node1 = cluster.instances["node1"]
node1.query(
"""
CREATE TABLE test_ttl_table (
generation UInt64,
date_key DateTime,
number UInt64,
text String,
expired DateTime DEFAULT now()
)
ENGINE=MergeTree
ORDER BY (generation, date_key)
PARTITION BY toMonth(date_key)
TTL expired + INTERVAL 20 SECONDS TO DISK 's3'
SETTINGS storage_policy = 's3';
"""
)

node1.query(
"""
INSERT INTO test_ttl_table (
generation,
date_key,
number,
text
)
SELECT
1,
toDateTime('2000-01-01 00:00:00') + rand(number) % 365 * 86400,
number,
toString(number)
FROM numbers(10000);
"""
)

disks = (
node1.query(
"""
SELECT distinct disk_name
FROM system.parts
WHERE table = 'test_ttl_table'
"""
)
.strip()
.split("\n")
)
print("Disks before", disks)

assert len(disks) == 1
assert disks[0] == "default"

node1.restart_with_latest_version()

for _ in range(60):
disks = (
node1.query(
"""
SELECT distinct disk_name
FROM system.parts
WHERE table = 'test_ttl_table'
"""
)
.strip()
.split("\n")
)
print("Disks after", disks)
if "s3" in disks:
break
time.sleep(1)
assert "s3" in disks

0 comments on commit c45ebaf

Please sign in to comment.