Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable verification - add ORDER BY * #17

Merged
merged 21 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from 19 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
3996f3e
new member enable_verification added to SQLSmithOptions and FuzzyDuck…
hmeriann Jun 12, 2024
7540d69
checking if enable_verification set to true on BeginFuzzing
hmeriann Jun 12, 2024
f31cdbd
Merge branch 'main' into enable-verification
hmeriann Jun 18, 2024
e6e5389
Change to hmeriann for duckdb repo
hmeriann Jun 19, 2024
0d660a6
Move from duckdb/duckdb to hmeriann/duckdb
hmeriann Jun 19, 2024
c346250
Changed the duckdb version and the branch name in the workflow file
hmeriann Jun 19, 2024
4bbd75f
added generated_statement to FuzzuDuck::GenerateQuery to debug
hmeriann Jun 20, 2024
0b19f19
added verification_enabled bool to the StatementGenerator, added VOLA…
hmeriann Jun 20, 2024
f1b122f
FunctionStability::VOLATILE check is added to the SCALAR_FUNCTION_ENT…
hmeriann Jun 20, 2024
7ba7862
trying to catch the sig abort
hmeriann Jun 24, 2024
13387af
Fixed duckdb version on MainDistributionPipeline.yml to last and main
hmeriann Jun 25, 2024
a8d5ef9
fix indentation
hmeriann Jul 16, 2024
2114479
Merge remote-tracking branch 'upstream/main' into enable-verification
hmeriann Jul 16, 2024
3d62f09
Add Generate Order by *, when verification is enabled. TODO: Find the…
hmeriann Jul 16, 2024
6ac497d
when StatementGenerator object created from (*this), its verification…
hmeriann Jul 17, 2024
5eb7801
fix GenerateStarexpression to return only star expression, when verif…
hmeriann Jul 17, 2024
a396f51
add verification_enabled in a member initializer list of the Statemen…
hmeriann Jul 23, 2024
920b908
remove ORDER BY * from the window function
hmeriann Jul 23, 2024
5ef9905
remove commented out code
hmeriann Jul 24, 2024
2945d89
fix init list for StatementGenerator()
hmeriann Jul 29, 2024
2a86d1a
Merge branch 'main' into enable-verification
hmeriann Aug 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -5,4 +5,4 @@
[submodule "extension-ci-tools"]
path = extension-ci-tools
url = https://github.com/duckdb/extension-ci-tools
branch = main
hmeriann marked this conversation as resolved.
Show resolved Hide resolved
branch = main
11 changes: 8 additions & 3 deletions src/fuzzyduck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ void FuzzyDuck::BeginFuzzing() {
auto &fs = FileSystem::GetFileSystem(context);
TryRemoveFile(complete_log);
complete_log_handle =
fs.OpenFile(complete_log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
fs.OpenFile(complete_log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
}
if (enable_verification) {
RunQuery("PRAGMA enable_verification");
}
}

Expand Down Expand Up @@ -64,15 +67,16 @@ void FuzzyDuck::FuzzAllFunctions() {
}

string FuzzyDuck::GenerateQuery() {
// generate the statement
// generate statement
StatementGenerator generator(context);
generator.verification_enabled = enable_verification;
// accumulate statement(s)
auto statement = string("");
if (generator.RandomPercentage(10)) {
// multi statement
idx_t number_of_statements = generator.RandomValue(1000);
LogTask("Generating Multi-Statement query of " + to_string(number_of_statements) + " statements with seed " +
to_string(seed));
to_string(seed));
for (idx_t i = 0; i < number_of_statements; i++) {
statement += generator.GenerateStatement()->ToString() + "; ";
}
Expand Down Expand Up @@ -157,6 +161,7 @@ void FuzzyDuck::LogToCurrent(const string &message) {
file->Sync();
file->Close();
}

void FuzzyDuck::LogToComplete(const string &message) {
if (!complete_log_handle) {
return;
Expand Down
1 change: 1 addition & 0 deletions src/include/fuzzyduck.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ class FuzzyDuck {
string complete_log;
string log;
bool verbose_output = false;
bool enable_verification = false;
idx_t timeout = 30;

public:
Expand Down
6 changes: 5 additions & 1 deletion src/include/statement_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ class StatementGenerator {
public:
StatementGenerator(ClientContext &context);
StatementGenerator(StatementGenerator &parent);
StatementGenerator(StatementGenerator &parent, bool verification);
~StatementGenerator();

public:
Expand All @@ -53,6 +54,7 @@ class StatementGenerator {

//! Returns true with a percentage change (0-100)
bool RandomPercentage(idx_t percentage);
bool verification_enabled = false;
idx_t RandomValue(idx_t max);
string GetRandomAttachedDataBase();
unique_ptr<SQLStatement> GenerateStatement(StatementType type); // came from private
Expand All @@ -63,9 +65,9 @@ class StatementGenerator {
unique_ptr<DeleteStatement> GenerateDelete();
unique_ptr<DetachStatement> GenerateDetach();
unique_ptr<MultiStatement> GenerateAttachUse();
unique_ptr<SelectStatement> GenerateSelect();
unique_ptr<SetStatement> GenerateSet();

unique_ptr<SelectStatement> GenerateSelect();
unique_ptr<QueryNode> GenerateQueryNode();

unique_ptr<AttachInfo> GenerateAttachInfo();
Expand Down Expand Up @@ -99,6 +101,8 @@ class StatementGenerator {
unique_ptr<ParsedExpression> GenerateCase();

unique_ptr<OrderModifier> GenerateOrderBy();
unique_ptr<OrderModifier> GenerateOrderByAll();


LogicalType GenerateLogicalType();

Expand Down
5 changes: 5 additions & 0 deletions src/sqlsmith_extension.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ struct SQLSmithFunctionData : public TableFunctionData {
bool dump_all_queries = false;
bool dump_all_graphs = false;
bool verbose_output = false;
bool enable_verification = false;
string complete_log;
string log;
bool finished = false;
Expand Down Expand Up @@ -67,6 +68,7 @@ static void SQLSmithFunction(ClientContext &context, TableFunctionInput &data_p,
options.dump_all_queries = data.dump_all_queries;
options.dump_all_graphs = data.dump_all_graphs;
options.verbose_output = data.verbose_output;
options.enable_verification = data.enable_verification;
options.complete_log = data.complete_log;
options.log = data.log;
duckdb_sqlsmith::run_sqlsmith(DatabaseInstance::GetDatabase(context), options);
Expand Down Expand Up @@ -139,6 +141,8 @@ static duckdb::unique_ptr<FunctionData> FuzzyDuckBind(ClientContext &context, Ta
result->fuzzer.log = StringValue::Get(kv.second);
} else if (kv.first == "verbose_output") {
result->fuzzer.verbose_output = BooleanValue::Get(kv.second);
} else if (kv.first == "enable_verification") {
result->fuzzer.enable_verification = BooleanValue::Get(kv.second);
}
}
return_types.emplace_back(LogicalType::BOOLEAN);
Expand Down Expand Up @@ -186,6 +190,7 @@ void SqlsmithExtension::Load(DuckDB &db) {
fuzzy_duck_fun.named_parameters["log"] = LogicalType::VARCHAR;
fuzzy_duck_fun.named_parameters["complete_log"] = LogicalType::VARCHAR;
fuzzy_duck_fun.named_parameters["verbose_output"] = LogicalType::BOOLEAN;
fuzzy_duck_fun.named_parameters["enable_verification"] = LogicalType::BOOLEAN;
ExtensionUtil::RegisterFunction(db_instance, fuzzy_duck_fun);

TableFunction fuzz_all_functions("fuzz_all_functions", {}, FuzzAllFunctions, FuzzyDuckBind);
Expand Down
80 changes: 49 additions & 31 deletions src/statement_generator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@
}
}

StatementGenerator::StatementGenerator(StatementGenerator &parent_p, bool verify)
: verification_enabled(verify), context(parent_p.context), parent(&parent_p),
generator_context(parent_p.generator_context), depth(parent_p.depth + 1) {
if (depth > MAX_DEPTH) {
throw InternalException("depth too high");
}
}

StatementGenerator::~StatementGenerator() {
}

Expand Down Expand Up @@ -393,10 +401,12 @@
if (is_distinct) {
result->modifiers.push_back(make_uniq<DistinctModifier>());
}
if (RandomPercentage(20)) {
result->modifiers.push_back(GenerateOrderBy());
}
if (RandomPercentage(20)) {
if (verification_enabled) {
result->modifiers.push_back(GenerateOrderByAll());
} else if (!verification_enabled) {
hmeriann marked this conversation as resolved.
Show resolved Hide resolved
if (RandomPercentage(5)) {
result->modifiers.push_back(GenerateOrderBy());
}
if (RandomPercentage(50)) {
auto limit_percent_modifier = make_uniq<LimitPercentModifier>();
if (RandomPercentage(30)) {
Expand Down Expand Up @@ -531,7 +541,7 @@
}
unique_ptr<SelectStatement> subquery;
{
StatementGenerator child_generator(*this);
StatementGenerator child_generator(*this, verification_enabled);
subquery = unique_ptr_cast<SQLStatement, SelectStatement>(child_generator.GenerateSelect());
for (auto &col : child_generator.current_column_names) {
current_column_names.push_back(std::move(col));
Expand Down Expand Up @@ -753,8 +763,8 @@
switch (function.type) {
case CatalogType::SCALAR_FUNCTION_ENTRY: {
auto &scalar_entry = function.Cast<ScalarFunctionCatalogEntry>();
auto actual_function = scalar_entry.functions.GetFunctionByOffset(RandomValue(scalar_entry.functions.Size()));

auto offset = RandomValue(scalar_entry.functions.Size());
auto actual_function = scalar_entry.functions.GetFunctionByOffset(offset);
hmeriann marked this conversation as resolved.
Show resolved Hide resolved
name = scalar_entry.name;
arguments = actual_function.arguments;
min_parameters = actual_function.arguments.size();
Expand All @@ -767,7 +777,7 @@
case CatalogType::AGGREGATE_FUNCTION_ENTRY: {
auto &aggregate_entry = function.Cast<AggregateFunctionCatalogEntry>();
auto actual_function =
aggregate_entry.functions.GetFunctionByOffset(RandomValue(aggregate_entry.functions.Size()));
aggregate_entry.functions.GetFunctionByOffset(RandomValue(aggregate_entry.functions.Size()));

name = aggregate_entry.name;
min_parameters = actual_function.arguments.size();
Expand Down Expand Up @@ -796,7 +806,7 @@
case CatalogType::MACRO_ENTRY: {
auto &macro_entry = function.Cast<MacroCatalogEntry>();
name = macro_entry.name;
min_parameters = macro_entry.function->parameters.size();

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64, ubuntu:18.04, x64-linux)

'class duckdb::MacroCatalogEntry' has no member named 'function'

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / Linux (linux_amd64, ubuntu:18.04, x64-linux)

'class duckdb::MacroCatalogEntry' has no member named 'function'

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / MacOS (osx_arm64, arm64, arm64-osx)

no member named 'function' in 'duckdb::MacroCatalogEntry'

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_mvp, wasm32-emscripten)

no member named 'function' in 'duckdb::MacroCatalogEntry'

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_mvp, wasm32-emscripten)

no member named 'function' in 'duckdb::MacroCatalogEntry'

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_threads, wasm32-emscripten)

no member named 'function' in 'duckdb::MacroCatalogEntry'

Check failure on line 809 in src/statement_generator.cpp

View workflow job for this annotation

GitHub Actions / Build extension binaries / DuckDB-Wasm (wasm_threads, wasm32-emscripten)

no member named 'function' in 'duckdb::MacroCatalogEntry'
max_parameters = min_parameters;
break;
}
Expand All @@ -815,19 +825,27 @@
distinct);
}

unique_ptr<OrderModifier> StatementGenerator::GenerateOrderByAll() {
auto result = make_uniq<OrderModifier>();
auto order_type = Choose<OrderType>({OrderType::ASCENDING, OrderType::DESCENDING, OrderType::ORDER_DEFAULT});
auto null_type = Choose<OrderByNullType>(
{OrderByNullType::NULLS_FIRST, OrderByNullType::NULLS_LAST, OrderByNullType::ORDER_DEFAULT});
result->orders.emplace_back(order_type, null_type, GenerateStar());
return result;
}

unique_ptr<OrderModifier> StatementGenerator::GenerateOrderBy() {
auto result = make_uniq<OrderModifier>();
auto result = make_uniq<OrderModifier>();
while (true) {
auto order_type = Choose<OrderType>({OrderType::ASCENDING, OrderType::DESCENDING, OrderType::ORDER_DEFAULT});
auto null_type = Choose<OrderByNullType>(
{OrderByNullType::NULLS_FIRST, OrderByNullType::NULLS_LAST, OrderByNullType::ORDER_DEFAULT});
{OrderByNullType::NULLS_FIRST, OrderByNullType::NULLS_LAST, OrderByNullType::ORDER_DEFAULT});
result->orders.emplace_back(order_type, null_type, GenerateExpression());
// continue with a random chance
if (RandomPercentage(50)) {
break;
}
}
return result;
return result;
}

unique_ptr<ParsedExpression> StatementGenerator::GenerateOperator() {
Expand Down Expand Up @@ -1043,25 +1061,26 @@
result->relation_name = GenerateRelationName();
}
}

while (RandomPercentage(20)) {
auto column_name = GenerateColumnName();
if (column_name.empty()) {
break;
if (!verification_enabled) {
while (RandomPercentage(20)) {
auto column_name = GenerateColumnName();
if (column_name.empty()) {
break;
}
result->exclude_list.insert(column_name);
}
result->exclude_list.insert(column_name);
}
while (RandomPercentage(20)) {
auto column_name = GenerateColumnName();
if (column_name.empty()) {
break;
while (RandomPercentage(20)) {
auto column_name = GenerateColumnName();
if (column_name.empty()) {
break;
}
result->replace_list.insert(make_pair(column_name, GenerateExpression()));
}
result->replace_list.insert(make_pair(column_name, GenerateExpression()));
}
if (RandomPercentage(50) || expression_depth > 0) {
result->columns = true;
if (RandomPercentage(50)) {
result->expr = GenerateLambda();
if (RandomPercentage(50) || expression_depth > 0) {
result->columns = true;
if (RandomPercentage(50)) {
result->expr = GenerateLambda();
}
}
}
return std::move(result);
Expand Down Expand Up @@ -1120,9 +1139,8 @@
return GenerateConstant();
}
auto subquery = make_uniq<SubqueryExpression>();

{
StatementGenerator child_generator(*this);
StatementGenerator child_generator(*this, verification_enabled);
hmeriann marked this conversation as resolved.
Show resolved Hide resolved
subquery->subquery = unique_ptr_cast<SQLStatement, SelectStatement>(child_generator.GenerateSelect());
}
subquery->subquery_type =
Expand Down
1 change: 1 addition & 0 deletions src/third_party/sqlsmith/include/sqlsmith.hh
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ struct SQLSmithOptions {
bool dump_all_queries = false;
bool dump_all_graphs = false;
bool verbose_output = false;
bool enable_verification = false;
std::string complete_log;
std::string log;
};
Expand Down
Loading