Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace hardcoded percentages with values from a file #27

Open
wants to merge 39 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
922b9af
add RandomConfigs class with default values and applied to the Genera…
hmeriann Aug 22, 2024
88949bc
add a class for random numbers configuraion
hmeriann Sep 3, 2024
1632858
change statement generator to work with the RandomsConfig
hmeriann Sep 3, 2024
aaac78c
rename randoms config
hmeriann Sep 4, 2024
c1506c5
mention random_nums_config files in the CMakes
hmeriann Sep 4, 2024
f602008
a json string gets parsed
hmeriann Sep 11, 2024
e08843b
parseJSONMap and initialise RandomNumsConfig with the custom values f…
hmeriann Sep 12, 2024
f629ec1
Merge remote-tracking branch 'upstream/main' into random-values-from-…
hmeriann Sep 12, 2024
fa89b25
fix paths to the reusable workflows, since they were moved to the .wo…
hmeriann Sep 12, 2024
6751d00
Merge branch 'main' into random-values-from-file
hmeriann Sep 13, 2024
11c5910
add randoms_config_handle to fuzzyduck
hmeriann Sep 13, 2024
566aff3
add config file
hmeriann Sep 13, 2024
87d6a03
FileSystem::ReadFile uses reads the file with all the new line symbol…
hmeriann Sep 13, 2024
de5cada
pass config file path as randoms_config_filepath parameter
hmeriann Sep 13, 2024
ce877c4
turn the RandomNumsConfig into an unorderdered_map<percentages_enum, …
hmeriann Sep 24, 2024
38757a5
rename to RandomPercentagesEnum
hmeriann Sep 24, 2024
26ba5ec
fixed missing values
hmeriann Sep 24, 2024
7275d9a
GetDefaultConfig() instead of throwing an exception when the file wit…
hmeriann Sep 24, 2024
ad51bdd
do not declare a variable for config again
hmeriann Sep 25, 2024
1430522
remove file handling, because it's being handled by yyjson_read_file
hmeriann Sep 25, 2024
3d395fd
add randoms_config_filepath to scripts/run_fuzzer.py
hmeriann Sep 25, 2024
4930d43
naive way to parse nested config.json to a map
hmeriann Sep 26, 2024
fb32a9d
config_nested.json file
hmeriann Sep 26, 2024
73b1cf1
parseJson recursively
hmeriann Sep 26, 2024
58d4908
a test file
hmeriann Sep 26, 2024
2b9ae4d
clean up
hmeriann Sep 26, 2024
f8aea67
set values of missing in the config file statement types to 0
hmeriann Sep 26, 2024
5e5e7ad
add missing quote
hmeriann Sep 27, 2024
0ddd49c
update the config file with almost all randoms
hmeriann Sep 27, 2024
7ada4b3
update src/include/random_nums_config.hpp to align with the config
hmeriann Sep 27, 2024
4cfe8c4
update StringToRandomPercentagesEnum in the src/random_nums_config.cpp
hmeriann Sep 27, 2024
ea50779
config_nested.json with all the default values
hmeriann Sep 30, 2024
c3c7725
src/include/random_nums_config.hpp corrected names
hmeriann Sep 30, 2024
ebe03d5
pass config to the statement generator
hmeriann Sep 30, 2024
5b8f652
rename some values
hmeriann Sep 30, 2024
628f2d8
remove duplicating line
hmeriann Sep 30, 2024
1a1ff8a
make RandomNumsConfig class
hmeriann Oct 3, 2024
ef48353
limit count of generated set operations
hmeriann Oct 10, 2024
3cd1bfb
set percentage to setop
hmeriann Oct 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
set(EXTENSION_SOURCES src/sqlsmith_extension.cpp
src/statement_generator.cpp
src/statement_simplifier.cpp
src/random_nums_config.cpp
src/fuzzyduck.cpp ${EXTENSION_OBJECT_FILES})

build_static_extension(${TARGET_NAME} ${EXTENSION_SOURCES})
Expand Down
8 changes: 8 additions & 0 deletions config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"select_percentage": "70",
"attach_percentage": "20",
"attach_use_percentage": "80",
"detach_percentage": "15",
"set_percentage": "5",
"delete_percentage": "25"
}
100 changes: 100 additions & 0 deletions config_nested.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
{
"attach": {
"attach_percentage": "20",
"attach_use_percentage": "80",
"attach_read_only": "30"
},
"detach": {
"detach_percentage": "15",
"detach_random_name": "20"
},
"set": {
"set_percentage": "5",
"set_attached_db": "90"
},
"delete_percentage": "25",
"select": {
"select_percentage": "70",
"select_node": {
"select_node_perc": "5",
"select_node_is_distinct_perc": "30",
"select_node_from_table_perc": "95",
"select_node_where_perc": "60",
"select_node_having_perc": "25",
"select_node_groups": {
"select_node_groups_perc": "30",
"select_node_group_by_perc": "70"
},
"select_node_qualify_perc": "10",
"select_node_aggregate_perc": "10",
"select_node_sample": {
"select_node_sample_perc": "10",
"select_node_sample_is_perc": "50",
"select_node_sample_size": "100"
}
},
"setop": "40",
"result_modifiers": "5",
"limit_percent_modifier": "50",
"limit_percent_modifier_limit": "30",
"limit_percent_modifier_offset": "30",
"limit_modifier_limit": "30",
"limit_modifier_offset": "30"
},
"create": {
"create_percentage": "0",
"create_generate_select": "50",
"create_num_cols": "1000"
},
"table_ref": {
"table_ref_base_table_ref_perc": "60",
"table_ref_expression_list_ref": "20",
"table_ref_join_ref": "40"
},
"join_ref": {
"join_ref_cross": "10",
"join_ref_asof": "10",
"join_ref_natural": "10",
"join_ref_positional": "10",
"join_ref_general_expression": "70"
},
"expression": {
"expression_column_ref": "50",
"expression_constant": "30",
"expression_subquery": "3"
},
"constant_value": {
"constant_value_bigint": "50",
"constant_value_to_string": "30"
},
"function": {
"function_aggregate": {
"function_aggregate_window_function": "10",
"function_aggregate_order_by": "10",
"function_aggregate_random_expression": "10",
"function_aggregate_distinct": "10"
}
},
"window_function": {
"window_function_partitions": "50",
"window_function_orders": "30",
"window_function_ignore_nulls": "30",
"window_function_result_offset": "30",
"window_function_result_default": "30"
},
"star": {
"star_relation_name": "10",
"star_column_name_exclude_list": "20",
"star_column_name": "20",
"star_columns": {
"star_columns_true": "50",
"star_columns_true_lambda": "50"
}
},
"relational_name": {
"relational_name_choose_current": "80"
},
"column_names": {
"column_names_choose_current": "80"
}
}
6 changes: 5 additions & 1 deletion scripts/run_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@
perform_checks = False
elif param.startswith('--enable_verification'):
verification = param.replace('--enable_verification=', '').lower() == 'true'
elif param.startswith('--randoms_config_filepath'):
randoms_config_filepath = param.replace('--randoms_config_filepath=', '')
elif param.startswith('--shell='):
shell = param.replace('--shell=', '')
elif param.startswith('--seed='):
Expand Down Expand Up @@ -76,7 +78,8 @@ def run_fuzzer_script(fuzzer):
if fuzzer == 'sqlsmith':
return "call sqlsmith(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
elif fuzzer == 'duckfuzz':
return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', enable_verification='${ENABLE_VERIFICATION}');"
return "call fuzzyduck(max_queries=${MAX_QUERIES}, seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}', \
enable_verification='${ENABLE_VERIFICATION}', randoms_config_filepath='${RANDOMS_CONFIG_FILEPATH}');"
elif fuzzer == 'duckfuzz_functions':
return "call fuzz_all_functions(seed=${SEED}, verbose_output=1, log='${LAST_LOG_FILE}', complete_log='${COMPLETE_LOG_FILE}');"
else:
Expand Down Expand Up @@ -132,6 +135,7 @@ def run_shell_command(cmd):
.replace('${COMPLETE_LOG_FILE}', complete_log_file)
.replace('${SEED}', str(seed))
.replace('${ENABLE_VERIFICATION}', str(verification))
.replace('${RANDOMS_CONFIG_FILEPATH}', randoms_config_filepath)
)

print(load_script)
Expand Down
2 changes: 1 addition & 1 deletion src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ include_directories(third_party/sqlsmith/include)
add_subdirectory(third_party)

set(SQLSMITH_SOURCES
sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp
sqlsmith_extension.cpp statement_generator.cpp statement_simplifier.cpp random_nums_config.cpp
fuzzyduck.cpp ${SQLSMITH_OBJECT_FILES})

build_static_extension(sqlsmith ${SQLSMITH_SOURCES})
Expand Down
8 changes: 7 additions & 1 deletion src/fuzzyduck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,15 +22,20 @@ void FuzzyDuck::BeginFuzzing() {
if (max_queries == 0) {
throw BinderException("Provide a max_queries argument greater than 0");
}
auto &fs = FileSystem::GetFileSystem(context);
if (!complete_log.empty()) {
auto &fs = FileSystem::GetFileSystem(context);
TryRemoveFile(complete_log);
complete_log_handle =
fs.OpenFile(complete_log, FileFlags::FILE_FLAGS_WRITE | FileFlags::FILE_FLAGS_FILE_CREATE_NEW);
}
if (enable_verification) {
RunQuery("PRAGMA enable_verification");
}
if (!randoms_config_filepath.empty()) {
config = RandomNumsConfig().GetConfigFromFile(randoms_config_filepath.c_str());
} else {
config = RandomNumsConfig().GetDefaultConfig();
}
}

void FuzzyDuck::EndFuzzing() {
Expand Down Expand Up @@ -70,6 +75,7 @@ string FuzzyDuck::GenerateQuery() {
// generate statement
StatementGenerator generator(context);
generator.verification_enabled = enable_verification;
generator.config = config;
// accumulate statement(s)
auto statement = string("");
if (generator.RandomPercentage(10)) {
Expand Down
6 changes: 6 additions & 0 deletions src/include/fuzzyduck.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,10 @@

#include "duckdb.hpp"
#include "duckdb/parser/query_node.hpp"
#include "random_nums_config.hpp"

namespace duckdb {

struct FileHandle;

class FuzzyDuck {
Expand All @@ -27,6 +29,9 @@ class FuzzyDuck {
bool verbose_output = false;
bool enable_verification = false;
idx_t timeout = 30;
string randoms_config_filepath;
// RandomNumsConfig config;
unordered_map<RandomPercentagesEnum, idx_t> config;

public:
void Fuzz();
Expand All @@ -50,6 +55,7 @@ class FuzzyDuck {

private:
unique_ptr<FileHandle> complete_log_handle;
unique_ptr<FileHandle> randoms_config_handle;
};

} // namespace duckdb
145 changes: 145 additions & 0 deletions src/include/random_nums_config.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// ===----------------------------------------------------------------------===//
// DuckDB

// random_nums_config.hpp


// ===----------------------------------------------------------------------===//

#pragma once

#include "duckdb.hpp"
#include "yyjson.hpp"

namespace duckdb {

enum class RandomPercentagesEnum : idx_t {
// ----------------------------------
// Generate Attach Percentages
// ----------------------------------
ATTACH = 0,
ATTACH_USE = 1,
ATTACH_READ_ONLY = 2,
// ----------------------------------
// Generate Detach Percentages
// ----------------------------------
DETACH = 3,
DETACH_RANDOM_NAME = 4,
// ----------------------------------
// Generate Set Percentages
// ----------------------------------
SET = 5,
SET_ATTACHED_DB = 6,
// ----------------------------------
// Generate Select Percentages
// ----------------------------------
DELETE = 7,

// ----------------------------------
hmeriann marked this conversation as resolved.
Show resolved Hide resolved
// Generate Select Percentages
// ----------------------------------
SELECT = 8,
SELECT_NODE = 9,
SELECT_NODE_IS_DISTINCT = 10,
SELECT_NODE_FROM_TABLE = 11,
SELECT_NODE_WHERE = 12,
SELECT_NODE_HAVING = 13,
SELECT_NODE_GROUPS = 14,
SELECT_NODE_GROUP_BY = 15,
SELECT_NODE_QUALIFY = 16,
SELECT_NODE_AGGREGATE = 17,
SELECT_NODE_SAMPLE = 18,
SELECT_NODE_SAMPLE_IS_PERC = 19,
SELECT_NODE_SAMPLE_SIZE = 20,
SETOP = 21,
RESULT_MODIFIERS = 22,
LIMIT_PERCENT_MODIFIER = 23,
LIMIT_PERCENT_MODIFIER_LIMIT = 24,
LIMIT_PERCENT_MODIFIER_OFFSET = 25,
LIMIT_MODIFIER_LIMIT = 26,
LIMIT_MODIFIER_OFFSET = 27,

// ----------------------------------
// Generate Create Percentages
// ----------------------------------
CREATE = 28,
CREATE_GENERATE_SELECT = 29,
CREATE_NUM_COLS = 30,
// -----------------------------------
// Generate Table Ref Percentages
// -----------------------------------
TABLE_REF_BASE_TABLE_REF_PERC = 31,
TABLE_REF_EXPRESSION_LIST_REF = 32,
TABLE_REF_JOIN_REF = 33,

// -----------------------------------
// Generate Join Ref Percentages
// -----------------------------------
JOIN_REF_CROSS = 34,
JOIN_REF_ASOF = 35,
JOIN_REF_NATURAL = 36,
JOIN_REF_POSITIONAL = 37,
JOIN_REF_GENERAL_EXPRESSION = 38,

// -----------------------------------
// Generate Expression Percentages
// -----------------------------------
EXPRESSION_COLUMN_REF = 39,
EXPRESSION_CONSTANT = 40,
EXPRESSION_SUBQUERY = 41,

// -----------------------------------
// Generate Constant Value Percentages
// -----------------------------------
CONSTANT_VALUE_BIGINT = 42,
CONSTANT_VALUE_TO_STRING = 43,

// -----------------------------------
// Generate Function Percentages
// -----------------------------------
FUNCTION_AGGREGATE_WINDOW_FUNCTION = 44,
FUNCTION_AGGREGATE_ORDER_BY = 45,
FUNCTION_AGGREGATE_DISTINCT = 46,
FUNCTION_AGGREGATE_RANDOM_EXPRESSION = 47,

// -----------------------------------
// Generate Window Function Percentages
// -----------------------------------
WINDOW_FUNCTION_PARTITIONS = 48,
WINDOW_FUNCTION_ORDERS = 49,
WINDOW_FUNCTION_IGNORE_NULLS = 50,
WINDOW_FUNCTION_RESULT_OFFSET = 51,
WINDOW_FUNCTION_RESULT_DEFAULT = 52,

// -----------------------------------
// Generate Star Percentages
// -----------------------------------
STAR_RELATION_NAME = 53,
STAR_COLUMN_NAME = 54,
STAR_COLUMN_NAME_EXCLUDE_LIST = 55,
STAR_COLUMNS_TRUE = 56,
STAR_COLUMNS_TRUE_LAMBDA = 57,

RELATIONAL_NAME_CHOOSE_CURRENT = 58,
COLUMN_NAMES_CHOOSE_CURRENT = 59,

COUNT

};

class RandomNumsConfig {
public:
RandomNumsConfig();
RandomNumsConfig(const char *config_file_path);
~RandomNumsConfig();

RandomPercentagesEnum percentages_selector;
// unordered_map<RandomPercentagesEnum, idx_t> config;

unordered_map<RandomPercentagesEnum, idx_t> GetDefaultConfig();
unordered_map<RandomPercentagesEnum, idx_t> GetConfigFromFile(const char *json_string);
string RandomPercentagesEnumToString(RandomPercentagesEnum type);

};

}// namespace duckdb
4 changes: 4 additions & 0 deletions src/include/statement_generator.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "duckdb.hpp"
#include "duckdb/parser/parsed_data/detach_info.hpp"
#include "duckdb/parser/query_node.hpp"
#include "random_nums_config.hpp"

#define TESTING_DIRECTORY_NAME "duckdb_unittest_tempdir"

Expand Down Expand Up @@ -40,6 +41,7 @@ class StatementGenerator {
friend class ExpressionDepthChecker;
friend class AggregateChecker;
friend class WindowChecker;
unordered_map<RandomPercentagesEnum, idx_t> config;

public:
StatementGenerator(ClientContext &context);
Expand All @@ -55,6 +57,8 @@ class StatementGenerator {
bool RandomPercentage(idx_t percentage);
bool verification_enabled = false;
idx_t RandomValue(idx_t max);


string GetRandomAttachedDataBase();
unique_ptr<SQLStatement> GenerateStatement(StatementType type); // came from private

Expand Down
Loading
Loading