From 015c757e47d5b4a5b1acf307cecd39234bb5085a Mon Sep 17 00:00:00 2001 From: DuckDB Labs GitHub Bot Date: Thu, 12 Sep 2024 00:31:34 +0000 Subject: [PATCH] Update vendored DuckDB sources to 4adc14b --- src/duckdb/src/common/cgroups.cpp | 39 +++++------- .../src/common/operator/cast_operators.cpp | 62 +------------------ .../types/column/column_data_allocator.cpp | 3 +- .../common/types/row/tuple_data_allocator.cpp | 3 +- .../src/execution/index/art/iterator.cpp | 32 +++++----- src/duckdb/src/execution/index/art/prefix.cpp | 43 +++---------- .../src/execution/index/fixed_size_buffer.cpp | 7 ++- .../csv_scanner/buffer_manager/csv_buffer.cpp | 3 +- .../scanner/string_value_scanner.cpp | 19 ++++-- .../csv_scanner/sniffer/type_detection.cpp | 8 ++- .../state_machine/csv_state_machine_cache.cpp | 8 +-- src/duckdb/src/function/table/sniff_csv.cpp | 5 +- .../function/table/version/pragma_version.cpp | 6 +- .../duckdb/common/operator/cast_operators.hpp | 60 ++++++++++++++++++ .../common/types/row/row_data_collection.hpp | 3 +- .../operator/csv_scanner/csv_sniffer.hpp | 2 +- src/duckdb/src/include/duckdb/main/config.hpp | 2 +- .../include/duckdb/storage/block_manager.hpp | 4 +- .../duckdb/storage/buffer/block_handle.hpp | 11 ++-- .../include/duckdb/storage/buffer_manager.hpp | 8 +-- .../storage/standard_buffer_manager.hpp | 7 +-- src/duckdb/src/main/capi/duckdb_value-c.cpp | 6 +- src/duckdb/src/main/config.cpp | 35 +++++++---- .../src/storage/buffer/block_handle.cpp | 39 ++++++------ .../src/storage/buffer/block_manager.cpp | 16 +++-- src/duckdb/src/storage/buffer_manager.cpp | 5 +- .../compression/string_uncompressed.cpp | 3 +- .../src/storage/metadata/metadata_manager.cpp | 15 ++--- .../src/storage/standard_buffer_manager.cpp | 39 ++++++------ .../src/storage/table/column_segment.cpp | 4 +- src/duckdb/src/storage/wal_replay.cpp | 5 +- 31 files changed, 260 insertions(+), 242 deletions(-) diff --git a/src/duckdb/src/common/cgroups.cpp b/src/duckdb/src/common/cgroups.cpp index b9d2b820..eb240b20 100644 --- a/src/duckdb/src/common/cgroups.cpp +++ b/src/duckdb/src/common/cgroups.cpp @@ -22,9 +22,7 @@ optional_idx CGroups::GetMemoryLimit(FileSystem &fs) { } optional_idx CGroups::GetCGroupV2MemoryLimit(FileSystem &fs) { -#ifdef DUCKDB_WASM - return optional_idx(); -#else +#if defined(__linux__) && !defined(DUCKDB_WASM) const char *cgroup_self = "/proc/self/cgroup"; const char *memory_max = "/sys/fs/cgroup/%s/memory.max"; @@ -45,13 +43,13 @@ optional_idx CGroups::GetCGroupV2MemoryLimit(FileSystem &fs) { } return ReadCGroupValue(fs, memory_max_path); +#else + return optional_idx(); #endif } optional_idx CGroups::GetCGroupV1MemoryLimit(FileSystem &fs) { -#ifdef DUCKDB_WASM - return optional_idx(); -#else +#if defined(__linux__) && !defined(DUCKDB_WASM) const char *cgroup_self = "/proc/self/cgroup"; const char *memory_limit = "/sys/fs/cgroup/memory/%s/memory.limit_in_bytes"; @@ -72,13 +70,13 @@ optional_idx CGroups::GetCGroupV1MemoryLimit(FileSystem &fs) { } return ReadCGroupValue(fs, memory_limit_path); +#else + return optional_idx(); #endif } string CGroups::ReadCGroupPath(FileSystem &fs, const char *cgroup_file) { -#ifdef DUCKDB_WASM - return ""; -#else +#if defined(__linux__) && !defined(DUCKDB_WASM) auto handle = fs.OpenFile(cgroup_file, FileFlags::FILE_FLAGS_READ); char buffer[1024]; auto bytes_read = fs.Read(*handle, buffer, sizeof(buffer) - 1); @@ -90,15 +88,12 @@ string CGroups::ReadCGroupPath(FileSystem &fs, const char *cgroup_file) { if (pos != string::npos) { return content.substr(pos + 2); } - - return ""; #endif + return ""; } string CGroups::ReadMemoryCGroupPath(FileSystem &fs, const char *cgroup_file) { -#ifdef DUCKDB_WASM - return ""; -#else +#if defined(__linux__) && !defined(DUCKDB_WASM) auto handle = fs.OpenFile(cgroup_file, FileFlags::FILE_FLAGS_READ); char buffer[1024]; auto bytes_read = fs.Read(*handle, buffer, sizeof(buffer) - 1); @@ -115,15 +110,12 @@ string CGroups::ReadMemoryCGroupPath(FileSystem &fs, const char *cgroup_file) { } content.erase(0, pos + 1); } - - return ""; #endif + return ""; } optional_idx CGroups::ReadCGroupValue(FileSystem &fs, const char *file_path) { -#ifdef DUCKDB_WASM - return optional_idx(); -#else +#if defined(__linux__) && !defined(DUCKDB_WASM) auto handle = fs.OpenFile(file_path, FileFlags::FILE_FLAGS_READ); char buffer[100]; auto bytes_read = fs.Read(*handle, buffer, 99); @@ -133,15 +125,12 @@ optional_idx CGroups::ReadCGroupValue(FileSystem &fs, const char *file_path) { if (TryCast::Operation(string_t(buffer), value)) { return optional_idx(value); } - return optional_idx(); #endif + return optional_idx(); } idx_t CGroups::GetCPULimit(FileSystem &fs, idx_t physical_cores) { -#ifdef DUCKDB_WASM - return physical_cores; -#else - +#if defined(__linux__) && !defined(DUCKDB_WASM) static constexpr const char *cpu_max = "/sys/fs/cgroup/cpu.max"; static constexpr const char *cfs_quota = "/sys/fs/cgroup/cpu/cpu.cfs_quota_us"; static constexpr const char *cfs_period = "/sys/fs/cgroup/cpu/cpu.cfs_period_us"; @@ -183,6 +172,8 @@ idx_t CGroups::GetCPULimit(FileSystem &fs, idx_t physical_cores) { } else { return physical_cores; } +#else + return physical_cores; #endif } diff --git a/src/duckdb/src/common/operator/cast_operators.cpp b/src/duckdb/src/common/operator/cast_operators.cpp index bcc0b7c0..87adfba4 100644 --- a/src/duckdb/src/common/operator/cast_operators.cpp +++ b/src/duckdb/src/common/operator/cast_operators.cpp @@ -920,68 +920,12 @@ bool TryCast::Operation(double input, double &result, bool strict) { //===--------------------------------------------------------------------===// // Cast String -> Numeric //===--------------------------------------------------------------------===// + template <> bool TryCast::Operation(string_t input, bool &result, bool strict) { - auto input_data = reinterpret_cast(input.GetData()); + auto input_data = reinterpret_cast(input.GetData()); auto input_size = input.GetSize(); - - switch (input_size) { - case 1: { - unsigned char c = UnsafeNumericCast(std::tolower(*input_data)); - if (c == 't' || (!strict && c == 'y') || (!strict && c == '1')) { - result = true; - return true; - } else if (c == 'f' || (!strict && c == 'n') || (!strict && c == '0')) { - result = false; - return true; - } - return false; - } - case 2: { - unsigned char n = UnsafeNumericCast(std::tolower(input_data[0])); - unsigned char o = UnsafeNumericCast(std::tolower(input_data[1])); - if (n == 'n' && o == 'o') { - result = false; - return true; - } - return false; - } - case 3: { - unsigned char y = UnsafeNumericCast(std::tolower(input_data[0])); - unsigned char e = UnsafeNumericCast(std::tolower(input_data[1])); - unsigned char s = UnsafeNumericCast(std::tolower(input_data[2])); - if (y == 'y' && e == 'e' && s == 's') { - result = true; - return true; - } - return false; - } - case 4: { - unsigned char t = UnsafeNumericCast(std::tolower(input_data[0])); - unsigned char r = UnsafeNumericCast(std::tolower(input_data[1])); - unsigned char u = UnsafeNumericCast(std::tolower(input_data[2])); - unsigned char e = UnsafeNumericCast(std::tolower(input_data[3])); - if (t == 't' && r == 'r' && u == 'u' && e == 'e') { - result = true; - return true; - } - return false; - } - case 5: { - unsigned char f = UnsafeNumericCast(std::tolower(input_data[0])); - unsigned char a = UnsafeNumericCast(std::tolower(input_data[1])); - unsigned char l = UnsafeNumericCast(std::tolower(input_data[2])); - unsigned char s = UnsafeNumericCast(std::tolower(input_data[3])); - unsigned char e = UnsafeNumericCast(std::tolower(input_data[4])); - if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') { - result = false; - return true; - } - return false; - } - default: - return false; - } + return TryCastStringBool(input_data, input_size, result, strict); } template <> bool TryCast::Operation(string_t input, int8_t &result, bool strict) { diff --git a/src/duckdb/src/common/types/column/column_data_allocator.cpp b/src/duckdb/src/common/types/column/column_data_allocator.cpp index 0081a0e4..0f2c6384 100644 --- a/src/duckdb/src/common/types/column/column_data_allocator.cpp +++ b/src/duckdb/src/common/types/column/column_data_allocator.cpp @@ -65,7 +65,8 @@ BufferHandle ColumnDataAllocator::AllocateBlock(idx_t size) { BlockMetaData data; data.size = 0; data.capacity = NumericCast(max_size); - auto pin = alloc.buffer_manager->Allocate(MemoryTag::COLUMN_DATA, max_size, false, &data.handle); + auto pin = alloc.buffer_manager->Allocate(MemoryTag::COLUMN_DATA, max_size, false); + data.handle = pin.GetBlockHandle(); blocks.push_back(std::move(data)); allocated_size += max_size; return pin; diff --git a/src/duckdb/src/common/types/row/tuple_data_allocator.cpp b/src/duckdb/src/common/types/row/tuple_data_allocator.cpp index f00c10fa..8f391c49 100644 --- a/src/duckdb/src/common/types/row/tuple_data_allocator.cpp +++ b/src/duckdb/src/common/types/row/tuple_data_allocator.cpp @@ -11,7 +11,8 @@ namespace duckdb { using ValidityBytes = TupleDataLayout::ValidityBytes; TupleDataBlock::TupleDataBlock(BufferManager &buffer_manager, idx_t capacity_p) : capacity(capacity_p), size(0) { - buffer_manager.Allocate(MemoryTag::HASH_TABLE, capacity, false, &handle); + auto buffer_handle = buffer_manager.Allocate(MemoryTag::HASH_TABLE, capacity, false); + handle = buffer_handle.GetBlockHandle(); } TupleDataBlock::TupleDataBlock(TupleDataBlock &&other) noexcept : capacity(0), size(0) { diff --git a/src/duckdb/src/execution/index/art/iterator.cpp b/src/duckdb/src/execution/index/art/iterator.cpp index 3f1f1f4f..689029a0 100644 --- a/src/duckdb/src/execution/index/art/iterator.cpp +++ b/src/duckdb/src/execution/index/art/iterator.cpp @@ -251,11 +251,7 @@ bool Iterator::Next() { } void Iterator::PopNode() { - // We are popping a gate node. - if (nodes.top().node.GetGateStatus() == GateStatus::GATE_SET) { - D_ASSERT(status == GateStatus::GATE_SET); - status = GateStatus::GATE_NOT_SET; - } + auto gate_status = nodes.top().node.GetGateStatus(); // Pop the byte and the node. if (nodes.top().node.GetType() != NType::PREFIX) { @@ -264,19 +260,25 @@ void Iterator::PopNode() { nested_depth--; D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); } - nodes.pop(); - return; - } - // Pop all prefix bytes and the node. - Prefix prefix(art, nodes.top().node); - auto prefix_byte_count = prefix.data[Prefix::Count(art)]; - current_key.Pop(prefix_byte_count); - if (status == GateStatus::GATE_SET) { - nested_depth -= prefix_byte_count; - D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); + } else { + // Pop all prefix bytes and the node. + Prefix prefix(art, nodes.top().node); + auto prefix_byte_count = prefix.data[Prefix::Count(art)]; + current_key.Pop(prefix_byte_count); + + if (status == GateStatus::GATE_SET) { + nested_depth -= prefix_byte_count; + D_ASSERT(nested_depth < Prefix::ROW_ID_SIZE); + } } nodes.pop(); + + // We are popping a gate node. + if (gate_status == GateStatus::GATE_SET) { + D_ASSERT(status == GateStatus::GATE_SET); + status = GateStatus::GATE_NOT_SET; + } } } // namespace duckdb diff --git a/src/duckdb/src/execution/index/art/prefix.cpp b/src/duckdb/src/execution/index/art/prefix.cpp index 66904696..cfdf8985 100644 --- a/src/duckdb/src/execution/index/art/prefix.cpp +++ b/src/duckdb/src/execution/index/art/prefix.cpp @@ -400,42 +400,15 @@ void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptr ref(node); - while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { - Prefix prefix(art, ref, true, true); - if (!prefix.in_memory) { - return; - } - - Node new_node; - new_node = allocator->New(); - new_node.SetMetadata(static_cast(PREFIX)); - - Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT); - new_prefix.data[DEPRECATED_COUNT] = prefix.data[Count(art)]; - memcpy(new_prefix.data, prefix.data, new_prefix.data[DEPRECATED_COUNT]); - *new_prefix.ptr = *prefix.ptr; - - prefix.ptr->Clear(); - Node::Free(art, ref); - ref.get() = new_node; - ref = *new_prefix.ptr; - } - - return Node::TransformToDeprecated(art, ref, allocator); - } - - // Else, we need to create a new prefix chain. + // We need to create a new prefix (chain). Node new_node; new_node = allocator->New(); new_node.SetMetadata(static_cast(PREFIX)); Prefix new_prefix(allocator, new_node, DEPRECATED_COUNT); - reference ref(node); - while (ref.get().GetType() == PREFIX && ref.get().GetGateStatus() == GateStatus::GATE_NOT_SET) { - Prefix prefix(art, ref, true, true); + Node current_node = node; + while (current_node.GetType() == PREFIX && current_node.GetGateStatus() == GateStatus::GATE_NOT_SET) { + Prefix prefix(art, current_node, true, true); if (!prefix.in_memory) { return; } @@ -445,11 +418,13 @@ void Prefix::TransformToDeprecated(ART &art, Node &node, unsafe_unique_ptrClear(); + Node::Free(art, current_node); + current_node = *new_prefix.ptr; } - return Node::TransformToDeprecated(art, ref, allocator); + node = new_node; + return Node::TransformToDeprecated(art, *new_prefix.ptr, allocator); } Prefix Prefix::Append(ART &art, const uint8_t byte) { diff --git a/src/duckdb/src/execution/index/fixed_size_buffer.cpp b/src/duckdb/src/execution/index/fixed_size_buffer.cpp index 29bb40f7..7647a453 100644 --- a/src/duckdb/src/execution/index/fixed_size_buffer.cpp +++ b/src/duckdb/src/execution/index/fixed_size_buffer.cpp @@ -40,7 +40,8 @@ FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager) block_handle(nullptr) { auto &buffer_manager = block_manager.buffer_manager; - buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false, &block_handle); + buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false); + block_handle = buffer_handle.GetBlockHandle(); } FixedSizeBuffer::FixedSizeBuffer(BlockManager &block_manager, const idx_t segment_count, const idx_t allocation_size, @@ -137,8 +138,8 @@ void FixedSizeBuffer::Pin() { // Copy the (partial) data into a new (not yet disk-backed) buffer handle. shared_ptr new_block_handle; - auto new_buffer_handle = - buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false, &new_block_handle); + auto new_buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager.GetBlockSize(), false); + new_block_handle = new_buffer_handle.GetBlockHandle(); memcpy(new_buffer_handle.Ptr(), buffer_handle.Ptr() + block_pointer.offset, allocation_size); buffer_handle = std::move(new_buffer_handle); diff --git a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp index bf0d7aeb..3677e0a7 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/buffer_manager/csv_buffer.cpp @@ -54,7 +54,8 @@ void CSVBuffer::AllocateBuffer(idx_t buffer_size) { auto &buffer_manager = BufferManager::GetBufferManager(context); bool can_destroy = !is_pipe; handle = buffer_manager.Allocate(MemoryTag::CSV_READER, MaxValue(buffer_manager.GetBlockSize(), buffer_size), - can_destroy, &block); + can_destroy); + block = handle.GetBlockHandle(); } idx_t CSVBuffer::GetBufferSize() { diff --git a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp index 5f95446e..b58f3649 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/scanner/string_value_scanner.cpp @@ -264,6 +264,10 @@ void StringValueResult::AddValueToVector(const char *value_ptr, const idx_t size } bool success = true; switch (parse_types[chunk_col_id].type_id) { + case LogicalTypeId::BOOLEAN: + success = + TryCastStringBool(value_ptr, size, static_cast(vector_ptr[chunk_col_id])[number_of_rows], false); + break; case LogicalTypeId::TINYINT: success = TrySimpleIntegerCast(value_ptr, size, static_cast(vector_ptr[chunk_col_id])[number_of_rows], false); @@ -644,9 +648,15 @@ bool LineError::HandleErrors(StringValueResult &result) { result.error_handler.Error(csv_error); } if (is_error_in_line) { - result.borked_rows.insert(result.number_of_rows); - result.cur_col_id = 0; - result.chunk_col_id = 0; + if (result.sniffing) { + // If we are sniffing we just remove the line + result.RemoveLastLine(); + } else { + // Otherwise, we add it to the borked rows to remove it later and just cleanup the column variables. + result.borked_rows.insert(result.number_of_rows); + result.cur_col_id = 0; + result.chunk_col_id = 0; + } Reset(); return true; } @@ -1437,6 +1447,7 @@ bool StringValueScanner::CanDirectlyCast(const LogicalType &type, bool icu_loade case LogicalTypeId::TIME: case LogicalTypeId::DECIMAL: case LogicalType::VARCHAR: + case LogicalType::BOOLEAN: return true; case LogicalType::TIMESTAMP_TZ: // We only try to do direct cast of timestamp tz if the ICU extension is not loaded, otherwise, it needs to go @@ -1493,7 +1504,7 @@ void StringValueScanner::SetStart() { } if (iterator.pos.buffer_pos == cur_buffer_handle->actual_size || scan_finder->iterator.GetBufferIdx() > iterator.GetBufferIdx()) { - // If things go terribly wrong, we never loop indefinetly. + // If things go terribly wrong, we never loop indefinitely. iterator.pos.buffer_idx = scan_finder->iterator.pos.buffer_idx; iterator.pos.buffer_pos = scan_finder->iterator.pos.buffer_pos; result.last_position = {iterator.pos.buffer_idx, iterator.pos.buffer_pos, result.buffer_size}; diff --git a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp index 11d79c40..25178108 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/sniffer/type_detection.cpp @@ -103,6 +103,10 @@ bool CSVSniffer::CanYouCastIt(ClientContext &context, const string_t value, cons auto value_ptr = value.GetData(); auto value_size = value.GetSize(); switch (type.id()) { + case LogicalTypeId::BOOLEAN: { + bool dummy_value; + return TryCastStringBool(value_ptr, value_size, dummy_value, true); + } case LogicalTypeId::TINYINT: { int8_t dummy_value; return TrySimpleIntegerCast(value_ptr, value_size, dummy_value, false); @@ -382,7 +386,7 @@ void CSVSniffer::SniffTypes(DataChunk &data_chunk, CSVStateMachine &state_machin } // If we have a predefined date/timestamp format we set it -void CSVSniffer::SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) { +void CSVSniffer::SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) const { const vector data_time_formats {LogicalTypeId::DATE, LogicalTypeId::TIMESTAMP}; for (auto &date_time_format : data_time_formats) { auto &user_option = options.dialect_options.date_format.at(date_time_format); @@ -423,7 +427,7 @@ void CSVSniffer::DetectTypes() { } } } - if (break_loop) { + if (break_loop && !candidate->state_machine->options.ignore_errors.GetValue()) { continue; } } diff --git a/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp b/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp index fc9d7f47..6c93cc93 100644 --- a/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp +++ b/src/duckdb/src/execution/operator/csv_scanner/state_machine/csv_state_machine_cache.cpp @@ -22,7 +22,7 @@ void CSVStateMachineCache::Insert(const CSVStateMachineOptions &state_machine_op auto &transition_array = state_machine_cache[state_machine_options]; for (uint32_t i = 0; i < StateMachine::NUM_STATES; i++) { - CSVState cur_state = CSVState(i); + CSVState cur_state = static_cast(i); switch (cur_state) { case CSVState::QUOTED: case CSVState::QUOTED_NEW_LINE: @@ -234,11 +234,11 @@ CSVStateMachineCache::CSVStateMachineCache() { auto default_delimiter = DialectCandidates::GetDefaultDelimiter(); auto default_comment = DialectCandidates::GetDefaultComment(); - for (auto quoterule : default_quote_rule) { - const auto "e_candidates = default_quote[static_cast(quoterule)]; + for (auto quote_rule : default_quote_rule) { + const auto "e_candidates = default_quote[static_cast(quote_rule)]; for (const auto "e : quote_candidates) { for (const auto &delimiter : default_delimiter) { - const auto &escape_candidates = default_escape[static_cast(quoterule)]; + const auto &escape_candidates = default_escape[static_cast(quote_rule)]; for (const auto &escape : escape_candidates) { for (const auto &comment : default_comment) { Insert({delimiter, quote, escape, comment, NewLineIdentifier::SINGLE_N}); diff --git a/src/duckdb/src/function/table/sniff_csv.cpp b/src/duckdb/src/function/table/sniff_csv.cpp index 11e5cca8..2a1d1b98 100644 --- a/src/duckdb/src/function/table/sniff_csv.cpp +++ b/src/duckdb/src/function/table/sniff_csv.cpp @@ -96,6 +96,9 @@ string FormatOptions(char opt) { if (opt == '\'') { return "''"; } + if (opt == '\0') { + return ""; + } string result; result += opt; return result; @@ -214,7 +217,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p, << "'" << separator; } // 11.2. Quote - if (!sniffer_options.dialect_options.header.IsSetByUser()) { + if (!sniffer_options.dialect_options.state_machine_options.quote.IsSetByUser()) { csv_read << "quote=" << "'" << FormatOptions(sniffer_options.dialect_options.state_machine_options.quote.GetValue()) << "'" << separator; diff --git a/src/duckdb/src/function/table/version/pragma_version.cpp b/src/duckdb/src/function/table/version/pragma_version.cpp index a947648e..083c1863 100644 --- a/src/duckdb/src/function/table/version/pragma_version.cpp +++ b/src/duckdb/src/function/table/version/pragma_version.cpp @@ -1,5 +1,5 @@ #ifndef DUCKDB_PATCH_VERSION -#define DUCKDB_PATCH_VERSION "1-dev25" +#define DUCKDB_PATCH_VERSION "1-dev64" #endif #ifndef DUCKDB_MINOR_VERSION #define DUCKDB_MINOR_VERSION 1 @@ -8,10 +8,10 @@ #define DUCKDB_MAJOR_VERSION 1 #endif #ifndef DUCKDB_VERSION -#define DUCKDB_VERSION "v1.1.1-dev25" +#define DUCKDB_VERSION "v1.1.1-dev64" #endif #ifndef DUCKDB_SOURCE_ID -#define DUCKDB_SOURCE_ID "d9e89b5cc1" +#define DUCKDB_SOURCE_ID "e2b177b759" #endif #include "duckdb/function/table/system_functions.hpp" #include "duckdb/main/database.hpp" diff --git a/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp b/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp index 1055b24f..3e4cb35e 100644 --- a/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp +++ b/src/duckdb/src/include/duckdb/common/operator/cast_operators.hpp @@ -483,6 +483,66 @@ DUCKDB_API bool TryCast::Operation(double input, double &result, bool strict); //===--------------------------------------------------------------------===// // String -> Numeric Casts //===--------------------------------------------------------------------===// +static inline bool TryCastStringBool(const char *input_data, idx_t input_size, bool &result, bool strict) { + switch (input_size) { + case 1: { + unsigned char c = static_cast(std::tolower(*input_data)); + if (c == 't' || (!strict && c == 'y') || (!strict && c == '1')) { + result = true; + return true; + } else if (c == 'f' || (!strict && c == 'n') || (!strict && c == '0')) { + result = false; + return true; + } + return false; + } + case 2: { + unsigned char n = static_cast(std::tolower(input_data[0])); + unsigned char o = static_cast(std::tolower(input_data[1])); + if (n == 'n' && o == 'o') { + result = false; + return true; + } + return false; + } + case 3: { + unsigned char y = static_cast(std::tolower(input_data[0])); + unsigned char e = static_cast(std::tolower(input_data[1])); + unsigned char s = static_cast(std::tolower(input_data[2])); + if (y == 'y' && e == 'e' && s == 's') { + result = true; + return true; + } + return false; + } + case 4: { + unsigned char t = static_cast(std::tolower(input_data[0])); + unsigned char r = static_cast(std::tolower(input_data[1])); + unsigned char u = static_cast(std::tolower(input_data[2])); + unsigned char e = static_cast(std::tolower(input_data[3])); + if (t == 't' && r == 'r' && u == 'u' && e == 'e') { + result = true; + return true; + } + return false; + } + case 5: { + unsigned char f = static_cast(std::tolower(input_data[0])); + unsigned char a = static_cast(std::tolower(input_data[1])); + unsigned char l = static_cast(std::tolower(input_data[2])); + unsigned char s = static_cast(std::tolower(input_data[3])); + unsigned char e = static_cast(std::tolower(input_data[4])); + if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') { + result = false; + return true; + } + return false; + } + default: + return false; + } +} + template <> DUCKDB_API bool TryCast::Operation(string_t input, bool &result, bool strict); template <> diff --git a/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp b/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp index 06045e56..d49abf2c 100644 --- a/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp +++ b/src/duckdb/src/include/duckdb/common/types/row/row_data_collection.hpp @@ -20,7 +20,8 @@ struct RowDataBlock { RowDataBlock(MemoryTag tag, BufferManager &buffer_manager, idx_t capacity, idx_t entry_size) : capacity(capacity), entry_size(entry_size), count(0), byte_offset(0) { auto size = MaxValue(buffer_manager.GetBlockSize(), capacity * entry_size); - buffer_manager.Allocate(tag, size, false, &block); + auto buffer_handle = buffer_manager.Allocate(tag, size, false); + block = buffer_handle.GetBlockHandle(); D_ASSERT(BufferManager::GetAllocSize(size) == block->GetMemoryUsage()); } diff --git a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp index 3fc2f2f8..56640e2a 100644 --- a/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp +++ b/src/duckdb/src/include/duckdb/execution/operator/csv_scanner/csv_sniffer.hpp @@ -198,7 +198,7 @@ class CSVSniffer { void InitializeDateAndTimeStampDetection(CSVStateMachine &candidate, const string &separator, const LogicalType &sql_type); //! Sets user defined date and time formats (if any) - void SetUserDefinedDateTimeFormat(CSVStateMachine &candidate); + void SetUserDefinedDateTimeFormat(CSVStateMachine &candidate) const; //! Functions that performs detection for date and timestamp formats void DetectDateAndTimeStampFormats(CSVStateMachine &candidate, const LogicalType &sql_type, const string &separator, const string_t &dummy_val); diff --git a/src/duckdb/src/include/duckdb/main/config.hpp b/src/duckdb/src/include/duckdb/main/config.hpp index 400f3977..15ba0286 100644 --- a/src/duckdb/src/include/duckdb/main/config.hpp +++ b/src/duckdb/src/include/duckdb/main/config.hpp @@ -365,7 +365,7 @@ struct DBConfig { DUCKDB_API IndexTypeSet &GetIndexTypes(); static idx_t GetSystemMaxThreads(FileSystem &fs); static idx_t GetSystemAvailableMemory(FileSystem &fs); - static idx_t ParseMemoryLimitSlurm(const string &arg); + static optional_idx ParseMemoryLimitSlurm(const string &arg); void SetDefaultMaxMemory(); void SetDefaultTempDirectory(); diff --git a/src/duckdb/src/include/duckdb/storage/block_manager.hpp b/src/duckdb/src/include/duckdb/storage/block_manager.hpp index e0f5ccee..7e73f037 100644 --- a/src/duckdb/src/include/duckdb/storage/block_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/block_manager.hpp @@ -88,7 +88,9 @@ class BlockManager { //! Convert an existing in-memory buffer into a persistent disk-backed block shared_ptr ConvertToPersistent(block_id_t block_id, shared_ptr old_block); - void UnregisterBlock(block_id_t block_id); + void UnregisterBlock(BlockHandle &block); + //! UnregisterBlock, only accepts non-temporary block ids + void UnregisterBlock(block_id_t id); //! Returns a reference to the metadata manager of this block manager. MetadataManager &GetMetadataManager(); diff --git a/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp b/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp index ad37fe6c..be1128fa 100644 --- a/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp +++ b/src/duckdb/src/include/duckdb/storage/buffer/block_handle.hpp @@ -54,7 +54,7 @@ struct TempBufferPoolReservation : BufferPoolReservation { } }; -class BlockHandle { +class BlockHandle : public enable_shared_from_this { friend class BlockManager; friend struct BufferEvictionNode; friend class BufferHandle; @@ -96,6 +96,10 @@ class BlockHandle { unswizzled = unswizzler; } + MemoryTag GetMemoryTag() const { + return tag; + } + inline void SetDestroyBufferUpon(DestroyBufferUpon destroy_buffer_upon_p) { lock_guard guard(lock); destroy_buffer_upon = destroy_buffer_upon_p; @@ -117,9 +121,8 @@ class BlockHandle { } private: - static BufferHandle Load(shared_ptr &handle, unique_ptr buffer = nullptr); - static BufferHandle LoadFromBuffer(shared_ptr &handle, data_ptr_t data, - unique_ptr reusable_buffer); + BufferHandle Load(unique_ptr buffer = nullptr); + BufferHandle LoadFromBuffer(data_ptr_t data, unique_ptr reusable_buffer); unique_ptr UnloadAndTakeBlock(); void Unload(); bool CanUnload(); diff --git a/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp b/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp index 5011f9a9..e2a3b95e 100644 --- a/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/buffer_manager.hpp @@ -34,8 +34,7 @@ class BufferManager { } public: - virtual BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true, - shared_ptr *block = nullptr) = 0; + virtual BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true) = 0; //! Reallocate an in-memory buffer that is pinned. virtual void ReAllocate(shared_ptr &handle, idx_t block_size) = 0; virtual BufferHandle Pin(shared_ptr &handle) = 0; @@ -102,8 +101,9 @@ class BufferManager { virtual void PurgeQueue(FileBufferType type) = 0; virtual void AddToEvictionQueue(shared_ptr &handle); virtual void WriteTemporaryBuffer(MemoryTag tag, block_id_t block_id, FileBuffer &buffer); - virtual unique_ptr ReadTemporaryBuffer(MemoryTag tag, block_id_t id, unique_ptr buffer); - virtual void DeleteTemporaryFile(block_id_t id); + virtual unique_ptr ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block, + unique_ptr buffer); + virtual void DeleteTemporaryFile(BlockHandle &block); }; } // namespace duckdb diff --git a/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp b/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp index b978535c..e4de9608 100644 --- a/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp +++ b/src/duckdb/src/include/duckdb/storage/standard_buffer_manager.hpp @@ -62,8 +62,7 @@ class StandardBufferManager : public BufferManager { //! Allocate an in-memory buffer with a single pin. //! The allocated memory is released when the buffer handle is destroyed. - DUCKDB_API BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true, - shared_ptr *block = nullptr) final; + DUCKDB_API BufferHandle Allocate(MemoryTag tag, idx_t block_size, bool can_destroy = true) final; //! Reallocate an in-memory buffer that is pinned. void ReAllocate(shared_ptr &handle, idx_t block_size) final; @@ -129,12 +128,12 @@ class StandardBufferManager : public BufferManager { //! Write a temporary buffer to disk void WriteTemporaryBuffer(MemoryTag tag, block_id_t block_id, FileBuffer &buffer) final; //! Read a temporary buffer from disk - unique_ptr ReadTemporaryBuffer(MemoryTag tag, block_id_t id, + unique_ptr ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block, unique_ptr buffer = nullptr) final; //! Get the path of the temporary buffer string GetTemporaryPath(block_id_t id); - void DeleteTemporaryFile(block_id_t id) final; + void DeleteTemporaryFile(BlockHandle &block) final; void RequireTemporaryDirectory(); diff --git a/src/duckdb/src/main/capi/duckdb_value-c.cpp b/src/duckdb/src/main/capi/duckdb_value-c.cpp index b231f919..defdf6a3 100644 --- a/src/duckdb/src/main/capi/duckdb_value-c.cpp +++ b/src/duckdb/src/main/capi/duckdb_value-c.cpp @@ -282,7 +282,7 @@ idx_t duckdb_get_map_size(duckdb_value value) { } auto val = UnwrapValue(value); - if (val.type() != LogicalTypeId::MAP) { + if (val.type().id() != LogicalTypeId::MAP) { return 0; } @@ -296,7 +296,7 @@ duckdb_value duckdb_get_map_key(duckdb_value value, idx_t index) { } auto val = UnwrapValue(value); - if (val.type() != LogicalTypeId::MAP) { + if (val.type().id() != LogicalTypeId::MAP) { return nullptr; } @@ -316,7 +316,7 @@ duckdb_value duckdb_get_map_value(duckdb_value value, idx_t index) { } auto val = UnwrapValue(value); - if (val.type() != LogicalTypeId::MAP) { + if (val.type().id() != LogicalTypeId::MAP) { return nullptr; } diff --git a/src/duckdb/src/main/config.cpp b/src/duckdb/src/main/config.cpp index 25af4eee..6921cb49 100644 --- a/src/duckdb/src/main/config.cpp +++ b/src/duckdb/src/main/config.cpp @@ -363,16 +363,22 @@ idx_t DBConfig::GetSystemMaxThreads(FileSystem &fs) { } idx_t DBConfig::GetSystemAvailableMemory(FileSystem &fs) { +#ifdef __linux__ // Check SLURM environment variables first const char *slurm_mem_per_node = getenv("SLURM_MEM_PER_NODE"); const char *slurm_mem_per_cpu = getenv("SLURM_MEM_PER_CPU"); if (slurm_mem_per_node) { - return ParseMemoryLimitSlurm(slurm_mem_per_node); + auto limit = ParseMemoryLimitSlurm(slurm_mem_per_node); + if (limit.IsValid()) { + return limit.GetIndex(); + } } else if (slurm_mem_per_cpu) { - idx_t mem_per_cpu = ParseMemoryLimitSlurm(slurm_mem_per_cpu); - idx_t num_threads = GetSystemMaxThreads(fs); - return mem_per_cpu * num_threads; + auto mem_per_cpu = ParseMemoryLimitSlurm(slurm_mem_per_cpu); + if (mem_per_cpu.IsValid()) { + idx_t num_threads = GetSystemMaxThreads(fs); + return mem_per_cpu.GetIndex() * num_threads; + } } // Check cgroup memory limit @@ -380,8 +386,9 @@ idx_t DBConfig::GetSystemAvailableMemory(FileSystem &fs) { if (cgroup_memory_limit.IsValid()) { return cgroup_memory_limit.GetIndex(); } +#endif - // Fall back to system memory detection + // System memory detection auto memory = FileSystem::GetAvailableMemory(); if (!memory.IsValid()) { return DBConfigOptions().maximum_memory; @@ -451,9 +458,9 @@ idx_t DBConfig::ParseMemoryLimit(const string &arg) { return LossyNumericCast(static_cast(multiplier) * limit); } -idx_t DBConfig::ParseMemoryLimitSlurm(const string &arg) { +optional_idx DBConfig::ParseMemoryLimitSlurm(const string &arg) { if (arg.empty()) { - return 0; + return optional_idx(); } string number_str = arg; @@ -475,13 +482,19 @@ idx_t DBConfig::ParseMemoryLimitSlurm(const string &arg) { } // Parse the number - double limit = Cast::Operation(string_t(number_str)); + double limit; + if (!TryCast::Operation(string_t(number_str), limit)) { + return optional_idx(); + } if (limit < 0) { - return NumericLimits::Maximum(); + return static_cast(NumericLimits::Maximum()); } - - return LossyNumericCast(static_cast(multiplier) * limit); + idx_t actual_limit = LossyNumericCast(static_cast(multiplier) * limit); + if (actual_limit == NumericLimits::Maximum()) { + return static_cast(NumericLimits::Maximum()); + } + return actual_limit; } // Right now we only really care about access mode when comparing DBConfigs diff --git a/src/duckdb/src/storage/buffer/block_handle.cpp b/src/duckdb/src/storage/buffer/block_handle.cpp index e2442c34..9523b296 100644 --- a/src/duckdb/src/storage/buffer/block_handle.cpp +++ b/src/duckdb/src/storage/buffer/block_handle.cpp @@ -49,7 +49,7 @@ BlockHandle::~BlockHandle() { // NOLINT: allow internal exceptions D_ASSERT(memory_charge.size == 0); } - block_manager.UnregisterBlock(block_id); + block_manager.UnregisterBlock(*this); } unique_ptr AllocateBlock(BlockManager &block_manager, unique_ptr reusable_buffer, @@ -71,39 +71,36 @@ unique_ptr AllocateBlock(BlockManager &block_manager, unique_ptr &handle, data_ptr_t data, - unique_ptr reusable_buffer) { - D_ASSERT(handle->state != BlockState::BLOCK_LOADED); +BufferHandle BlockHandle::LoadFromBuffer(data_ptr_t data, unique_ptr reusable_buffer) { + D_ASSERT(state != BlockState::BLOCK_LOADED); // copy over the data into the block from the file buffer - auto block = AllocateBlock(handle->block_manager, std::move(reusable_buffer), handle->block_id); + auto block = AllocateBlock(block_manager, std::move(reusable_buffer), block_id); memcpy(block->InternalBuffer(), data, block->AllocSize()); - handle->buffer = std::move(block); - handle->state = BlockState::BLOCK_LOADED; - return BufferHandle(handle); + buffer = std::move(block); + state = BlockState::BLOCK_LOADED; + return BufferHandle(shared_from_this()); } -BufferHandle BlockHandle::Load(shared_ptr &handle, unique_ptr reusable_buffer) { - if (handle->state == BlockState::BLOCK_LOADED) { +BufferHandle BlockHandle::Load(unique_ptr reusable_buffer) { + if (state == BlockState::BLOCK_LOADED) { // already loaded - D_ASSERT(handle->buffer); - return BufferHandle(handle); + D_ASSERT(buffer); + return BufferHandle(shared_from_this()); } - auto &block_manager = handle->block_manager; - if (handle->block_id < MAXIMUM_BLOCK) { - auto block = AllocateBlock(block_manager, std::move(reusable_buffer), handle->block_id); + if (block_id < MAXIMUM_BLOCK) { + auto block = AllocateBlock(block_manager, std::move(reusable_buffer), block_id); block_manager.Read(*block); - handle->buffer = std::move(block); + buffer = std::move(block); } else { - if (handle->MustWriteToTemporaryFile()) { - handle->buffer = block_manager.buffer_manager.ReadTemporaryBuffer(handle->tag, handle->block_id, - std::move(reusable_buffer)); + if (MustWriteToTemporaryFile()) { + buffer = block_manager.buffer_manager.ReadTemporaryBuffer(tag, *this, std::move(reusable_buffer)); } else { return BufferHandle(); // Destroyed upon unpin/evict, so there is no temp buffer to read } } - handle->state = BlockState::BLOCK_LOADED; - return BufferHandle(handle); + state = BlockState::BLOCK_LOADED; + return BufferHandle(shared_from_this()); } unique_ptr BlockHandle::UnloadAndTakeBlock() { diff --git a/src/duckdb/src/storage/buffer/block_manager.cpp b/src/duckdb/src/storage/buffer/block_manager.cpp index 32935a63..22cb54d1 100644 --- a/src/duckdb/src/storage/buffer/block_manager.cpp +++ b/src/duckdb/src/storage/buffer/block_manager.cpp @@ -70,14 +70,22 @@ shared_ptr BlockManager::ConvertToPersistent(block_id_t block_id, s return new_block; } -void BlockManager::UnregisterBlock(block_id_t block_id) { - if (block_id >= MAXIMUM_BLOCK) { +void BlockManager::UnregisterBlock(block_id_t id) { + D_ASSERT(id < MAXIMUM_BLOCK); + lock_guard lock(blocks_lock); + // on-disk block: erase from list of blocks in manager + blocks.erase(id); +} + +void BlockManager::UnregisterBlock(BlockHandle &block) { + auto id = block.BlockId(); + if (id >= MAXIMUM_BLOCK) { // in-memory buffer: buffer could have been offloaded to disk: remove the file - buffer_manager.DeleteTemporaryFile(block_id); + buffer_manager.DeleteTemporaryFile(block); } else { lock_guard lock(blocks_lock); // on-disk block: erase from list of blocks in manager - blocks.erase(block_id); + blocks.erase(id); } } diff --git a/src/duckdb/src/storage/buffer_manager.cpp b/src/duckdb/src/storage/buffer_manager.cpp index 4ee16fc7..c758fae8 100644 --- a/src/duckdb/src/storage/buffer_manager.cpp +++ b/src/duckdb/src/storage/buffer_manager.cpp @@ -79,11 +79,12 @@ void BufferManager::WriteTemporaryBuffer(MemoryTag tag, block_id_t block_id, Fil throw NotImplementedException("This type of BufferManager does not support 'WriteTemporaryBuffer"); } -unique_ptr BufferManager::ReadTemporaryBuffer(MemoryTag tag, block_id_t id, unique_ptr buffer) { +unique_ptr BufferManager::ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block, + unique_ptr buffer) { throw NotImplementedException("This type of BufferManager does not support 'ReadTemporaryBuffer"); } -void BufferManager::DeleteTemporaryFile(block_id_t id) { +void BufferManager::DeleteTemporaryFile(BlockHandle &block) { throw NotImplementedException("This type of BufferManager does not support 'DeleteTemporaryFile"); } diff --git a/src/duckdb/src/storage/compression/string_uncompressed.cpp b/src/duckdb/src/storage/compression/string_uncompressed.cpp index 6986a1aa..aa340f78 100644 --- a/src/duckdb/src/storage/compression/string_uncompressed.cpp +++ b/src/duckdb/src/storage/compression/string_uncompressed.cpp @@ -315,7 +315,8 @@ void UncompressedStringStorage::WriteStringMemory(ColumnSegment &segment, string new_block->offset = 0; new_block->size = alloc_size; // allocate an in-memory buffer for it - handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, alloc_size, false, &block); + handle = buffer_manager.Allocate(MemoryTag::OVERFLOW_STRINGS, alloc_size, false); + block = handle.GetBlockHandle(); state.overflow_blocks.insert(make_pair(block->BlockId(), reference(*new_block))); new_block->block = std::move(block); new_block->next = std::move(state.head); diff --git a/src/duckdb/src/storage/metadata/metadata_manager.cpp b/src/duckdb/src/storage/metadata/metadata_manager.cpp index 2634a2d5..f29b61df 100644 --- a/src/duckdb/src/storage/metadata/metadata_manager.cpp +++ b/src/duckdb/src/storage/metadata/metadata_manager.cpp @@ -61,27 +61,28 @@ MetadataHandle MetadataManager::Pin(MetadataPointer pointer) { return handle; } -void MetadataManager::ConvertToTransient(MetadataBlock &block) { +void MetadataManager::ConvertToTransient(MetadataBlock &metadata_block) { // pin the old block - auto old_buffer = buffer_manager.Pin(block.block); + auto old_buffer = buffer_manager.Pin(metadata_block.block); // allocate a new transient block to replace it - shared_ptr new_block; - auto new_buffer = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false, &new_block); + auto new_buffer = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false); + auto new_block = new_buffer.GetBlockHandle(); // copy the data to the transient block memcpy(new_buffer.Ptr(), old_buffer.Ptr(), block_manager.GetBlockSize()); - block.block = std::move(new_block); + metadata_block.block = std::move(new_block); // unregister the old block - block_manager.UnregisterBlock(block.block_id); + block_manager.UnregisterBlock(metadata_block.block_id); } block_id_t MetadataManager::AllocateNewBlock() { auto new_block_id = GetNextBlockId(); MetadataBlock new_block; - auto handle = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false, &new_block.block); + auto handle = buffer_manager.Allocate(MemoryTag::METADATA, block_manager.GetBlockSize(), false); + new_block.block = handle.GetBlockHandle(); new_block.block_id = new_block_id; for (idx_t i = 0; i < METADATA_BLOCK_COUNT; i++) { new_block.free_blocks.push_back(NumericCast(METADATA_BLOCK_COUNT - i - 1)); diff --git a/src/duckdb/src/storage/standard_buffer_manager.cpp b/src/duckdb/src/storage/standard_buffer_manager.cpp index 41fb28ea..da10afce 100644 --- a/src/duckdb/src/storage/standard_buffer_manager.cpp +++ b/src/duckdb/src/storage/standard_buffer_manager.cpp @@ -126,9 +126,8 @@ shared_ptr StandardBufferManager::RegisterTransientMemory(const idx return RegisterSmallMemory(size); } - shared_ptr block; - Allocate(MemoryTag::IN_MEMORY_TABLE, size, false, &block); - return block; + auto buffer_handle = Allocate(MemoryTag::IN_MEMORY_TABLE, size, false); + return buffer_handle.GetBlockHandle(); } shared_ptr StandardBufferManager::RegisterSmallMemory(const idx_t size) { @@ -164,17 +163,14 @@ shared_ptr StandardBufferManager::RegisterMemory(MemoryTag tag, idx destroy_buffer_upon, alloc_size, std::move(res)); } -BufferHandle StandardBufferManager::Allocate(MemoryTag tag, idx_t block_size, bool can_destroy, - shared_ptr *block) { - shared_ptr local_block; - auto block_ptr = block ? block : &local_block; - *block_ptr = RegisterMemory(tag, block_size, can_destroy); +BufferHandle StandardBufferManager::Allocate(MemoryTag tag, idx_t block_size, bool can_destroy) { + auto block = RegisterMemory(tag, block_size, can_destroy); #ifdef DUCKDB_DEBUG_DESTROY_BLOCKS // Initialize the memory with garbage data - WriteGarbageIntoBuffer(*(*block_ptr)->buffer); + WriteGarbageIntoBuffer(*block->buffer); #endif - return Pin(*block_ptr); + return Pin(block); } void StandardBufferManager::ReAllocate(shared_ptr &handle, idx_t block_size) { @@ -253,7 +249,7 @@ void StandardBufferManager::BatchRead(vector> &handles, } auto block_ptr = intermediate_buffer.GetFileBuffer().InternalBuffer() + block_idx * block_manager.GetBlockAllocSize(); - buf = BlockHandle::LoadFromBuffer(handle, block_ptr, std::move(reusable_buffer)); + buf = handle->LoadFromBuffer(block_ptr, std::move(reusable_buffer)); handle->readers = 1; handle->memory_charge = std::move(reservation); } @@ -314,7 +310,7 @@ BufferHandle StandardBufferManager::Pin(shared_ptr &handle) { if (handle->state == BlockState::BLOCK_LOADED) { // the block is loaded, increment the reader count and set the BufferHandle handle->readers++; - buf = handle->Load(handle); + buf = handle->Load(); } required_memory = handle->memory_usage; } @@ -335,11 +331,11 @@ BufferHandle StandardBufferManager::Pin(shared_ptr &handle) { // the block is loaded, increment the reader count and return a pointer to the handle handle->readers++; reservation.Resize(0); - buf = handle->Load(handle); + buf = handle->Load(); } else { // now we can actually load the current block D_ASSERT(handle->readers == 0); - buf = handle->Load(handle, std::move(reusable_buffer)); + buf = handle->Load(std::move(reusable_buffer)); handle->readers = 1; handle->memory_charge = std::move(reservation); // in the case of a variable sized block, the buffer may be smaller than a full block. @@ -480,33 +476,34 @@ void StandardBufferManager::WriteTemporaryBuffer(MemoryTag tag, block_id_t block buffer.Write(*handle, sizeof(idx_t)); } -unique_ptr StandardBufferManager::ReadTemporaryBuffer(MemoryTag tag, block_id_t id, +unique_ptr StandardBufferManager::ReadTemporaryBuffer(MemoryTag tag, BlockHandle &block, unique_ptr reusable_buffer) { D_ASSERT(!temporary_directory.path.empty()); D_ASSERT(temporary_directory.handle.get()); + auto id = block.BlockId(); if (temporary_directory.handle->GetTempFile().HasTemporaryBuffer(id)) { - evicted_data_per_tag[uint8_t(tag)] -= GetBlockSize(); + // This is a block that was offloaded to a regular .tmp file, the file contains blocks of a fixed size return temporary_directory.handle->GetTempFile().ReadTemporaryBuffer(id, std::move(reusable_buffer)); } - // Open the temporary file and read its size. + // This block contains data of variable size so we need to open it and read it to get its size. idx_t block_size; auto path = GetTemporaryPath(id); auto &fs = FileSystem::GetFileSystem(db); auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ); handle->Read(&block_size, sizeof(idx_t), 0); - evicted_data_per_tag[uint8_t(tag)] -= block_size; // Allocate a buffer of the file's size and read the data into that buffer. auto buffer = ReadTemporaryBufferInternal(*this, *handle, sizeof(idx_t), block_size, std::move(reusable_buffer)); handle.reset(); // Delete the file and return the buffer. - DeleteTemporaryFile(id); + DeleteTemporaryFile(block); return buffer; } -void StandardBufferManager::DeleteTemporaryFile(block_id_t id) { +void StandardBufferManager::DeleteTemporaryFile(BlockHandle &block) { + auto id = block.BlockId(); if (temporary_directory.path.empty()) { // no temporary directory specified: nothing to delete return; @@ -520,6 +517,7 @@ void StandardBufferManager::DeleteTemporaryFile(block_id_t id) { } // check if we should delete the file from the shared pool of files, or from the general file system if (temporary_directory.handle->GetTempFile().HasTemporaryBuffer(id)) { + evicted_data_per_tag[uint8_t(block.GetMemoryTag())] -= GetBlockSize(); temporary_directory.handle->GetTempFile().DeleteTemporaryBuffer(id); return; } @@ -528,6 +526,7 @@ void StandardBufferManager::DeleteTemporaryFile(block_id_t id) { auto &fs = FileSystem::GetFileSystem(db); auto path = GetTemporaryPath(id); if (fs.FileExists(path)) { + evicted_data_per_tag[uint8_t(block.GetMemoryTag())] -= block.GetMemoryUsage(); auto handle = fs.OpenFile(path, FileFlags::FILE_FLAGS_READ); auto content_size = handle->GetFileSize(); handle.reset(); diff --git a/src/duckdb/src/storage/table/column_segment.cpp b/src/duckdb/src/storage/table/column_segment.cpp index c26d895f..ae73eef9 100644 --- a/src/duckdb/src/storage/table/column_segment.cpp +++ b/src/duckdb/src/storage/table/column_segment.cpp @@ -161,8 +161,8 @@ void ColumnSegment::Resize(idx_t new_size) { auto &buffer_manager = BufferManager::GetBufferManager(db); auto old_handle = buffer_manager.Pin(block); - shared_ptr new_block; - auto new_handle = buffer_manager.Allocate(MemoryTag::IN_MEMORY_TABLE, new_size, false, &new_block); + auto new_handle = buffer_manager.Allocate(MemoryTag::IN_MEMORY_TABLE, new_size); + auto new_block = new_handle.GetBlockHandle(); memcpy(new_handle.Ptr(), old_handle.Ptr(), segment_size); this->block_id = new_block->BlockId(); diff --git a/src/duckdb/src/storage/wal_replay.cpp b/src/duckdb/src/storage/wal_replay.cpp index 890a5a61..6d77b071 100644 --- a/src/duckdb/src/storage/wal_replay.cpp +++ b/src/duckdb/src/storage/wal_replay.cpp @@ -554,9 +554,8 @@ void WriteAheadLogDeserializer::ReplayCreateIndex() { for (idx_t j = 0; j < data_info.allocation_sizes.size(); j++) { // read the data into a buffer handle - shared_ptr block_handle; - buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager->GetBlockSize(), false, &block_handle); - auto buffer_handle = buffer_manager.Pin(block_handle); + auto buffer_handle = buffer_manager.Allocate(MemoryTag::ART_INDEX, block_manager->GetBlockSize(), false); + auto block_handle = buffer_handle.GetBlockHandle(); auto data_ptr = buffer_handle.Ptr(); list.ReadElement(data_ptr, data_info.allocation_sizes[j]);