Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@b33069b (#247)
Browse files Browse the repository at this point in the history
  • Loading branch information
github-actions[bot] authored Sep 6, 2024
1 parent e904df3 commit 6b61306
Show file tree
Hide file tree
Showing 10 changed files with 111 additions and 75 deletions.
18 changes: 16 additions & 2 deletions src/duckdb/src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
#include "duckdb/common/types/hash.hpp"
#include "duckdb/function/cast/cast_function_set.hpp"
#include "duckdb/main/error_manager.hpp"

#include "duckdb/common/types/varint.hpp"
#include "duckdb/common/serializer/serializer.hpp"
#include "duckdb/common/serializer/deserializer.hpp"

Expand Down Expand Up @@ -275,6 +275,11 @@ Value Value::MinimumValue(const LogicalType &type) {
}
case LogicalTypeId::ENUM:
return Value::ENUM(0, type);
case LogicalTypeId::VARINT:
return Value::VARINT(Varint::VarcharToVarInt(
"-179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540"
"4589535143824642343213268894641827684675467035375169860499105765512820762454900903893289440758685084551339"
"42304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368"));
default:
throw InvalidTypeException(type, "MinimumValue requires numeric type");
}
Expand Down Expand Up @@ -355,6 +360,11 @@ Value Value::MaximumValue(const LogicalType &type) {
auto enum_size = EnumType::GetSize(type);
return Value::ENUM(enum_size - (enum_size ? 1 : 0), type);
}
case LogicalTypeId::VARINT:
return Value::VARINT(Varint::VarcharToVarInt(
"1797693134862315708145274237317043567980705675258449965989174768031572607800285387605895586327668781715404"
"5895351438246423432132688946418276846754670353751698604991057655128207624549009038932894407586850845513394"
"2304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368"));
default:
throw InvalidTypeException(type, "MaximumValue requires numeric type");
}
Expand Down Expand Up @@ -849,9 +859,13 @@ Value Value::BLOB(const_data_ptr_t data, idx_t len) {
}

Value Value::VARINT(const_data_ptr_t data, idx_t len) {
return VARINT(string(const_char_ptr_cast(data), len));
}

Value Value::VARINT(const string &data) {
Value result(LogicalType::VARINT);
result.is_null = false;
result.value_info_ = make_shared_ptr<StringValueInfo>(string(const_char_ptr_cast(data), len));
result.value_info_ = make_shared_ptr<StringValueInfo>(data);
return result;
}

Expand Down
76 changes: 75 additions & 1 deletion src/duckdb/src/common/types/varint.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#include "duckdb/common/types/varint.hpp"
#include "duckdb/common/exception/conversion_exception.hpp"
#include <cmath>

namespace duckdb {
Expand Down Expand Up @@ -69,6 +70,14 @@ string_t Varint::InitializeVarintZero(Vector &result) {
return blob;
}

string Varint::InitializeVarintZero() {
uint32_t blob_size = 1 + VARINT_HEADER_SIZE;
string result(blob_size, '0');
SetHeader(&result[0], 1, false);
result[3] = 0;
return result;
}

int Varint::CharToDigit(char c) {
return c - '0';
}
Expand All @@ -79,7 +88,8 @@ char Varint::DigitToChar(int digit) {
return static_cast<char>(digit + '0');
}

bool Varint::VarcharFormatting(string_t &value, idx_t &start_pos, idx_t &end_pos, bool &is_negative, bool &is_zero) {
bool Varint::VarcharFormatting(const string_t &value, idx_t &start_pos, idx_t &end_pos, bool &is_negative,
bool &is_zero) {
// If it's empty we error
if (value.Empty()) {
return false;
Expand Down Expand Up @@ -188,6 +198,70 @@ string Varint::VarIntToVarchar(const string_t &blob) {
return decimal_string;
}

string Varint::VarcharToVarInt(const string_t &value) {
idx_t start_pos, end_pos;
bool is_negative, is_zero;
if (!VarcharFormatting(value, start_pos, end_pos, is_negative, is_zero)) {
throw ConversionException("Could not convert string \'%s\' to Varint", value.GetString());
}
if (is_zero) {
// Return Value 0
return InitializeVarintZero();
}
auto int_value_char = value.GetData();
idx_t actual_size = end_pos - start_pos;

// we initalize result with space for our header
string result(VARINT_HEADER_SIZE, '0');
unsafe_vector<uint64_t> digits;

// The max number a uint64_t can represent is 18.446.744.073.709.551.615
// That has 20 digits
// In the worst case a remainder of a division will be 255, which is 3 digits
// Since the max value is 184, we need to take one more digit out
// Hence we end up with a max of 16 digits supported.
constexpr uint8_t max_digits = 16;
const idx_t number_of_digits = static_cast<idx_t>(std::ceil(static_cast<double>(actual_size) / max_digits));

// lets convert the string to a uint64_t vector
idx_t cur_end = end_pos;
for (idx_t i = 0; i < number_of_digits; i++) {
idx_t cur_start = static_cast<int64_t>(start_pos) > static_cast<int64_t>(cur_end - max_digits)
? start_pos
: cur_end - max_digits;
std::string current_number(int_value_char + cur_start, cur_end - cur_start);
digits.push_back(std::stoull(current_number));
// move cur_end to more digits down the road
cur_end = cur_end - max_digits;
}

// Now that we have our uint64_t vector, lets start our division process to figure out the new number and remainder
while (!digits.empty()) {
idx_t digit_idx = digits.size() - 1;
uint8_t remainder = 0;
idx_t digits_size = digits.size();
for (idx_t i = 0; i < digits_size; i++) {
digits[digit_idx] += static_cast<uint64_t>(remainder * pow(10, max_digits));
remainder = static_cast<uint8_t>(digits[digit_idx] % 256);
digits[digit_idx] /= 256;
if (digits[digit_idx] == 0 && digit_idx == digits.size() - 1) {
// we can cap this
digits.pop_back();
}
digit_idx--;
}
if (is_negative) {
result.push_back(static_cast<char>(~remainder));
} else {
result.push_back(static_cast<char>(remainder));
}
}
std::reverse(result.begin() + VARINT_HEADER_SIZE, result.end());
// Set header after we know the size of the varint
SetHeader(&result[0], result.size() - VARINT_HEADER_SIZE, is_negative);
return result;
}

bool Varint::VarintToDouble(string_t &blob, double &result, bool &strict) {
result = 0;
bool is_negative;
Expand Down
4 changes: 4 additions & 0 deletions src/duckdb/src/core_functions/scalar/string/hex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,8 @@ ScalarFunctionSet HexFun::GetFunctions() {
ScalarFunctionSet to_hex;
to_hex.AddFunction(
ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToHexFunction<string_t, HexStrOperator>));
to_hex.AddFunction(
ScalarFunction({LogicalType::VARINT}, LogicalType::VARCHAR, ToHexFunction<string_t, HexStrOperator>));
to_hex.AddFunction(
ScalarFunction({LogicalType::BLOB}, LogicalType::VARCHAR, ToHexFunction<string_t, HexStrOperator>));
to_hex.AddFunction(
Expand All @@ -414,6 +416,8 @@ ScalarFunctionSet BinFun::GetFunctions() {

to_binary.AddFunction(
ScalarFunction({LogicalType::VARCHAR}, LogicalType::VARCHAR, ToBinaryFunction<string_t, BinaryStrOperator>));
to_binary.AddFunction(
ScalarFunction({LogicalType::VARINT}, LogicalType::VARCHAR, ToBinaryFunction<string_t, BinaryStrOperator>));
to_binary.AddFunction(ScalarFunction({LogicalType::UBIGINT}, LogicalType::VARCHAR,
ToBinaryFunction<uint64_t, BinaryIntegralOperator>));
to_binary.AddFunction(
Expand Down
68 changes: 4 additions & 64 deletions src/duckdb/src/function/cast/varint_casts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,75 +164,15 @@ string_t HugeintCastToVarInt::Operation(hugeint_t int_value, Vector &result) {
template <>
bool TryCastToVarInt::Operation(string_t input_value, string_t &result_value, Vector &result,
CastParameters &parameters) {
idx_t start_pos, end_pos;
bool is_negative, is_zero;
if (!Varint::VarcharFormatting(input_value, start_pos, end_pos, is_negative, is_zero)) {
return false;
}
if (is_zero) {
// Return Value 0
result_value = Varint::InitializeVarintZero(result);
return true;
}
auto int_value_char = input_value.GetData();
idx_t actual_size = end_pos - start_pos;
// convert the string to a byte array
string blob_string;

unsafe_vector<uint64_t> digits;

// The max number a uint64_t can represent is 18.446.744.073.709.551.615
// That has 20 digits
// In the worst case a remainder of a division will be 255, which is 3 digits
// Since the max value is 184, we need to take one more digit out
// Hence we end up with a max of 16 digits supported.
constexpr uint8_t max_digits = 16;
const idx_t number_of_digits = static_cast<idx_t>(std::ceil(static_cast<double>(actual_size) / max_digits));

// lets convert the string to a uint64_t vector
idx_t cur_end = end_pos;
for (idx_t i = 0; i < number_of_digits; i++) {
idx_t cur_start = static_cast<int64_t>(start_pos) > static_cast<int64_t>(cur_end - max_digits)
? start_pos
: cur_end - max_digits;
std::string current_number(int_value_char + cur_start, cur_end - cur_start);
digits.push_back(std::stoull(current_number));
// move cur_end to more digits down the road
cur_end = cur_end - max_digits;
}
auto blob_string = Varint::VarcharToVarInt(input_value);

// Now that we have our uint64_t vector, lets start our division process to figure out the new number and remainder
while (!digits.empty()) {
idx_t digit_idx = digits.size() - 1;
uint8_t remainder = 0;
idx_t digits_size = digits.size();
for (idx_t i = 0; i < digits_size; i++) {
digits[digit_idx] += static_cast<uint64_t>(remainder * pow(10, max_digits));
remainder = static_cast<uint8_t>(digits[digit_idx] % 256);
digits[digit_idx] /= 256;
if (digits[digit_idx] == 0 && digit_idx == digits.size() - 1) {
// we can cap this
digits.pop_back();
}
digit_idx--;
}
if (is_negative) {
blob_string.push_back(static_cast<char>(~remainder));
} else {
blob_string.push_back(static_cast<char>(remainder));
}
}

uint32_t blob_size = static_cast<uint32_t>(blob_string.size() + Varint::VARINT_HEADER_SIZE);
uint32_t blob_size = static_cast<uint32_t>(blob_string.size());
result_value = StringVector::EmptyString(result, blob_size);
auto writable_blob = result_value.GetDataWriteable();

Varint::SetHeader(writable_blob, blob_string.size(), is_negative);

// Write string_blob into blob
idx_t blob_string_idx = blob_string.size() - 1;
for (idx_t i = Varint::VARINT_HEADER_SIZE; i < blob_size; i++) {
writable_blob[i] = blob_string[blob_string_idx--];
for (idx_t i = 0; i < blob_string.size(); i++) {
writable_blob[i] = blob_string[i];
}
result_value.Finalize();
return true;
Expand Down
1 change: 1 addition & 0 deletions src/duckdb/src/function/table/system/test_all_types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ vector<TestType> TestAllTypesFun::GetTestTypes(bool use_large_enum) {
result.emplace_back(LogicalType::USMALLINT, "usmallint");
result.emplace_back(LogicalType::UINTEGER, "uint");
result.emplace_back(LogicalType::UBIGINT, "ubigint");
result.emplace_back(LogicalType::VARINT, "varint");
result.emplace_back(LogicalType::DATE, "date");
result.emplace_back(LogicalType::TIME, "time");
result.emplace_back(LogicalType::TIMESTAMP, "timestamp");
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "1-dev4474"
#define DUCKDB_PATCH_VERSION "1-dev4488"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 0
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.0.1-dev4474"
#define DUCKDB_VERSION "v1.0.1-dev4488"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "9ad037f3ad"
#define DUCKDB_SOURCE_ID "b33069bb4e"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
2 changes: 1 addition & 1 deletion src/duckdb/src/include/duckdb/common/types/value.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,8 +194,8 @@ class Value {
//! Creates a bitstring by casting a specified string to a bitstring
DUCKDB_API static Value BIT(const_data_ptr_t data, idx_t len);
DUCKDB_API static Value BIT(const string &data);

DUCKDB_API static Value VARINT(const_data_ptr_t data, idx_t len);
DUCKDB_API static Value VARINT(const string &data);

//! Creates an aggregate state
DUCKDB_API static Value AGGREGATE_STATE(const LogicalType &type, const_data_ptr_t data, idx_t len); // NOLINT
Expand Down
7 changes: 5 additions & 2 deletions src/duckdb/src/include/duckdb/common/types/varint.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,15 @@ class Varint {
DUCKDB_API static void SetHeader(char *blob, uint64_t number_of_bytes, bool is_negative);
//! Initializes and returns a blob with value 0, allocated in Vector& result
DUCKDB_API static string_t InitializeVarintZero(Vector &result);
DUCKDB_API static string InitializeVarintZero();

//! Switch Case of To Varint Convertion
DUCKDB_API static BoundCastInfo NumericToVarintCastSwitch(const LogicalType &source);

//! ----------------------------------- Varchar Cast ----------------------------------- //
//! Function to prepare a varchar for conversion. We trim zero's, check for negative values, and what-not
//! Returns false if this is an invalid varchar
DUCKDB_API static bool VarcharFormatting(string_t &value, idx_t &start_pos, idx_t &end_pos, bool &is_negative,
DUCKDB_API static bool VarcharFormatting(const string_t &value, idx_t &start_pos, idx_t &end_pos, bool &is_negative,
bool &is_zero);

//! Converts a char to a Digit
Expand All @@ -47,7 +49,8 @@ class Varint {
DUCKDB_API static void GetByteArray(vector<uint8_t> &byte_array, bool &is_negative, const string_t &blob);
//! Function to convert VARINT blob to a VARCHAR
DUCKDB_API static string VarIntToVarchar(const string_t &blob);

//! Function to convert Varchar to VARINT blob
DUCKDB_API static string VarcharToVarInt(const string_t &value);
//! ----------------------------------- Double Cast ----------------------------------- //
DUCKDB_API static bool VarintToDouble(string_t &blob, double &result, bool &strict);
};
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/_snaps/types.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

Code
as.list(dbGetQuery(con,
"SELECT * EXCLUDE (timestamp_tz, time_tz, timestamp_ns, timestamp_array, timestamptz_array, bit, \"union\", fixed_int_array, fixed_varchar_array, fixed_nested_int_array, fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, fixed_array_of_int_list, list_of_fixed_int_array) REPLACE(replace(varchar, chr(0), '') AS varchar) FROM test_all_types(use_large_enum=true)"))
"SELECT * EXCLUDE (timestamp_tz, time_tz, timestamp_ns, timestamp_array, timestamptz_array, bit, \"union\", fixed_int_array, fixed_varchar_array, fixed_nested_int_array, fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, fixed_array_of_int_list, list_of_fixed_int_array, varint) REPLACE(replace(varchar, chr(0), '') AS varchar) FROM test_all_types(use_large_enum=true)"))
Output
$bool
[1] FALSE TRUE NA
Expand Down
2 changes: 1 addition & 1 deletion tests/testthat/test-types.R
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,6 @@ test_that("test_all_types() output", {

# Need to omit timestamp columns, likely due to https://bugs.r-project.org/show_bug.cgi?id=16856
expect_snapshot({
as.list(dbGetQuery(con, "SELECT * EXCLUDE (timestamp_tz, time_tz, timestamp_ns, timestamp_array, timestamptz_array, bit, \"union\", fixed_int_array, fixed_varchar_array, fixed_nested_int_array, fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, fixed_array_of_int_list, list_of_fixed_int_array) REPLACE(replace(varchar, chr(0), '') AS varchar) FROM test_all_types(use_large_enum=true)"))
as.list(dbGetQuery(con, "SELECT * EXCLUDE (timestamp_tz, time_tz, timestamp_ns, timestamp_array, timestamptz_array, bit, \"union\", fixed_int_array, fixed_varchar_array, fixed_nested_int_array, fixed_nested_varchar_array, fixed_struct_array, struct_of_fixed_array, fixed_array_of_int_list, list_of_fixed_int_array, varint) REPLACE(replace(varchar, chr(0), '') AS varchar) FROM test_all_types(use_large_enum=true)"))
})
})

0 comments on commit 6b61306

Please sign in to comment.