Skip to content

Commit

Permalink
chore: Update vendored sources to duckdb/duckdb@1844ae5 (#347)
Browse files Browse the repository at this point in the history
Merge pull request duckdb/duckdb#13634 from pdet/file_conversion
Merge pull request duckdb/duckdb#13658 from hawkfish/validate-timezones

Co-authored-by: krlmlr <[email protected]>
  • Loading branch information
github-actions[bot] and krlmlr authored Sep 10, 2024
1 parent 9a56ba4 commit c7301b6
Show file tree
Hide file tree
Showing 9 changed files with 50 additions and 20 deletions.
24 changes: 24 additions & 0 deletions src/duckdb/src/common/enums/file_compression_type.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,28 @@ FileCompressionType FileCompressionTypeFromString(const string &input) {
}
}

string CompressionExtensionFromType(const FileCompressionType type) {
switch (type) {
case FileCompressionType::GZIP:
return ".gz";
case FileCompressionType::ZSTD:
return ".zst";
default:
throw NotImplementedException("Compression Extension of file compression type is not implemented");
}
}

bool IsFileCompressed(string path, FileCompressionType type) {
auto extension = CompressionExtensionFromType(type);
std::size_t question_mark_pos = std::string::npos;
if (!StringUtil::StartsWith(path, "\\\\?\\")) {
question_mark_pos = path.find('?');
}
path = path.substr(0, question_mark_pos);
if (StringUtil::EndsWith(path, extension)) {
return true;
}
return false;
}

} // namespace duckdb
6 changes: 3 additions & 3 deletions src/duckdb/src/common/virtual_file_system.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,15 @@ unique_ptr<FileHandle> VirtualFileSystem::OpenFile(const string &path, FileOpenF
optional_ptr<FileOpener> opener) {
auto compression = flags.Compression();
if (compression == FileCompressionType::AUTO_DETECT) {
// auto detect compression settings based on file name
// auto-detect compression settings based on file name
auto lower_path = StringUtil::Lower(path);
if (StringUtil::EndsWith(lower_path, ".tmp")) {
// strip .tmp
lower_path = lower_path.substr(0, lower_path.length() - 4);
}
if (StringUtil::EndsWith(lower_path, ".gz")) {
if (IsFileCompressed(path, FileCompressionType::GZIP)) {
compression = FileCompressionType::GZIP;
} else if (StringUtil::EndsWith(lower_path, ".zst")) {
} else if (IsFileCompressed(path, FileCompressionType::ZSTD)) {
compression = FileCompressionType::ZSTD;
} else {
compression = FileCompressionType::UNCOMPRESSED;
Expand Down
8 changes: 4 additions & 4 deletions src/duckdb/src/function/table/copy_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,13 +182,13 @@ static unique_ptr<FunctionData> WriteCSVBind(ClientContext &context, CopyFunctio

switch (bind_data->options.compression) {
case FileCompressionType::GZIP:
if (!StringUtil::EndsWith(input.file_extension, ".gz")) {
input.file_extension += ".gz";
if (!IsFileCompressed(input.file_extension, FileCompressionType::GZIP)) {
input.file_extension += CompressionExtensionFromType(FileCompressionType::GZIP);
}
break;
case FileCompressionType::ZSTD:
if (!StringUtil::EndsWith(input.file_extension, ".zst")) {
input.file_extension += ".zst";
if (!IsFileCompressed(input.file_extension, FileCompressionType::ZSTD)) {
input.file_extension += CompressionExtensionFromType(FileCompressionType::ZSTD);
}
break;
default:
Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/function/table/read_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -390,9 +390,9 @@ unique_ptr<TableRef> ReadCSVReplacement(ClientContext &context, ReplacementScanI
auto table_name = ReplacementScan::GetFullPath(input);
auto lower_name = StringUtil::Lower(table_name);
// remove any compression
if (StringUtil::EndsWith(lower_name, ".gz")) {
if (StringUtil::EndsWith(lower_name, CompressionExtensionFromType(FileCompressionType::GZIP))) {
lower_name = lower_name.substr(0, lower_name.size() - 3);
} else if (StringUtil::EndsWith(lower_name, ".zst")) {
} else if (StringUtil::EndsWith(lower_name, CompressionExtensionFromType(FileCompressionType::ZSTD))) {
if (!Catalog::TryAutoLoad(context, "parquet")) {
throw MissingExtensionException("parquet extension is required for reading zst compressed file");
}
Expand Down
9 changes: 5 additions & 4 deletions src/duckdb/src/function/table/sniff_csv.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,14 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
const CSVSniffFunctionData &data = data_p.bind_data->Cast<CSVSniffFunctionData>();
auto &fs = duckdb::FileSystem::GetFileSystem(context);

if (data.path.rfind("http://", 0) != 0 && data.path.rfind("https://", 0) != 0 && fs.HasGlob(data.path)) {
throw NotImplementedException("sniff_csv does not operate on globs yet");
auto paths = fs.GlobFiles(data.path, context, FileGlobOptions::DISALLOW_EMPTY);
if (paths.size() > 1) {
throw NotImplementedException("sniff_csv does not operate on more than one file yet");
}

// We must run the sniffer.
auto sniffer_options = data.options;
sniffer_options.file_path = data.path;
sniffer_options.file_path = paths[0];

auto buffer_manager = make_shared_ptr<CSVBufferManager>(context, sniffer_options, sniffer_options.file_path, 0);
if (sniffer_options.name_list.empty()) {
Expand Down Expand Up @@ -204,7 +205,7 @@ static void CSVSniffFunction(ClientContext &context, TableFunctionInput &data_p,
std::ostringstream csv_read;

// Base, Path and auto_detect=false
csv_read << "FROM read_csv('" << data.path << "'" << separator << "auto_detect=false" << separator;
csv_read << "FROM read_csv('" << paths[0] << "'" << separator << "auto_detect=false" << separator;
// 10.1. Delimiter
if (!sniffer_options.dialect_options.state_machine_options.delimiter.IsSetByUser()) {
csv_read << "delim="
Expand Down
6 changes: 3 additions & 3 deletions src/duckdb/src/function/table/version/pragma_version.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#ifndef DUCKDB_PATCH_VERSION
#define DUCKDB_PATCH_VERSION "1-dev5148"
#define DUCKDB_PATCH_VERSION "1-dev5166"
#endif
#ifndef DUCKDB_MINOR_VERSION
#define DUCKDB_MINOR_VERSION 0
Expand All @@ -8,10 +8,10 @@
#define DUCKDB_MAJOR_VERSION 1
#endif
#ifndef DUCKDB_VERSION
#define DUCKDB_VERSION "v1.0.1-dev5148"
#define DUCKDB_VERSION "v1.0.1-dev5166"
#endif
#ifndef DUCKDB_SOURCE_ID
#define DUCKDB_SOURCE_ID "439bb91fc3"
#define DUCKDB_SOURCE_ID "1844ae5109"
#endif
#include "duckdb/function/table/system_functions.hpp"
#include "duckdb/main/database.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@ enum class FileCompressionType : uint8_t { AUTO_DETECT = 0, UNCOMPRESSED = 1, GZ

FileCompressionType FileCompressionTypeFromString(const string &input);

string CompressionExtensionFromType(const FileCompressionType type);

bool IsFileCompressed(string path, FileCompressionType type);

} // namespace duckdb
5 changes: 3 additions & 2 deletions src/duckdb/src/include/duckdb/function/replacement_scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

#include "duckdb/common/common.hpp"
#include "duckdb/common/string_util.hpp"
#include "duckdb/common/enums/file_compression_type.hpp"

namespace duckdb {

Expand Down Expand Up @@ -59,9 +60,9 @@ struct ReplacementScan {
static bool CanReplace(const string &table_name, const vector<string> &extensions) {
auto lower_name = StringUtil::Lower(table_name);

if (StringUtil::EndsWith(lower_name, ".gz")) {
if (StringUtil::EndsWith(lower_name, CompressionExtensionFromType(FileCompressionType::GZIP))) {
lower_name = lower_name.substr(0, lower_name.size() - 3);
} else if (StringUtil::EndsWith(lower_name, ".zst")) {
} else if (StringUtil::EndsWith(lower_name, CompressionExtensionFromType(FileCompressionType::ZSTD))) {
lower_name = lower_name.substr(0, lower_name.size() - 4);
}

Expand Down
4 changes: 2 additions & 2 deletions src/duckdb/src/main/extension/extension_install.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ string ExtensionHelper::ExtensionUrlTemplate(optional_ptr<const DatabaseInstance
versioned_path = versioned_path + ".wasm";
#else
string default_endpoint = ExtensionRepository::DEFAULT_REPOSITORY_URL;
versioned_path = versioned_path + ".gz";
versioned_path = versioned_path + CompressionExtensionFromType(FileCompressionType::GZIP);
#endif
string url_template = repository.path + versioned_path;
return url_template;
Expand Down Expand Up @@ -283,7 +283,7 @@ static unique_ptr<ExtensionInstallInfo> DirectInstallExtension(DatabaseInstance
bool exists = fs.FileExists(file);

// Recheck without .gz
if (!exists && StringUtil::EndsWith(file, ".gz")) {
if (!exists && StringUtil::EndsWith(file, CompressionExtensionFromType(FileCompressionType::GZIP))) {
file = file.substr(0, file.size() - 3);
exists = fs.FileExists(file);
}
Expand Down

0 comments on commit c7301b6

Please sign in to comment.