From 452185cbcfbff3d7a374a879336cfe86d9e28cfe Mon Sep 17 00:00:00 2001 From: Antoine Pitrou Date: Sun, 3 Nov 2024 17:06:31 +0100 Subject: [PATCH] Various cleanups --- cpp/src/arrow/filesystem/s3_test_util.cc | 38 ++++++----- cpp/src/arrow/filesystem/s3_test_util.h | 12 +--- cpp/src/arrow/filesystem/s3fs.h | 4 ++ cpp/src/arrow/filesystem/s3fs_benchmark.cc | 2 +- cpp/src/arrow/filesystem/s3fs_test.cc | 75 +++++++++++++--------- 5 files changed, 69 insertions(+), 62 deletions(-) diff --git a/cpp/src/arrow/filesystem/s3_test_util.cc b/cpp/src/arrow/filesystem/s3_test_util.cc index 14f9775afecc0..87359aa1832a5 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.cc +++ b/cpp/src/arrow/filesystem/s3_test_util.cc @@ -50,8 +50,6 @@ const char* kEnvConnectString = "ARROW_TEST_S3_CONNECT_STRING"; const char* kEnvAccessKey = "ARROW_TEST_S3_ACCESS_KEY"; const char* kEnvSecretKey = "ARROW_TEST_S3_SECRET_KEY"; -std::string GenerateConnectString() { return GetListenAddress(); } - } // namespace struct MinioTestServer::Impl { @@ -115,7 +113,7 @@ Status MinioTestServer::GenerateCertificateFile() { return Status::OK(); } -Status MinioTestServer::Start(bool enable_tls_if_supported) { +Status MinioTestServer::Start(bool enable_tls) { const char* connect_str = std::getenv(kEnvConnectString); const char* access_key = std::getenv(kEnvAccessKey); const char* secret_key = std::getenv(kEnvSecretKey); @@ -134,25 +132,23 @@ Status MinioTestServer::Start(bool enable_tls_if_supported) { impl_->server_process_->SetEnv("MINIO_SECRET_KEY", kMinioSecretKey); // Disable the embedded console (one less listening address to care about) impl_->server_process_->SetEnv("MINIO_BROWSER", "off"); - impl_->connect_string_ = GenerateConnectString(); // NOTE: --quiet makes startup faster by suppressing remote version check std::vector minio_args({"server", "--quiet", "--compat"}); - if (enable_tls_if_supported) { -#ifdef MINIO_SERVER_WITH_TLS + if (enable_tls) { ARROW_RETURN_NOT_OK(GenerateCertificateFile()); minio_args.emplace_back("--certs-dir"); minio_args.emplace_back(ca_dir_path()); impl_->scheme_ = "https"; - impl_->connect_string_ = - GetListenAddress("localhost"); // for TLS enabled case, we need to use localhost - // which is the fixed hostname in the certificate -#endif // MINIO_SERVER_WITH_TLS + // With TLS enabled, we need the connection hostname to match the certificate's + // subject name. This also constrains the actual listening IP address. + impl_->connect_string_ = GetListenAddress("localhost"); + } else { + // Without TLS enabled, we want to minimize the likelihood of address collisions + // by varying the listening IP address (note that most tests don't enable TLS). + impl_->connect_string_ = GetListenAddress(); } minio_args.emplace_back("--address"); - minio_args.emplace_back( - impl_->connect_string_); // the connect_string_ differs for the http and https, - // https is fixed localhost while http is the dynamic ip - // in range 127.0.0.1/8 + minio_args.emplace_back(impl_->connect_string_); minio_args.emplace_back(impl_->temp_dir_->path().ToString()); ARROW_RETURN_NOT_OK(impl_->server_process_->SetExecutable(kMinioExecutableName)); @@ -168,16 +164,19 @@ Status MinioTestServer::Stop() { struct MinioTestEnvironment::Impl { std::function>()> server_generator_; + bool enable_tls_; + + explicit Impl(bool enable_tls) : enable_tls_(enable_tls) {} Result> LaunchOneServer() { auto server = std::make_shared(); - RETURN_NOT_OK(server->Start()); + RETURN_NOT_OK(server->Start(enable_tls_)); return server; } }; -MinioTestEnvironment::MinioTestEnvironment(bool enable_tls_if_supported) - : impl_(new Impl), enable_tls_if_supported_(enable_tls_if_supported) {} +MinioTestEnvironment::MinioTestEnvironment(bool enable_tls) + : impl_(new Impl(enable_tls)) {} MinioTestEnvironment::~MinioTestEnvironment() = default; @@ -185,10 +184,9 @@ void MinioTestEnvironment::SetUp() { auto pool = ::arrow::internal::GetCpuThreadPool(); auto launch_one_server = - [enable_tls_if_supported = - enable_tls_if_supported_]() -> Result> { + [enable_tls = impl_->enable_tls_]() -> Result> { auto server = std::make_shared(); - RETURN_NOT_OK(server->Start(enable_tls_if_supported)); + RETURN_NOT_OK(server->Start(enable_tls)); return server; }; impl_->server_generator_ = [pool, launch_one_server]() { diff --git a/cpp/src/arrow/filesystem/s3_test_util.h b/cpp/src/arrow/filesystem/s3_test_util.h index db3a5f80755f2..0a89a7a9d5a15 100644 --- a/cpp/src/arrow/filesystem/s3_test_util.h +++ b/cpp/src/arrow/filesystem/s3_test_util.h @@ -30,10 +30,6 @@ #include "arrow/util/checked_cast.h" #include "arrow/util/macros.h" -#if defined(__linux__) -# define MINIO_SERVER_WITH_TLS -#endif // Linux - namespace arrow { namespace fs { @@ -44,10 +40,7 @@ class MinioTestServer { MinioTestServer(); ~MinioTestServer(); - // enable_tls_if_supported = true: start Minio with TLS if MINIO_SERVER_WITH_TLS is - // defined, Currently only enabled on Linux platfrom. enable_tls_if_supported = false: - // start Minio without TLS in all platfroms - Status Start(bool enable_tls_if_supported = true); + Status Start(bool enable_tls = false); Status Stop(); @@ -74,7 +67,7 @@ class MinioTestServer { class MinioTestEnvironment : public ::testing::Environment { public: - explicit MinioTestEnvironment(bool enable_tls_if_supported = true); + explicit MinioTestEnvironment(bool enable_tls = false); ~MinioTestEnvironment(); void SetUp() override; @@ -84,7 +77,6 @@ class MinioTestEnvironment : public ::testing::Environment { protected: struct Impl; std::unique_ptr impl_; - bool enable_tls_if_supported_ = true; // by default, enable TLS if supported }; // A global test "environment", to ensure that the S3 API is initialized before diff --git a/cpp/src/arrow/filesystem/s3fs.h b/cpp/src/arrow/filesystem/s3fs.h index 4b6457acaec02..ac6342f26a304 100644 --- a/cpp/src/arrow/filesystem/s3fs.h +++ b/cpp/src/arrow/filesystem/s3fs.h @@ -206,6 +206,8 @@ struct ARROW_EXPORT S3Options { /// If empty, global filesystem options will be used (see FileSystemGlobalOptions); /// if the corresponding global filesystem option is also empty, the underlying /// TLS library's defaults will be used. + /// + /// Note this option may be ignored on some systems (Windows, macOS). std::string tls_ca_file_path; /// Optional path to a directory holding TLS CA @@ -216,6 +218,8 @@ struct ARROW_EXPORT S3Options { /// If empty, global filesystem options will be used (see FileSystemGlobalOptions); /// if the corresponding global filesystem option is also empty, the underlying /// TLS library's defaults will be used. + /// + /// Note this option may be ignored on some systems (Windows, macOS). std::string tls_ca_dir_path; /// Whether to verify the S3 endpoint's TLS certificate diff --git a/cpp/src/arrow/filesystem/s3fs_benchmark.cc b/cpp/src/arrow/filesystem/s3fs_benchmark.cc index 2867012b86616..b7b6dda64192a 100644 --- a/cpp/src/arrow/filesystem/s3fs_benchmark.cc +++ b/cpp/src/arrow/filesystem/s3fs_benchmark.cc @@ -61,7 +61,7 @@ class MinioFixture : public benchmark::Fixture { public: void SetUp(const ::benchmark::State& state) override { minio_.reset(new MinioTestServer()); - ASSERT_OK(minio_->Start(/*enable_tls_if_supported=*/false)); + ASSERT_OK(minio_->Start(/*enable_tls=*/false)); const char* region_str = std::getenv(kEnvAwsRegion); if (region_str) { diff --git a/cpp/src/arrow/filesystem/s3fs_test.cc b/cpp/src/arrow/filesystem/s3fs_test.cc index ed99bd1147213..92e441b8d70a0 100644 --- a/cpp/src/arrow/filesystem/s3fs_test.cc +++ b/cpp/src/arrow/filesystem/s3fs_test.cc @@ -71,6 +71,12 @@ #include "arrow/util/range.h" #include "arrow/util/string.h" +// TLS tests require the ability to set a custom CA file when initiating S3 client +// connections, which the AWS SDK currently only supports on Linux. +#if defined(__linux__) +# define ENABLE_TLS_TESTS +#endif // Linux + namespace arrow { namespace fs { @@ -95,14 +101,14 @@ ::testing::Environment* s3_env = ::testing::AddGlobalTestEnvironment(new S3Envir ::testing::Environment* minio_env = ::testing::AddGlobalTestEnvironment(new MinioTestEnvironment); -::testing::Environment* minio_env_http = ::testing::AddGlobalTestEnvironment( - new MinioTestEnvironment(/*enable_tls_if_supported=*/false)); +::testing::Environment* minio_env_https = + ::testing::AddGlobalTestEnvironment(new MinioTestEnvironment(/*enable_tls=*/true)); -MinioTestEnvironment* GetMinioEnv(bool enable_tls_if_supported) { - if (enable_tls_if_supported) { - return ::arrow::internal::checked_cast(minio_env); +MinioTestEnvironment* GetMinioEnv(bool enable_tls) { + if (enable_tls) { + return ::arrow::internal::checked_cast(minio_env_https); } else { - return ::arrow::internal::checked_cast(minio_env_http); + return ::arrow::internal::checked_cast(minio_env); } } @@ -182,24 +188,6 @@ class AwsTestMixin : public ::testing::Test { #endif }; -// Test the CalculateSSECustomerKeyMD5 in AwsTestMixin fixture, since there is AWS SDK -// function call in it -TEST_F(AwsTestMixin, CalculateSSECustomerKeyMD5) { - ASSERT_RAISES(Invalid, CalculateSSECustomerKeyMD5("")); // invalid length - ASSERT_RAISES(Invalid, - CalculateSSECustomerKeyMD5( - "1234567890123456789012345678901234567890")); // invalid length - // valid case, with some non-ASCII character and a null byte in the sse_customer_key - char sse_customer_key[32] = {}; - sse_customer_key[0] = '\x40'; // '@' character - sse_customer_key[1] = '\0'; // null byte - sse_customer_key[2] = '\xFF'; // non-ASCII - sse_customer_key[31] = '\xFA'; // non-ASCII - std::string sse_customer_key_string(sse_customer_key, sizeof(sse_customer_key)); - ASSERT_OK_AND_ASSIGN(auto md5, CalculateSSECustomerKeyMD5(sse_customer_key_string)) - ASSERT_EQ(md5, "97FTa6lj0hE7lshKdBy61g=="); // valid case -} - class S3TestMixin : public AwsTestMixin { public: void SetUp() override { @@ -228,7 +216,7 @@ class S3TestMixin : public AwsTestMixin { protected: Status InitServerAndClient() { - ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv(enable_tls_if_supported_)->GetOneServer()); + ARROW_ASSIGN_OR_RAISE(minio_, GetMinioEnv(enable_tls_)->GetOneServer()); client_config_.reset(new Aws::Client::ClientConfiguration()); client_config_->endpointOverride = ToAwsString(minio_->connect_string()); if (minio_->scheme() == "https") { @@ -255,9 +243,11 @@ class S3TestMixin : public AwsTestMixin { std::unique_ptr client_config_; Aws::Auth::AWSCredentials credentials_; std::unique_ptr client_; - bool enable_tls_if_supported_ = - false; // by default, use the HTTP for all the test cases, since there is the - // random failure when there is stress test against the minio HTTPS Server + // Use plain HTTP by default, as this allows us to listen on different loopback + // addresses and thus minimize the risk of address reuse (HTTPS requires the + // hostname to match the certificate's subject name, constraining us to a + // single loopback address). + bool enable_tls_ = false; }; void AssertGetObject(Aws::S3::Model::GetObjectResult& result, @@ -283,6 +273,27 @@ void AssertObjectContents(Aws::S3::S3Client* client, const std::string& bucket, AssertGetObject(result, expected); } +//////////////////////////////////////////////////////////////////////////// +// Misc tests + +class InternalsTest : public AwsTestMixin {}; + +TEST_F(InternalsTest, CalculateSSECustomerKeyMD5) { + ASSERT_RAISES(Invalid, CalculateSSECustomerKeyMD5("")); // invalid length + ASSERT_RAISES(Invalid, + CalculateSSECustomerKeyMD5( + "1234567890123456789012345678901234567890")); // invalid length + // valid case, with some non-ASCII character and a null byte in the sse_customer_key + char sse_customer_key[32] = {}; + sse_customer_key[0] = '\x40'; // '@' character + sse_customer_key[1] = '\0'; // null byte + sse_customer_key[2] = '\xFF'; // non-ASCII + sse_customer_key[31] = '\xFA'; // non-ASCII + std::string sse_customer_key_string(sse_customer_key, sizeof(sse_customer_key)); + ASSERT_OK_AND_ASSIGN(auto md5, CalculateSSECustomerKeyMD5(sse_customer_key_string)) + ASSERT_EQ(md5, "97FTa6lj0hE7lshKdBy61g=="); // valid case +} + //////////////////////////////////////////////////////////////////////////// // S3Options tests @@ -1344,11 +1355,11 @@ TEST_F(TestS3FS, OpenInputFile) { } // Minio only allows Server Side Encryption on HTTPS client connections. -#ifdef MINIO_SERVER_WITH_TLS +#ifdef ENABLE_TLS_TESTS class TestS3FSHTTPS : public TestS3FS { public: void SetUp() override { - enable_tls_if_supported_ = true; + enable_tls_ = true; TestS3FS::SetUp(); } }; @@ -1374,6 +1385,7 @@ TEST_F(TestS3FSHTTPS, SSECustomerKeyMatch) { TEST_F(TestS3FSHTTPS, SSECustomerKeyMismatch) { std::shared_ptr stream; for (const auto& allow_delayed_open : {false, true}) { + ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open); options_.allow_delayed_open = allow_delayed_open; options_.sse_customer_key = "12345678123456781234567812345678"; MakeFileSystem(); @@ -1390,6 +1402,7 @@ TEST_F(TestS3FSHTTPS, SSECustomerKeyMismatch) { TEST_F(TestS3FSHTTPS, SSECustomerKeyMissing) { std::shared_ptr stream; for (const auto& allow_delayed_open : {false, true}) { + ARROW_SCOPED_TRACE("allow_delayed_open = ", allow_delayed_open); options_.allow_delayed_open = allow_delayed_open; options_.sse_customer_key = "12345678123456781234567812345678"; MakeFileSystem(); @@ -1415,7 +1428,7 @@ TEST_F(TestS3FSHTTPS, SSECustomerKeyCopyFile) { AssertBufferEqual(*buf, "some"); ASSERT_OK(RestoreTestBucket()); } -#endif // MINIO_SERVER_WITH_TLS +#endif // ENABLE_TLS_TESTS struct S3OptionsTestParameters { bool background_writes{false};