From f45966f179780a4e4d262b16f39de75d30e3a218 Mon Sep 17 00:00:00 2001 From: Deepak Majeti Date: Fri, 21 Jun 2024 19:22:39 -0700 Subject: [PATCH] Use separate headers for DWRF Reader Writer registration API (#10132) Summary: Currently, registering the DWRF reader/writer pulls in some of the corresponding DWRF implementation details. This introduces additional dependencies in the client. Separate the registrations to their own headers to avoid this. Cleanup files with unused DwrfReader.h. Pull Request resolved: https://github.com/facebookincubator/velox/pull/10132 Reviewed By: Yuhta Differential Revision: D58736823 Pulled By: pedroerp fbshipit-source-id: fc8622f039d983adaa63812bc0f99ceee9d69076 --- velox/connectors/hive/HiveConnector.cpp | 3 +++ velox/dwio/dwrf/RegisterDwrfReader.h | 25 +++++++++++++++++++ velox/dwio/dwrf/RegisterDwrfWriter.h | 25 +++++++++++++++++++ velox/dwio/dwrf/reader/DwrfReader.h | 4 --- velox/dwio/dwrf/writer/Writer.h | 4 --- velox/dwio/parquet/RegisterParquetReader.cpp | 2 -- velox/dwio/parquet/RegisterParquetWriter.cpp | 2 -- velox/examples/ScanAndSort.cpp | 2 +- velox/examples/ScanOrc.cpp | 14 +++++++---- velox/exec/fuzzer/AggregationFuzzerBase.cpp | 1 - velox/exec/fuzzer/FuzzerUtil.cpp | 1 - velox/exec/fuzzer/JoinFuzzer.cpp | 2 -- velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp | 2 -- velox/exec/fuzzer/RowNumberFuzzer.cpp | 2 -- velox/exec/fuzzer/WriterFuzzer.cpp | 2 -- .../tests/utils/HiveConnectorTestBase.cpp | 2 -- 16 files changed, 63 insertions(+), 30 deletions(-) create mode 100644 velox/dwio/dwrf/RegisterDwrfReader.h create mode 100644 velox/dwio/dwrf/RegisterDwrfWriter.h diff --git a/velox/connectors/hive/HiveConnector.cpp b/velox/connectors/hive/HiveConnector.cpp index 5f5faf1671ae..f34a5442f0b5 100644 --- a/velox/connectors/hive/HiveConnector.cpp +++ b/velox/connectors/hive/HiveConnector.cpp @@ -21,6 +21,9 @@ #include "velox/connectors/hive/HiveDataSink.h" #include "velox/connectors/hive/HiveDataSource.h" #include "velox/connectors/hive/HivePartitionFunction.h" +#include "velox/dwio/dwrf/RegisterDwrfReader.h" +#include "velox/dwio/dwrf/RegisterDwrfWriter.h" + // Meta's buck build system needs this check. #ifdef VELOX_ENABLE_GCS #include "velox/connectors/hive/storage_adapters/gcs/RegisterGCSFileSystem.h" // @manual diff --git a/velox/dwio/dwrf/RegisterDwrfReader.h b/velox/dwio/dwrf/RegisterDwrfReader.h new file mode 100644 index 000000000000..ae15ac80965a --- /dev/null +++ b/velox/dwio/dwrf/RegisterDwrfReader.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace facebook::velox::dwrf { + +void registerDwrfReaderFactory(); + +void unregisterDwrfReaderFactory(); + +} // namespace facebook::velox::dwrf diff --git a/velox/dwio/dwrf/RegisterDwrfWriter.h b/velox/dwio/dwrf/RegisterDwrfWriter.h new file mode 100644 index 000000000000..6888da424887 --- /dev/null +++ b/velox/dwio/dwrf/RegisterDwrfWriter.h @@ -0,0 +1,25 @@ +/* + * Copyright (c) Facebook, Inc. and its affiliates. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace facebook::velox::dwrf { + +void registerDwrfWriterFactory(); + +void unregisterDwrfWriterFactory(); + +} // namespace facebook::velox::dwrf diff --git a/velox/dwio/dwrf/reader/DwrfReader.h b/velox/dwio/dwrf/reader/DwrfReader.h index 4cb0d9271932..6ba9a1dd62aa 100644 --- a/velox/dwio/dwrf/reader/DwrfReader.h +++ b/velox/dwio/dwrf/reader/DwrfReader.h @@ -345,8 +345,4 @@ class DwrfReaderFactory : public dwio::common::ReaderFactory { } }; -void registerDwrfReaderFactory(); - -void unregisterDwrfReaderFactory(); - } // namespace facebook::velox::dwrf diff --git a/velox/dwio/dwrf/writer/Writer.h b/velox/dwio/dwrf/writer/Writer.h index d342fce1bf64..f3f67147f82f 100644 --- a/velox/dwio/dwrf/writer/Writer.h +++ b/velox/dwio/dwrf/writer/Writer.h @@ -222,8 +222,4 @@ class DwrfWriterFactory : public dwio::common::WriterFactory { const dwio::common::WriterOptions& options) override; }; -void registerDwrfWriterFactory(); - -void unregisterDwrfWriterFactory(); - } // namespace facebook::velox::dwrf diff --git a/velox/dwio/parquet/RegisterParquetReader.cpp b/velox/dwio/parquet/RegisterParquetReader.cpp index da8afaa5dd8e..995b3acd2a5f 100644 --- a/velox/dwio/parquet/RegisterParquetReader.cpp +++ b/velox/dwio/parquet/RegisterParquetReader.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "velox/dwio/parquet/RegisterParquetReader.h" - #ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/reader/ParquetReader.h" #endif diff --git a/velox/dwio/parquet/RegisterParquetWriter.cpp b/velox/dwio/parquet/RegisterParquetWriter.cpp index d0d3e4825386..b6a9ccee465c 100644 --- a/velox/dwio/parquet/RegisterParquetWriter.cpp +++ b/velox/dwio/parquet/RegisterParquetWriter.cpp @@ -14,8 +14,6 @@ * limitations under the License. */ -#include "velox/dwio/parquet/RegisterParquetWriter.h" - #ifdef VELOX_ENABLE_PARQUET #include "velox/dwio/parquet/writer/Writer.h" #endif diff --git a/velox/examples/ScanAndSort.cpp b/velox/examples/ScanAndSort.cpp index 493dae7b84ee..6625e7a4d7c0 100644 --- a/velox/examples/ScanAndSort.cpp +++ b/velox/examples/ScanAndSort.cpp @@ -19,7 +19,7 @@ #include "velox/common/memory/Memory.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" +#include "velox/dwio/dwrf/RegisterDwrfReader.h" #include "velox/exec/Task.h" #include "velox/exec/tests/utils/HiveConnectorTestBase.h" #include "velox/exec/tests/utils/PlanBuilder.h" diff --git a/velox/examples/ScanOrc.cpp b/velox/examples/ScanOrc.cpp index 2867cba9fd53..bac1f6be4d83 100644 --- a/velox/examples/ScanOrc.cpp +++ b/velox/examples/ScanOrc.cpp @@ -19,7 +19,9 @@ #include "velox/common/file/FileSystems.h" #include "velox/common/memory/Memory.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" +#include "velox/dwio/common/Reader.h" +#include "velox/dwio/common/ReaderFactory.h" +#include "velox/dwio/dwrf/RegisterDwrfReader.h" #include "velox/exec/tests/utils/TempDirectoryPath.h" #include "velox/vector/BaseVector.h" @@ -48,10 +50,12 @@ int main(int argc, char** argv) { dwio::common::ReaderOptions readerOpts{pool.get()}; // To make DwrfReader reads ORC file, setFileFormat to FileFormat::ORC readerOpts.setFileFormat(FileFormat::ORC); - auto reader = DwrfReader::create( - std::make_unique( - std::make_shared(filePath), readerOpts.memoryPool()), - readerOpts); + auto reader = dwio::common::getReaderFactory(FileFormat::ORC) + ->createReader( + std::make_unique( + std::make_shared(filePath), + readerOpts.memoryPool()), + readerOpts); VectorPtr batch; RowReaderOptions rowReaderOptions; diff --git a/velox/exec/fuzzer/AggregationFuzzerBase.cpp b/velox/exec/fuzzer/AggregationFuzzerBase.cpp index 2876e57dc834..aa047d39288e 100644 --- a/velox/exec/fuzzer/AggregationFuzzerBase.cpp +++ b/velox/exec/fuzzer/AggregationFuzzerBase.cpp @@ -19,7 +19,6 @@ #include "velox/common/base/Fs.h" #include "velox/common/base/VeloxException.h" #include "velox/connectors/hive/HiveConnectorSplit.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" #include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/fuzzer/DuckQueryRunner.h" #include "velox/exec/fuzzer/PrestoQueryRunner.h" diff --git a/velox/exec/fuzzer/FuzzerUtil.cpp b/velox/exec/fuzzer/FuzzerUtil.cpp index c7d8ddd07e41..7eb6a52f7ec4 100644 --- a/velox/exec/fuzzer/FuzzerUtil.cpp +++ b/velox/exec/fuzzer/FuzzerUtil.cpp @@ -20,7 +20,6 @@ #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/dwio/catalog/fbhive/FileUtils.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" #include "velox/dwio/dwrf/writer/Writer.h" using namespace facebook::velox::dwio::catalog::fbhive; diff --git a/velox/exec/fuzzer/JoinFuzzer.cpp b/velox/exec/fuzzer/JoinFuzzer.cpp index 39742fd0176f..fbdca8831cd4 100644 --- a/velox/exec/fuzzer/JoinFuzzer.cpp +++ b/velox/exec/fuzzer/JoinFuzzer.cpp @@ -19,8 +19,6 @@ #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" #include "velox/connectors/hive/PartitionIdGenerator.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" -#include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/OperatorUtils.h" #include "velox/exec/fuzzer/FuzzerUtil.h" #include "velox/exec/fuzzer/ReferenceQueryRunner.h" diff --git a/velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp b/velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp index 6e794259d303..f46363ba383b 100644 --- a/velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp +++ b/velox/exec/fuzzer/MemoryArbitrationFuzzer.cpp @@ -22,8 +22,6 @@ #include "velox/common/memory/SharedArbitrator.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" -#include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/MemoryReclaimer.h" #include "velox/exec/TableWriter.h" #include "velox/exec/fuzzer/FuzzerUtil.h" diff --git a/velox/exec/fuzzer/RowNumberFuzzer.cpp b/velox/exec/fuzzer/RowNumberFuzzer.cpp index bd55884092aa..aba81e918025 100644 --- a/velox/exec/fuzzer/RowNumberFuzzer.cpp +++ b/velox/exec/fuzzer/RowNumberFuzzer.cpp @@ -20,8 +20,6 @@ #include "velox/common/file/FileSystems.h" #include "velox/connectors/hive/HiveConnector.h" #include "velox/connectors/hive/HiveConnectorSplit.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" -#include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/fuzzer/FuzzerUtil.h" #include "velox/exec/fuzzer/ReferenceQueryRunner.h" #include "velox/exec/tests/utils/AssertQueryBuilder.h" diff --git a/velox/exec/fuzzer/WriterFuzzer.cpp b/velox/exec/fuzzer/WriterFuzzer.cpp index 46f95e1f52b8..eab4bccced8e 100644 --- a/velox/exec/fuzzer/WriterFuzzer.cpp +++ b/velox/exec/fuzzer/WriterFuzzer.cpp @@ -17,8 +17,6 @@ #include -#include "velox/dwio/dwrf/reader/DwrfReader.h" - #include #include "velox/common/base/Fs.h" #include "velox/common/encode/Base64.h" diff --git a/velox/exec/tests/utils/HiveConnectorTestBase.cpp b/velox/exec/tests/utils/HiveConnectorTestBase.cpp index 50c670a37a21..5d492d7cfaa7 100644 --- a/velox/exec/tests/utils/HiveConnectorTestBase.cpp +++ b/velox/exec/tests/utils/HiveConnectorTestBase.cpp @@ -18,9 +18,7 @@ #include "velox/common/file/FileSystems.h" #include "velox/common/file/tests/FaultyFileSystem.h" -#include "velox/connectors/hive/HiveDataSink.h" #include "velox/dwio/common/tests/utils/BatchMaker.h" -#include "velox/dwio/dwrf/reader/DwrfReader.h" #include "velox/dwio/dwrf/writer/Writer.h" #include "velox/exec/tests/utils/AssertQueryBuilder.h"