diff --git a/.github/workflows/ruby.yml b/.github/workflows/ruby.yml index 11e3c93ed0806..04f944f56c665 100644 --- a/.github/workflows/ruby.yml +++ b/.github/workflows/ruby.yml @@ -186,7 +186,7 @@ jobs: shell: bash run: ci/scripts/ruby_test.sh $(pwd) $(pwd)/build - windows: + windows-mingw: name: AMD64 Windows MinGW ${{ matrix.mingw-n-bits }} GLib & Ruby runs-on: windows-2019 if: ${{ !contains(github.event.pull_request.title, 'WIP') }} @@ -267,7 +267,6 @@ jobs: ridk exec bash ci\scripts\cpp_build.sh "${source_dir}" "${build_dir}" - name: Build GLib run: | - $Env:CMAKE_BUILD_PARALLEL_LEVEL = $Env:NUMBER_OF_PROCESSORS $source_dir = "$(ridk exec cygpath --unix "$(Get-Location)")" $build_dir = "$(ridk exec cygpath --unix "$(Get-Location)\build")" $ErrorActionPreference = "Continue" @@ -305,3 +304,96 @@ jobs: $Env:MAKE = "ridk exec make" $ErrorActionPreference = "Continue" rake -f ruby\Rakefile + + windows-msvc: + name: AMD64 Windows MSVC GLib + runs-on: windows-2019 + if: ${{ !contains(github.event.pull_request.title, 'WIP') }} + timeout-minutes: 90 + strategy: + fail-fast: false + env: + ARROW_BOOST_USE_SHARED: OFF + ARROW_BUILD_BENCHMARKS: OFF + ARROW_BUILD_SHARED: ON + ARROW_BUILD_STATIC: OFF + ARROW_BUILD_TESTS: OFF + ARROW_ACERO: ON + ARROW_DATASET: ON + ARROW_FLIGHT: OFF + ARROW_FLIGHT_SQL: OFF + ARROW_GANDIVA: OFF + ARROW_HDFS: OFF + ARROW_HOME: "${{ github.workspace }}/dist" + ARROW_JEMALLOC: OFF + ARROW_MIMALLOC: ON + ARROW_ORC: OFF + ARROW_PARQUET: ON + ARROW_SUBSTRAIT: OFF + ARROW_USE_GLOG: OFF + ARROW_VERBOSE_THIRDPARTY_BUILD: OFF + ARROW_WITH_BROTLI: OFF + ARROW_WITH_BZ2: OFF + ARROW_WITH_LZ4: OFF + ARROW_WITH_OPENTELEMETRY: OFF + ARROW_WITH_SNAPPY: ON + ARROW_WITH_ZLIB: OFF + ARROW_WITH_ZSTD: ON + BOOST_SOURCE: BUNDLED + CMAKE_CXX_STANDARD: "17" + CMAKE_GENERATOR: Ninja + CMAKE_INSTALL_PREFIX: "${{ github.workspace }}/dist" + CMAKE_UNITY_BUILD: ON + steps: + - name: Disable Crash Dialogs + run: | + reg add ` + "HKCU\SOFTWARE\Microsoft\Windows\Windows Error Reporting" ` + /v DontShowUI ` + /t REG_DWORD ` + /d 1 ` + /f + - name: Checkout Arrow + uses: actions/checkout@v4 + with: + fetch-depth: 0 + submodules: recursive + - name: Install vcpkg + shell: bash + run: | + ci/scripts/install_vcpkg.sh ./vcpkg + - name: Install meson + run: | + python -m pip install meson + - name: Install ccache + shell: bash + run: | + ci/scripts/install_ccache.sh 4.6.3 /usr + - name: Setup ccache + shell: bash + run: | + ci/scripts/ccache_setup.sh + - name: ccache info + id: ccache-info + shell: bash + run: | + echo "cache-dir=$(ccache --get-config cache_dir)" >> $GITHUB_OUTPUT + - name: Cache ccache + uses: actions/cache@v4 + with: + path: ${{ steps.ccache-info.outputs.cache-dir }} + key: glib-ccache-msvc-${{ env.CACHE_VERSION }}-${{ hashFiles('cpp/**') }} + restore-keys: glib-ccache-msvc-${{ env.CACHE_VERSION }}- + env: + # We can invalidate the current cache by updating this. + CACHE_VERSION: "2024-05-09" + - name: Build C++ + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "ci/scripts/cpp_build.sh $(pwd) $(pwd)/build" + - name: Build GLib + shell: cmd + run: | + call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvarsall.bat" x64 + bash -c "VCPKG_ROOT=\"$(pwd)/vcpkg\" ci/scripts/c_glib_build.sh $(pwd) $(pwd)/build" diff --git a/.gitignore b/.gitignore index c7f5aa90e18e6..e2e84fee57e3c 100644 --- a/.gitignore +++ b/.gitignore @@ -102,4 +102,4 @@ __debug_bin .envrc # Develocity -.mvn/.gradle-enterprise/ +.mvn/.develocity.xml diff --git a/.mvn/gradle-enterprise.xml b/.mvn/develocity.xml similarity index 78% rename from .mvn/gradle-enterprise.xml rename to .mvn/develocity.xml index bae5a3f147e68..df3cbccd2b6cb 100644 --- a/.mvn/gradle-enterprise.xml +++ b/.mvn/develocity.xml @@ -1,4 +1,4 @@ - + - + https://ge.apache.org false - true + true true true #{isFalse(env['CI'])} - ALWAYS + true true #{{'0.0.0.0'}} @@ -42,4 +42,4 @@ false - + diff --git a/.mvn/extensions.xml b/.mvn/extensions.xml index b446c647e47e6..b56ab0fd7772a 100644 --- a/.mvn/extensions.xml +++ b/.mvn/extensions.xml @@ -22,12 +22,12 @@ com.gradle - gradle-enterprise-maven-extension - 1.20 + develocity-maven-extension + 1.21.4 com.gradle common-custom-user-data-maven-extension - 1.12.5 + 2.0 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf5ca08d53c32..e0b8009b03184 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -40,7 +40,7 @@ repos: hooks: - id: flake8 name: Python Format - files: ^(python|dev|integration)/ + files: ^(python|dev|c_glib|integration)/ types: - file - python @@ -116,25 +116,28 @@ repos: name: CMake Format files: >- ( + ?.*CMakeLists\.txt$| ?^ci/.*/.*\.cmake$| ?^cpp/.*/.*\.cmake\.in$| ?^cpp/.*/.*\.cmake$| - ?^cpp/.*/CMakeLists\.txt$| - ?^go/.*/CMakeLists\.txt$| - ?^java/.*/CMakeLists\.txt$| - ?^matlab/.*/CMakeLists\.txt$| - ?^python/.*/CMakeLists\.txt$| ) exclude: >- ( + ?^ci/conan/all/.*CMakeLists\.txt$| ?^cpp/cmake_modules/FindNumPy\.cmake$| ?^cpp/cmake_modules/FindPythonLibsNew\.cmake$| ?^cpp/cmake_modules/UseCython\.cmake$| - ?^cpp/src/arrow/util/config\.h\.cmake$| + ?^cpp/src/arrow/util/.*\.h\.cmake$| ) - repo: https://github.com/sphinx-contrib/sphinx-lint rev: v0.9.1 hooks: - id: sphinx-lint - files: ^docs/ - args: ['--disable', 'all', '--enable', 'trailing-whitespace,missing-final-newline', 'docs'] + files: ^docs/source + exclude: ^docs/source/python/generated + args: [ + '--enable', + 'all', + '--disable', + 'dangling-hyphen,line-too-long', + ] diff --git a/c_glib/arrow-cuda-glib/arrow-cuda-glib.h b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h index b3c7f21087669..572ff92ed9b43 100644 --- a/c_glib/arrow-cuda-glib/arrow-cuda-glib.h +++ b/c_glib/arrow-cuda-glib/arrow-cuda-glib.h @@ -21,4 +21,6 @@ #include +#include + #include diff --git a/c_glib/arrow-cuda-glib/cuda.h b/c_glib/arrow-cuda-glib/cuda.h index 863743a620bf8..f04a3381259bb 100644 --- a/c_glib/arrow-cuda-glib/cuda.h +++ b/c_glib/arrow-cuda-glib/cuda.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_CUDA_TYPE_DEVICE_MANAGER (garrow_cuda_device_manager_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDADeviceManager, garrow_cuda_device_manager, GARROW_CUDA, @@ -35,6 +38,7 @@ struct _GArrowCUDADeviceManagerClass }; #define GARROW_CUDA_TYPE_CONTEXT (garrow_cuda_context_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GArrowCUDAContext, garrow_cuda_context, GARROW_CUDA, CONTEXT, GObject) struct _GArrowCUDAContextClass @@ -43,6 +47,7 @@ struct _GArrowCUDAContextClass }; #define GARROW_CUDA_TYPE_BUFFER (garrow_cuda_buffer_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GArrowCUDABuffer, garrow_cuda_buffer, GARROW_CUDA, BUFFER, GArrowBuffer) struct _GArrowCUDABufferClass @@ -51,6 +56,7 @@ struct _GArrowCUDABufferClass }; #define GARROW_CUDA_TYPE_HOST_BUFFER (garrow_cuda_host_buffer_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAHostBuffer, garrow_cuda_host_buffer, GARROW_CUDA, @@ -62,6 +68,7 @@ struct _GArrowCUDAHostBufferClass }; #define GARROW_CUDA_TYPE_IPC_MEMORY_HANDLE (garrow_cuda_ipc_memory_handle_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDAIPCMemoryHandle, garrow_cuda_ipc_memory_handle, GARROW_CUDA, @@ -73,6 +80,7 @@ struct _GArrowCUDAIPCMemoryHandleClass }; #define GARROW_CUDA_TYPE_BUFFER_INPUT_STREAM (garrow_cuda_buffer_input_stream_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferInputStream, garrow_cuda_buffer_input_stream, GARROW_CUDA, @@ -85,6 +93,7 @@ struct _GArrowCUDABufferInputStreamClass #define GARROW_CUDA_TYPE_BUFFER_OUTPUT_STREAM \ (garrow_cuda_buffer_output_stream_get_type()) +GARROW_CUDA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowCUDABufferOutputStream, garrow_cuda_buffer_output_stream, GARROW_CUDA, @@ -95,71 +104,100 @@ struct _GArrowCUDABufferOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDADeviceManager * garrow_cuda_device_manager_new(GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAContext * garrow_cuda_device_manager_get_context(GArrowCUDADeviceManager *manager, gint gpu_number, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 gsize garrow_cuda_device_manager_get_n_devices(GArrowCUDADeviceManager *manager); +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_context_get_allocated_size(GArrowCUDAContext *context); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new(GArrowCUDAContext *context, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new_ipc(GArrowCUDAContext *context, GArrowCUDAIPCMemoryHandle *handle, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABuffer * garrow_cuda_buffer_new_record_batch(GArrowCUDAContext *context, GArrowRecordBatch *record_batch, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GBytes * garrow_cuda_buffer_copy_to_host(GArrowCUDABuffer *buffer, gint64 position, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 gboolean garrow_cuda_buffer_copy_from_host(GArrowCUDABuffer *buffer, const guint8 *data, gint64 size, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAIPCMemoryHandle * garrow_cuda_buffer_export(GArrowCUDABuffer *buffer, GError **error); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAContext * garrow_cuda_buffer_get_context(GArrowCUDABuffer *buffer); + +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowRecordBatch * garrow_cuda_buffer_read_record_batch(GArrowCUDABuffer *buffer, GArrowSchema *schema, GArrowReadOptions *options, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAHostBuffer * garrow_cuda_host_buffer_new(gint gpu_number, gint64 size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDAIPCMemoryHandle * garrow_cuda_ipc_memory_handle_new(const guint8 *data, gsize size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowBuffer * garrow_cuda_ipc_memory_handle_serialize(GArrowCUDAIPCMemoryHandle *handle, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABufferInputStream * garrow_cuda_buffer_input_stream_new(GArrowCUDABuffer *buffer); +GARROW_CUDA_AVAILABLE_IN_0_12 GArrowCUDABufferOutputStream * garrow_cuda_buffer_output_stream_new(GArrowCUDABuffer *buffer); +GARROW_CUDA_AVAILABLE_IN_0_12 gboolean garrow_cuda_buffer_output_stream_set_buffer_size(GArrowCUDABufferOutputStream *stream, gint64 size, GError **error); +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_buffer_output_stream_get_buffer_size(GArrowCUDABufferOutputStream *stream); + +GARROW_CUDA_AVAILABLE_IN_0_12 gint64 garrow_cuda_buffer_output_stream_get_buffered_size(GArrowCUDABufferOutputStream *stream); diff --git a/c_glib/arrow-cuda-glib/meson.build b/c_glib/arrow-cuda-glib/meson.build index 88029e6dc2073..47bed70f03b60 100644 --- a/c_glib/arrow-cuda-glib/meson.build +++ b/c_glib/arrow-cuda-glib/meson.build @@ -31,10 +31,17 @@ cpp_headers = files( 'cuda.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GARROW_CUDA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-cuda-glib') - dependencies = [ arrow_cuda, arrow_glib, @@ -45,6 +52,7 @@ libarrow_cuda_glib = library('arrow-cuda-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGARROW_CUDA_COMPILATION'], soversion: so_version, version: library_version) arrow_cuda_glib = declare_dependency(link_with: libarrow_cuda_glib, diff --git a/c_glib/arrow-cuda-glib/version.h.in b/c_glib/arrow-cuda-glib/version.h.in new file mode 100644 index 0000000000000..0ab5bfd562b41 --- /dev/null +++ b/c_glib/arrow-cuda-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-cuda-glib/arrow-cuda-glib.h + * + * Apache Arrow CUDA GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GARROW_CUDA_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GARROW_CUDA_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GARROW_CUDA_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GARROW_CUDA_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_TAG "@VERSION_TAG@" + +/** + * GARROW_CUDA_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GARROW_CUDA_VERSION_CHECK(major, minor, micro) \ + (GARROW_CUDA_VERSION_MAJOR > (major) || \ + (GARROW_CUDA_VERSION_MAJOR == (major) && \ + GARROW_CUDA_VERSION_MINOR > (minor)) || \ + (GARROW_CUDA_VERSION_MAJOR == (major) && \ + GARROW_CUDA_VERSION_MINOR == (minor) && \ + GARROW_CUDA_VERSION_MICRO >= (micro))) + +/** + * GARROW_CUDA_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GARROW_CUDA_DISABLE_DEPRECATION_WARNINGS +# define GARROW_CUDA_DEPRECATED +# define GARROW_CUDA_DEPRECATED_FOR(function) +# define GARROW_CUDA_UNAVAILABLE(major, minor) +#else +# define GARROW_CUDA_DEPRECATED G_DEPRECATED +# define GARROW_CUDA_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GARROW_CUDA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GARROW_CUDA_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_CUDA_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_CUDA_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-cuda-glib/arrow-cuda-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GARROW_CUDA_VERSION_MIN_REQUIRED +# define GARROW_CUDA_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GARROW_CUDA_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GARROW_CUDA_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GARROW_CUDA_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-cuda-glib/arrow-cuda-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GARROW_CUDA_VERSION_MAX_ALLOWED +# define GARROW_CUDA_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h index 58f4e216cc715..7ebf36ddd2b78 100644 --- a/c_glib/arrow-dataset-glib/arrow-dataset-glib.h +++ b/c_glib/arrow-dataset-glib/arrow-dataset-glib.h @@ -21,6 +21,8 @@ #include +#include + #include #include #include diff --git a/c_glib/arrow-dataset-glib/dataset-definition.h b/c_glib/arrow-dataset-glib/dataset-definition.h index f278b05a135f5..bc52d6d3663a3 100644 --- a/c_glib/arrow-dataset-glib/dataset-definition.h +++ b/c_glib/arrow-dataset-glib/dataset-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GADATASET_TYPE_DATASET (gadataset_dataset_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetDataset, gadataset_dataset, GADATASET, DATASET, GObject) struct _GADatasetDatasetClass { diff --git a/c_glib/arrow-dataset-glib/dataset-factory.h b/c_glib/arrow-dataset-glib/dataset-factory.h index 1dc875837fe21..e7d3bc27aea8f 100644 --- a/c_glib/arrow-dataset-glib/dataset-factory.h +++ b/c_glib/arrow-dataset-glib/dataset-factory.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GADATASET_TYPE_FINISH_OPTIONS (gadataset_finish_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFinishOptions, gadataset_finish_options, GADATASET, FINISH_OPTIONS, GObject) struct _GADatasetFinishOptionsClass @@ -31,11 +32,12 @@ struct _GADatasetFinishOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetFinishOptions * gadataset_finish_options_new(void); #define GADATASET_TYPE_DATASET_FACTORY (gadataset_dataset_factory_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GADatasetDatasetFactory, gadataset_dataset_factory, GADATASET, DATASET_FACTORY, GObject) struct _GADatasetDatasetFactoryClass @@ -43,7 +45,7 @@ struct _GADatasetDatasetFactoryClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetDataset * gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, GADatasetFinishOptions *options, @@ -51,6 +53,7 @@ gadataset_dataset_factory_finish(GADatasetDatasetFactory *factory, #define GADATASET_TYPE_FILE_SYSTEM_DATASET_FACTORY \ (gadataset_file_system_dataset_factory_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetFactory, gadataset_file_system_dataset_factory, GADATASET, @@ -61,32 +64,33 @@ struct _GADatasetFileSystemDatasetFactoryClass GADatasetDatasetFactoryClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetFileSystemDatasetFactory * gadataset_file_system_dataset_factory_new(GADatasetFileFormat *file_format); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_set_file_system( GADatasetFileSystemDatasetFactory *factory, GArrowFileSystem *file_system, GError **error); +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_set_file_system_uri( GADatasetFileSystemDatasetFactory *factory, const gchar *uri, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_path(GADatasetFileSystemDatasetFactory *factory, const gchar *path, GError **error); /* -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_file( GADatasetFileSystemDatasetFactory *factory, GArrowFileInfo *file, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gboolean gadataset_file_system_dataset_factory_add_selector( GADatasetFileSystemDatasetFactory *factory, @@ -94,7 +98,7 @@ gadataset_file_system_dataset_factory_add_selector( GError **error); */ -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetFileSystemDataset * gadataset_file_system_dataset_factory_finish(GADatasetFileSystemDatasetFactory *factory, GADatasetFinishOptions *options, diff --git a/c_glib/arrow-dataset-glib/dataset.h b/c_glib/arrow-dataset-glib/dataset.h index 57f6c7729f073..657de330e6c49 100644 --- a/c_glib/arrow-dataset-glib/dataset.h +++ b/c_glib/arrow-dataset-glib/dataset.h @@ -25,18 +25,19 @@ G_BEGIN_DECLS -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScannerBuilder * gadataset_dataset_begin_scan(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_dataset_to_table(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 gchar * gadataset_dataset_get_type_name(GADatasetDataset *dataset); #define GADATASET_TYPE_FILE_SYSTEM_DATASET_WRITE_OPTIONS \ (gadataset_file_system_dataset_write_options_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDatasetWriteOptions, gadataset_file_system_dataset_write_options, GADATASET, @@ -47,11 +48,12 @@ struct _GADatasetFileSystemDatasetWriteOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileSystemDatasetWriteOptions * gadataset_file_system_dataset_write_options_new(void); #define GADATASET_TYPE_FILE_SYSTEM_DATASET (gadataset_file_system_dataset_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileSystemDataset, gadataset_file_system_dataset, GADATASET, @@ -62,7 +64,7 @@ struct _GADatasetFileSystemDatasetClass GADatasetDatasetClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_system_dataset_write_scanner( GADatasetScanner *scanner, diff --git a/c_glib/arrow-dataset-glib/file-format.h b/c_glib/arrow-dataset-glib/file-format.h index 29487e59d70dd..f70523597e7c6 100644 --- a/c_glib/arrow-dataset-glib/file-format.h +++ b/c_glib/arrow-dataset-glib/file-format.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GADATASET_TYPE_FILE_WRITE_OPTIONS (gadataset_file_write_options_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetFileWriteOptions, gadataset_file_write_options, GADATASET, @@ -35,6 +38,7 @@ struct _GADatasetFileWriteOptionsClass }; #define GADATASET_TYPE_FILE_WRITER (gadataset_file_writer_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFileWriter, gadataset_file_writer, GADATASET, FILE_WRITER, GObject) struct _GADatasetFileWriterClass @@ -42,21 +46,22 @@ struct _GADatasetFileWriterClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_write_record_batch(GADatasetFileWriter *writer, GArrowRecordBatch *record_batch, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_write_record_batch_reader(GADatasetFileWriter *writer, GArrowRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_file_writer_finish(GADatasetFileWriter *writer, GError **error); #define GADATASET_TYPE_FILE_FORMAT (gadataset_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFileFormat, gadataset_file_format, GADATASET, FILE_FORMAT, GObject) struct _GADatasetFileFormatClass @@ -64,13 +69,13 @@ struct _GADatasetFileFormatClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 gchar * gadataset_file_format_get_type_name(GADatasetFileFormat *format); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileWriteOptions * gadataset_file_format_get_default_write_options(GADatasetFileFormat *format); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetFileWriter * gadataset_file_format_open_writer(GADatasetFileFormat *format, GArrowOutputStream *destination, @@ -80,12 +85,13 @@ gadataset_file_format_open_writer(GADatasetFileFormat *format, GADatasetFileWriteOptions *options, GError **error); -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 gboolean gadataset_file_format_equal(GADatasetFileFormat *format, GADatasetFileFormat *other_format); #define GADATASET_TYPE_CSV_FILE_FORMAT (gadataset_csv_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetCSVFileFormat, gadataset_csv_file_format, GADATASET, @@ -96,11 +102,12 @@ struct _GADatasetCSVFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetCSVFileFormat * gadataset_csv_file_format_new(void); #define GADATASET_TYPE_IPC_FILE_FORMAT (gadataset_ipc_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetIPCFileFormat, gadataset_ipc_file_format, GADATASET, @@ -111,11 +118,12 @@ struct _GADatasetIPCFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetIPCFileFormat * gadataset_ipc_file_format_new(void); #define GADATASET_TYPE_PARQUET_FILE_FORMAT (gadataset_parquet_file_format_get_type()) +GADATASET_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GADatasetParquetFileFormat, gadataset_parquet_file_format, GADATASET, @@ -126,7 +134,7 @@ struct _GADatasetParquetFileFormatClass GADatasetFileFormatClass parent_class; }; -GARROW_AVAILABLE_IN_3_0 +GADATASET_AVAILABLE_IN_3_0 GADatasetParquetFileFormat * gadataset_parquet_file_format_new(void); diff --git a/c_glib/arrow-dataset-glib/fragment.h b/c_glib/arrow-dataset-glib/fragment.h index 49acc360a3679..80eb9e19df3cc 100644 --- a/c_glib/arrow-dataset-glib/fragment.h +++ b/c_glib/arrow-dataset-glib/fragment.h @@ -21,11 +21,14 @@ #include +#include + G_BEGIN_DECLS /* arrow::dataset::Fragment */ #define GADATASET_TYPE_FRAGMENT (gadataset_fragment_get_type()) +GADATASET_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GADatasetFragment, gadataset_fragment, GADATASET, FRAGMENT, GObject) struct _GADatasetFragmentClass @@ -36,6 +39,7 @@ struct _GADatasetFragmentClass /* arrow::dataset::InMemoryFragment */ #define GADATASET_TYPE_IN_MEMORY_FRAGMENT (gadataset_in_memory_fragment_get_type()) +GADATASET_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GADatasetInMemoryFragment, gadataset_in_memory_fragment, GADATASET, @@ -46,7 +50,7 @@ struct _GADatasetInMemoryFragmentClass GADatasetFragmentClass parent_class; }; -GARROW_AVAILABLE_IN_4_0 +GADATASET_AVAILABLE_IN_4_0 GADatasetInMemoryFragment * gadataset_in_memory_fragment_new(GArrowSchema *schema, GArrowRecordBatch **record_batches, diff --git a/c_glib/arrow-dataset-glib/meson.build b/c_glib/arrow-dataset-glib/meson.build index 0c869a4183efa..2d54efadfa230 100644 --- a/c_glib/arrow-dataset-glib/meson.build +++ b/c_glib/arrow-dataset-glib/meson.build @@ -17,6 +17,8 @@ # specific language governing permissions and limitations # under the License. +project_name = 'arrow-dataset-glib' + sources = files( 'dataset-factory.cpp', 'dataset.cpp', @@ -47,20 +49,27 @@ cpp_headers = files( 'scanner.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GADATASET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + enums = gnome.mkenums('enums', sources: c_headers, identifier_prefix: 'GADataset', symbol_prefix: 'gadataset', c_template: 'enums.c.template', h_template: 'enums.h.template', - install_dir: join_paths(include_dir, 'arrow-dataset-glib'), + install_dir: join_paths(include_dir, project_name), install_header: true) enums_source = enums[0] enums_header = enums[1] - headers = c_headers + cpp_headers -install_headers(headers, subdir: 'arrow-dataset-glib') +install_headers(headers, subdir: project_name) dependencies = [ arrow_dataset, @@ -72,6 +81,7 @@ libarrow_dataset_glib = library('arrow-dataset-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGADATASET_COMPILATION'], soversion: so_version, version: library_version) arrow_dataset_glib = declare_dependency(link_with: libarrow_dataset_glib, diff --git a/c_glib/arrow-dataset-glib/partitioning.h b/c_glib/arrow-dataset-glib/partitioning.h index ccf476272e429..7671958d88e61 100644 --- a/c_glib/arrow-dataset-glib/partitioning.h +++ b/c_glib/arrow-dataset-glib/partitioning.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS /** @@ -39,6 +41,7 @@ typedef enum { #define GADATASET_TYPE_PARTITIONING_FACTORY_OPTIONS \ (gadataset_partitioning_factory_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetPartitioningFactoryOptions, gadataset_partitioning_factory_options, GADATASET, @@ -49,11 +52,12 @@ struct _GADatasetPartitioningFactoryOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetPartitioningFactoryOptions * gadataset_partitioning_factory_options_new(void); #define GADATASET_TYPE_PARTITIONING (gadataset_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GADatasetPartitioning, gadataset_partitioning, GADATASET, PARTITIONING, GObject) struct _GADatasetPartitioningClass @@ -61,16 +65,17 @@ struct _GADatasetPartitioningClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gchar * gadataset_partitioning_get_type_name(GADatasetPartitioning *partitioning); -GARROW_AVAILABLE_IN_12_0 +GADATASET_AVAILABLE_IN_12_0 GADatasetPartitioning * gadataset_partitioning_create_default(void); #define GADATASET_TYPE_KEY_VALUE_PARTITIONING_OPTIONS \ (gadataset_key_value_partitioning_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioningOptions, gadataset_key_value_partitioning_options, GADATASET, @@ -81,12 +86,13 @@ struct _GADatasetKeyValuePartitioningOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetKeyValuePartitioningOptions * gadataset_key_value_partitioning_options_new(void); #define GADATASET_TYPE_KEY_VALUE_PARTITIONING \ (gadataset_key_value_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetKeyValuePartitioning, gadataset_key_value_partitioning, GADATASET, @@ -99,6 +105,7 @@ struct _GADatasetKeyValuePartitioningClass #define GADATASET_TYPE_DIRECTORY_PARTITIONING \ (gadataset_directory_partitioning_get_type()) +GADATASET_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GADatasetDirectoryPartitioning, gadataset_directory_partitioning, GADATASET, @@ -109,7 +116,7 @@ struct _GADatasetDirectoryPartitioningClass GADatasetKeyValuePartitioningClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetDirectoryPartitioning * gadataset_directory_partitioning_new(GArrowSchema *schema, GList *dictionaries, @@ -118,6 +125,7 @@ gadataset_directory_partitioning_new(GArrowSchema *schema, #define GADATASET_TYPE_HIVE_PARTITIONING_OPTIONS \ (gadataset_hive_partitioning_options_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioningOptions, gadataset_hive_partitioning_options, GADATASET, @@ -128,11 +136,12 @@ struct _GADatasetHivePartitioningOptionsClass GADatasetKeyValuePartitioningOptionsClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetHivePartitioningOptions * gadataset_hive_partitioning_options_new(void); #define GADATASET_TYPE_HIVE_PARTITIONING (gadataset_hive_partitioning_get_type()) +GADATASET_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GADatasetHivePartitioning, gadataset_hive_partitioning, GADATASET, @@ -143,13 +152,13 @@ struct _GADatasetHivePartitioningClass GADatasetKeyValuePartitioningClass parent_class; }; -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 GADatasetHivePartitioning * gadataset_hive_partitioning_new(GArrowSchema *schema, GList *dictionaries, GADatasetHivePartitioningOptions *options, GError **error); -GARROW_AVAILABLE_IN_11_0 +GADATASET_AVAILABLE_IN_11_0 gchar * gadataset_hive_partitioning_get_null_fallback(GADatasetHivePartitioning *partitioning); diff --git a/c_glib/arrow-dataset-glib/scanner.h b/c_glib/arrow-dataset-glib/scanner.h index 3c7432fb268e4..ad462391568a3 100644 --- a/c_glib/arrow-dataset-glib/scanner.h +++ b/c_glib/arrow-dataset-glib/scanner.h @@ -21,21 +21,24 @@ #include #include +#include G_BEGIN_DECLS #define GADATASET_TYPE_SCANNER (gadataset_scanner_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GADatasetScanner, gadataset_scanner, GADATASET, SCANNER, GObject) struct _GADatasetScannerClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GArrowTable * gadataset_scanner_to_table(GADatasetScanner *scanner, GError **error); #define GADATASET_TYPE_SCANNER_BUILDER (gadataset_scanner_builder_get_type()) +GADATASET_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GADatasetScannerBuilder, gadataset_scanner_builder, GADATASET, SCANNER_BUILDER, GObject) struct _GADatasetScannerBuilderClass @@ -43,20 +46,20 @@ struct _GADatasetScannerBuilderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScannerBuilder * gadataset_scanner_builder_new(GADatasetDataset *dataset, GError **error); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 GADatasetScannerBuilder * gadataset_scanner_builder_new_record_batch_reader(GArrowRecordBatchReader *reader); -GARROW_AVAILABLE_IN_6_0 +GADATASET_AVAILABLE_IN_6_0 gboolean gadataset_scanner_builder_set_filter(GADatasetScannerBuilder *builder, GArrowExpression *expression, GError **error); -GARROW_AVAILABLE_IN_5_0 +GADATASET_AVAILABLE_IN_5_0 GADatasetScanner * gadataset_scanner_builder_finish(GADatasetScannerBuilder *builder, GError **error); diff --git a/c_glib/arrow-dataset-glib/version.h.in b/c_glib/arrow-dataset-glib/version.h.in new file mode 100644 index 0000000000000..7e678bda3a875 --- /dev/null +++ b/c_glib/arrow-dataset-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-dataset-glib/arrow-dataset-glib.h + * + * Apache Arrow Dataset GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GADATASET_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GADATASET_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GADATASET_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GADATASET_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_TAG "@VERSION_TAG@" + +/** + * GADATASET_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GADATASET_VERSION_CHECK(major, minor, micro) \ + (GADATASET_VERSION_MAJOR > (major) || \ + (GADATASET_VERSION_MAJOR == (major) && \ + GADATASET_VERSION_MINOR > (minor)) || \ + (GADATASET_VERSION_MAJOR == (major) && \ + GADATASET_VERSION_MINOR == (minor) && \ + GADATASET_VERSION_MICRO >= (micro))) + +/** + * GADATASET_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GADATASET_DISABLE_DEPRECATION_WARNINGS +# define GADATASET_DEPRECATED +# define GADATASET_DEPRECATED_FOR(function) +# define GADATASET_UNAVAILABLE(major, minor) +#else +# define GADATASET_DEPRECATED G_DEPRECATED +# define GADATASET_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GADATASET_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GADATASET_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GADATASET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GADATASET_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-dataset-glib/arrow-dataset-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GADATASET_VERSION_MIN_REQUIRED +# define GADATASET_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GADATASET_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GADATASET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GADATASET_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-dataset-glib/arrow-dataset-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GADATASET_VERSION_MAX_ALLOWED +# define GADATASET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-glib/arrow-flight-glib.h b/c_glib/arrow-flight-glib/arrow-flight-glib.h index 6fc8f43d8406e..7e973dd125dd4 100644 --- a/c_glib/arrow-flight-glib/arrow-flight-glib.h +++ b/c_glib/arrow-flight-glib/arrow-flight-glib.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include diff --git a/c_glib/arrow-flight-glib/client.h b/c_glib/arrow-flight-glib/client.h index f67d58371d583..a91bbe55e3c04 100644 --- a/c_glib/arrow-flight-glib/client.h +++ b/c_glib/arrow-flight-glib/client.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GAFLIGHT_TYPE_STREAM_READER (gaflight_stream_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightStreamReader, gaflight_stream_reader, GAFLIGHT, @@ -35,6 +36,7 @@ struct _GAFlightStreamReaderClass }; #define GAFLIGHT_TYPE_CALL_OPTIONS (gaflight_call_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightCallOptions, gaflight_call_options, GAFLIGHT, CALL_OPTIONS, GObject) struct _GAFlightCallOptionsClass @@ -42,25 +44,26 @@ struct _GAFlightCallOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCallOptions * gaflight_call_options_new(void); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_add_header(GAFlightCallOptions *options, const gchar *name, const gchar *value); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_clear_headers(GAFlightCallOptions *options); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 void gaflight_call_options_foreach_header(GAFlightCallOptions *options, GAFlightHeaderFunc func, gpointer user_data); #define GAFLIGHT_TYPE_CLIENT_OPTIONS (gaflight_client_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightClientOptions, gaflight_client_options, GAFLIGHT, CLIENT_OPTIONS, GObject) struct _GAFlightClientOptionsClass @@ -68,28 +71,29 @@ struct _GAFlightClientOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightClientOptions * gaflight_client_options_new(void); #define GAFLIGHT_TYPE_CLIENT (gaflight_client_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightClient, gaflight_client, GAFLIGHT, CLIENT, GObject) struct _GAFlightClientClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightClient * gaflight_client_new(GAFlightLocation *location, GAFlightClientOptions *options, GError **error); -GARROW_AVAILABLE_IN_8_0 +GAFLIGHT_AVAILABLE_IN_8_0 gboolean gaflight_client_close(GAFlightClient *client, GError **error); -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 gboolean gaflight_client_authenticate_basic_token(GAFlightClient *client, const gchar *user, @@ -99,21 +103,21 @@ gaflight_client_authenticate_basic_token(GAFlightClient *client, gchar **bearer_value, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_client_list_flights(GAFlightClient *client, GAFlightCriteria *criteria, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 GAFlightInfo * gaflight_client_get_flight_info(GAFlightClient *client, GAFlightDescriptor *descriptor, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightStreamReader * gaflight_client_do_get(GAFlightClient *client, GAFlightTicket *ticket, diff --git a/c_glib/arrow-flight-glib/client.hpp b/c_glib/arrow-flight-glib/client.hpp index 6d7bdcecf3006..185a28e6dc4bd 100644 --- a/c_glib/arrow-flight-glib/client.hpp +++ b/c_glib/arrow-flight-glib/client.hpp @@ -23,17 +23,23 @@ #include +GAFLIGHT_EXTERN GAFlightStreamReader * gaflight_stream_reader_new_raw(arrow::flight::FlightStreamReader *flight_reader, gboolean is_owner); +GAFLIGHT_EXTERN arrow::flight::FlightCallOptions * gaflight_call_options_get_raw(GAFlightCallOptions *options); +GAFLIGHT_EXTERN arrow::flight::FlightClientOptions * gaflight_client_options_get_raw(GAFlightClientOptions *options); +GAFLIGHT_EXTERN std::shared_ptr gaflight_client_get_raw(GAFlightClient *client); + +GAFLIGHT_EXTERN GAFlightClient * gaflight_client_new_raw(std::shared_ptr *flight_client); diff --git a/c_glib/arrow-flight-glib/common.h b/c_glib/arrow-flight-glib/common.h index fcb23b1885ea7..b1d89f79c357e 100644 --- a/c_glib/arrow-flight-glib/common.h +++ b/c_glib/arrow-flight-glib/common.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS typedef void (*GAFlightHeaderFunc)(const gchar *name, @@ -28,40 +30,43 @@ typedef void (*GAFlightHeaderFunc)(const gchar *name, gpointer user_data); #define GAFLIGHT_TYPE_CRITERIA (gaflight_criteria_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightCriteria, gaflight_criteria, GAFLIGHT, CRITERIA, GObject) struct _GAFlightCriteriaClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCriteria * gaflight_criteria_new(GBytes *expression); #define GAFLIGHT_TYPE_LOCATION (gaflight_location_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightLocation, gaflight_location, GAFLIGHT, LOCATION, GObject) struct _GAFlightLocationClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightLocation * gaflight_location_new(const gchar *uri, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_location_to_string(GAFlightLocation *location); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_location_get_scheme(GAFlightLocation *location); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_location_equal(GAFlightLocation *location, GAFlightLocation *other_location); #define GAFLIGHT_TYPE_DESCRIPTOR (gaflight_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightDescriptor, gaflight_descriptor, GAFLIGHT, DESCRIPTOR, GObject) struct _GAFlightDescriptorClass @@ -69,16 +74,17 @@ struct _GAFlightDescriptorClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_descriptor_to_string(GAFlightDescriptor *descriptor); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_descriptor_equal(GAFlightDescriptor *descriptor, GAFlightDescriptor *other_descriptor); #define GAFLIGHT_TYPE_PATH_DESCRIPTOR (gaflight_path_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightPathDescriptor, gaflight_path_descriptor, GAFLIGHT, @@ -89,15 +95,16 @@ struct _GAFlightPathDescriptorClass GAFlightDescriptorClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightPathDescriptor * gaflight_path_descriptor_new(const gchar **paths, gsize n_paths); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar ** gaflight_path_descriptor_get_paths(GAFlightPathDescriptor *descriptor); #define GAFLIGHT_TYPE_COMMAND_DESCRIPTOR (gaflight_command_descriptor_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightCommandDescriptor, gaflight_command_descriptor, GAFLIGHT, @@ -108,56 +115,59 @@ struct _GAFlightCommandDescriptorClass GAFlightDescriptorClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightCommandDescriptor * gaflight_command_descriptor_new(const gchar *command); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gchar * gaflight_command_descriptor_get_command(GAFlightCommandDescriptor *descriptor); #define GAFLIGHT_TYPE_TICKET (gaflight_ticket_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightTicket, gaflight_ticket, GAFLIGHT, TICKET, GObject) struct _GAFlightTicketClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightTicket * gaflight_ticket_new(GBytes *data); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_ticket_equal(GAFlightTicket *ticket, GAFlightTicket *other_ticket); #define GAFLIGHT_TYPE_ENDPOINT (gaflight_endpoint_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightEndpoint, gaflight_endpoint, GAFLIGHT, ENDPOINT, GObject) struct _GAFlightEndpointClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightEndpoint * gaflight_endpoint_new(GAFlightTicket *ticket, GList *locations); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_endpoint_equal(GAFlightEndpoint *endpoint, GAFlightEndpoint *other_endpoint); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_endpoint_get_locations(GAFlightEndpoint *endpoint); #define GAFLIGHT_TYPE_INFO (gaflight_info_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightInfo, gaflight_info, GAFLIGHT, INFO, GObject) struct _GAFlightInfoClass { GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightInfo * gaflight_info_new(GArrowSchema *schema, GAFlightDescriptor *descriptor, @@ -166,27 +176,28 @@ gaflight_info_new(GArrowSchema *schema, gint64 total_bytes, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_info_equal(GAFlightInfo *info, GAFlightInfo *other_info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GArrowSchema * gaflight_info_get_schema(GAFlightInfo *info, GArrowReadOptions *options, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightDescriptor * gaflight_info_get_descriptor(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_info_get_endpoints(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint64 gaflight_info_get_total_records(GAFlightInfo *info); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint64 gaflight_info_get_total_bytes(GAFlightInfo *info); #define GAFLIGHT_TYPE_STREAM_CHUNK (gaflight_stream_chunk_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GAFlightStreamChunk, gaflight_stream_chunk, GAFLIGHT, STREAM_CHUNK, GObject) struct _GAFlightStreamChunkClass @@ -194,14 +205,15 @@ struct _GAFlightStreamChunkClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowRecordBatch * gaflight_stream_chunk_get_data(GAFlightStreamChunk *chunk); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowBuffer * gaflight_stream_chunk_get_metadata(GAFlightStreamChunk *chunk); #define GAFLIGHT_TYPE_RECORD_BATCH_READER (gaflight_record_batch_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchReader, gaflight_record_batch_reader, GAFLIGHT, @@ -212,11 +224,11 @@ struct _GAFlightRecordBatchReaderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightStreamChunk * gaflight_record_batch_reader_read_next(GAFlightRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GArrowTable * gaflight_record_batch_reader_read_all(GAFlightRecordBatchReader *reader, GError **error); diff --git a/c_glib/arrow-flight-glib/common.hpp b/c_glib/arrow-flight-glib/common.hpp index b748d6f382184..db56fff579baf 100644 --- a/c_glib/arrow-flight-glib/common.hpp +++ b/c_glib/arrow-flight-glib/common.hpp @@ -23,39 +23,59 @@ #include +GAFLIGHT_EXTERN GAFlightCriteria * gaflight_criteria_new_raw(const arrow::flight::Criteria *flight_criteria); + +GAFLIGHT_EXTERN arrow::flight::Criteria * gaflight_criteria_get_raw(GAFlightCriteria *criteria); +GAFLIGHT_EXTERN arrow::flight::Location * gaflight_location_get_raw(GAFlightLocation *location); +GAFLIGHT_EXTERN GAFlightDescriptor * gaflight_descriptor_new_raw(const arrow::flight::FlightDescriptor *flight_descriptor); + +GAFLIGHT_EXTERN arrow::flight::FlightDescriptor * gaflight_descriptor_get_raw(GAFlightDescriptor *descriptor); +GAFLIGHT_EXTERN GAFlightTicket * gaflight_ticket_new_raw(const arrow::flight::Ticket *flight_ticket); + +GAFLIGHT_EXTERN arrow::flight::Ticket * gaflight_ticket_get_raw(GAFlightTicket *ticket); +GAFLIGHT_EXTERN GAFlightEndpoint * gaflight_endpoint_new_raw(const arrow::flight::FlightEndpoint *flight_endpoint, GAFlightTicket *ticket); + +GAFLIGHT_EXTERN arrow::flight::FlightEndpoint * gaflight_endpoint_get_raw(GAFlightEndpoint *endpoint); +GAFLIGHT_EXTERN GAFlightInfo * gaflight_info_new_raw(arrow::flight::FlightInfo *flight_info); + +GAFLIGHT_EXTERN arrow::flight::FlightInfo * gaflight_info_get_raw(GAFlightInfo *info); +GAFLIGHT_EXTERN GAFlightStreamChunk * gaflight_stream_chunk_new_raw(arrow::flight::FlightStreamChunk *flight_chunk); + +GAFLIGHT_EXTERN arrow::flight::FlightStreamChunk * gaflight_stream_chunk_get_raw(GAFlightStreamChunk *chunk); +GAFLIGHT_EXTERN arrow::flight::MetadataRecordBatchReader * gaflight_record_batch_reader_get_raw(GAFlightRecordBatchReader *reader); diff --git a/c_glib/arrow-flight-glib/meson.build b/c_glib/arrow-flight-glib/meson.build index 70db7400b124a..c1422e0d10a7d 100644 --- a/c_glib/arrow-flight-glib/meson.build +++ b/c_glib/arrow-flight-glib/meson.build @@ -37,6 +37,14 @@ cpp_headers = files( 'server.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GAFLIGHT', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-flight-glib') @@ -50,6 +58,7 @@ libarrow_flight_glib = library('arrow-flight-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGAFLIGHT_COMPILATION'], soversion: so_version, version: library_version) arrow_flight_glib = declare_dependency(link_with: libarrow_flight_glib, diff --git a/c_glib/arrow-flight-glib/server.h b/c_glib/arrow-flight-glib/server.h index 89f5a0a596e9e..7e594febb172f 100644 --- a/c_glib/arrow-flight-glib/server.h +++ b/c_glib/arrow-flight-glib/server.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GAFLIGHT_TYPE_DATA_STREAM (gaflight_data_stream_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GAFlightDataStream, gaflight_data_stream, GAFLIGHT, DATA_STREAM, GObject) struct _GAFlightDataStreamClass @@ -32,6 +33,7 @@ struct _GAFlightDataStreamClass }; #define GAFLIGHT_TYPE_RECORD_BATCH_STREAM (gaflight_record_batch_stream_get_type()) +GAFLIGHT_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GAFlightRecordBatchStream, gaflight_record_batch_stream, GAFLIGHT, @@ -42,12 +44,13 @@ struct _GAFlightRecordBatchStreamClass GAFlightDataStreamClass parent_class; }; -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightRecordBatchStream * gaflight_record_batch_stream_new(GArrowRecordBatchReader *reader, GArrowWriteOptions *options); #define GAFLIGHT_TYPE_MESSAGE_READER (gaflight_message_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightMessageReader, gaflight_message_reader, GAFLIGHT, @@ -58,11 +61,12 @@ struct _GAFlightMessageReaderClass GAFlightRecordBatchReaderClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHT_AVAILABLE_IN_14_0 GAFlightDescriptor * gaflight_message_reader_get_descriptor(GAFlightMessageReader *reader); #define GAFLIGHT_TYPE_SERVER_CALL_CONTEXT (gaflight_server_call_context_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCallContext, gaflight_server_call_context, GAFLIGHT, @@ -73,13 +77,14 @@ struct _GAFlightServerCallContextClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHT_AVAILABLE_IN_14_0 void gaflight_server_call_context_foreach_incoming_header(GAFlightServerCallContext *context, GAFlightHeaderFunc func, gpointer user_data); #define GAFLIGHT_TYPE_SERVER_AUTH_SENDER (gaflight_server_auth_sender_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthSender, gaflight_server_auth_sender, GAFLIGHT, @@ -90,13 +95,14 @@ struct _GAFlightServerAuthSenderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 gboolean gaflight_server_auth_sender_write(GAFlightServerAuthSender *sender, GBytes *message, GError **error); #define GAFLIGHT_TYPE_SERVER_AUTH_READER (gaflight_server_auth_reader_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthReader, gaflight_server_auth_reader, GAFLIGHT, @@ -107,11 +113,12 @@ struct _GAFlightServerAuthReaderClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 GBytes * gaflight_server_auth_reader_read(GAFlightServerAuthReader *reader, GError **error); #define GAFLIGHT_TYPE_SERVER_AUTH_HANDLER (gaflight_server_auth_handler_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerAuthHandler, gaflight_server_auth_handler, GAFLIGHT, @@ -124,6 +131,7 @@ struct _GAFlightServerAuthHandlerClass #define GAFLIGHT_TYPE_SERVER_CUSTOM_AUTH_HANDLER \ (gaflight_server_custom_auth_handler_get_type()) +GAFLIGHT_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServerCustomAuthHandler, gaflight_server_custom_auth_handler, GAFLIGHT, @@ -152,7 +160,7 @@ struct _GAFlightServerCustomAuthHandlerClass GError **error); }; -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 void gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler *handler, GAFlightServerCallContext *context, @@ -160,7 +168,7 @@ gaflight_server_custom_auth_handler_authenticate(GAFlightServerCustomAuthHandler GAFlightServerAuthReader *reader, GError **error); -GARROW_AVAILABLE_IN_12_0 +GAFLIGHT_AVAILABLE_IN_12_0 GBytes * gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *handler, GAFlightServerCallContext *context, @@ -168,6 +176,7 @@ gaflight_server_custom_auth_handler_is_valid(GAFlightServerCustomAuthHandler *ha GError **error); #define GAFLIGHT_TYPE_SERVER_OPTIONS (gaflight_server_options_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GAFlightServerOptions, gaflight_server_options, GAFLIGHT, SERVER_OPTIONS, GObject) struct _GAFlightServerOptionsClass @@ -175,14 +184,16 @@ struct _GAFlightServerOptionsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GAFlightServerOptions * gaflight_server_options_new(GAFlightLocation *location); #define GAFLIGHT_TYPE_SERVABLE (gaflight_servable_get_type()) +GAFLIGHT_AVAILABLE_IN_9_0 G_DECLARE_INTERFACE(GAFlightServable, gaflight_servable, GAFLIGHT, SERVABLE, GObject) #define GAFLIGHT_TYPE_SERVER (gaflight_server_get_type()) +GAFLIGHT_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GAFlightServer, gaflight_server, GAFLIGHT, SERVER, GObject) /** * GAFlightServerClass: @@ -209,34 +220,34 @@ struct _GAFlightServerClass GError **error); }; -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_listen(GAFlightServer *server, GAFlightServerOptions *options, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gint gaflight_server_get_port(GAFlightServer *server); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_shutdown(GAFlightServer *server, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 gboolean gaflight_server_wait(GAFlightServer *server, GError **error); -GARROW_AVAILABLE_IN_5_0 +GAFLIGHT_AVAILABLE_IN_5_0 GList * gaflight_server_list_flights(GAFlightServer *server, GAFlightServerCallContext *context, GAFlightCriteria *criteria, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHT_AVAILABLE_IN_9_0 GAFlightInfo * gaflight_server_get_flight_info(GAFlightServer *server, GAFlightServerCallContext *context, GAFlightDescriptor *request, GError **error); -GARROW_AVAILABLE_IN_6_0 +GAFLIGHT_AVAILABLE_IN_6_0 GAFlightDataStream * gaflight_server_do_get(GAFlightServer *server, GAFlightServerCallContext *context, diff --git a/c_glib/arrow-flight-glib/server.hpp b/c_glib/arrow-flight-glib/server.hpp index 70da867d5b0e9..ec4815751c8d8 100644 --- a/c_glib/arrow-flight-glib/server.hpp +++ b/c_glib/arrow-flight-glib/server.hpp @@ -23,34 +23,49 @@ #include +GAFLIGHT_EXTERN arrow::flight::FlightDataStream * gaflight_data_stream_get_raw(GAFlightDataStream *stream); +GAFLIGHT_EXTERN GAFlightMessageReader * gaflight_message_reader_new_raw(arrow::flight::FlightMessageReader *flight_reader, gboolean is_owner); + +GAFLIGHT_EXTERN arrow::flight::FlightMessageReader * gaflight_message_reader_get_raw(GAFlightMessageReader *reader); +GAFLIGHT_EXTERN GAFlightServerCallContext * gaflight_server_call_context_new_raw( const arrow::flight::ServerCallContext *flight_call_context); + +GAFLIGHT_EXTERN const arrow::flight::ServerCallContext * gaflight_server_call_context_get_raw(GAFlightServerCallContext *call_context); +GAFLIGHT_EXTERN GAFlightServerAuthSender * gaflight_server_auth_sender_new_raw(arrow::flight::ServerAuthSender *flight_sender); + +GAFLIGHT_EXTERN arrow::flight::ServerAuthSender * gaflight_server_auth_sender_get_raw(GAFlightServerAuthSender *sender); +GAFLIGHT_EXTERN GAFlightServerAuthReader * gaflight_server_auth_reader_new_raw(arrow::flight::ServerAuthReader *flight_reader); + +GAFLIGHT_EXTERN arrow::flight::ServerAuthReader * gaflight_server_auth_reader_get_raw(GAFlightServerAuthReader *reader); +GAFLIGHT_EXTERN std::shared_ptr gaflight_server_auth_handler_get_raw(GAFlightServerAuthHandler *handler); +GAFLIGHT_EXTERN arrow::flight::FlightServerOptions * gaflight_server_options_get_raw(GAFlightServerOptions *options); @@ -61,6 +76,7 @@ struct _GAFlightServableInterface arrow::flight::FlightServerBase *(*get_raw)(GAFlightServable *servable); }; +GAFLIGHT_EXTERN arrow::flight::FlightServerBase * gaflight_servable_get_raw(GAFlightServable *servable); diff --git a/c_glib/arrow-flight-glib/version.h.in b/c_glib/arrow-flight-glib/version.h.in new file mode 100644 index 0000000000000..4a42c7f5aa91e --- /dev/null +++ b/c_glib/arrow-flight-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-flight-glib/arrow-flight-glib.h + * + * Apache Arrow Flight GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GAFLIGHT_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GAFLIGHT_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GAFLIGHT_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GAFLIGHT_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_TAG "@VERSION_TAG@" + +/** + * GAFLIGHT_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GAFLIGHT_VERSION_CHECK(major, minor, micro) \ + (GAFLIGHT_VERSION_MAJOR > (major) || \ + (GAFLIGHT_VERSION_MAJOR == (major) && \ + GAFLIGHT_VERSION_MINOR > (minor)) || \ + (GAFLIGHT_VERSION_MAJOR == (major) && \ + GAFLIGHT_VERSION_MINOR == (minor) && \ + GAFLIGHT_VERSION_MICRO >= (micro))) + +/** + * GAFLIGHT_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GAFLIGHT_DISABLE_DEPRECATION_WARNINGS +# define GAFLIGHT_DEPRECATED +# define GAFLIGHT_DEPRECATED_FOR(function) +# define GAFLIGHT_UNAVAILABLE(major, minor) +#else +# define GAFLIGHT_DEPRECATED G_DEPRECATED +# define GAFLIGHT_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GAFLIGHT_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GAFLIGHT_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHT_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHT_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-glib/arrow-flight-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHT_VERSION_MIN_REQUIRED +# define GAFLIGHT_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GAFLIGHT_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHT_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHT_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-glib/arrow-flight-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHT_VERSION_MAX_ALLOWED +# define GAFLIGHT_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h b/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h index 8ebe39aee57a8..94e72d06f2b47 100644 --- a/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h +++ b/c_glib/arrow-flight-sql-glib/arrow-flight-sql-glib.h @@ -19,5 +19,7 @@ #pragma once +#include + #include #include diff --git a/c_glib/arrow-flight-sql-glib/client.h b/c_glib/arrow-flight-sql-glib/client.h index 9a5a8987f7195..b9e9baf41a59f 100644 --- a/c_glib/arrow-flight-sql-glib/client.h +++ b/c_glib/arrow-flight-sql-glib/client.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT (gaflightsql_prepared_statement_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatement, gaflightsql_prepared_statement, GAFLIGHTSQL, @@ -34,52 +37,53 @@ struct _GAFlightSQLPreparedStatementClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightInfo * gaflightsql_prepared_statement_execute(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gint64 gaflightsql_prepared_statement_execute_update(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_prepared_statement_get_parameter_schema( GAFlightSQLPreparedStatement *statement); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_prepared_statement_get_dataset_schema( GAFlightSQLPreparedStatement *statement); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_set_record_batch(GAFlightSQLPreparedStatement *statement, GArrowRecordBatch *record_batch, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_set_record_batch_reader( GAFlightSQLPreparedStatement *statement, GArrowRecordBatchReader *reader, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_close(GAFlightSQLPreparedStatement *statement, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 gboolean gaflightsql_prepared_statement_is_closed(GAFlightSQLPreparedStatement *statement); #define GAFLIGHTSQL_TYPE_CLIENT (gaflightsql_client_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLClient, gaflightsql_client, GAFLIGHTSQL, CLIENT, GObject) struct _GAFlightSQLClientClass @@ -87,32 +91,32 @@ struct _GAFlightSQLClientClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightSQLClient * gaflightsql_client_new(GAFlightClient *client); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightInfo * gaflightsql_client_execute(GAFlightSQLClient *client, const gchar *query, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 gint64 gaflightsql_client_execute_update(GAFlightSQLClient *client, const gchar *query, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightStreamReader * gaflightsql_client_do_get(GAFlightSQLClient *client, GAFlightTicket *ticket, GAFlightCallOptions *options, GError **error); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLPreparedStatement * gaflightsql_client_prepare(GAFlightSQLClient *client, const gchar *query, diff --git a/c_glib/arrow-flight-sql-glib/meson.build b/c_glib/arrow-flight-sql-glib/meson.build index e7abc605bb819..d588ba4917c76 100644 --- a/c_glib/arrow-flight-sql-glib/meson.build +++ b/c_glib/arrow-flight-sql-glib/meson.build @@ -34,6 +34,14 @@ cpp_headers = files( 'server.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GAFLIGHTSQL', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: 'arrow-flight-sql-glib') @@ -47,6 +55,7 @@ libarrow_flight_sql_glib = library('arrow-flight-sql-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGAFLIGHTSQL_COMPILATION'], soversion: so_version, version: library_version) arrow_flight_sql_glib = \ diff --git a/c_glib/arrow-flight-sql-glib/server.h b/c_glib/arrow-flight-sql-glib/server.h index d6fd7e4d10394..8cf0aace77644 100644 --- a/c_glib/arrow-flight-sql-glib/server.h +++ b/c_glib/arrow-flight-sql-glib/server.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GAFLIGHTSQL_TYPE_COMMAND (gaflightsql_command_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLCommand, gaflightsql_command, GAFLIGHTSQL, COMMAND, GObject) struct _GAFlightSQLCommandClass @@ -32,6 +35,7 @@ struct _GAFlightSQLCommandClass }; #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY (gaflightsql_statement_query_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQuery, gaflightsql_statement_query, GAFLIGHTSQL, @@ -42,11 +46,12 @@ struct _GAFlightSQLStatementQueryClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 const gchar * gaflightsql_statement_query_get_query(GAFlightSQLStatementQuery *command); #define GAFLIGHTSQL_TYPE_STATEMENT_UPDATE (gaflightsql_statement_update_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementUpdate, gaflightsql_statement_update, GAFLIGHTSQL, @@ -57,12 +62,13 @@ struct _GAFlightSQLStatementUpdateClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 const gchar * gaflightsql_statement_update_get_query(GAFlightSQLStatementUpdate *command); #define GAFLIGHTSQL_TYPE_PREPARED_STATEMENT_UPDATE \ (gaflightsql_prepared_statement_update_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLPreparedStatementUpdate, gaflightsql_prepared_statement_update, GAFLIGHTSQL, @@ -73,13 +79,14 @@ struct _GAFlightSQLPreparedStatementUpdateClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_prepared_statement_update_get_handle( GAFlightSQLPreparedStatementUpdate *command); #define GAFLIGHTSQL_TYPE_STATEMENT_QUERY_TICKET \ (gaflightsql_statement_query_ticket_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLStatementQueryTicket, gaflightsql_statement_query_ticket, GAFLIGHTSQL, @@ -90,15 +97,16 @@ struct _GAFlightSQLStatementQueryTicketClass GAFlightSQLCommandClass parent_class; }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GBytes * gaflightsql_statement_query_ticket_generate_handle(const gchar *query, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GBytes * gaflightsql_statement_query_ticket_get_handle(GAFlightSQLStatementQueryTicket *command); #define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_REQUEST \ (gaflightsql_create_prepared_statement_request_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementRequest, gaflightsql_create_prepared_statement_request, GAFLIGHTSQL, @@ -109,18 +117,19 @@ struct _GAFlightSQLCreatePreparedStatementRequestClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 const gchar * gaflightsql_create_prepared_statement_request_get_query( GAFlightSQLCreatePreparedStatementRequest *request); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 const gchar * gaflightsql_create_prepared_statement_request_get_transaction_id( GAFlightSQLCreatePreparedStatementRequest *request); #define GAFLIGHTSQL_TYPE_CREATE_PREPARED_STATEMENT_RESULT \ (gaflightsql_create_prepared_statement_result_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLCreatePreparedStatementResult, gaflightsql_create_prepared_statement_result, GAFLIGHTSQL, @@ -131,36 +140,37 @@ struct _GAFlightSQLCreatePreparedStatementResultClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLCreatePreparedStatementResult * gaflightsql_create_prepared_statement_result_new(void); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_dataset_schema( GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_create_prepared_statement_result_get_dataset_schema( GAFlightSQLCreatePreparedStatementResult *result); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_parameter_schema( GAFlightSQLCreatePreparedStatementResult *result, GArrowSchema *schema); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GArrowSchema * gaflightsql_create_prepared_statement_result_get_parameter_schema( GAFlightSQLCreatePreparedStatementResult *result); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_create_prepared_statement_result_set_handle( GAFlightSQLCreatePreparedStatementResult *result, GBytes *handle); -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_create_prepared_statement_result_get_handle( GAFlightSQLCreatePreparedStatementResult *result); #define GAFLIGHTSQL_TYPE_CLOSE_PREPARED_STATEMENT_REQUEST \ (gaflightsql_close_prepared_statement_request_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GAFlightSQLClosePreparedStatementRequest, gaflightsql_close_prepared_statement_request, GAFLIGHTSQL, @@ -171,12 +181,13 @@ struct _GAFlightSQLClosePreparedStatementRequestClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_14_0 +GAFLIGHTSQL_AVAILABLE_IN_14_0 GBytes * gaflightsql_close_prepared_statement_request_get_handle( GAFlightSQLClosePreparedStatementRequest *request); #define GAFLIGHTSQL_TYPE_SERVER (gaflightsql_server_get_type()) +GAFLIGHTSQL_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE( GAFlightSQLServer, gaflightsql_server, GAFLIGHTSQL, SERVER, GAFlightServer) /** @@ -231,27 +242,27 @@ struct _GAFlightSQLServerClass GError **error); }; -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightInfo * gaflightsql_server_get_flight_info_statement(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementQuery *command, GAFlightDescriptor *descriptor, GError **error); -GARROW_AVAILABLE_IN_9_0 +GAFLIGHTSQL_AVAILABLE_IN_9_0 GAFlightDataStream * gaflightsql_server_do_get_statement(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementQueryTicket *ticket, GError **error); -GARROW_AVAILABLE_IN_13_0 +GAFLIGHTSQL_AVAILABLE_IN_13_0 gint64 gaflightsql_server_do_put_command_statement_update(GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLStatementUpdate *command, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 gint64 gaflightsql_server_do_put_prepared_statement_update( GAFlightSQLServer *server, @@ -259,16 +270,16 @@ gaflightsql_server_do_put_prepared_statement_update( GAFlightSQLPreparedStatementUpdate *command, GAFlightMessageReader *reader, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 GAFlightSQLCreatePreparedStatementResult * gaflightsql_server_create_prepared_statement( GAFlightSQLServer *server, GAFlightServerCallContext *context, GAFlightSQLCreatePreparedStatementRequest *request, GError **error); -/* We can restore this after we bump version to 14.0.0-SNAPSHOT. */ -/* GARROW_AVAILABLE_IN_14_0 */ + +GAFLIGHTSQL_AVAILABLE_IN_14_0 void gaflightsql_server_close_prepared_statement( GAFlightSQLServer *server, diff --git a/c_glib/arrow-flight-sql-glib/version.h.in b/c_glib/arrow-flight-sql-glib/version.h.in new file mode 100644 index 0000000000000..e4373109b9008 --- /dev/null +++ b/c_glib/arrow-flight-sql-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: arrow-flight-sql-glib/arrow-flight-sql-glib.h + * + * Apache Arrow Flight SQL GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GAFLIGHTSQL_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GAFLIGHTSQL_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GAFLIGHTSQL_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GAFLIGHTSQL_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_TAG "@VERSION_TAG@" + +/** + * GAFLIGHTSQL_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GAFLIGHTSQL_VERSION_CHECK(major, minor, micro) \ + (GAFLIGHTSQL_VERSION_MAJOR > (major) || \ + (GAFLIGHTSQL_VERSION_MAJOR == (major) && \ + GAFLIGHTSQL_VERSION_MINOR > (minor)) || \ + (GAFLIGHTSQL_VERSION_MAJOR == (major) && \ + GAFLIGHTSQL_VERSION_MINOR == (minor) && \ + GAFLIGHTSQL_VERSION_MICRO >= (micro))) + +/** + * GAFLIGHTSQL_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GAFLIGHTSQL_DISABLE_DEPRECATION_WARNINGS +# define GAFLIGHTSQL_DEPRECATED +# define GAFLIGHTSQL_DEPRECATED_FOR(function) +# define GAFLIGHTSQL_UNAVAILABLE(major, minor) +#else +# define GAFLIGHTSQL_DEPRECATED G_DEPRECATED +# define GAFLIGHTSQL_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GAFLIGHTSQL_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GAFLIGHTSQL_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHTSQL_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHTSQL_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-sql-glib/arrow-flight-sql-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHTSQL_VERSION_MIN_REQUIRED +# define GAFLIGHTSQL_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GAFLIGHTSQL_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GAFLIGHTSQL_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GAFLIGHTSQL_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * arrow-flight-sql-glib/arrow-flight-sql-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GAFLIGHTSQL_VERSION_MAX_ALLOWED +# define GAFLIGHTSQL_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index b498ecb51cedb..9b7c608ca8a5b 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -231,8 +231,8 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, if (n_remains > 0) { ++n_loops; } + std::vector data(value_size * chunk_size); for (gint64 i = 0; i < n_loops; ++i) { - uint8_t data[value_size * chunk_size]; uint8_t *valid_bytes = nullptr; uint8_t valid_bytes_buffer[chunk_size]; if (is_valids_length > 0) { @@ -255,7 +255,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, value = values[offset + j]; } if (value) { - get_value_function(data + (value_size * j), value, value_size); + get_value_function(data.data() + (value_size * j), value, value_size); } else { is_valid = false; if (!valid_bytes) { @@ -267,7 +267,7 @@ garrow_array_builder_append_values(GArrowArrayBuilder *builder, valid_bytes_buffer[j] = is_valid; } } - auto status = arrow_builder->AppendValues(data, n_values, valid_bytes); + auto status = arrow_builder->AppendValues(data.data(), n_values, valid_bytes); if (!garrow_error_check(error, status, context)) { return FALSE; } @@ -1035,13 +1035,13 @@ garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, gint64 is_valids_length, GError **error) { - guint8 arrow_values[values_length]; + std::vector arrow_values(values_length); for (gint64 i = 0; i < values_length; ++i) { arrow_values[i] = values[i]; } return garrow_array_builder_append_values( GARROW_ARRAY_BUILDER(builder), - arrow_values, + arrow_values.data(), values_length, is_valids, is_valids_length, diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index 8a1385b9b8c1b..6a0d0154833a7 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_ARRAY_BUILDER (garrow_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowArrayBuilder, garrow_array_builder, GARROW, ARRAY_BUILDER, GObject) struct _GArrowArrayBuilderClass @@ -33,11 +34,15 @@ struct _GArrowArrayBuilderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_array_builder_get_value_data_type(GArrowArrayBuilder *builder); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_array_builder_get_value_type(GArrowArrayBuilder *builder); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_builder_finish(GArrowArrayBuilder *builder, GError **error); @@ -86,6 +91,7 @@ garrow_array_builder_append_empty_values(GArrowArrayBuilder *builder, GError **error); #define GARROW_TYPE_NULL_ARRAY_BUILDER (garrow_null_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_13 G_DECLARE_DERIVABLE_TYPE(GArrowNullArrayBuilder, garrow_null_array_builder, GARROW, @@ -114,6 +120,7 @@ garrow_null_array_builder_append_nulls(GArrowNullArrayBuilder *builder, #endif #define GARROW_TYPE_BOOLEAN_ARRAY_BUILDER (garrow_boolean_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBooleanArrayBuilder, garrow_boolean_array_builder, GARROW, @@ -124,10 +131,12 @@ struct _GArrowBooleanArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanArrayBuilder * garrow_boolean_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_boolean_array_builder_append_value) gboolean garrow_boolean_array_builder_append(GArrowBooleanArrayBuilder *builder, @@ -139,6 +148,8 @@ gboolean garrow_boolean_array_builder_append_value(GArrowBooleanArrayBuilder *builder, gboolean value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_boolean_array_builder_append_values(GArrowBooleanArrayBuilder *builder, const gboolean *values, @@ -159,6 +170,7 @@ garrow_boolean_array_builder_append_nulls(GArrowBooleanArrayBuilder *builder, #endif #define GARROW_TYPE_INT_ARRAY_BUILDER (garrow_int_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowIntArrayBuilder, garrow_int_array_builder, GARROW, @@ -169,10 +181,12 @@ struct _GArrowIntArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowIntArrayBuilder * garrow_int_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int_array_builder_append_value) gboolean garrow_int_array_builder_append(GArrowIntArrayBuilder *builder, @@ -184,6 +198,7 @@ gboolean garrow_int_array_builder_append_value(GArrowIntArrayBuilder *builder, gint64 value, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, const gint64 *values, @@ -192,9 +207,12 @@ garrow_int_array_builder_append_values(GArrowIntArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int_array_builder_append_null(GArrowIntArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, @@ -203,6 +221,7 @@ garrow_int_array_builder_append_nulls(GArrowIntArrayBuilder *builder, #endif #define GARROW_TYPE_UINT_ARRAY_BUILDER (garrow_uint_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUIntArrayBuilder, garrow_uint_array_builder, GARROW, @@ -213,10 +232,12 @@ struct _GArrowUIntArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUIntArrayBuilder * garrow_uint_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint_array_builder_append_value) gboolean garrow_uint_array_builder_append(GArrowUIntArrayBuilder *builder, @@ -228,6 +249,8 @@ gboolean garrow_uint_array_builder_append_value(GArrowUIntArrayBuilder *builder, guint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, const guint64 *values, @@ -236,9 +259,12 @@ garrow_uint_array_builder_append_values(GArrowUIntArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint_array_builder_append_null(GArrowUIntArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, @@ -247,6 +273,7 @@ garrow_uint_array_builder_append_nulls(GArrowUIntArrayBuilder *builder, #endif #define GARROW_TYPE_INT8_ARRAY_BUILDER (garrow_int8_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt8ArrayBuilder, garrow_int8_array_builder, GARROW, @@ -257,10 +284,12 @@ struct _GArrowInt8ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8ArrayBuilder * garrow_int8_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int8_array_builder_append_value) gboolean garrow_int8_array_builder_append(GArrowInt8ArrayBuilder *builder, @@ -272,6 +301,8 @@ gboolean garrow_int8_array_builder_append_value(GArrowInt8ArrayBuilder *builder, gint8 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, const gint8 *values, @@ -280,9 +311,12 @@ garrow_int8_array_builder_append_values(GArrowInt8ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int8_array_builder_append_null(GArrowInt8ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, @@ -291,6 +325,7 @@ garrow_int8_array_builder_append_nulls(GArrowInt8ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT8_ARRAY_BUILDER (garrow_uint8_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt8ArrayBuilder, garrow_uint8_array_builder, GARROW, @@ -301,10 +336,12 @@ struct _GArrowUInt8ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8ArrayBuilder * garrow_uint8_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint8_array_builder_append_value) gboolean garrow_uint8_array_builder_append(GArrowUInt8ArrayBuilder *builder, @@ -316,6 +353,8 @@ gboolean garrow_uint8_array_builder_append_value(GArrowUInt8ArrayBuilder *builder, guint8 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, const guint8 *values, @@ -324,9 +363,12 @@ garrow_uint8_array_builder_append_values(GArrowUInt8ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint8_array_builder_append_null(GArrowUInt8ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, @@ -335,6 +377,7 @@ garrow_uint8_array_builder_append_nulls(GArrowUInt8ArrayBuilder *builder, #endif #define GARROW_TYPE_INT16_ARRAY_BUILDER (garrow_int16_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt16ArrayBuilder, garrow_int16_array_builder, GARROW, @@ -345,6 +388,7 @@ struct _GArrowInt16ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16ArrayBuilder * garrow_int16_array_builder_new(void); @@ -360,6 +404,7 @@ gboolean garrow_int16_array_builder_append_value(GArrowInt16ArrayBuilder *builder, gint16 value, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, const gint16 *values, @@ -368,9 +413,12 @@ garrow_int16_array_builder_append_values(GArrowInt16ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int16_array_builder_append_null(GArrowInt16ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, @@ -379,6 +427,7 @@ garrow_int16_array_builder_append_nulls(GArrowInt16ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT16_ARRAY_BUILDER (garrow_uint16_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt16ArrayBuilder, garrow_uint16_array_builder, GARROW, @@ -389,10 +438,12 @@ struct _GArrowUInt16ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16ArrayBuilder * garrow_uint16_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint16_array_builder_append_value) gboolean garrow_uint16_array_builder_append(GArrowUInt16ArrayBuilder *builder, @@ -404,6 +455,8 @@ gboolean garrow_uint16_array_builder_append_value(GArrowUInt16ArrayBuilder *builder, guint16 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, const guint16 *values, @@ -412,10 +465,13 @@ garrow_uint16_array_builder_append_values(GArrowUInt16ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint16_array_builder_append_null(GArrowUInt16ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, @@ -424,6 +480,7 @@ garrow_uint16_array_builder_append_nulls(GArrowUInt16ArrayBuilder *builder, #endif #define GARROW_TYPE_INT32_ARRAY_BUILDER (garrow_int32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt32ArrayBuilder, garrow_int32_array_builder, GARROW, @@ -434,10 +491,12 @@ struct _GArrowInt32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32ArrayBuilder * garrow_int32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int32_array_builder_append_value) gboolean garrow_int32_array_builder_append(GArrowInt32ArrayBuilder *builder, @@ -449,6 +508,8 @@ gboolean garrow_int32_array_builder_append_value(GArrowInt32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, const gint32 *values, @@ -457,9 +518,12 @@ garrow_int32_array_builder_append_values(GArrowInt32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int32_array_builder_append_null(GArrowInt32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, @@ -468,6 +532,7 @@ garrow_int32_array_builder_append_nulls(GArrowInt32ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT32_ARRAY_BUILDER (garrow_uint32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt32ArrayBuilder, garrow_uint32_array_builder, GARROW, @@ -478,10 +543,12 @@ struct _GArrowUInt32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32ArrayBuilder * garrow_uint32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint32_array_builder_append_value) gboolean garrow_uint32_array_builder_append(GArrowUInt32ArrayBuilder *builder, @@ -493,6 +560,8 @@ gboolean garrow_uint32_array_builder_append_value(GArrowUInt32ArrayBuilder *builder, guint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, const guint32 *values, @@ -501,10 +570,13 @@ garrow_uint32_array_builder_append_values(GArrowUInt32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint32_array_builder_append_null(GArrowUInt32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, @@ -513,6 +585,7 @@ garrow_uint32_array_builder_append_nulls(GArrowUInt32ArrayBuilder *builder, #endif #define GARROW_TYPE_INT64_ARRAY_BUILDER (garrow_int64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt64ArrayBuilder, garrow_int64_array_builder, GARROW, @@ -523,10 +596,12 @@ struct _GArrowInt64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64ArrayBuilder * garrow_int64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_int64_array_builder_append_value) gboolean garrow_int64_array_builder_append(GArrowInt64ArrayBuilder *builder, @@ -538,6 +613,8 @@ gboolean garrow_int64_array_builder_append_value(GArrowInt64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, const gint64 *values, @@ -546,9 +623,12 @@ garrow_int64_array_builder_append_values(GArrowInt64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_int64_array_builder_append_null(GArrowInt64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, @@ -557,6 +637,7 @@ garrow_int64_array_builder_append_nulls(GArrowInt64ArrayBuilder *builder, #endif #define GARROW_TYPE_UINT64_ARRAY_BUILDER (garrow_uint64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt64ArrayBuilder, garrow_uint64_array_builder, GARROW, @@ -567,10 +648,12 @@ struct _GArrowUInt64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64ArrayBuilder * garrow_uint64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_uint64_array_builder_append_value) gboolean garrow_uint64_array_builder_append(GArrowUInt64ArrayBuilder *builder, @@ -582,6 +665,8 @@ gboolean garrow_uint64_array_builder_append_value(GArrowUInt64ArrayBuilder *builder, guint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, const guint64 *values, @@ -590,10 +675,13 @@ garrow_uint64_array_builder_append_values(GArrowUInt64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_uint64_array_builder_append_null(GArrowUInt64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, @@ -602,6 +690,7 @@ garrow_uint64_array_builder_append_nulls(GArrowUInt64ArrayBuilder *builder, #endif #define GARROW_TYPE_HALF_FLOAT_ARRAY_BUILDER (garrow_half_float_array_builder_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArrayBuilder, garrow_half_float_array_builder, GARROW, @@ -631,6 +720,7 @@ garrow_half_float_array_builder_append_values(GArrowHalfFloatArrayBuilder *build GError **error); #define GARROW_TYPE_FLOAT_ARRAY_BUILDER (garrow_float_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatArrayBuilder, garrow_float_array_builder, GARROW, @@ -641,10 +731,12 @@ struct _GArrowFloatArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatArrayBuilder * garrow_float_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_float_array_builder_append_value) gboolean garrow_float_array_builder_append(GArrowFloatArrayBuilder *builder, @@ -656,6 +748,8 @@ gboolean garrow_float_array_builder_append_value(GArrowFloatArrayBuilder *builder, gfloat value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, const gfloat *values, @@ -664,9 +758,12 @@ garrow_float_array_builder_append_values(GArrowFloatArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_float_array_builder_append_null(GArrowFloatArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, @@ -675,6 +772,7 @@ garrow_float_array_builder_append_nulls(GArrowFloatArrayBuilder *builder, #endif #define GARROW_TYPE_DOUBLE_ARRAY_BUILDER (garrow_double_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDoubleArrayBuilder, garrow_double_array_builder, GARROW, @@ -685,10 +783,12 @@ struct _GArrowDoubleArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleArrayBuilder * garrow_double_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_double_array_builder_append_value) gboolean garrow_double_array_builder_append(GArrowDoubleArrayBuilder *builder, @@ -700,6 +800,8 @@ gboolean garrow_double_array_builder_append_value(GArrowDoubleArrayBuilder *builder, gdouble value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, const gdouble *values, @@ -708,10 +810,13 @@ garrow_double_array_builder_append_values(GArrowDoubleArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_double_array_builder_append_null(GArrowDoubleArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, @@ -720,6 +825,7 @@ garrow_double_array_builder_append_nulls(GArrowDoubleArrayBuilder *builder, #endif #define GARROW_TYPE_BINARY_ARRAY_BUILDER (garrow_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBinaryArrayBuilder, garrow_binary_array_builder, GARROW, @@ -730,10 +836,12 @@ struct _GArrowBinaryArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryArrayBuilder * garrow_binary_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_binary_array_builder_append_value) gboolean garrow_binary_array_builder_append(GArrowBinaryArrayBuilder *builder, @@ -761,10 +869,12 @@ garrow_binary_array_builder_append_values(GArrowBinaryArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_binary_array_builder_append_null(GArrowBinaryArrayBuilder *builder, GError **error); + GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) GARROW_AVAILABLE_IN_0_16 gboolean @@ -775,6 +885,7 @@ garrow_binary_array_builder_append_nulls(GArrowBinaryArrayBuilder *builder, #define GARROW_TYPE_LARGE_BINARY_ARRAY_BUILDER \ (garrow_large_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArrayBuilder, garrow_large_binary_array_builder, GARROW, @@ -821,6 +932,7 @@ garrow_large_binary_array_builder_append_nulls(GArrowLargeBinaryArrayBuilder *bu #endif #define GARROW_TYPE_STRING_ARRAY_BUILDER (garrow_string_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStringArrayBuilder, garrow_string_array_builder, GARROW, @@ -831,10 +943,12 @@ struct _GArrowStringArrayBuilderClass GArrowBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringArrayBuilder * garrow_string_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_string_array_builder_append_value) gboolean garrow_string_array_builder_append(GArrowStringArrayBuilder *builder, @@ -863,6 +977,7 @@ garrow_string_array_builder_append_string_len(GArrowStringArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_16_FOR(garrow_string_array_builder_append_strings) gboolean garrow_string_array_builder_append_values(GArrowStringArrayBuilder *builder, @@ -883,6 +998,7 @@ garrow_string_array_builder_append_strings(GArrowStringArrayBuilder *builder, #define GARROW_TYPE_LARGE_STRING_ARRAY_BUILDER \ (garrow_large_string_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArrayBuilder, garrow_large_string_array_builder, GARROW, @@ -919,6 +1035,7 @@ garrow_large_string_array_builder_append_strings(GArrowLargeStringArrayBuilder * #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY_BUILDER \ (garrow_fixed_size_binary_array_builder_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArrayBuilder, garrow_fixed_size_binary_array_builder, GARROW, @@ -963,6 +1080,7 @@ garrow_fixed_size_binary_array_builder_append_values_packed( GError **error); #define GARROW_TYPE_DATE32_ARRAY_BUILDER (garrow_date32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate32ArrayBuilder, garrow_date32_array_builder, GARROW, @@ -973,10 +1091,12 @@ struct _GArrowDate32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32ArrayBuilder * garrow_date32_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_date32_array_builder_append_value) gboolean garrow_date32_array_builder_append(GArrowDate32ArrayBuilder *builder, @@ -988,6 +1108,8 @@ gboolean garrow_date32_array_builder_append_value(GArrowDate32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, const gint32 *values, @@ -996,10 +1118,13 @@ garrow_date32_array_builder_append_values(GArrowDate32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_date32_array_builder_append_null(GArrowDate32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, @@ -1008,6 +1133,7 @@ garrow_date32_array_builder_append_nulls(GArrowDate32ArrayBuilder *builder, #endif #define GARROW_TYPE_DATE64_ARRAY_BUILDER (garrow_date64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate64ArrayBuilder, garrow_date64_array_builder, GARROW, @@ -1018,10 +1144,12 @@ struct _GArrowDate64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64ArrayBuilder * garrow_date64_array_builder_new(void); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_date64_array_builder_append_value) gboolean garrow_date64_array_builder_append(GArrowDate64ArrayBuilder *builder, @@ -1033,6 +1161,8 @@ gboolean garrow_date64_array_builder_append_value(GArrowDate64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, const gint64 *values, @@ -1041,10 +1171,13 @@ garrow_date64_array_builder_append_values(GArrowDate64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_date64_array_builder_append_null(GArrowDate64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, @@ -1053,6 +1186,7 @@ garrow_date64_array_builder_append_nulls(GArrowDate64ArrayBuilder *builder, #endif #define GARROW_TYPE_TIMESTAMP_ARRAY_BUILDER (garrow_timestamp_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArrayBuilder, garrow_timestamp_array_builder, GARROW, @@ -1063,10 +1197,12 @@ struct _GArrowTimestampArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampArrayBuilder * garrow_timestamp_array_builder_new(GArrowTimestampDataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_timestamp_array_builder_append_value) gboolean garrow_timestamp_array_builder_append(GArrowTimestampArrayBuilder *builder, @@ -1078,6 +1214,8 @@ gboolean garrow_timestamp_array_builder_append_value(GArrowTimestampArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builder, const gint64 *values, @@ -1086,10 +1224,13 @@ garrow_timestamp_array_builder_append_values(GArrowTimestampArrayBuilder *builde gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_timestamp_array_builder_append_null(GArrowTimestampArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder, @@ -1098,6 +1239,7 @@ garrow_timestamp_array_builder_append_nulls(GArrowTimestampArrayBuilder *builder #endif #define GARROW_TYPE_TIME32_ARRAY_BUILDER (garrow_time32_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime32ArrayBuilder, garrow_time32_array_builder, GARROW, @@ -1108,10 +1250,12 @@ struct _GArrowTime32ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32ArrayBuilder * garrow_time32_array_builder_new(GArrowTime32DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_time32_array_builder_append_value) gboolean garrow_time32_array_builder_append(GArrowTime32ArrayBuilder *builder, @@ -1123,6 +1267,8 @@ gboolean garrow_time32_array_builder_append_value(GArrowTime32ArrayBuilder *builder, gint32 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, const gint32 *values, @@ -1131,10 +1277,13 @@ garrow_time32_array_builder_append_values(GArrowTime32ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_time32_array_builder_append_null(GArrowTime32ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, @@ -1143,6 +1292,7 @@ garrow_time32_array_builder_append_nulls(GArrowTime32ArrayBuilder *builder, #endif #define GARROW_TYPE_TIME64_ARRAY_BUILDER (garrow_time64_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime64ArrayBuilder, garrow_time64_array_builder, GARROW, @@ -1153,10 +1303,12 @@ struct _GArrowTime64ArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64ArrayBuilder * garrow_time64_array_builder_new(GArrowTime64DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_time64_array_builder_append_value) gboolean garrow_time64_array_builder_append(GArrowTime64ArrayBuilder *builder, @@ -1168,6 +1320,8 @@ gboolean garrow_time64_array_builder_append_value(GArrowTime64ArrayBuilder *builder, gint64 value, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, const gint64 *values, @@ -1176,10 +1330,13 @@ garrow_time64_array_builder_append_values(GArrowTime64ArrayBuilder *builder, gint64 is_valids_length, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_time64_array_builder_append_null(GArrowTime64ArrayBuilder *builder, GError **error); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_nulls) gboolean garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, @@ -1189,6 +1346,7 @@ garrow_time64_array_builder_append_nulls(GArrowTime64ArrayBuilder *builder, #define GARROW_TYPE_MONTH_INTERVAL_ARRAY_BUILDER \ (garrow_month_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArrayBuilder, garrow_month_interval_array_builder, GARROW, @@ -1220,6 +1378,7 @@ garrow_month_interval_array_builder_append_values( #define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY_BUILDER \ (garrow_day_time_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArrayBuilder, garrow_day_time_interval_array_builder, GARROW, @@ -1252,6 +1411,7 @@ garrow_day_time_interval_array_builder_append_values( #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY_BUILDER \ (garrow_month_day_nano_interval_array_builder_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArrayBuilder, garrow_month_day_nano_interval_array_builder, GARROW, @@ -1284,6 +1444,7 @@ garrow_month_day_nano_interval_array_builder_append_values( #define GARROW_TYPE_BINARY_DICTIONARY_ARRAY_BUILDER \ (garrow_binary_dictionary_array_builder_get_type()) +GARROW_AVAILABLE_IN_2_0 G_DECLARE_DERIVABLE_TYPE(GArrowBinaryDictionaryArrayBuilder, garrow_binary_dictionary_array_builder, GARROW, @@ -1350,6 +1511,7 @@ garrow_binary_dictionary_array_builder_reset_full( #define GARROW_TYPE_STRING_DICTIONARY_ARRAY_BUILDER \ (garrow_string_dictionary_array_builder_get_type()) +GARROW_AVAILABLE_IN_2_0 G_DECLARE_DERIVABLE_TYPE(GArrowStringDictionaryArrayBuilder, garrow_string_dictionary_array_builder, GARROW, @@ -1408,6 +1570,7 @@ garrow_string_dictionary_array_builder_reset_full( GArrowStringDictionaryArrayBuilder *builder); #define GARROW_TYPE_LIST_ARRAY_BUILDER (garrow_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowListArrayBuilder, garrow_list_array_builder, GARROW, @@ -1418,10 +1581,12 @@ struct _GArrowListArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListArrayBuilder * garrow_list_array_builder_new(GArrowListDataType *data_type, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_list_array_builder_append_value) gboolean garrow_list_array_builder_append(GArrowListArrayBuilder *builder, GError **error); @@ -1430,15 +1595,18 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_list_array_builder_append_value(GArrowListArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_list_array_builder_append_null(GArrowListArrayBuilder *builder, GError **error); #endif +GARROW_AVAILABLE_IN_ALL GArrowArrayBuilder * garrow_list_array_builder_get_value_builder(GArrowListArrayBuilder *builder); #define GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER (garrow_large_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListArrayBuilder, garrow_large_list_array_builder, GARROW, @@ -1468,6 +1636,7 @@ GArrowArrayBuilder * garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder); #define GARROW_TYPE_STRUCT_ARRAY_BUILDER (garrow_struct_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStructArrayBuilder, garrow_struct_array_builder, GARROW, @@ -1478,10 +1647,12 @@ struct _GArrowStructArrayBuilderClass GArrowArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructArrayBuilder * garrow_struct_array_builder_new(GArrowStructDataType *data_type, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_struct_array_builder_append_value) gboolean garrow_struct_array_builder_append(GArrowStructArrayBuilder *builder, GError **error); @@ -1491,6 +1662,7 @@ gboolean garrow_struct_array_builder_append_value(GArrowStructArrayBuilder *builder, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_3_0_FOR(garrow_array_builder_append_null) gboolean garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, @@ -1498,15 +1670,19 @@ garrow_struct_array_builder_append_null(GArrowStructArrayBuilder *builder, #endif #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_child) GArrowArrayBuilder * garrow_struct_array_builder_get_field_builder(GArrowStructArrayBuilder *builder, gint i); + +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_12_0_FOR(garrow_array_builder_get_children) GList * garrow_struct_array_builder_get_field_builders(GArrowStructArrayBuilder *builder); #endif #define GARROW_TYPE_MAP_ARRAY_BUILDER (garrow_map_array_builder_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowMapArrayBuilder, garrow_map_array_builder, GARROW, @@ -1554,6 +1730,7 @@ GArrowArrayBuilder * garrow_map_array_builder_get_value_builder(GArrowMapArrayBuilder *builder); #define GARROW_TYPE_DECIMAL128_ARRAY_BUILDER (garrow_decimal128_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128ArrayBuilder, garrow_decimal128_array_builder, GARROW, @@ -1564,10 +1741,12 @@ struct _GArrowDecimal128ArrayBuilderClass GArrowFixedSizeBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal128ArrayBuilder * garrow_decimal128_array_builder_new(GArrowDecimal128DataType *data_type); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_decimal128_array_builder_append_value) gboolean garrow_decimal128_array_builder_append(GArrowDecimal128ArrayBuilder *builder, @@ -1596,6 +1775,7 @@ garrow_decimal128_array_builder_append_null(GArrowDecimal128ArrayBuilder *builde #endif #define GARROW_TYPE_DECIMAL256_ARRAY_BUILDER (garrow_decimal256_array_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256ArrayBuilder, garrow_decimal256_array_builder, GARROW, @@ -1606,6 +1786,7 @@ struct _GArrowDecimal256ArrayBuilderClass GArrowFixedSizeBinaryArrayBuilderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal256ArrayBuilder * garrow_decimal256_array_builder_new(GArrowDecimal256DataType *data_type); @@ -1624,6 +1805,7 @@ garrow_decimal256_array_builder_append_values(GArrowDecimal256ArrayBuilder *buil GError **error); #define GARROW_TYPE_UNION_ARRAY_BUILDER (garrow_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowUnionArrayBuilder, garrow_union_array_builder, GARROW, @@ -1648,6 +1830,7 @@ garrow_union_array_builder_append_value(GArrowUnionArrayBuilder *builder, #define GARROW_TYPE_DENSE_UNION_ARRAY_BUILDER \ (garrow_dense_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArrayBuilder, garrow_dense_union_array_builder, GARROW, @@ -1664,6 +1847,7 @@ garrow_dense_union_array_builder_new(GArrowDenseUnionDataType *data_type, GError #define GARROW_TYPE_SPARSE_UNION_ARRAY_BUILDER \ (garrow_sparse_union_array_builder_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArrayBuilder, garrow_sparse_union_array_builder, GARROW, diff --git a/c_glib/arrow-glib/basic-array-definition.h b/c_glib/arrow-glib/basic-array-definition.h index 54642dae018ec..2fa67c09c1cc4 100644 --- a/c_glib/arrow-glib/basic-array-definition.h +++ b/c_glib/arrow-glib/basic-array-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_ARRAY (garrow_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowArray, garrow_array, GARROW, ARRAY, GObject) struct _GArrowArrayClass { @@ -31,6 +34,7 @@ struct _GArrowArrayClass }; #define GARROW_TYPE_EXTENSION_ARRAY (garrow_extension_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowExtensionArray, garrow_extension_array, GARROW, EXTENSION_ARRAY, GArrowArray) struct _GArrowExtensionArrayClass diff --git a/c_glib/arrow-glib/basic-array.h b/c_glib/arrow-glib/basic-array.h index ee6f40b1ddc24..95679aa37c57a 100644 --- a/c_glib/arrow-glib/basic-array.h +++ b/c_glib/arrow-glib/basic-array.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_EQUAL_OPTIONS (garrow_equal_options_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowEqualOptions, garrow_equal_options, GARROW, EQUAL_OPTIONS, GObject) struct _GArrowEqualOptionsClass @@ -52,6 +53,7 @@ garrow_array_export(GArrowArray *array, gpointer *c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal(GArrowArray *array, GArrowArray *other_array); GARROW_AVAILABLE_IN_5_0 @@ -59,8 +61,11 @@ gboolean garrow_array_equal_options(GArrowArray *array, GArrowArray *other_array, GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal_approx(GArrowArray *array, GArrowArray *other_array); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_equal_range(GArrowArray *array, gint64 start_index, @@ -69,37 +74,60 @@ garrow_array_equal_range(GArrowArray *array, gint64 end_index, GArrowEqualOptions *options); +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_is_null(GArrowArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_array_is_valid(GArrowArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_length(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_offset(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_array_get_n_nulls(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_array_get_null_bitmap(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_array_get_value_data_type(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_array_get_value_type(GArrowArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_slice(GArrowArray *array, gint64 offset, gint64 length); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_array_to_string(GArrowArray *array, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowArray * garrow_array_view(GArrowArray *array, GArrowDataType *return_type, GError **error); + GARROW_AVAILABLE_IN_0_15 gchar * garrow_array_diff_unified(GArrowArray *array, GArrowArray *other_array); + GARROW_AVAILABLE_IN_4_0 GArrowArray * garrow_array_concatenate(GArrowArray *array, GList *other_arrays, GError **error); #define GARROW_TYPE_NULL_ARRAY (garrow_null_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNullArray, garrow_null_array, GARROW, NULL_ARRAY, GArrowArray) struct _GArrowNullArrayClass @@ -107,10 +135,12 @@ struct _GArrowNullArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowNullArray * garrow_null_array_new(gint64 length); #define GARROW_TYPE_PRIMITIVE_ARRAY (garrow_primitive_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowPrimitiveArray, garrow_primitive_array, GARROW, PRIMITIVE_ARRAY, GArrowArray) struct _GArrowPrimitiveArrayClass @@ -119,6 +149,7 @@ struct _GArrowPrimitiveArrayClass }; #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_primitive_array_get_data_buffer) GArrowBuffer * garrow_primitive_array_get_buffer(GArrowPrimitiveArray *array); @@ -128,6 +159,7 @@ GArrowBuffer * garrow_primitive_array_get_data_buffer(GArrowPrimitiveArray *array); #define GARROW_TYPE_BOOLEAN_ARRAY (garrow_boolean_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBooleanArray, garrow_boolean_array, GARROW, BOOLEAN_ARRAY, GArrowPrimitiveArray) struct _GArrowBooleanArrayClass @@ -135,18 +167,23 @@ struct _GArrowBooleanArrayClass GArrowPrimitiveArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanArray * garrow_boolean_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gboolean garrow_boolean_array_get_value(GArrowBooleanArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL gboolean * garrow_boolean_array_get_values(GArrowBooleanArray *array, gint64 *length); #define GARROW_TYPE_NUMERIC_ARRAY (garrow_numeric_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNumericArray, garrow_numeric_array, GARROW, NUMERIC_ARRAY, GArrowPrimitiveArray) struct _GArrowNumericArrayClass @@ -155,6 +192,7 @@ struct _GArrowNumericArrayClass }; #define GARROW_TYPE_INT8_ARRAY (garrow_int8_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt8Array, garrow_int8_array, GARROW, INT8_ARRAY, GArrowNumericArray) struct _GArrowInt8ArrayClass @@ -162,18 +200,23 @@ struct _GArrowInt8ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8Array * garrow_int8_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint8 garrow_int8_array_get_value(GArrowInt8Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint8 * garrow_int8_array_get_values(GArrowInt8Array *array, gint64 *length); #define GARROW_TYPE_UINT8_ARRAY (garrow_uint8_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt8Array, garrow_uint8_array, GARROW, UINT8_ARRAY, GArrowNumericArray) struct _GArrowUInt8ArrayClass @@ -181,18 +224,23 @@ struct _GArrowUInt8ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8Array * garrow_uint8_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint8 garrow_uint8_array_get_value(GArrowUInt8Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint8 * garrow_uint8_array_get_values(GArrowUInt8Array *array, gint64 *length); #define GARROW_TYPE_INT16_ARRAY (garrow_int16_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt16Array, garrow_int16_array, GARROW, INT16_ARRAY, GArrowNumericArray) struct _GArrowInt16ArrayClass @@ -200,18 +248,23 @@ struct _GArrowInt16ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16Array * garrow_int16_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint16 garrow_int16_array_get_value(GArrowInt16Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint16 * garrow_int16_array_get_values(GArrowInt16Array *array, gint64 *length); #define GARROW_TYPE_UINT16_ARRAY (garrow_uint16_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt16Array, garrow_uint16_array, GARROW, UINT16_ARRAY, GArrowNumericArray) struct _GArrowUInt16ArrayClass @@ -219,18 +272,23 @@ struct _GArrowUInt16ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16Array * garrow_uint16_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint16 garrow_uint16_array_get_value(GArrowUInt16Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint16 * garrow_uint16_array_get_values(GArrowUInt16Array *array, gint64 *length); #define GARROW_TYPE_INT32_ARRAY (garrow_int32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt32Array, garrow_int32_array, GARROW, INT32_ARRAY, GArrowNumericArray) struct _GArrowInt32ArrayClass @@ -238,18 +296,23 @@ struct _GArrowInt32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32Array * garrow_int32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_int32_array_get_value(GArrowInt32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_int32_array_get_values(GArrowInt32Array *array, gint64 *length); #define GARROW_TYPE_UINT32_ARRAY (garrow_uint32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt32Array, garrow_uint32_array, GARROW, UINT32_ARRAY, GArrowNumericArray) struct _GArrowUInt32ArrayClass @@ -257,18 +320,23 @@ struct _GArrowUInt32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32Array * garrow_uint32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint32 garrow_uint32_array_get_value(GArrowUInt32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint32 * garrow_uint32_array_get_values(GArrowUInt32Array *array, gint64 *length); #define GARROW_TYPE_INT64_ARRAY (garrow_int64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInt64Array, garrow_int64_array, GARROW, INT64_ARRAY, GArrowNumericArray) struct _GArrowInt64ArrayClass @@ -276,18 +344,23 @@ struct _GArrowInt64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64Array * garrow_int64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_int64_array_get_value(GArrowInt64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_int64_array_get_values(GArrowInt64Array *array, gint64 *length); #define GARROW_TYPE_UINT64_ARRAY (garrow_uint64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUInt64Array, garrow_uint64_array, GARROW, UINT64_ARRAY, GArrowNumericArray) struct _GArrowUInt64ArrayClass @@ -295,18 +368,23 @@ struct _GArrowUInt64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64Array * garrow_uint64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL guint64 garrow_uint64_array_get_value(GArrowUInt64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const guint64 * garrow_uint64_array_get_values(GArrowUInt64Array *array, gint64 *length); #define GARROW_TYPE_HALF_FLOAT_ARRAY (garrow_half_float_array_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatArray, garrow_half_float_array, GARROW, @@ -332,6 +410,7 @@ const guint16 * garrow_half_float_array_get_values(GArrowHalfFloatArray *array, gint64 *length); #define GARROW_TYPE_FLOAT_ARRAY (garrow_float_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowFloatArray, garrow_float_array, GARROW, FLOAT_ARRAY, GArrowNumericArray) struct _GArrowFloatArrayClass @@ -339,18 +418,23 @@ struct _GArrowFloatArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatArray * garrow_float_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gfloat garrow_float_array_get_value(GArrowFloatArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gfloat * garrow_float_array_get_values(GArrowFloatArray *array, gint64 *length); #define GARROW_TYPE_DOUBLE_ARRAY (garrow_double_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDoubleArray, garrow_double_array, GARROW, DOUBLE_ARRAY, GArrowNumericArray) struct _GArrowDoubleArrayClass @@ -358,18 +442,23 @@ struct _GArrowDoubleArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleArray * garrow_double_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gdouble garrow_double_array_get_value(GArrowDoubleArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gdouble * garrow_double_array_get_values(GArrowDoubleArray *array, gint64 *length); #define GARROW_TYPE_BINARY_ARRAY (garrow_binary_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBinaryArray, garrow_binary_array, GARROW, BINARY_ARRAY, GArrowArray) struct _GArrowBinaryArrayClass @@ -377,6 +466,7 @@ struct _GArrowBinaryArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryArray * garrow_binary_array_new(gint64 length, GArrowBuffer *value_offsets, @@ -384,9 +474,12 @@ garrow_binary_array_new(gint64 length, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GBytes * garrow_binary_array_get_value(GArrowBinaryArray *array, gint64 i); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_binary_array_get_data_buffer) GArrowBuffer * garrow_binary_array_get_buffer(GArrowBinaryArray *array); @@ -394,10 +487,13 @@ garrow_binary_array_get_buffer(GArrowBinaryArray *array); GARROW_AVAILABLE_IN_1_0 GArrowBuffer * garrow_binary_array_get_data_buffer(GArrowBinaryArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_binary_array_get_offsets_buffer(GArrowBinaryArray *array); #define GARROW_TYPE_LARGE_BINARY_ARRAY (garrow_large_binary_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryArray, garrow_large_binary_array, GARROW, @@ -428,11 +524,13 @@ garrow_large_binary_array_get_buffer(GArrowLargeBinaryArray *array); GARROW_AVAILABLE_IN_1_0 GArrowBuffer * garrow_large_binary_array_get_data_buffer(GArrowLargeBinaryArray *array); + GARROW_AVAILABLE_IN_0_16 GArrowBuffer * garrow_large_binary_array_get_offsets_buffer(GArrowLargeBinaryArray *array); #define GARROW_TYPE_STRING_ARRAY (garrow_string_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStringArray, garrow_string_array, GARROW, STRING_ARRAY, GArrowBinaryArray) struct _GArrowStringArrayClass @@ -440,6 +538,7 @@ struct _GArrowStringArrayClass GArrowBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringArray * garrow_string_array_new(gint64 length, GArrowBuffer *value_offsets, @@ -447,10 +546,12 @@ garrow_string_array_new(gint64 length, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gchar * garrow_string_array_get_string(GArrowStringArray *array, gint64 i); #define GARROW_TYPE_LARGE_STRING_ARRAY (garrow_large_string_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringArray, garrow_large_string_array, GARROW, @@ -474,6 +575,7 @@ gchar * garrow_large_string_array_get_string(GArrowLargeStringArray *array, gint64 i); #define GARROW_TYPE_DATE32_ARRAY (garrow_date32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDate32Array, garrow_date32_array, GARROW, DATE32_ARRAY, GArrowNumericArray) struct _GArrowDate32ArrayClass @@ -481,18 +583,23 @@ struct _GArrowDate32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32Array * garrow_date32_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_date32_array_get_value(GArrowDate32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_date32_array_get_values(GArrowDate32Array *array, gint64 *length); #define GARROW_TYPE_DATE64_ARRAY (garrow_date64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDate64Array, garrow_date64_array, GARROW, DATE64_ARRAY, GArrowNumericArray) struct _GArrowDate64ArrayClass @@ -500,18 +607,23 @@ struct _GArrowDate64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64Array * garrow_date64_array_new(gint64 length, GArrowBuffer *data, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_date64_array_get_value(GArrowDate64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_date64_array_get_values(GArrowDate64Array *array, gint64 *length); #define GARROW_TYPE_TIMESTAMP_ARRAY (garrow_timestamp_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampArray, garrow_timestamp_array, GARROW, @@ -522,6 +634,7 @@ struct _GArrowTimestampArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampArray * garrow_timestamp_array_new(GArrowTimestampDataType *data_type, gint64 length, @@ -529,12 +642,16 @@ garrow_timestamp_array_new(GArrowTimestampDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_timestamp_array_get_value(GArrowTimestampArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_timestamp_array_get_values(GArrowTimestampArray *array, gint64 *length); #define GARROW_TYPE_TIME32_ARRAY (garrow_time32_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowTime32Array, garrow_time32_array, GARROW, TIME32_ARRAY, GArrowNumericArray) struct _GArrowTime32ArrayClass @@ -542,6 +659,7 @@ struct _GArrowTime32ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32Array * garrow_time32_array_new(GArrowTime32DataType *data_type, gint64 length, @@ -549,12 +667,16 @@ garrow_time32_array_new(GArrowTime32DataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint32 garrow_time32_array_get_value(GArrowTime32Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint32 * garrow_time32_array_get_values(GArrowTime32Array *array, gint64 *length); #define GARROW_TYPE_TIME64_ARRAY (garrow_time64_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowTime64Array, garrow_time64_array, GARROW, TIME64_ARRAY, GArrowNumericArray) struct _GArrowTime64ArrayClass @@ -562,6 +684,7 @@ struct _GArrowTime64ArrayClass GArrowNumericArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64Array * garrow_time64_array_new(GArrowTime64DataType *data_type, gint64 length, @@ -569,12 +692,16 @@ garrow_time64_array_new(GArrowTime64DataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL gint64 garrow_time64_array_get_value(GArrowTime64Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL const gint64 * garrow_time64_array_get_values(GArrowTime64Array *array, gint64 *length); #define GARROW_TYPE_MONTH_INTERVAL_ARRAY (garrow_month_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalArray, garrow_month_interval_array, GARROW, @@ -594,11 +721,13 @@ garrow_month_interval_array_new(gint64 length, GARROW_AVAILABLE_IN_8_0 gint32 garrow_month_interval_array_get_value(GArrowMonthIntervalArray *array, gint64 i); + GARROW_AVAILABLE_IN_8_0 const gint32 * garrow_month_interval_array_get_values(GArrowMonthIntervalArray *array, gint64 *length); #define GARROW_TYPE_DAY_TIME_INTERVAL_ARRAY (garrow_day_time_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalArray, garrow_day_time_interval_array, GARROW, @@ -618,12 +747,14 @@ garrow_day_time_interval_array_new(gint64 length, GARROW_AVAILABLE_IN_8_0 GArrowDayMillisecond * garrow_day_time_interval_array_get_value(GArrowDayTimeIntervalArray *array, gint64 i); + GARROW_AVAILABLE_IN_8_0 GList * garrow_day_time_interval_array_get_values(GArrowDayTimeIntervalArray *array); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_ARRAY \ (garrow_month_day_nano_interval_array_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalArray, garrow_month_day_nano_interval_array, GARROW, @@ -649,6 +780,7 @@ GList * garrow_month_day_nano_interval_array_get_values(GArrowMonthDayNanoIntervalArray *array); #define GARROW_TYPE_FIXED_SIZE_BINARY_ARRAY (garrow_fixed_size_binary_array_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryArray, garrow_fixed_size_binary_array, GARROW, @@ -669,14 +801,17 @@ garrow_fixed_size_binary_array_new(GArrowFixedSizeBinaryDataType *data_type, GARROW_AVAILABLE_IN_3_0 gint32 garrow_fixed_size_binary_array_get_byte_width(GArrowFixedSizeBinaryArray *array); + GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_value(GArrowFixedSizeBinaryArray *array, gint64 i); + GARROW_AVAILABLE_IN_3_0 GBytes * garrow_fixed_size_binary_array_get_values_bytes(GArrowFixedSizeBinaryArray *array); #define GARROW_TYPE_DECIMAL128_ARRAY (garrow_decimal128_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Array, garrow_decimal128_array, GARROW, @@ -687,12 +822,16 @@ struct _GArrowDecimal128ArrayClass GArrowFixedSizeBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_array_format_value(GArrowDecimal128Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_array_get_value(GArrowDecimal128Array *array, gint64 i); #define GARROW_TYPE_DECIMAL256_ARRAY (garrow_decimal256_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Array, garrow_decimal256_array, GARROW, @@ -703,8 +842,11 @@ struct _GArrowDecimal256ArrayClass GArrowFixedSizeBinaryArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal256_array_format_value(GArrowDecimal256Array *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowDecimal256 * garrow_decimal256_array_get_value(GArrowDecimal256Array *array, gint64 i); diff --git a/c_glib/arrow-glib/basic-array.hpp b/c_glib/arrow-glib/basic-array.hpp index f010cf3db4bc3..b2a7ed6ae075f 100644 --- a/c_glib/arrow-glib/basic-array.hpp +++ b/c_glib/arrow-glib/basic-array.hpp @@ -23,22 +23,32 @@ #include +GARROW_EXTERN arrow::EqualOptions * garrow_equal_options_get_raw(GArrowEqualOptions *equal_options); +GARROW_EXTERN GArrowArray * garrow_array_new_raw(std::shared_ptr *arrow_array); + +GARROW_EXTERN GArrowArray * garrow_array_new_raw(std::shared_ptr *arrow_array, const gchar *first_property_name, ...); + +GARROW_EXTERN GArrowArray * garrow_array_new_raw_valist(std::shared_ptr *arrow_array, const gchar *first_property_name, va_list args); + +GARROW_EXTERN GArrowExtensionArray * garrow_extension_array_new_raw(std::shared_ptr *arrow_array, GArrowArray *storage); + +GARROW_EXTERN std::shared_ptr garrow_array_get_raw(GArrowArray *array); diff --git a/c_glib/arrow-glib/basic-data-type.cpp b/c_glib/arrow-glib/basic-data-type.cpp index 36646a9733cd3..d1c06000065dc 100644 --- a/c_glib/arrow-glib/basic-data-type.cpp +++ b/c_glib/arrow-glib/basic-data-type.cpp @@ -1801,6 +1801,8 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type return garrow_chunked_array_new_raw(&arrow_extension_chunked_array); } +G_END_DECLS + static std::shared_ptr garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *data_type) { @@ -1808,8 +1810,6 @@ garrow_extension_data_type_get_storage_data_type_raw(GArrowExtensionDataType *da return garrow_data_type_get_raw(priv->storage_data_type); } -G_END_DECLS - namespace garrow { GExtensionType::GExtensionType(GArrowExtensionDataType *garrow_data_type) : arrow::ExtensionType( diff --git a/c_glib/arrow-glib/basic-data-type.h b/c_glib/arrow-glib/basic-data-type.h index 01c9e5ef6e40a..77180018c9be8 100644 --- a/c_glib/arrow-glib/basic-data-type.h +++ b/c_glib/arrow-glib/basic-data-type.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DATA_TYPE (garrow_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDataType, garrow_data_type, GARROW, DATA_TYPE, GObject) struct _GArrowDataTypeClass { @@ -42,17 +43,24 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_data_type_export(GArrowDataType *data_type, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_data_type_equal(GArrowDataType *data_type, GArrowDataType *other_data_type); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_data_type_to_string(GArrowDataType *data_type); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_data_type_get_id(GArrowDataType *data_type); + GARROW_AVAILABLE_IN_3_0 gchar * garrow_data_type_get_name(GArrowDataType *data_type); #define GARROW_TYPE_FIXED_WIDTH_DATA_TYPE (garrow_fixed_width_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFixedWidthDataType, garrow_fixed_width_data_type, GARROW, @@ -63,6 +71,7 @@ struct _GArrowFixedWidthDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gint garrow_fixed_width_data_type_get_bit_width(GArrowFixedWidthDataType *data_type); /* TODO: @@ -71,6 +80,7 @@ GList *garrow_fixed_width_data_type_get_buffer_layout(GArrowFixedWidthDataType */ #define GARROW_TYPE_NULL_DATA_TYPE (garrow_null_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowNullDataType, garrow_null_data_type, GARROW, NULL_DATA_TYPE, GArrowDataType) struct _GArrowNullDataTypeClass @@ -78,10 +88,12 @@ struct _GArrowNullDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowNullDataType * garrow_null_data_type_new(void); #define GARROW_TYPE_BOOLEAN_DATA_TYPE (garrow_boolean_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBooleanDataType, garrow_boolean_data_type, GARROW, @@ -92,10 +104,12 @@ struct _GArrowBooleanDataTypeClass GArrowFixedWidthDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBooleanDataType * garrow_boolean_data_type_new(void); #define GARROW_TYPE_NUMERIC_DATA_TYPE (garrow_numeric_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowNumericDataType, garrow_numeric_data_type, GARROW, @@ -107,6 +121,7 @@ struct _GArrowNumericDataTypeClass }; #define GARROW_TYPE_INTEGER_DATA_TYPE (garrow_integer_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowIntegerDataType, garrow_integer_data_type, GARROW, @@ -122,6 +137,7 @@ gboolean garrow_integer_data_type_is_signed(GArrowIntegerDataType *data_type); #define GARROW_TYPE_INT8_DATA_TYPE (garrow_int8_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt8DataType, garrow_int8_data_type, GARROW, @@ -132,10 +148,12 @@ struct _GArrowInt8DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt8DataType * garrow_int8_data_type_new(void); #define GARROW_TYPE_UINT8_DATA_TYPE (garrow_uint8_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt8DataType, garrow_uint8_data_type, GARROW, @@ -146,10 +164,12 @@ struct _GArrowUInt8DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt8DataType * garrow_uint8_data_type_new(void); #define GARROW_TYPE_INT16_DATA_TYPE (garrow_int16_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt16DataType, garrow_int16_data_type, GARROW, @@ -160,10 +180,12 @@ struct _GArrowInt16DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt16DataType * garrow_int16_data_type_new(void); #define GARROW_TYPE_UINT16_DATA_TYPE (garrow_uint16_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt16DataType, garrow_uint16_data_type, GARROW, @@ -174,10 +196,12 @@ struct _GArrowUInt16DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt16DataType * garrow_uint16_data_type_new(void); #define GARROW_TYPE_INT32_DATA_TYPE (garrow_int32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt32DataType, garrow_int32_data_type, GARROW, @@ -188,10 +212,12 @@ struct _GArrowInt32DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt32DataType * garrow_int32_data_type_new(void); #define GARROW_TYPE_UINT32_DATA_TYPE (garrow_uint32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt32DataType, garrow_uint32_data_type, GARROW, @@ -202,10 +228,12 @@ struct _GArrowUInt32DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt32DataType * garrow_uint32_data_type_new(void); #define GARROW_TYPE_INT64_DATA_TYPE (garrow_int64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowInt64DataType, garrow_int64_data_type, GARROW, @@ -216,10 +244,12 @@ struct _GArrowInt64DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowInt64DataType * garrow_int64_data_type_new(void); #define GARROW_TYPE_UINT64_DATA_TYPE (garrow_uint64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowUInt64DataType, garrow_uint64_data_type, GARROW, @@ -230,10 +260,12 @@ struct _GArrowUInt64DataTypeClass GArrowIntegerDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowUInt64DataType * garrow_uint64_data_type_new(void); #define GARROW_TYPE_FLOATING_POINT_DATA_TYPE (garrow_floating_point_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatingPointDataType, garrow_floating_point_data_type, GARROW, @@ -245,6 +277,7 @@ struct _GArrowFloatingPointDataTypeClass }; #define GARROW_TYPE_HALF_FLOAT_DATA_TYPE (garrow_half_float_data_type_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatDataType, garrow_half_float_data_type, GARROW, @@ -260,6 +293,7 @@ GArrowHalfFloatDataType * garrow_half_float_data_type_new(void); #define GARROW_TYPE_FLOAT_DATA_TYPE (garrow_float_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFloatDataType, garrow_float_data_type, GARROW, @@ -270,10 +304,12 @@ struct _GArrowFloatDataTypeClass GArrowFloatingPointDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFloatDataType * garrow_float_data_type_new(void); #define GARROW_TYPE_DOUBLE_DATA_TYPE (garrow_double_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDoubleDataType, garrow_double_data_type, GARROW, @@ -284,10 +320,12 @@ struct _GArrowDoubleDataTypeClass GArrowFloatingPointDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDoubleDataType * garrow_double_data_type_new(void); #define GARROW_TYPE_BINARY_DATA_TYPE (garrow_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowBinaryDataType, garrow_binary_data_type, GARROW, BINARY_DATA_TYPE, GArrowDataType) struct _GArrowBinaryDataTypeClass @@ -295,11 +333,13 @@ struct _GArrowBinaryDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBinaryDataType * garrow_binary_data_type_new(void); #define GARROW_TYPE_FIXED_SIZE_BINARY_DATA_TYPE \ (garrow_fixed_size_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryDataType, garrow_fixed_size_binary_data_type, GARROW, @@ -319,6 +359,7 @@ garrow_fixed_size_binary_data_type_get_byte_width( GArrowFixedSizeBinaryDataType *data_type); #define GARROW_TYPE_LARGE_BINARY_DATA_TYPE (garrow_large_binary_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryDataType, garrow_large_binary_data_type, GARROW, @@ -334,6 +375,7 @@ GArrowLargeBinaryDataType * garrow_large_binary_data_type_new(void); #define GARROW_TYPE_STRING_DATA_TYPE (garrow_string_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStringDataType, garrow_string_data_type, GARROW, @@ -344,10 +386,12 @@ struct _GArrowStringDataTypeClass GArrowBinaryDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStringDataType * garrow_string_data_type_new(void); #define GARROW_TYPE_LARGE_STRING_DATA_TYPE (garrow_large_string_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringDataType, garrow_large_string_data_type, GARROW, @@ -363,6 +407,7 @@ GArrowLargeStringDataType * garrow_large_string_data_type_new(void); #define GARROW_TYPE_TEMPORAL_DATA_TYPE (garrow_temporal_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTemporalDataType, garrow_temporal_data_type, GARROW, @@ -374,6 +419,7 @@ struct _GArrowTemporalDataTypeClass }; #define GARROW_TYPE_DATE32_DATA_TYPE (garrow_date32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate32DataType, garrow_date32_data_type, GARROW, @@ -384,10 +430,12 @@ struct _GArrowDate32DataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate32DataType * garrow_date32_data_type_new(void); #define GARROW_TYPE_DATE64_DATA_TYPE (garrow_date64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDate64DataType, garrow_date64_data_type, GARROW, @@ -398,10 +446,12 @@ struct _GArrowDate64DataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDate64DataType * garrow_date64_data_type_new(void); #define GARROW_TYPE_TIMESTAMP_DATA_TYPE (garrow_timestamp_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimestampDataType, garrow_timestamp_data_type, GARROW, @@ -412,12 +462,16 @@ struct _GArrowTimestampDataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimestampDataType * garrow_timestamp_data_type_new(GArrowTimeUnit unit, GTimeZone *time_zone); + +GARROW_AVAILABLE_IN_ALL GArrowTimeUnit garrow_timestamp_data_type_get_unit(GArrowTimestampDataType *data_type); #define GARROW_TYPE_TIME_DATA_TYPE (garrow_time_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTimeDataType, garrow_time_data_type, GARROW, @@ -428,10 +482,12 @@ struct _GArrowTimeDataTypeClass GArrowTemporalDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTimeUnit garrow_time_data_type_get_unit(GArrowTimeDataType *time_data_type); #define GARROW_TYPE_TIME32_DATA_TYPE (garrow_time32_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime32DataType, garrow_time32_data_type, GARROW, @@ -442,10 +498,12 @@ struct _GArrowTime32DataTypeClass GArrowTimeDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime32DataType * garrow_time32_data_type_new(GArrowTimeUnit unit, GError **error); #define GARROW_TYPE_TIME64_DATA_TYPE (garrow_time64_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTime64DataType, garrow_time64_data_type, GARROW, @@ -456,10 +514,12 @@ struct _GArrowTime64DataTypeClass GArrowTimeDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTime64DataType * garrow_time64_data_type_new(GArrowTimeUnit unit, GError **error); #define GARROW_TYPE_INTERVAL_DATA_TYPE (garrow_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowIntervalDataType, garrow_interval_data_type, GARROW, @@ -475,6 +535,7 @@ GArrowIntervalType garrow_interval_data_type_get_interval_type(GArrowIntervalDataType *type); #define GARROW_TYPE_MONTH_INTERVAL_DATA_TYPE (garrow_month_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalDataType, garrow_month_interval_data_type, GARROW, @@ -491,6 +552,7 @@ garrow_month_interval_data_type_new(void); #define GARROW_TYPE_DAY_TIME_INTERVAL_DATA_TYPE \ (garrow_day_time_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalDataType, garrow_day_time_interval_data_type, GARROW, @@ -507,6 +569,7 @@ garrow_day_time_interval_data_type_new(void); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_DATA_TYPE \ (garrow_month_day_nano_interval_data_type_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalDataType, garrow_month_day_nano_interval_data_type, GARROW, @@ -522,6 +585,7 @@ GArrowMonthDayNanoIntervalDataType * garrow_month_day_nano_interval_data_type_new(void); #define GARROW_TYPE_DECIMAL_DATA_TYPE (garrow_decimal_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimalDataType, garrow_decimal_data_type, GARROW, @@ -532,14 +596,20 @@ struct _GArrowDecimalDataTypeClass GArrowFixedSizeBinaryDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimalDataType * garrow_decimal_data_type_new(gint32 precision, gint32 scale, GError **error); + +GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_precision(GArrowDecimalDataType *decimal_data_type); + +GARROW_AVAILABLE_IN_ALL gint32 garrow_decimal_data_type_get_scale(GArrowDecimalDataType *decimal_data_type); #define GARROW_TYPE_DECIMAL128_DATA_TYPE (garrow_decimal128_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128DataType, garrow_decimal128_data_type, GARROW, @@ -559,6 +629,7 @@ GArrowDecimal128DataType * garrow_decimal128_data_type_new(gint32 precision, gint32 scale, GError **error); #define GARROW_TYPE_DECIMAL256_DATA_TYPE (garrow_decimal256_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256DataType, garrow_decimal256_data_type, GARROW, @@ -578,6 +649,7 @@ GArrowDecimal256DataType * garrow_decimal256_data_type_new(gint32 precision, gint32 scale, GError **error); #define GARROW_TYPE_EXTENSION_DATA_TYPE (garrow_extension_data_type_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataType, garrow_extension_data_type, GARROW, @@ -628,6 +700,7 @@ garrow_extension_data_type_wrap_chunked_array(GArrowExtensionDataType *data_type #define GARROW_TYPE_EXTENSION_DATA_TYPE_REGISTRY \ (garrow_extension_data_type_registry_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowExtensionDataTypeRegistry, garrow_extension_data_type_registry, GARROW, diff --git a/c_glib/arrow-glib/buffer.h b/c_glib/arrow-glib/buffer.h index 8f93a5ef0ddb2..29308e935aba2 100644 --- a/c_glib/arrow-glib/buffer.h +++ b/c_glib/arrow-glib/buffer.h @@ -21,44 +21,70 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_BUFFER (garrow_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBuffer, garrow_buffer, GARROW, BUFFER, GObject) struct _GArrowBufferClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_new(const guint8 *data, gint64 size); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_new_bytes(GBytes *data); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_equal(GArrowBuffer *buffer, GArrowBuffer *other_buffer); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_equal_n_bytes(GArrowBuffer *buffer, GArrowBuffer *other_buffer, gint64 n_bytes); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_buffer_is_mutable(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_buffer_get_capacity(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GBytes * garrow_buffer_get_data(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GBytes * garrow_buffer_get_mutable_data(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_buffer_get_size(GArrowBuffer *buffer); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_get_parent(GArrowBuffer *buffer); +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_copy(GArrowBuffer *buffer, gint64 start, gint64 size, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_slice(GArrowBuffer *buffer, gint64 offset, gint64 size); #define GARROW_TYPE_MUTABLE_BUFFER (garrow_mutable_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowMutableBuffer, garrow_mutable_buffer, GARROW, MUTABLE_BUFFER, GArrowBuffer) struct _GArrowMutableBufferClass @@ -66,12 +92,19 @@ struct _GArrowMutableBufferClass GArrowBufferClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_new(guint8 *data, gint64 size); + +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_new_bytes(GBytes *data); + +GARROW_AVAILABLE_IN_ALL GArrowMutableBuffer * garrow_mutable_buffer_slice(GArrowMutableBuffer *buffer, gint64 offset, gint64 size); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, gint64 offset, @@ -80,6 +113,7 @@ garrow_mutable_buffer_set_data(GArrowMutableBuffer *buffer, GError **error); #define GARROW_TYPE_RESIZABLE_BUFFER (garrow_resizable_buffer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowResizableBuffer, garrow_resizable_buffer, GARROW, @@ -90,12 +124,17 @@ struct _GArrowResizableBufferClass GArrowMutableBufferClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowResizableBuffer * garrow_resizable_buffer_new(gint64 initial_size, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_resizable_buffer_resize(GArrowResizableBuffer *buffer, gint64 new_size, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_resizable_buffer_reserve(GArrowResizableBuffer *buffer, gint64 new_capacity, diff --git a/c_glib/arrow-glib/buffer.hpp b/c_glib/arrow-glib/buffer.hpp index 5d922371c3b6b..7e4d7ecee1c1c 100644 --- a/c_glib/arrow-glib/buffer.hpp +++ b/c_glib/arrow-glib/buffer.hpp @@ -23,20 +23,32 @@ #include +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw(std::shared_ptr *arrow_buffer); + +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw_bytes(std::shared_ptr *arrow_buffer, GBytes *data); + +GARROW_EXTERN GArrowBuffer * garrow_buffer_new_raw_parent(std::shared_ptr *arrow_buffer, GArrowBuffer *parent); + +GARROW_EXTERN std::shared_ptr garrow_buffer_get_raw(GArrowBuffer *buffer); +GARROW_EXTERN GArrowMutableBuffer * garrow_mutable_buffer_new_raw(std::shared_ptr *arrow_buffer); + +GARROW_EXTERN GArrowMutableBuffer * garrow_mutable_buffer_new_raw_bytes(std::shared_ptr *arrow_buffer, GBytes *data); + +GARROW_EXTERN GArrowResizableBuffer * garrow_resizable_buffer_new_raw(std::shared_ptr *arrow_buffer); diff --git a/c_glib/arrow-glib/chunked-array-definition.h b/c_glib/arrow-glib/chunked-array-definition.h index b687735419eeb..744f1077ea754 100644 --- a/c_glib/arrow-glib/chunked-array-definition.h +++ b/c_glib/arrow-glib/chunked-array-definition.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_CHUNKED_ARRAY (garrow_chunked_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowChunkedArray, garrow_chunked_array, GARROW, CHUNKED_ARRAY, GObject) struct _GArrowChunkedArrayClass diff --git a/c_glib/arrow-glib/chunked-array.h b/c_glib/arrow-glib/chunked-array.h index 6ca497942ff2e..712d16504f624 100644 --- a/c_glib/arrow-glib/chunked-array.h +++ b/c_glib/arrow-glib/chunked-array.h @@ -24,42 +24,61 @@ G_BEGIN_DECLS +GARROW_AVAILABLE_IN_ALL GArrowChunkedArray * garrow_chunked_array_new(GList *chunks, GError **error); + GARROW_AVAILABLE_IN_11_0 GArrowChunkedArray * garrow_chunked_array_new_empty(GArrowDataType *data_type, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_chunked_array_equal(GArrowChunkedArray *chunked_array, GArrowChunkedArray *other_chunked_array); +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_chunked_array_get_value_data_type(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_chunked_array_get_value_type(GArrowChunkedArray *chunked_array); GARROW_DEPRECATED_IN_0_15_FOR(garrow_chunked_array_get_n_rows) guint64 garrow_chunked_array_get_length(GArrowChunkedArray *chunked_array); + GARROW_AVAILABLE_IN_0_15 guint64 garrow_chunked_array_get_n_rows(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL guint64 garrow_chunked_array_get_n_nulls(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL guint garrow_chunked_array_get_n_chunks(GArrowChunkedArray *chunked_array); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_chunked_array_get_chunk(GArrowChunkedArray *chunked_array, guint i); + +GARROW_AVAILABLE_IN_ALL GList * garrow_chunked_array_get_chunks(GArrowChunkedArray *chunked_array); + +GARROW_AVAILABLE_IN_ALL GArrowChunkedArray * garrow_chunked_array_slice(GArrowChunkedArray *chunked_array, guint64 offset, guint64 length); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_chunked_array_to_string(GArrowChunkedArray *chunked_array, GError **error); + GARROW_AVAILABLE_IN_4_0 GArrowArray * garrow_chunked_array_combine(GArrowChunkedArray *chunked_array, GError **error); diff --git a/c_glib/arrow-glib/chunked-array.hpp b/c_glib/arrow-glib/chunked-array.hpp index 9ce6cc76adfbf..674ef9606b96e 100644 --- a/c_glib/arrow-glib/chunked-array.hpp +++ b/c_glib/arrow-glib/chunked-array.hpp @@ -23,10 +23,15 @@ #include +GARROW_EXTERN GArrowChunkedArray * garrow_chunked_array_new_raw(std::shared_ptr *arrow_chunked_array); + +GARROW_EXTERN GArrowChunkedArray * garrow_chunked_array_new_raw(std::shared_ptr *arrow_chunked_array, GArrowDataType *data_type); + +GARROW_EXTERN std::shared_ptr garrow_chunked_array_get_raw(GArrowChunkedArray *chunked_array); diff --git a/c_glib/arrow-glib/codec.h b/c_glib/arrow-glib/codec.h index 9b8611bb0a7ee..5865634a7d8e4 100644 --- a/c_glib/arrow-glib/codec.h +++ b/c_glib/arrow-glib/codec.h @@ -50,20 +50,25 @@ typedef enum { } GArrowCompressionType; #define GARROW_TYPE_CODEC (garrow_codec_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCodec, garrow_codec, GARROW, CODEC, GObject) struct _GArrowCodecClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCodec * garrow_codec_new(GArrowCompressionType type, GError **error); +GARROW_AVAILABLE_IN_ALL const gchar * garrow_codec_get_name(GArrowCodec *codec); + GARROW_AVAILABLE_IN_2_0 GArrowCompressionType garrow_codec_get_compression_type(GArrowCodec *codec); + GARROW_AVAILABLE_IN_2_0 gint garrow_codec_get_compression_level(GArrowCodec *codec); diff --git a/c_glib/arrow-glib/codec.hpp b/c_glib/arrow-glib/codec.hpp index f4cfaba18a00e..baea842ddf6b5 100644 --- a/c_glib/arrow-glib/codec.hpp +++ b/c_glib/arrow-glib/codec.hpp @@ -23,12 +23,18 @@ #include +GARROW_EXTERN GArrowCompressionType garrow_compression_type_from_raw(arrow::Compression::type arrow_type); + +GARROW_EXTERN arrow::Compression::type garrow_compression_type_to_raw(GArrowCompressionType type); +GARROW_EXTERN GArrowCodec * garrow_codec_new_raw(std::shared_ptr *arrow_codec); + +GARROW_EXTERN std::shared_ptr garrow_codec_get_raw(GArrowCodec *codec); diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index c6e19f1c74e22..b8ba901363d0a 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LIST_ARRAY (garrow_list_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowListArray, garrow_list_array, GARROW, LIST_ARRAY, GArrowArray) struct _GArrowListArrayClass @@ -34,6 +35,7 @@ struct _GArrowListArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListArray * garrow_list_array_new(GArrowDataType *data_type, gint64 length, @@ -42,24 +44,32 @@ garrow_list_array_new(GArrowDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_list_array_get_value_type(GArrowListArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_list_array_get_value(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 GArrowArray * garrow_list_array_get_values(GArrowListArray *array); + GARROW_AVAILABLE_IN_2_0 gint32 garrow_list_array_get_value_offset(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 gint32 garrow_list_array_get_value_length(GArrowListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 const gint32 * garrow_list_array_get_value_offsets(GArrowListArray *array, gint64 *n_offsets); #define GARROW_TYPE_LARGE_LIST_ARRAY (garrow_large_list_array_get_type()) +GARROW_AVAILABLE_IN_0_16 G_DECLARE_DERIVABLE_TYPE( GArrowLargeListArray, garrow_large_list_array, GARROW, LARGE_LIST_ARRAY, GArrowArray) struct _GArrowLargeListArrayClass @@ -79,23 +89,29 @@ garrow_large_list_array_new(GArrowDataType *data_type, GARROW_AVAILABLE_IN_0_16 GArrowDataType * garrow_large_list_array_get_value_type(GArrowLargeListArray *array); + GARROW_AVAILABLE_IN_0_16 GArrowArray * garrow_large_list_array_get_value(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 GArrowArray * garrow_large_list_array_get_values(GArrowLargeListArray *array); + GARROW_AVAILABLE_IN_2_0 gint64 garrow_large_list_array_get_value_offset(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 gint64 garrow_large_list_array_get_value_length(GArrowLargeListArray *array, gint64 i); + GARROW_AVAILABLE_IN_2_0 const gint64 * garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets); #define GARROW_TYPE_STRUCT_ARRAY (garrow_struct_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStructArray, garrow_struct_array, GARROW, STRUCT_ARRAY, GArrowArray) struct _GArrowStructArrayClass @@ -103,6 +119,7 @@ struct _GArrowStructArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructArray * garrow_struct_array_new(GArrowDataType *data_type, gint64 length, @@ -110,9 +127,11 @@ garrow_struct_array_new(GArrowDataType *data_type, GArrowBuffer *null_bitmap, gint64 n_nulls); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_struct_array_get_field(GArrowStructArray *array, gint i); +GARROW_AVAILABLE_IN_ALL GList * garrow_struct_array_get_fields(GArrowStructArray *array); @@ -121,6 +140,7 @@ GList * garrow_struct_array_flatten(GArrowStructArray *array, GError **error); #define GARROW_TYPE_MAP_ARRAY (garrow_map_array_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowMapArray, garrow_map_array, GARROW, MAP_ARRAY, GArrowListArray) struct _GArrowMapArrayClass @@ -134,14 +154,17 @@ garrow_map_array_new(GArrowArray *offsets, GArrowArray *keys, GArrowArray *items, GError **error); + GARROW_AVAILABLE_IN_0_17 GArrowArray * garrow_map_array_get_keys(GArrowMapArray *array); + GARROW_AVAILABLE_IN_0_17 GArrowArray * garrow_map_array_get_items(GArrowMapArray *array); #define GARROW_TYPE_UNION_ARRAY (garrow_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUnionArray, garrow_union_array, GARROW, UNION_ARRAY, GArrowArray) struct _GArrowUnionArrayClass @@ -152,13 +175,17 @@ struct _GArrowUnionArrayClass GARROW_AVAILABLE_IN_12_0 gint8 garrow_union_array_get_type_code(GArrowUnionArray *array, gint64 i); + GARROW_AVAILABLE_IN_12_0 gint garrow_union_array_get_child_id(GArrowUnionArray *array, gint64 i); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_union_array_get_field(GArrowUnionArray *array, gint i); #define GARROW_TYPE_SPARSE_UNION_ARRAY (garrow_sparse_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionArray, garrow_sparse_union_array, GARROW, @@ -169,8 +196,11 @@ struct _GArrowSparseUnionArrayClass GArrowUnionArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionArray * garrow_sparse_union_array_new(GArrowInt8Array *type_ids, GList *fields, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionArray * garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, GArrowInt8Array *type_ids, @@ -178,6 +208,7 @@ garrow_sparse_union_array_new_data_type(GArrowSparseUnionDataType *data_type, GError **error); #define GARROW_TYPE_DENSE_UNION_ARRAY (garrow_dense_union_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionArray, garrow_dense_union_array, GARROW, @@ -188,22 +219,27 @@ struct _GArrowDenseUnionArrayClass GArrowUnionArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionArray * garrow_dense_union_array_new(GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionArray * garrow_dense_union_array_new_data_type(GArrowDenseUnionDataType *data_type, GArrowInt8Array *type_ids, GArrowInt32Array *value_offsets, GList *fields, GError **error); + GARROW_AVAILABLE_IN_12_0 gint32 garrow_dense_union_array_get_value_offset(GArrowDenseUnionArray *array, gint64 i); #define GARROW_TYPE_DICTIONARY_ARRAY (garrow_dictionary_array_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowDictionaryArray, garrow_dictionary_array, GARROW, DICTIONARY_ARRAY, GArrowArray) struct _GArrowDictionaryArrayClass @@ -211,22 +247,29 @@ struct _GArrowDictionaryArrayClass GArrowArrayClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDictionaryArray * garrow_dictionary_array_new(GArrowDataType *data_type, GArrowArray *indices, GArrowArray *dictionary, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_dictionary_array_get_indices(GArrowDictionaryArray *array); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_dictionary_array_get_dictionary(GArrowDictionaryArray *array); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_1_0_FOR(garrow_array_get_value_data_type) GArrowDictionaryDataType * garrow_dictionary_array_get_dictionary_data_type(GArrowDictionaryArray *array); #endif #define GARROW_TYPE_RUN_END_ENCODED_ARRAY (garrow_run_end_encoded_array_get_type()) +GARROW_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedArray, garrow_run_end_encoded_array, GARROW, @@ -248,9 +291,11 @@ garrow_run_end_encoded_array_new(GArrowDataType *data_type, GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_run_ends(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_values(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *array, @@ -258,9 +303,11 @@ garrow_run_end_encoded_array_get_logical_run_ends(GArrowRunEndEncodedArray *arra GARROW_AVAILABLE_IN_13_0 GArrowArray * garrow_run_end_encoded_array_get_logical_values(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 gint64 garrow_run_end_encoded_array_find_physical_offset(GArrowRunEndEncodedArray *array); + GARROW_AVAILABLE_IN_13_0 gint64 garrow_run_end_encoded_array_find_physical_length(GArrowRunEndEncodedArray *array); diff --git a/c_glib/arrow-glib/composite-data-type.h b/c_glib/arrow-glib/composite-data-type.h index e71d277a305c6..7a0a462af00f9 100644 --- a/c_glib/arrow-glib/composite-data-type.h +++ b/c_glib/arrow-glib/composite-data-type.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LIST_DATA_TYPE (garrow_list_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowListDataType, garrow_list_data_type, GARROW, LIST_DATA_TYPE, GArrowDataType) struct _GArrowListDataTypeClass @@ -34,18 +35,23 @@ struct _GArrowListDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowListDataType * garrow_list_data_type_new(GArrowField *field); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_list_data_type_get_field) GArrowField * garrow_list_data_type_get_value_field(GArrowListDataType *list_data_type); #endif + GARROW_AVAILABLE_IN_0_13 GArrowField * garrow_list_data_type_get_field(GArrowListDataType *list_data_type); #define GARROW_TYPE_LARGE_LIST_DATA_TYPE (garrow_large_list_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowLargeListDataType, garrow_large_list_data_type, GARROW, @@ -59,11 +65,13 @@ struct _GArrowLargeListDataTypeClass GARROW_AVAILABLE_IN_0_16 GArrowLargeListDataType * garrow_large_list_data_type_new(GArrowField *field); + GARROW_AVAILABLE_IN_0_16 GArrowField * garrow_large_list_data_type_get_field(GArrowLargeListDataType *large_list_data_type); #define GARROW_TYPE_STRUCT_DATA_TYPE (garrow_struct_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowStructDataType, garrow_struct_data_type, GARROW, STRUCT_DATA_TYPE, GArrowDataType) struct _GArrowStructDataTypeClass @@ -71,22 +79,34 @@ struct _GArrowStructDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowStructDataType * garrow_struct_data_type_new(GList *fields); + +GARROW_AVAILABLE_IN_ALL gint garrow_struct_data_type_get_n_fields(GArrowStructDataType *struct_data_type); + +GARROW_AVAILABLE_IN_ALL GList * garrow_struct_data_type_get_fields(GArrowStructDataType *struct_data_type); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_struct_data_type_get_field(GArrowStructDataType *struct_data_type, gint i); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_struct_data_type_get_field_by_name(GArrowStructDataType *struct_data_type, const gchar *name); + +GARROW_AVAILABLE_IN_ALL gint garrow_struct_data_type_get_field_index(GArrowStructDataType *struct_data_type, const gchar *name); #define GARROW_TYPE_MAP_DATA_TYPE (garrow_map_data_type_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowMapDataType, garrow_map_data_type, GARROW, MAP_DATA_TYPE, GArrowListDataType) struct _GArrowMapDataTypeClass @@ -105,6 +125,7 @@ GArrowDataType * garrow_map_data_type_get_item_type(GArrowMapDataType *map_data_type); #define GARROW_TYPE_UNION_DATA_TYPE (garrow_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowUnionDataType, garrow_union_data_type, GARROW, UNION_DATA_TYPE, GArrowDataType) struct _GArrowUnionDataTypeClass @@ -112,17 +133,25 @@ struct _GArrowUnionDataTypeClass GArrowDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gint garrow_union_data_type_get_n_fields(GArrowUnionDataType *union_data_type); + +GARROW_AVAILABLE_IN_ALL GList * garrow_union_data_type_get_fields(GArrowUnionDataType *union_data_type); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_union_data_type_get_field(GArrowUnionDataType *union_data_type, gint i); + +GARROW_AVAILABLE_IN_ALL gint8 * garrow_union_data_type_get_type_codes(GArrowUnionDataType *union_data_type, gsize *n_type_codes); #define GARROW_TYPE_SPARSE_UNION_DATA_TYPE (garrow_sparse_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionDataType, garrow_sparse_union_data_type, GARROW, @@ -133,10 +162,12 @@ struct _GArrowSparseUnionDataTypeClass GArrowUnionDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowSparseUnionDataType * garrow_sparse_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes); #define GARROW_TYPE_DENSE_UNION_DATA_TYPE (garrow_dense_union_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionDataType, garrow_dense_union_data_type, GARROW, @@ -147,10 +178,12 @@ struct _GArrowDenseUnionDataTypeClass GArrowUnionDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDenseUnionDataType * garrow_dense_union_data_type_new(GList *fields, gint8 *type_codes, gsize n_type_codes); #define GARROW_TYPE_DICTIONARY_DATA_TYPE (garrow_dictionary_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryDataType, garrow_dictionary_data_type, GARROW, @@ -161,22 +194,29 @@ struct _GArrowDictionaryDataTypeClass GArrowFixedWidthDataTypeClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDictionaryDataType * garrow_dictionary_data_type_new(GArrowDataType *index_data_type, GArrowDataType *value_data_type, gboolean ordered); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_dictionary_data_type_get_index_data_type( GArrowDictionaryDataType *dictionary_data_type); + GARROW_AVAILABLE_IN_0_14 GArrowDataType * garrow_dictionary_data_type_get_value_data_type( GArrowDictionaryDataType *dictionary_data_type); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_dictionary_data_type_is_ordered(GArrowDictionaryDataType *dictionary_data_type); #define GARROW_TYPE_RUN_END_ENCODED_DATA_TYPE \ (garrow_run_end_encoded_data_type_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodedDataType, garrow_run_end_encoded_data_type, GARROW, @@ -195,6 +235,7 @@ GARROW_AVAILABLE_IN_13_0 GArrowDataType * garrow_run_end_encoded_data_type_get_run_end_data_type( GArrowRunEndEncodedDataType *data_type); + GARROW_AVAILABLE_IN_13_0 GArrowDataType * garrow_run_end_encoded_data_type_get_value_data_type( diff --git a/c_glib/arrow-glib/compute-definition.h b/c_glib/arrow-glib/compute-definition.h index b699e9e99a9fc..a060f16f62cf6 100644 --- a/c_glib/arrow-glib/compute-definition.h +++ b/c_glib/arrow-glib/compute-definition.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_FUNCTION_OPTIONS (garrow_function_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowFunctionOptions, garrow_function_options, GARROW, FUNCTION_OPTIONS, GObject) struct _GArrowFunctionOptionsClass @@ -32,6 +35,7 @@ struct _GArrowFunctionOptionsClass }; #define GARROW_TYPE_CAST_OPTIONS (garrow_cast_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowCastOptions, garrow_cast_options, GARROW, CAST_OPTIONS, GArrowFunctionOptions) struct _GArrowCastOptionsClass @@ -40,6 +44,7 @@ struct _GArrowCastOptionsClass }; #define GARROW_TYPE_EXPRESSION (garrow_expression_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowExpression, garrow_expression, GARROW, EXPRESSION, GObject) struct _GArrowExpressionClass { diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 029cab136ad8f..54b0ddb014fbb 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_EXECUTE_CONTEXT (garrow_execute_context_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecuteContext, garrow_execute_context, GARROW, EXECUTE_CONTEXT, GObject) struct _GArrowExecuteContextClass @@ -46,6 +47,7 @@ gchar * garrow_function_options_to_string(GArrowFunctionOptions *options); #define GARROW_TYPE_FUNCTION_DOC (garrow_function_doc_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowFunctionDoc, garrow_function_doc, GARROW, FUNCTION_DOC, GObject) struct _GArrowFunctionDocClass @@ -67,6 +69,7 @@ gchar * garrow_function_doc_get_options_class_name(GArrowFunctionDoc *doc); #define GARROW_TYPE_FUNCTION (garrow_function_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowFunction, garrow_function, GARROW, FUNCTION, GObject) struct _GArrowFunctionClass { @@ -110,6 +113,7 @@ gchar * garrow_function_to_string(GArrowFunction *function); #define GARROW_TYPE_EXECUTE_NODE_OPTIONS (garrow_execute_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowExecuteNodeOptions, garrow_execute_node_options, GARROW, @@ -121,6 +125,7 @@ struct _GArrowExecuteNodeOptionsClass }; #define GARROW_TYPE_SOURCE_NODE_OPTIONS (garrow_source_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSourceNodeOptions, garrow_source_node_options, GARROW, @@ -142,6 +147,7 @@ GArrowSourceNodeOptions * garrow_source_node_options_new_table(GArrowTable *table); #define GARROW_TYPE_FILTER_NODE_OPTIONS (garrow_filter_node_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowFilterNodeOptions, garrow_filter_node_options, GARROW, @@ -157,6 +163,7 @@ GArrowFilterNodeOptions * garrow_filter_node_options_new(GArrowExpression *expression); #define GARROW_TYPE_PROJECT_NODE_OPTIONS (garrow_project_node_options_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowProjectNodeOptions, garrow_project_node_options, GARROW, @@ -172,6 +179,7 @@ GArrowProjectNodeOptions * garrow_project_node_options_new(GList *expressions, gchar **names, gsize n_names); #define GARROW_TYPE_AGGREGATION (garrow_aggregation_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowAggregation, garrow_aggregation, GARROW, AGGREGATION, GObject) struct _GArrowAggregationClass @@ -187,6 +195,7 @@ garrow_aggregation_new(const gchar *function, const gchar *output); #define GARROW_TYPE_AGGREGATE_NODE_OPTIONS (garrow_aggregate_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowAggregateNodeOptions, garrow_aggregate_node_options, GARROW, @@ -205,6 +214,7 @@ garrow_aggregate_node_options_new(GList *aggregations, GError **error); #define GARROW_TYPE_SINK_NODE_OPTIONS (garrow_sink_node_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSinkNodeOptions, garrow_sink_node_options, GARROW, @@ -249,6 +259,7 @@ typedef enum { } GArrowJoinType; #define GARROW_TYPE_HASH_JOIN_NODE_OPTIONS (garrow_hash_join_node_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowHashJoinNodeOptions, garrow_hash_join_node_options, GARROW, @@ -281,6 +292,7 @@ garrow_hash_join_node_options_set_right_outputs(GArrowHashJoinNodeOptions *optio GError **error); #define GARROW_TYPE_EXECUTE_NODE (garrow_execute_node_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecuteNode, garrow_execute_node, GARROW, EXECUTE_NODE, GObject) struct _GArrowExecuteNodeClass @@ -296,6 +308,7 @@ GArrowSchema * garrow_execute_node_get_output_schema(GArrowExecuteNode *node); #define GARROW_TYPE_EXECUTE_PLAN (garrow_execute_plan_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowExecutePlan, garrow_execute_plan, GARROW, EXECUTE_PLAN, GObject) struct _GArrowExecutePlanClass @@ -365,10 +378,12 @@ GARROW_AVAILABLE_IN_6_0 gboolean garrow_execute_plan_wait(GArrowExecutePlan *plan, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowCastOptions * garrow_cast_options_new(void); #define GARROW_TYPE_SCALAR_AGGREGATE_OPTIONS (garrow_scalar_aggregate_options_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowScalarAggregateOptions, garrow_scalar_aggregate_options, GARROW, @@ -401,6 +416,7 @@ typedef enum { } GArrowCountMode; #define GARROW_TYPE_COUNT_OPTIONS (garrow_count_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowCountOptions, garrow_count_options, GARROW, COUNT_OPTIONS, GArrowFunctionOptions) struct _GArrowCountOptionsClass @@ -428,6 +444,7 @@ typedef enum { } GArrowFilterNullSelectionBehavior; #define GARROW_TYPE_FILTER_OPTIONS (garrow_filter_options_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFilterOptions, garrow_filter_options, GARROW, @@ -443,6 +460,7 @@ GArrowFilterOptions * garrow_filter_options_new(void); #define GARROW_TYPE_TAKE_OPTIONS (garrow_take_options_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowTakeOptions, garrow_take_options, GARROW, TAKE_OPTIONS, GArrowFunctionOptions) struct _GArrowTakeOptionsClass @@ -487,6 +505,7 @@ typedef enum /**/ { } GArrowNullPlacement; #define GARROW_TYPE_ARRAY_SORT_OPTIONS (garrow_array_sort_options_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowArraySortOptions, garrow_array_sort_options, GARROW, @@ -506,6 +525,7 @@ garrow_array_sort_options_equal(GArrowArraySortOptions *options, GArrowArraySortOptions *other_options); #define GARROW_TYPE_SORT_KEY (garrow_sort_key_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowSortKey, garrow_sort_key, GARROW, SORT_KEY, GObject) struct _GArrowSortKeyClass { @@ -521,6 +541,7 @@ gboolean garrow_sort_key_equal(GArrowSortKey *sort_key, GArrowSortKey *other_sort_key); #define GARROW_TYPE_SORT_OPTIONS (garrow_sort_options_get_type()) +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE( GArrowSortOptions, garrow_sort_options, GARROW, SORT_OPTIONS, GArrowFunctionOptions) struct _GArrowSortOptionsClass @@ -545,6 +566,7 @@ void garrow_sort_options_add_sort_key(GArrowSortOptions *options, GArrowSortKey *sort_key); #define GARROW_TYPE_SET_LOOKUP_OPTIONS (garrow_set_lookup_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowSetLookupOptions, garrow_set_lookup_options, GARROW, @@ -560,6 +582,7 @@ GArrowSetLookupOptions * garrow_set_lookup_options_new(GArrowDatum *value_set); #define GARROW_TYPE_VARIANCE_OPTIONS (garrow_variance_options_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowVarianceOptions, garrow_variance_options, GARROW, @@ -620,6 +643,7 @@ typedef enum { } GArrowRoundMode; #define GARROW_TYPE_ROUND_OPTIONS (garrow_round_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowRoundOptions, garrow_round_options, GARROW, ROUND_OPTIONS, GArrowFunctionOptions) struct _GArrowRoundOptionsClass @@ -633,6 +657,7 @@ garrow_round_options_new(void); #define GARROW_TYPE_ROUND_TO_MULTIPLE_OPTIONS \ (garrow_round_to_multiple_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE(GArrowRoundToMultipleOptions, garrow_round_to_multiple_options, GARROW, @@ -648,6 +673,7 @@ GArrowRoundToMultipleOptions * garrow_round_to_multiple_options_new(void); #define GARROW_TYPE_MATCH_SUBSTRING_OPTIONS (garrow_match_substring_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE(GArrowMatchSubstringOptions, garrow_match_substring_options, GARROW, @@ -683,6 +709,7 @@ typedef enum /*< underscore_name=garrow_utf8_normalize_form >*/ { } GArrowUTF8NormalizeForm; #define GARROW_TYPE_UTF8_NORMALIZE_OPTIONS (garrow_utf8_normalize_options_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowUTF8NormalizeOptions, garrow_utf8_normalize_options, GARROW, @@ -719,6 +746,7 @@ typedef enum { } GArrowQuantileInterpolation; #define GARROW_TYPE_QUANTILE_OPTIONS (garrow_quantile_options_get_type()) +GARROW_AVAILABLE_IN_9_0 G_DECLARE_DERIVABLE_TYPE(GArrowQuantileOptions, garrow_quantile_options, GARROW, @@ -745,6 +773,7 @@ garrow_quantile_options_set_qs(GArrowQuantileOptions *options, gsize n); #define GARROW_TYPE_INDEX_OPTIONS (garrow_index_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE( GArrowIndexOptions, garrow_index_options, GARROW, INDEX_OPTIONS, GArrowFunctionOptions) struct _GArrowIndexOptionsClass @@ -782,6 +811,7 @@ typedef enum { } GArrowRankTiebreaker; #define GARROW_TYPE_RANK_OPTIONS (garrow_rank_options_get_type()) +GARROW_AVAILABLE_IN_12_0 G_DECLARE_DERIVABLE_TYPE( GArrowRankOptions, garrow_rank_options, GARROW, RANK_OPTIONS, GArrowFunctionOptions) struct _GArrowRankOptionsClass @@ -805,18 +835,25 @@ GARROW_AVAILABLE_IN_12_0 void garrow_rank_options_add_sort_key(GArrowRankOptions *options, GArrowSortKey *sort_key); +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_cast(GArrowArray *array, GArrowDataType *target_data_type, GArrowCastOptions *options, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowArray * garrow_array_unique(GArrowArray *array, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowDictionaryArray * garrow_array_dictionary_encode(GArrowArray *array, GError **error); + GARROW_AVAILABLE_IN_0_13 gint64 garrow_array_count(GArrowArray *array, GArrowCountOptions *options, GError **error); + GARROW_AVAILABLE_IN_0_13 GArrowStructArray * garrow_array_count_values(GArrowArray *array, GError **error); @@ -987,6 +1024,7 @@ garrow_record_batch_filter(GArrowRecordBatch *record_batch, GError **error); #define GARROW_TYPE_RUN_END_ENCODE_OPTIONS (garrow_run_end_encode_options_get_type()) +GARROW_AVAILABLE_IN_13_0 G_DECLARE_DERIVABLE_TYPE(GArrowRunEndEncodeOptions, garrow_run_end_encode_options, GARROW, @@ -1011,6 +1049,7 @@ GArrowArray * garrow_run_end_encoded_array_decode(GArrowRunEndEncodedArray *array, GError **error); #define GARROW_TYPE_STRPTIME_OPTIONS (garrow_strptime_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeOptions, garrow_strptime_options, GARROW, @@ -1026,6 +1065,7 @@ GArrowStrptimeOptions * garrow_strptime_options_new(void); #define GARROW_TYPE_STRFTIME_OPTIONS (garrow_strftime_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrftimeOptions, garrow_strftime_options, GARROW, @@ -1041,6 +1081,7 @@ GArrowStrftimeOptions * garrow_strftime_options_new(void); #define GARROW_TYPE_SPLIT_PATTERN_OPTIONS (garrow_split_pattern_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowSplitPatternOptions, garrow_split_pattern_options, GARROW, @@ -1056,6 +1097,7 @@ GArrowSplitPatternOptions * garrow_split_pattern_options_new(void); #define GARROW_TYPE_STRUCT_FIELD_OPTIONS (garrow_struct_field_options_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStructFieldOptions, garrow_struct_field_options, GARROW, diff --git a/c_glib/arrow-glib/datum.h b/c_glib/arrow-glib/datum.h index df5e9a1c2cf4f..fc9a2fe7ab907 100644 --- a/c_glib/arrow-glib/datum.h +++ b/c_glib/arrow-glib/datum.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DATUM (garrow_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowDatum, garrow_datum, GARROW, DATUM, GObject) struct _GArrowDatumClass { @@ -60,6 +61,7 @@ garrow_datum_to_string(GArrowDatum *datum); /* GARROW_TYPE_NONE_DATUM */ #define GARROW_TYPE_SCALAR_DATUM (garrow_scalar_datum_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowScalarDatum, garrow_scalar_datum, GARROW, SCALAR_DATUM, GArrowDatum) struct _GArrowScalarDatumClass @@ -72,6 +74,7 @@ GArrowScalarDatum * garrow_scalar_datum_new(GArrowScalar *value); #define GARROW_TYPE_ARRAY_DATUM (garrow_array_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowArrayDatum, garrow_array_datum, GARROW, ARRAY_DATUM, GArrowDatum) struct _GArrowArrayDatumClass @@ -84,6 +87,7 @@ GArrowArrayDatum * garrow_array_datum_new(GArrowArray *value); #define GARROW_TYPE_CHUNKED_ARRAY_DATUM (garrow_chunked_array_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowChunkedArrayDatum, garrow_chunked_array_datum, GARROW, @@ -99,6 +103,7 @@ GArrowChunkedArrayDatum * garrow_chunked_array_datum_new(GArrowChunkedArray *value); #define GARROW_TYPE_RECORD_BATCH_DATUM (garrow_record_batch_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchDatum, garrow_record_batch_datum, GARROW, @@ -114,6 +119,7 @@ GArrowRecordBatchDatum * garrow_record_batch_datum_new(GArrowRecordBatch *value); #define GARROW_TYPE_TABLE_DATUM (garrow_table_datum_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowTableDatum, garrow_table_datum, GARROW, TABLE_DATUM, GArrowDatum) struct _GArrowTableDatumClass diff --git a/c_glib/arrow-glib/decimal.h b/c_glib/arrow-glib/decimal.h index b967fa36d5611..f64afa800a19b 100644 --- a/c_glib/arrow-glib/decimal.h +++ b/c_glib/arrow-glib/decimal.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS /* Disabled because it conflicts with GARROW_TYPE_DECIMAL128 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL128 (garrow_decimal128_get_type()) */ +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128, garrow_decimal128, GARROW, DECIMAL128, GObject) struct _GArrowDecimal128Class @@ -34,8 +35,10 @@ struct _GArrowDecimal128Class GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_new_string(const gchar *data, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_new_integer(const gint64 data); GARROW_AVAILABLE_IN_3_0 @@ -62,25 +65,34 @@ GARROW_AVAILABLE_IN_0_12 gboolean garrow_decimal128_greater_than_or_equal(GArrowDecimal128 *decimal, GArrowDecimal128 *other_decimal); +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_to_string_scale(GArrowDecimal128 *decimal, gint32 scale); +GARROW_AVAILABLE_IN_ALL gchar * garrow_decimal128_to_string(GArrowDecimal128 *decimal); GARROW_AVAILABLE_IN_3_0 GBytes * garrow_decimal128_to_bytes(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL void garrow_decimal128_abs(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL void garrow_decimal128_negate(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL gint64 garrow_decimal128_to_integer(GArrowDecimal128 *decimal); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_plus(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_minus(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_multiply(GArrowDecimal128 *left, GArrowDecimal128 *right); +GARROW_AVAILABLE_IN_ALL GArrowDecimal128 * garrow_decimal128_divide(GArrowDecimal128 *left, GArrowDecimal128 *right, @@ -95,6 +107,7 @@ garrow_decimal128_rescale(GArrowDecimal128 *decimal, /* Disabled because it conflicts with GARROW_TYPE_DECIMAL256 in GArrowType. */ /* #define GARROW_TYPE_DECIMAL256 (garrow_decimal256_get_type()) */ +GARROW_AVAILABLE_IN_3_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256, garrow_decimal256, GARROW, DECIMAL256, GObject) struct _GArrowDecimal256Class diff --git a/c_glib/arrow-glib/error.h b/c_glib/arrow-glib/error.h index 4414417a1a25b..e0c6a591a021b 100644 --- a/c_glib/arrow-glib/error.h +++ b/c_glib/arrow-glib/error.h @@ -21,6 +21,8 @@ #include +#include + G_BEGIN_DECLS /** @@ -66,6 +68,7 @@ typedef enum { #define GARROW_ERROR garrow_error_quark() +GARROW_AVAILABLE_IN_ALL GQuark garrow_error_quark(void); diff --git a/c_glib/arrow-glib/error.hpp b/c_glib/arrow-glib/error.hpp index 90a0f3161878e..c2c9b3c63028a 100644 --- a/c_glib/arrow-glib/error.hpp +++ b/c_glib/arrow-glib/error.hpp @@ -23,18 +23,26 @@ #include +GARROW_EXTERN gboolean garrow_error_check(GError **error, const arrow::Status &status, const char *context); + +GARROW_EXTERN GArrowError garrow_error_from_status(const arrow::Status &status); + +GARROW_EXTERN arrow::StatusCode garrow_error_to_status_code(GError *error, arrow::StatusCode default_code); + +GARROW_EXTERN arrow::Status garrow_error_to_status(GError *error, arrow::StatusCode default_code, const char *context); namespace garrow { + GARROW_EXTERN gboolean check(GError **error, const arrow::Status &status, const char *context); diff --git a/c_glib/arrow-glib/expression.h b/c_glib/arrow-glib/expression.h index 3141ed4df18b7..5a6bfb456fc64 100644 --- a/c_glib/arrow-glib/expression.h +++ b/c_glib/arrow-glib/expression.h @@ -31,6 +31,7 @@ gboolean garrow_expression_equal(GArrowExpression *expression, GArrowExpression *other_expression); #define GARROW_TYPE_LITERAL_EXPRESSION (garrow_literal_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowLiteralExpression, garrow_literal_expression, GARROW, @@ -46,6 +47,7 @@ GArrowLiteralExpression * garrow_literal_expression_new(GArrowDatum *datum); #define GARROW_TYPE_FIELD_EXPRESSION (garrow_field_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE(GArrowFieldExpression, garrow_field_expression, GARROW, @@ -61,6 +63,7 @@ GArrowFieldExpression * garrow_field_expression_new(const gchar *reference, GError **error); #define GARROW_TYPE_CALL_EXPRESSION (garrow_call_expression_get_type()) +GARROW_AVAILABLE_IN_6_0 G_DECLARE_DERIVABLE_TYPE( GArrowCallExpression, garrow_call_expression, GARROW, CALL_EXPRESSION, GArrowExpression) struct _GArrowCallExpressionClass diff --git a/c_glib/arrow-glib/expression.hpp b/c_glib/arrow-glib/expression.hpp index 60d5c9fe2f1bd..cc96badbe67aa 100644 --- a/c_glib/arrow-glib/expression.hpp +++ b/c_glib/arrow-glib/expression.hpp @@ -23,7 +23,10 @@ #include +GARROW_EXTERN GArrowExpression * garrow_expression_new_raw(const arrow::compute::Expression &arrow_expression); + +GARROW_EXTERN arrow::compute::Expression * garrow_expression_get_raw(GArrowExpression *expression); diff --git a/c_glib/arrow-glib/field.h b/c_glib/arrow-glib/field.h index 8de63757878c9..4be13f6135975 100644 --- a/c_glib/arrow-glib/field.h +++ b/c_glib/arrow-glib/field.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_FIELD (garrow_field_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowField, garrow_field, GARROW, FIELD, GObject) struct _GArrowFieldClass { @@ -34,8 +35,10 @@ GARROW_AVAILABLE_IN_6_0 GArrowField * garrow_field_import(gpointer c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_field_new(const gchar *name, GArrowDataType *data_type); +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_field_new_full(const gchar *name, GArrowDataType *data_type, gboolean nullable); @@ -43,18 +46,26 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_field_export(GArrowField *field, GError **error); +GARROW_AVAILABLE_IN_ALL const gchar * garrow_field_get_name(GArrowField *field); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_field_get_data_type(GArrowField *field); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_field_is_nullable(GArrowField *field); +GARROW_AVAILABLE_IN_ALL gboolean garrow_field_equal(GArrowField *field, GArrowField *other_field); +GARROW_AVAILABLE_IN_ALL gchar * garrow_field_to_string(GArrowField *field); + GARROW_AVAILABLE_IN_3_0 gchar * garrow_field_to_string_metadata(GArrowField *field, gboolean show_metadata); diff --git a/c_glib/arrow-glib/file-system.h b/c_glib/arrow-glib/file-system.h index d3d5fde73fe23..2e500672e145c 100644 --- a/c_glib/arrow-glib/file-system.h +++ b/c_glib/arrow-glib/file-system.h @@ -53,6 +53,7 @@ typedef enum { /* arrow::fs::FileInfo */ #define GARROW_TYPE_FILE_INFO (garrow_file_info_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFileInfo, garrow_file_info, GARROW, FILE_INFO, GObject) struct _GArrowFileInfoClass { @@ -80,6 +81,7 @@ garrow_file_info_to_string(GArrowFileInfo *file_info); /* arrow::fs::FileSelector */ #define GARROW_TYPE_FILE_SELECTOR (garrow_file_selector_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowFileSelector, garrow_file_selector, GARROW, FILE_SELECTOR, GObject) struct _GArrowFileSelectorClass @@ -90,6 +92,7 @@ struct _GArrowFileSelectorClass /* arrow::fs::FileSystem */ #define GARROW_TYPE_FILE_SYSTEM (garrow_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GArrowFileSystem, garrow_file_system, GARROW, FILE_SYSTEM, GObject) struct _GArrowFileSystemClass @@ -197,6 +200,7 @@ garrow_file_system_open_append_stream(GArrowFileSystem *file_system, /* arrow::fs::SubTreeFileSystem */ #define GARROW_TYPE_SUB_TREE_FILE_SYSTEM (garrow_sub_tree_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowSubTreeFileSystem, garrow_sub_tree_file_system, GARROW, @@ -215,6 +219,7 @@ garrow_sub_tree_file_system_new(const gchar *base_path, /* arrow::fs::SlowFileSystem */ #define GARROW_TYPE_SLOW_FILE_SYSTEM (garrow_slow_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowSlowFileSystem, garrow_slow_file_system, GARROW, @@ -244,6 +249,7 @@ garrow_slow_file_system_new_average_latency_and_seed(GArrowFileSystem *base_file gint32 seed); #define GARROW_TYPE_MOCK_FILE_SYSTEM (garrow_mock_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowMockFileSystem, garrow_mock_file_system, GARROW, @@ -255,6 +261,7 @@ struct _GArrowMockFileSystemClass }; #define GARROW_TYPE_HDFS_FILE_SYSTEM (garrow_hdfs_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowHDFSFileSystem, garrow_hdfs_file_system, GARROW, @@ -290,6 +297,7 @@ typedef enum { } GArrowS3LogLevel; #define GARROW_TYPE_S3_GLOBAL_OPTIONS (garrow_s3_global_options_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowS3GlobalOptions, garrow_s3_global_options, GARROW, S3_GLOBAL_OPTIONS, GObject) struct _GArrowS3GlobalOptionsClass @@ -312,6 +320,7 @@ gboolean garrow_s3_finalize(GError **error); #define GARROW_TYPE_S3_FILE_SYSTEM (garrow_s3_file_system_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowS3FileSystem, garrow_s3_file_system, GARROW, S3_FILE_SYSTEM, GArrowFileSystem) struct _GArrowS3FileSystemClass @@ -320,6 +329,7 @@ struct _GArrowS3FileSystemClass }; #define GARROW_TYPE_GCS_FILE_SYSTEM (garrow_gcs_file_system_get_type()) +GARROW_AVAILABLE_IN_7_0 G_DECLARE_DERIVABLE_TYPE( GArrowGCSFileSystem, garrow_gcs_file_system, GARROW, GCS_FILE_SYSTEM, GArrowFileSystem) struct _GArrowGCSFileSystemClass diff --git a/c_glib/arrow-glib/file-system.hpp b/c_glib/arrow-glib/file-system.hpp index f41fc6e9c75b0..c535958301c5c 100644 --- a/c_glib/arrow-glib/file-system.hpp +++ b/c_glib/arrow-glib/file-system.hpp @@ -23,28 +23,35 @@ #include +GARROW_EXTERN GArrowFileInfo * garrow_file_info_new_raw(const arrow::fs::FileInfo &arrow_file_info); +GARROW_EXTERN arrow::fs::FileInfo * garrow_file_info_get_raw(GArrowFileInfo *file_info); +GARROW_EXTERN GArrowFileSystem * garrow_file_system_new_raw(std::shared_ptr *arrow_file_system); +GARROW_EXTERN std::shared_ptr garrow_file_system_get_raw(GArrowFileSystem *file_system); +GARROW_EXTERN GArrowSubTreeFileSystem * garrow_sub_tree_file_system_new_raw( std::shared_ptr *arrow_file_system, GArrowFileSystem *base_file_system); +GARROW_EXTERN GArrowSlowFileSystem * garrow_slow_file_system_new_raw(std::shared_ptr *arrow_file_system, GArrowFileSystem *base_file_system); #ifdef ARROW_S3 +GARROW_EXTERN arrow::fs::S3GlobalOptions * garrow_s3_global_options_get_raw(GArrowS3GlobalOptions *options); #endif diff --git a/c_glib/arrow-glib/file.h b/c_glib/arrow-glib/file.h index 42afed139463c..799dd83b9c243 100644 --- a/c_glib/arrow-glib/file.h +++ b/c_glib/arrow-glib/file.h @@ -27,15 +27,22 @@ G_BEGIN_DECLS #define GARROW_TYPE_FILE (garrow_file_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowFile, garrow_file, GARROW, FILE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_file_close(GArrowFile *file, GError **error); + GARROW_AVAILABLE_IN_0_13 gboolean garrow_file_is_closed(GArrowFile *file); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_file_tell(GArrowFile *file, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowFileMode garrow_file_get_mode(GArrowFile *file); diff --git a/c_glib/arrow-glib/input-stream.cpp b/c_glib/arrow-glib/input-stream.cpp index 03a3f03fff7ce..52c79993e4ca8 100644 --- a/c_glib/arrow-glib/input-stream.cpp +++ b/c_glib/arrow-glib/input-stream.cpp @@ -35,6 +35,22 @@ #include #include +static std::shared_ptr +garrow_input_stream_get_raw_file_interface(GArrowFile *file) +{ + auto input_stream = GARROW_INPUT_STREAM(file); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + +static std::shared_ptr +garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) +{ + auto input_stream = GARROW_INPUT_STREAM(readable); + auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); + return arrow_input_stream; +} + G_BEGIN_DECLS /** @@ -71,28 +87,12 @@ enum { PROP_INPUT_STREAM = 1 }; -static std::shared_ptr -garrow_input_stream_get_raw_file_interface(GArrowFile *file) -{ - auto input_stream = GARROW_INPUT_STREAM(file); - auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); - return arrow_input_stream; -} - static void garrow_input_stream_file_interface_init(GArrowFileInterface *iface) { iface->get_raw = garrow_input_stream_get_raw_file_interface; } -static std::shared_ptr -garrow_input_stream_get_raw_readable_interface(GArrowReadable *readable) -{ - auto input_stream = GARROW_INPUT_STREAM(readable); - auto arrow_input_stream = garrow_input_stream_get_raw(input_stream); - return arrow_input_stream; -} - static void garrow_input_stream_readable_interface_init(GArrowReadableInterface *iface) { diff --git a/c_glib/arrow-glib/input-stream.h b/c_glib/arrow-glib/input-stream.h index 3e2a2ecdbd4fa..676f2f44b0041 100644 --- a/c_glib/arrow-glib/input-stream.h +++ b/c_glib/arrow-glib/input-stream.h @@ -30,6 +30,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_INPUT_STREAM (garrow_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowInputStream, garrow_input_stream, GARROW, INPUT_STREAM, GInputStream) struct _GArrowInputStreamClass @@ -37,16 +38,22 @@ struct _GArrowInputStreamClass GInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_input_stream_advance(GArrowInputStream *input_stream, gint64 n_bytes, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_input_stream_align(GArrowInputStream *input_stream, gint32 alignment, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTensor * garrow_input_stream_read_tensor(GArrowInputStream *input_stream, GError **error); + GARROW_AVAILABLE_IN_1_0 GArrowRecordBatch * garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, @@ -55,6 +62,7 @@ garrow_input_stream_read_record_batch(GArrowInputStream *input_stream, GError **error); #define GARROW_TYPE_SEEKABLE_INPUT_STREAM (garrow_seekable_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSeekableInputStream, garrow_seekable_input_stream, GARROW, @@ -65,12 +73,17 @@ struct _GArrowSeekableInputStreamClass GArrowInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL guint64 garrow_seekable_input_stream_get_size(GArrowSeekableInputStream *input_stream, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_seekable_input_stream_get_support_zero_copy( GArrowSeekableInputStream *input_stream); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_seekable_input_stream_read_at(GArrowSeekableInputStream *input_stream, gint64 position, @@ -89,6 +102,7 @@ garrow_seekable_input_stream_peek(GArrowSeekableInputStream *input_stream, GError **error); #define GARROW_TYPE_BUFFER_INPUT_STREAM (garrow_buffer_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBufferInputStream, garrow_buffer_input_stream, GARROW, @@ -99,13 +113,16 @@ struct _GArrowBufferInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBufferInputStream * garrow_buffer_input_stream_new(GArrowBuffer *buffer); +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_buffer_input_stream_get_buffer(GArrowBufferInputStream *input_stream); #define GARROW_TYPE_FILE_INPUT_STREAM (garrow_file_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFileInputStream, garrow_file_input_stream, GARROW, @@ -116,15 +133,21 @@ struct _GArrowFileInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFileInputStream * garrow_file_input_stream_new(const gchar *path, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowFileInputStream * garrow_file_input_stream_new_file_descriptor(gint file_descriptor, GError **error); + +GARROW_AVAILABLE_IN_ALL gint garrow_file_input_stream_get_file_descriptor(GArrowFileInputStream *stream); #define GARROW_TYPE_MEMORY_MAPPED_INPUT_STREAM \ (garrow_memory_mapped_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowMemoryMappedInputStream, garrow_memory_mapped_input_stream, GARROW, @@ -135,10 +158,12 @@ struct _GArrowMemoryMappedInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMemoryMappedInputStream * garrow_memory_mapped_input_stream_new(const gchar *path, GError **error); #define GARROW_TYPE_GIO_INPUT_STREAM (garrow_gio_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowGIOInputStream, garrow_gio_input_stream, GARROW, @@ -149,15 +174,19 @@ struct _GArrowGIOInputStreamClass GArrowSeekableInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowGIOInputStream * garrow_gio_input_stream_new(GInputStream *gio_input_stream); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED GInputStream * garrow_gio_input_stream_get_raw(GArrowGIOInputStream *input_stream); #endif #define GARROW_TYPE_COMPRESSED_INPUT_STREAM (garrow_compressed_input_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCompressedInputStream, garrow_compressed_input_stream, GARROW, @@ -168,6 +197,7 @@ struct _GArrowCompressedInputStreamClass GArrowInputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCompressedInputStream * garrow_compressed_input_stream_new(GArrowCodec *codec, GArrowInputStream *raw, diff --git a/c_glib/arrow-glib/input-stream.hpp b/c_glib/arrow-glib/input-stream.hpp index 7ae759370ddbd..0400398c4777f 100644 --- a/c_glib/arrow-glib/input-stream.hpp +++ b/c_glib/arrow-glib/input-stream.hpp @@ -26,34 +26,48 @@ #include +GARROW_EXTERN GArrowInputStream * garrow_input_stream_new_raw(std::shared_ptr *arrow_input_stream); + +GARROW_EXTERN std::shared_ptr garrow_input_stream_get_raw(GArrowInputStream *input_stream); +GARROW_EXTERN GArrowSeekableInputStream * garrow_seekable_input_stream_new_raw( std::shared_ptr *arrow_random_access_file); + +GARROW_EXTERN std::shared_ptr garrow_seekable_input_stream_get_raw(GArrowSeekableInputStream *input_stream); +GARROW_EXTERN GArrowBufferInputStream * garrow_buffer_input_stream_new_raw( std::shared_ptr *arrow_buffer_reader, GArrowBuffer *buffer); + +GARROW_EXTERN std::shared_ptr garrow_buffer_input_stream_get_raw(GArrowBufferInputStream *input_stream); +GARROW_EXTERN GArrowFileInputStream * garrow_file_input_stream_new_raw(std::shared_ptr *arrow_stream); +GARROW_EXTERN GArrowMemoryMappedInputStream * garrow_memory_mapped_input_stream_new_raw( std::shared_ptr *arrow_stream); +GARROW_EXTERN GArrowCompressedInputStream * garrow_compressed_input_stream_new_raw( std::shared_ptr *arrow_raw, GArrowCodec *codec, GArrowInputStream *raw); + +GARROW_EXTERN std::shared_ptr garrow_compressed_input_stream_get_raw(GArrowCompressedInputStream *stream); diff --git a/c_glib/arrow-glib/interval.h b/c_glib/arrow-glib/interval.h index a6c9e1ff1e1ef..8c23b9a509bb4 100644 --- a/c_glib/arrow-glib/interval.h +++ b/c_glib/arrow-glib/interval.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_DAY_MILLISECOND (garrow_day_millisecond_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GArrowDayMillisecond, garrow_day_millisecond, GARROW, DAY_MILLISECOND, GObject) @@ -47,6 +48,7 @@ garrow_day_millisecond_less_than(GArrowDayMillisecond *day_millisecond, GArrowDayMillisecond *other_day_millisecond); #define GARROW_TYPE_MONTH_DAY_NANO (garrow_month_day_nano_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GArrowMonthDayNano, garrow_month_day_nano, GARROW, MONTH_DAY_NANO, GObject) diff --git a/c_glib/arrow-glib/ipc-options.h b/c_glib/arrow-glib/ipc-options.h index 418b08f080152..1ddff059d2faf 100644 --- a/c_glib/arrow-glib/ipc-options.h +++ b/c_glib/arrow-glib/ipc-options.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_READ_OPTIONS (garrow_read_options_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowReadOptions, garrow_read_options, GARROW, READ_OPTIONS, GObject) struct _GArrowReadOptionsClass @@ -46,6 +47,7 @@ garrow_read_options_set_included_fields(GArrowReadOptions *options, gsize n_fields); #define GARROW_TYPE_WRITE_OPTIONS (garrow_write_options_get_type()) +GARROW_AVAILABLE_IN_1_0 G_DECLARE_DERIVABLE_TYPE( GArrowWriteOptions, garrow_write_options, GARROW, WRITE_OPTIONS, GObject) struct _GArrowWriteOptionsClass diff --git a/c_glib/arrow-glib/ipc-options.hpp b/c_glib/arrow-glib/ipc-options.hpp index f57fbd3c11e5a..838d05d41dbac 100644 --- a/c_glib/arrow-glib/ipc-options.hpp +++ b/c_glib/arrow-glib/ipc-options.hpp @@ -23,10 +23,14 @@ #include +GARROW_EXTERN arrow::ipc::IpcReadOptions * garrow_read_options_get_raw(GArrowReadOptions *options); + +GARROW_EXTERN arrow::ipc::DictionaryMemo * garrow_read_options_get_dictionary_memo_raw(GArrowReadOptions *options); +GARROW_EXTERN arrow::ipc::IpcWriteOptions * garrow_write_options_get_raw(GArrowWriteOptions *options); diff --git a/c_glib/arrow-glib/local-file-system.h b/c_glib/arrow-glib/local-file-system.h index 9af4f8e8b168d..6ad2ee9f231ab 100644 --- a/c_glib/arrow-glib/local-file-system.h +++ b/c_glib/arrow-glib/local-file-system.h @@ -27,6 +27,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_LOCAL_FILE_SYSTEM_OPTIONS \ (garrow_local_file_system_options_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystemOptions, garrow_local_file_system_options, GARROW, @@ -44,6 +45,7 @@ garrow_local_file_system_options_new(void); /* arrow::fs::LocalFileSystem */ #define GARROW_TYPE_LOCAL_FILE_SYSTEM (garrow_local_file_system_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowLocalFileSystem, garrow_local_file_system, GARROW, diff --git a/c_glib/arrow-glib/memory-pool.h b/c_glib/arrow-glib/memory-pool.h index de2a5d717a183..7da15a9eb1b47 100644 --- a/c_glib/arrow-glib/memory-pool.h +++ b/c_glib/arrow-glib/memory-pool.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GARROW_TYPE_MEMORY_POOL (garrow_memory_pool_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowMemoryPool, garrow_memory_pool, GARROW, MEMORY_POOL, GObject) struct _GArrowMemoryPoolClass @@ -31,12 +34,19 @@ struct _GArrowMemoryPoolClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowMemoryPool * garrow_memory_pool_default(); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_memory_pool_get_bytes_allocated(GArrowMemoryPool *memory_pool); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_memory_pool_get_max_memory(GArrowMemoryPool *memory_pool); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_memory_pool_get_backend_name(GArrowMemoryPool *memory_pool); diff --git a/c_glib/arrow-glib/meson.build b/c_glib/arrow-glib/meson.build index 67909ff22c428..fd32b35badcb1 100644 --- a/c_glib/arrow-glib/meson.build +++ b/c_glib/arrow-glib/meson.build @@ -205,14 +205,12 @@ cpp_internal_headers = files( 'internal-index.hpp', ) -version_h_conf = configuration_data() -version_h_conf.set('GARROW_VERSION_MAJOR', version_major) -version_h_conf.set('GARROW_VERSION_MINOR', version_minor) -version_h_conf.set('GARROW_VERSION_MICRO', version_micro) -version_h_conf.set('GARROW_VERSION_TAG', version_tag) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GARROW', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + c_headers += version_h enums = gnome.mkenums('enums', @@ -226,11 +224,9 @@ enums = gnome.mkenums('enums', enums_source = enums[0] enums_header = enums[1] - headers = c_headers + cpp_headers install_headers(headers, subdir: meson.project_name()) - gobject = dependency('gobject-2.0') gobject_libdir = gobject.get_variable(pkgconfig: 'libdir') # This is for Homebrew. "pkg-config --cflags gio-2.0" includes the @@ -253,6 +249,7 @@ libarrow_glib = library('arrow-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGARROW_COMPILATION'], soversion: so_version, version: library_version) arrow_glib = declare_dependency(link_with: libarrow_glib, diff --git a/c_glib/arrow-glib/orc-file-reader.h b/c_glib/arrow-glib/orc-file-reader.h index 20089eb2866c6..4eb3df5242e48 100644 --- a/c_glib/arrow-glib/orc-file-reader.h +++ b/c_glib/arrow-glib/orc-file-reader.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_ORC_FILE_READER (garrow_orc_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowORCFileReader, garrow_orc_file_reader, GARROW, ORC_FILE_READER, GObject) struct _GArrowORCFileReaderClass @@ -31,10 +32,12 @@ struct _GArrowORCFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowORCFileReader * garrow_orc_file_reader_new(GArrowSeekableInputStream *file, GError **error); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_set_field_indices) void garrow_orc_file_reader_set_field_indexes(GArrowORCFileReader *reader, @@ -47,6 +50,7 @@ garrow_orc_file_reader_set_field_indices(GArrowORCFileReader *reader, const gint *field_indices, guint n_field_indices); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_12_FOR(garrow_orc_file_reader_get_field_indices) const gint * garrow_orc_file_reader_get_field_indexes(GArrowORCFileReader *reader, @@ -56,14 +60,24 @@ GARROW_AVAILABLE_IN_0_12 const gint * garrow_orc_file_reader_get_field_indices(GArrowORCFileReader *reader, guint *n_field_indices); + +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_orc_file_reader_read_type(GArrowORCFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_orc_file_reader_read_stripes(GArrowORCFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_orc_file_reader_read_stripe(GArrowORCFileReader *reader, gint64 i, GError **error); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_orc_file_reader_get_n_stripes(GArrowORCFileReader *reader); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_orc_file_reader_get_n_rows(GArrowORCFileReader *reader); diff --git a/c_glib/arrow-glib/output-stream.cpp b/c_glib/arrow-glib/output-stream.cpp index 83de2eb38a72a..d9bdf7ad8b786 100644 --- a/c_glib/arrow-glib/output-stream.cpp +++ b/c_glib/arrow-glib/output-stream.cpp @@ -33,6 +33,22 @@ #include #include +static std::shared_ptr +garrow_output_stream_get_raw_file_interface(GArrowFile *file) +{ + auto output_stream = GARROW_OUTPUT_STREAM(file); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + +static std::shared_ptr +garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) +{ + auto output_stream = GARROW_OUTPUT_STREAM(writable); + auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); + return arrow_output_stream; +} + G_BEGIN_DECLS /** @@ -65,28 +81,12 @@ enum { PROP_OUTPUT_STREAM }; -static std::shared_ptr -garrow_output_stream_get_raw_file_interface(GArrowFile *file) -{ - auto output_stream = GARROW_OUTPUT_STREAM(file); - auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); - return arrow_output_stream; -} - static void garrow_output_stream_file_interface_init(GArrowFileInterface *iface) { iface->get_raw = garrow_output_stream_get_raw_file_interface; } -static std::shared_ptr -garrow_output_stream_get_raw_writable_interface(GArrowWritable *writable) -{ - auto output_stream = GARROW_OUTPUT_STREAM(writable); - auto arrow_output_stream = garrow_output_stream_get_raw(output_stream); - return arrow_output_stream; -} - static void garrow_output_stream_writable_interface_init(GArrowWritableInterface *iface) { diff --git a/c_glib/arrow-glib/output-stream.h b/c_glib/arrow-glib/output-stream.h index 1b18c08c14a5f..5c8b4b9374fc6 100644 --- a/c_glib/arrow-glib/output-stream.h +++ b/c_glib/arrow-glib/output-stream.h @@ -30,6 +30,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_OUTPUT_STREAM (garrow_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowOutputStream, garrow_output_stream, GARROW, OUTPUT_STREAM, GObject) struct _GArrowOutputStreamClass @@ -37,8 +38,11 @@ struct _GArrowOutputStreamClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_output_stream_align(GArrowOutputStream *stream, gint32 alignment, GError **error); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_output_stream_write_tensor(GArrowOutputStream *stream, GArrowTensor *tensor, @@ -51,6 +55,7 @@ garrow_output_stream_write_record_batch(GArrowOutputStream *stream, GError **error); #define GARROW_TYPE_FILE_OUTPUT_STREAM (garrow_file_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFileOutputStream, garrow_file_output_stream, GARROW, @@ -61,10 +66,12 @@ struct _GArrowFileOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFileOutputStream * garrow_file_output_stream_new(const gchar *path, gboolean append, GError **error); #define GARROW_TYPE_BUFFER_OUTPUT_STREAM (garrow_buffer_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowBufferOutputStream, garrow_buffer_output_stream, GARROW, @@ -75,10 +82,12 @@ struct _GArrowBufferOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowBufferOutputStream * garrow_buffer_output_stream_new(GArrowResizableBuffer *buffer); #define GARROW_TYPE_GIO_OUTPUT_STREAM (garrow_gio_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowGIOOutputStream, garrow_gio_output_stream, GARROW, @@ -89,15 +98,19 @@ struct _GArrowGIOOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowGIOOutputStream * garrow_gio_output_stream_new(GOutputStream *gio_output_stream); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED GOutputStream * garrow_gio_output_stream_get_raw(GArrowGIOOutputStream *output_stream); #endif #define GARROW_TYPE_COMPRESSED_OUTPUT_STREAM (garrow_compressed_output_stream_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCompressedOutputStream, garrow_compressed_output_stream, GARROW, @@ -108,6 +121,7 @@ struct _GArrowCompressedOutputStreamClass GArrowOutputStreamClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCompressedOutputStream * garrow_compressed_output_stream_new(GArrowCodec *codec, GArrowOutputStream *raw, diff --git a/c_glib/arrow-glib/output-stream.hpp b/c_glib/arrow-glib/output-stream.hpp index e41c65da88d82..515d969efc245 100644 --- a/c_glib/arrow-glib/output-stream.hpp +++ b/c_glib/arrow-glib/output-stream.hpp @@ -25,23 +25,32 @@ #include +GARROW_EXTERN GArrowOutputStream * garrow_output_stream_new_raw( std::shared_ptr *arrow_output_stream); + +GARROW_EXTERN std::shared_ptr garrow_output_stream_get_raw(GArrowOutputStream *output_stream); +GARROW_EXTERN GArrowFileOutputStream * garrow_file_output_stream_new_raw( std::shared_ptr *arrow_file_output_stream); + +GARROW_EXTERN GArrowBufferOutputStream * garrow_buffer_output_stream_new_raw( std::shared_ptr *arrow_buffer_output_stream); +GARROW_EXTERN GArrowCompressedOutputStream * garrow_compressed_output_stream_new_raw( std::shared_ptr *arrow_raw, GArrowCodec *codec, GArrowOutputStream *raw); + +GARROW_EXTERN std::shared_ptr garrow_compressed_output_stream_get_raw(GArrowCompressedOutputStream *stream); diff --git a/c_glib/arrow-glib/readable.h b/c_glib/arrow-glib/readable.h index d0b1f5b6a99ee..266b45849057e 100644 --- a/c_glib/arrow-glib/readable.h +++ b/c_glib/arrow-glib/readable.h @@ -25,10 +25,13 @@ G_BEGIN_DECLS #define GARROW_TYPE_READABLE (garrow_readable_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowReadable, garrow_readable, GARROW, READABLE, GObject) +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_readable_read(GArrowReadable *readable, gint64 n_bytes, GError **error); + GARROW_AVAILABLE_IN_0_17 GBytes * garrow_readable_read_bytes(GArrowReadable *readable, gint64 n_bytes, GError **error); diff --git a/c_glib/arrow-glib/reader.h b/c_glib/arrow-glib/reader.h index 96e4c5bbb5890..5401aa3bb1fc5 100644 --- a/c_glib/arrow-glib/reader.h +++ b/c_glib/arrow-glib/reader.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_READER (garrow_record_batch_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchReader, garrow_record_batch_reader, GARROW, @@ -53,22 +54,29 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_record_batch_reader_export(GArrowRecordBatchReader *reader, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_reader_get_schema(GArrowRecordBatchReader *reader); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) GArrowRecordBatch * garrow_record_batch_reader_get_next_record_batch(GArrowRecordBatchReader *reader, GError **error); #endif #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_reader_read_next) GArrowRecordBatch * garrow_record_batch_reader_read_next_record_batch(GArrowRecordBatchReader *reader, GError **error); #endif + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_reader_read_next(GArrowRecordBatchReader *reader, GError **error); + GARROW_AVAILABLE_IN_6_0 GArrowTable * garrow_record_batch_reader_read_all(GArrowRecordBatchReader *reader, GError **error); @@ -78,6 +86,7 @@ GList * garrow_record_batch_reader_get_sources(GArrowRecordBatchReader *reader); #define GARROW_TYPE_TABLE_BATCH_READER (garrow_table_batch_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTableBatchReader, garrow_table_batch_reader, GARROW, @@ -88,6 +97,7 @@ struct _GArrowTableBatchReaderClass GArrowRecordBatchReaderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTableBatchReader * garrow_table_batch_reader_new(GArrowTable *table); @@ -98,6 +108,7 @@ garrow_table_batch_reader_set_max_chunk_size(GArrowTableBatchReader *reader, #define GARROW_TYPE_RECORD_BATCH_STREAM_READER \ (garrow_record_batch_stream_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamReader, garrow_record_batch_stream_reader, GARROW, @@ -108,10 +119,12 @@ struct _GArrowRecordBatchStreamReaderClass GArrowRecordBatchReaderClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchStreamReader * garrow_record_batch_stream_reader_new(GArrowInputStream *stream, GError **error); #define GARROW_TYPE_RECORD_BATCH_FILE_READER (garrow_record_batch_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileReader, garrow_record_batch_file_reader, GARROW, @@ -122,28 +135,39 @@ struct _GArrowRecordBatchFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchFileReader * garrow_record_batch_file_reader_new(GArrowSeekableInputStream *file, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_file_reader_get_schema(GArrowRecordBatchFileReader *reader); + +GARROW_AVAILABLE_IN_ALL guint garrow_record_batch_file_reader_get_n_record_batches(GArrowRecordBatchFileReader *reader); + +GARROW_AVAILABLE_IN_ALL GArrowMetadataVersion garrow_record_batch_file_reader_get_version(GArrowRecordBatchFileReader *reader); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL G_GNUC_DEPRECATED_FOR(garrow_record_batch_file_reader_read_record_batch) GArrowRecordBatch * garrow_record_batch_file_reader_get_record_batch(GArrowRecordBatchFileReader *reader, guint i, GError **error); #endif + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_file_reader_read_record_batch(GArrowRecordBatchFileReader *reader, guint i, GError **error); #define GARROW_TYPE_FEATHER_FILE_READER (garrow_feather_file_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowFeatherFileReader, garrow_feather_file_reader, GARROW, @@ -154,18 +178,26 @@ struct _GArrowFeatherFileReaderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowFeatherFileReader * garrow_feather_file_reader_new(GArrowSeekableInputStream *file, GError **error); +GARROW_AVAILABLE_IN_ALL gint garrow_feather_file_reader_get_version(GArrowFeatherFileReader *reader); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read(GArrowFeatherFileReader *reader, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read_indices(GArrowFeatherFileReader *reader, const gint *indices, guint n_indices, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, const gchar **names, @@ -173,6 +205,7 @@ garrow_feather_file_reader_read_names(GArrowFeatherFileReader *reader, GError **error); #define GARROW_TYPE_CSV_READ_OPTIONS (garrow_csv_read_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowCSVReadOptions, garrow_csv_read_options, GARROW, CSV_READ_OPTIONS, GObject) struct _GArrowCSVReadOptionsClass @@ -180,16 +213,23 @@ struct _GArrowCSVReadOptionsClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCSVReadOptions * garrow_csv_read_options_new(void); + +GARROW_AVAILABLE_IN_ALL void garrow_csv_read_options_add_column_type(GArrowCSVReadOptions *options, const gchar *name, GArrowDataType *data_type); +GARROW_AVAILABLE_IN_ALL void garrow_csv_read_options_add_schema(GArrowCSVReadOptions *options, GArrowSchema *schema); + +GARROW_AVAILABLE_IN_ALL GHashTable * garrow_csv_read_options_get_column_types(GArrowCSVReadOptions *options); + GARROW_AVAILABLE_IN_0_14 void garrow_csv_read_options_set_null_values(GArrowCSVReadOptions *options, @@ -251,16 +291,20 @@ garrow_csv_read_options_add_timestamp_parser(GArrowCSVReadOptions *options, GArrowTimestampParser *parser); #define GARROW_TYPE_CSV_READER (garrow_csv_reader_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowCSVReader, garrow_csv_reader, GARROW, CSV_READER, GObject) struct _GArrowCSVReaderClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowCSVReader * garrow_csv_reader_new(GArrowInputStream *input, GArrowCSVReadOptions *options, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_csv_reader_read(GArrowCSVReader *reader, GError **error); @@ -279,6 +323,7 @@ typedef enum { } GArrowJSONReadUnexpectedFieldBehavior; #define GARROW_TYPE_JSON_READ_OPTIONS (garrow_json_read_options_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowJSONReadOptions, garrow_json_read_options, GARROW, JSON_READ_OPTIONS, GObject) struct _GArrowJSONReadOptionsClass @@ -291,6 +336,7 @@ GArrowJSONReadOptions * garrow_json_read_options_new(void); #define GARROW_TYPE_JSON_READER (garrow_json_reader_get_type()) +GARROW_AVAILABLE_IN_0_14 G_DECLARE_DERIVABLE_TYPE( GArrowJSONReader, garrow_json_reader, GARROW, JSON_READER, GObject) struct _GArrowJSONReaderClass diff --git a/c_glib/arrow-glib/reader.hpp b/c_glib/arrow-glib/reader.hpp index 192497ef52e31..beec6766af2e6 100644 --- a/c_glib/arrow-glib/reader.hpp +++ b/c_glib/arrow-glib/reader.hpp @@ -27,42 +27,61 @@ #include +GARROW_EXTERN GArrowRecordBatchReader * garrow_record_batch_reader_new_raw( std::shared_ptr *arrow_reader, GList *sources); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_reader_get_raw(GArrowRecordBatchReader *reader); +GARROW_EXTERN GArrowTableBatchReader * garrow_table_batch_reader_new_raw(std::shared_ptr *arrow_reader, GArrowTable *table); + +GARROW_EXTERN std::shared_ptr garrow_table_batch_reader_get_raw(GArrowTableBatchReader *reader); +GARROW_EXTERN GArrowRecordBatchStreamReader * garrow_record_batch_stream_reader_new_raw( std::shared_ptr *arrow_reader); +GARROW_EXTERN GArrowRecordBatchFileReader * garrow_record_batch_file_reader_new_raw( std::shared_ptr *arrow_reader); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_file_reader_get_raw(GArrowRecordBatchFileReader *reader); +GARROW_EXTERN GArrowFeatherFileReader * garrow_feather_file_reader_new_raw( std::shared_ptr *arrow_reader); + +GARROW_EXTERN std::shared_ptr garrow_feather_file_reader_get_raw(GArrowFeatherFileReader *reader); +GARROW_EXTERN GArrowCSVReader * garrow_csv_reader_new_raw(std::shared_ptr *arrow_reader, GArrowInputStream *input); + +GARROW_EXTERN std::shared_ptr garrow_csv_reader_get_raw(GArrowCSVReader *reader); +GARROW_EXTERN GArrowJSONReader * garrow_json_reader_new_raw(std::shared_ptr *arrow_reader, GArrowInputStream *input); + +GARROW_EXTERN std::shared_ptr garrow_json_reader_get_raw(GArrowJSONReader *reader); diff --git a/c_glib/arrow-glib/record-batch.h b/c_glib/arrow-glib/record-batch.h index 3c995658224cb..e7ffd83795ed4 100644 --- a/c_glib/arrow-glib/record-batch.h +++ b/c_glib/arrow-glib/record-batch.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH (garrow_record_batch_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( GArrowRecordBatch, garrow_record_batch, GARROW, RECORD_BATCH, GObject) struct _GArrowRecordBatchClass @@ -37,6 +38,7 @@ GARROW_AVAILABLE_IN_6_0 GArrowRecordBatch * garrow_record_batch_import(gpointer c_abi_array, GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_new(GArrowSchema *schema, guint32 n_rows, @@ -50,6 +52,7 @@ garrow_record_batch_export(GArrowRecordBatch *record_batch, gpointer *c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_equal(GArrowRecordBatch *record_batch, GArrowRecordBatch *other_record_batch); @@ -59,28 +62,43 @@ garrow_record_batch_equal_metadata(GArrowRecordBatch *record_batch, GArrowRecordBatch *other_record_batch, gboolean check_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_get_schema(GArrowRecordBatch *record_batch); + GARROW_AVAILABLE_IN_0_15 GArrowArray * garrow_record_batch_get_column_data(GArrowRecordBatch *record_batch, gint i); + +GARROW_AVAILABLE_IN_ALL const gchar * garrow_record_batch_get_column_name(GArrowRecordBatch *record_batch, gint i); + +GARROW_AVAILABLE_IN_ALL guint garrow_record_batch_get_n_columns(GArrowRecordBatch *record_batch); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_record_batch_get_n_rows(GArrowRecordBatch *record_batch); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_slice(GArrowRecordBatch *record_batch, gint64 offset, gint64 length); +GARROW_AVAILABLE_IN_ALL gchar * garrow_record_batch_to_string(GArrowRecordBatch *record_batch, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_add_column(GArrowRecordBatch *record_batch, guint i, GArrowField *field, GArrowArray *column, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_remove_column(GArrowRecordBatch *record_batch, guint i, @@ -92,6 +110,7 @@ garrow_record_batch_serialize(GArrowRecordBatch *record_batch, GError **error); #define GARROW_TYPE_RECORD_BATCH_ITERATOR (garrow_record_batch_iterator_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchIterator, garrow_record_batch_iterator, GARROW, diff --git a/c_glib/arrow-glib/record-batch.hpp b/c_glib/arrow-glib/record-batch.hpp index 4c3e5e8a78231..75c0432b390ba 100644 --- a/c_glib/arrow-glib/record-batch.hpp +++ b/c_glib/arrow-glib/record-batch.hpp @@ -23,13 +23,18 @@ #include +GARROW_EXTERN GArrowRecordBatch * garrow_record_batch_new_raw(std::shared_ptr *arrow_record_batch); + +GARROW_EXTERN std::shared_ptr garrow_record_batch_get_raw(GArrowRecordBatch *record_batch); +GARROW_EXTERN GArrowRecordBatchIterator * garrow_record_batch_iterator_new_raw(arrow::RecordBatchIterator *arrow_iterator); +GARROW_EXTERN arrow::RecordBatchIterator * garrow_record_batch_iterator_get_raw(GArrowRecordBatchIterator *iterator); diff --git a/c_glib/arrow-glib/scalar.h b/c_glib/arrow-glib/scalar.h index b4a6229c62fd1..5f9015d29c61c 100644 --- a/c_glib/arrow-glib/scalar.h +++ b/c_glib/arrow-glib/scalar.h @@ -25,6 +25,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_SCALAR (garrow_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowScalar, garrow_scalar, GARROW, SCALAR, GObject) struct _GArrowScalarClass { @@ -64,6 +65,7 @@ garrow_scalar_cast(GArrowScalar *scalar, GError **error); #define GARROW_TYPE_NULL_SCALAR (garrow_null_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowNullScalar, garrow_null_scalar, GARROW, NULL_SCALAR, GArrowScalar) struct _GArrowNullScalarClass @@ -76,6 +78,7 @@ GArrowNullScalar * garrow_null_scalar_new(void); #define GARROW_TYPE_BOOLEAN_SCALAR (garrow_boolean_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBooleanScalar, garrow_boolean_scalar, GARROW, BOOLEAN_SCALAR, GArrowScalar) struct _GArrowBooleanScalarClass @@ -91,6 +94,7 @@ gboolean garrow_boolean_scalar_get_value(GArrowBooleanScalar *scalar); #define GARROW_TYPE_INT8_SCALAR (garrow_int8_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt8Scalar, garrow_int8_scalar, GARROW, INT8_SCALAR, GArrowScalar) struct _GArrowInt8ScalarClass @@ -106,6 +110,7 @@ gint8 garrow_int8_scalar_get_value(GArrowInt8Scalar *scalar); #define GARROW_TYPE_INT16_SCALAR (garrow_int16_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt16Scalar, garrow_int16_scalar, GARROW, INT16_SCALAR, GArrowScalar) struct _GArrowInt16ScalarClass @@ -121,6 +126,7 @@ gint16 garrow_int16_scalar_get_value(GArrowInt16Scalar *scalar); #define GARROW_TYPE_INT32_SCALAR (garrow_int32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt32Scalar, garrow_int32_scalar, GARROW, INT32_SCALAR, GArrowScalar) struct _GArrowInt32ScalarClass @@ -136,6 +142,7 @@ gint32 garrow_int32_scalar_get_value(GArrowInt32Scalar *scalar); #define GARROW_TYPE_INT64_SCALAR (garrow_int64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowInt64Scalar, garrow_int64_scalar, GARROW, INT64_SCALAR, GArrowScalar) struct _GArrowInt64ScalarClass @@ -151,6 +158,7 @@ gint64 garrow_int64_scalar_get_value(GArrowInt64Scalar *scalar); #define GARROW_TYPE_UINT8_SCALAR (garrow_uint8_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt8Scalar, garrow_uint8_scalar, GARROW, UINT8_SCALAR, GArrowScalar) struct _GArrowUInt8ScalarClass @@ -166,6 +174,7 @@ guint8 garrow_uint8_scalar_get_value(GArrowUInt8Scalar *scalar); #define GARROW_TYPE_UINT16_SCALAR (garrow_uint16_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt16Scalar, garrow_uint16_scalar, GARROW, UINT16_SCALAR, GArrowScalar) struct _GArrowUInt16ScalarClass @@ -181,6 +190,7 @@ guint16 garrow_uint16_scalar_get_value(GArrowUInt16Scalar *scalar); #define GARROW_TYPE_UINT32_SCALAR (garrow_uint32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt32Scalar, garrow_uint32_scalar, GARROW, UINT32_SCALAR, GArrowScalar) struct _GArrowUInt32ScalarClass @@ -196,6 +206,7 @@ guint32 garrow_uint32_scalar_get_value(GArrowUInt32Scalar *scalar); #define GARROW_TYPE_UINT64_SCALAR (garrow_uint64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUInt64Scalar, garrow_uint64_scalar, GARROW, UINT64_SCALAR, GArrowScalar) struct _GArrowUInt64ScalarClass @@ -211,6 +222,7 @@ guint64 garrow_uint64_scalar_get_value(GArrowUInt64Scalar *scalar); #define GARROW_TYPE_HALF_FLOAT_SCALAR (garrow_half_float_scalar_get_type()) +GARROW_AVAILABLE_IN_11_0 G_DECLARE_DERIVABLE_TYPE(GArrowHalfFloatScalar, garrow_half_float_scalar, GARROW, @@ -229,6 +241,7 @@ guint16 garrow_half_float_scalar_get_value(GArrowHalfFloatScalar *scalar); #define GARROW_TYPE_FLOAT_SCALAR (garrow_float_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowFloatScalar, garrow_float_scalar, GARROW, FLOAT_SCALAR, GArrowScalar) struct _GArrowFloatScalarClass @@ -244,6 +257,7 @@ gfloat garrow_float_scalar_get_value(GArrowFloatScalar *scalar); #define GARROW_TYPE_DOUBLE_SCALAR (garrow_double_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDoubleScalar, garrow_double_scalar, GARROW, DOUBLE_SCALAR, GArrowScalar) struct _GArrowDoubleScalarClass @@ -259,6 +273,7 @@ gdouble garrow_double_scalar_get_value(GArrowDoubleScalar *scalar); #define GARROW_TYPE_BASE_BINARY_SCALAR (garrow_base_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowBaseBinaryScalar, garrow_base_binary_scalar, GARROW, @@ -274,6 +289,7 @@ GArrowBuffer * garrow_base_binary_scalar_get_value(GArrowBaseBinaryScalar *scalar); #define GARROW_TYPE_BINARY_SCALAR (garrow_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBinaryScalar, garrow_binary_scalar, GARROW, BINARY_SCALAR, GArrowBaseBinaryScalar) struct _GArrowBinaryScalarClass @@ -286,6 +302,7 @@ GArrowBinaryScalar * garrow_binary_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_STRING_SCALAR (garrow_string_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowStringScalar, garrow_string_scalar, GARROW, STRING_SCALAR, GArrowBaseBinaryScalar) struct _GArrowStringScalarClass @@ -298,6 +315,7 @@ GArrowStringScalar * garrow_string_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_LARGE_BINARY_SCALAR (garrow_large_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeBinaryScalar, garrow_large_binary_scalar, GARROW, @@ -313,6 +331,7 @@ GArrowLargeBinaryScalar * garrow_large_binary_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_LARGE_STRING_SCALAR (garrow_large_string_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeStringScalar, garrow_large_string_scalar, GARROW, @@ -328,6 +347,7 @@ GArrowLargeStringScalar * garrow_large_string_scalar_new(GArrowBuffer *value); #define GARROW_TYPE_FIXED_SIZE_BINARY_SCALAR (garrow_fixed_size_binary_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeBinaryScalar, garrow_fixed_size_binary_scalar, GARROW, @@ -344,6 +364,7 @@ garrow_fixed_size_binary_scalar_new(GArrowFixedSizeBinaryDataType *data_type, GArrowBuffer *value); #define GARROW_TYPE_DATE32_SCALAR (garrow_date32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDate32Scalar, garrow_date32_scalar, GARROW, DATE32_SCALAR, GArrowScalar) struct _GArrowDate32ScalarClass @@ -359,6 +380,7 @@ gint32 garrow_date32_scalar_get_value(GArrowDate32Scalar *scalar); #define GARROW_TYPE_DATE64_SCALAR (garrow_date64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowDate64Scalar, garrow_date64_scalar, GARROW, DATE64_SCALAR, GArrowScalar) struct _GArrowDate64ScalarClass @@ -374,6 +396,7 @@ gint64 garrow_date64_scalar_get_value(GArrowDate64Scalar *scalar); #define GARROW_TYPE_TIME32_SCALAR (garrow_time32_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTime32Scalar, garrow_time32_scalar, GARROW, TIME32_SCALAR, GArrowScalar) struct _GArrowTime32ScalarClass @@ -389,6 +412,7 @@ gint32 garrow_time32_scalar_get_value(GArrowTime32Scalar *scalar); #define GARROW_TYPE_TIME64_SCALAR (garrow_time64_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTime64Scalar, garrow_time64_scalar, GARROW, TIME64_SCALAR, GArrowScalar) struct _GArrowTime64ScalarClass @@ -404,6 +428,7 @@ gint64 garrow_time64_scalar_get_value(GArrowTime64Scalar *scalar); #define GARROW_TYPE_TIMESTAMP_SCALAR (garrow_timestamp_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowTimestampScalar, garrow_timestamp_scalar, GARROW, TIMESTAMP_SCALAR, GArrowScalar) struct _GArrowTimestampScalarClass @@ -419,6 +444,7 @@ gint64 garrow_timestamp_scalar_get_value(GArrowTimestampScalar *scalar); #define GARROW_TYPE_MONTH_INTERVAL_SCALAR (garrow_month_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthIntervalScalar, garrow_month_interval_scalar, GARROW, @@ -437,6 +463,7 @@ gint32 garrow_month_interval_scalar_get_value(GArrowMonthIntervalScalar *scalar); #define GARROW_TYPE_DAY_TIME_INTERVAL_SCALAR (garrow_day_time_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowDayTimeIntervalScalar, garrow_day_time_interval_scalar, GARROW, @@ -456,6 +483,7 @@ garrow_day_time_interval_scalar_get_value(GArrowDayTimeIntervalScalar *scalar); #define GARROW_TYPE_MONTH_DAY_NANO_INTERVAL_SCALAR \ (garrow_month_day_nano_interval_scalar_get_type()) +GARROW_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GArrowMonthDayNanoIntervalScalar, garrow_month_day_nano_interval_scalar, GARROW, @@ -474,6 +502,7 @@ GArrowMonthDayNano * garrow_month_day_nano_interval_scalar_get_value(GArrowMonthDayNanoIntervalScalar *scalar); #define GARROW_TYPE_DECIMAL128_SCALAR (garrow_decimal128_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal128Scalar, garrow_decimal128_scalar, GARROW, @@ -493,6 +522,7 @@ GArrowDecimal128 * garrow_decimal128_scalar_get_value(GArrowDecimal128Scalar *scalar); #define GARROW_TYPE_DECIMAL256_SCALAR (garrow_decimal256_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDecimal256Scalar, garrow_decimal256_scalar, GARROW, @@ -512,6 +542,7 @@ GArrowDecimal256 * garrow_decimal256_scalar_get_value(GArrowDecimal256Scalar *scalar); #define GARROW_TYPE_BASE_LIST_SCALAR (garrow_base_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowBaseListScalar, garrow_base_list_scalar, GARROW, BASE_LIST_SCALAR, GArrowScalar) struct _GArrowBaseListScalarClass @@ -524,6 +555,7 @@ GArrowArray * garrow_base_list_scalar_get_value(GArrowBaseListScalar *scalar); #define GARROW_TYPE_LIST_SCALAR (garrow_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowListScalar, garrow_list_scalar, GARROW, LIST_SCALAR, GArrowBaseListScalar) struct _GArrowListScalarClass @@ -536,6 +568,7 @@ GArrowListScalar * garrow_list_scalar_new(GArrowListArray *value); #define GARROW_TYPE_LARGE_LIST_SCALAR (garrow_large_list_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowLargeListScalar, garrow_large_list_scalar, GARROW, @@ -551,6 +584,7 @@ GArrowLargeListScalar * garrow_large_list_scalar_new(GArrowLargeListArray *value); #define GARROW_TYPE_MAP_SCALAR (garrow_map_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowMapScalar, garrow_map_scalar, GARROW, MAP_SCALAR, GArrowBaseListScalar) struct _GArrowMapScalarClass @@ -563,6 +597,7 @@ GArrowMapScalar * garrow_map_scalar_new(GArrowStructArray *value); #define GARROW_TYPE_STRUCT_SCALAR (garrow_struct_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowStructScalar, garrow_struct_scalar, GARROW, STRUCT_SCALAR, GArrowScalar) struct _GArrowStructScalarClass @@ -578,6 +613,7 @@ GList * garrow_struct_scalar_get_value(GArrowStructScalar *scalar); #define GARROW_TYPE_UNION_SCALAR (garrow_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowUnionScalar, garrow_union_scalar, GARROW, UNION_SCALAR, GArrowScalar) struct _GArrowUnionScalarClass @@ -593,6 +629,7 @@ GArrowScalar * garrow_union_scalar_get_value(GArrowUnionScalar *scalar); #define GARROW_TYPE_SPARSE_UNION_SCALAR (garrow_sparse_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowSparseUnionScalar, garrow_sparse_union_scalar, GARROW, @@ -610,6 +647,7 @@ garrow_sparse_union_scalar_new(GArrowSparseUnionDataType *data_type, GArrowScalar *value); #define GARROW_TYPE_DENSE_UNION_SCALAR (garrow_dense_union_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE(GArrowDenseUnionScalar, garrow_dense_union_scalar, GARROW, @@ -627,6 +665,7 @@ garrow_dense_union_scalar_new(GArrowDenseUnionDataType *data_type, GArrowScalar *value); #define GARROW_TYPE_EXTENSION_SCALAR (garrow_extension_scalar_get_type()) +GARROW_AVAILABLE_IN_5_0 G_DECLARE_DERIVABLE_TYPE( GArrowExtensionScalar, garrow_extension_scalar, GARROW, EXTENSION_SCALAR, GArrowScalar) struct _GArrowExtensionScalarClass diff --git a/c_glib/arrow-glib/schema.h b/c_glib/arrow-glib/schema.h index 93cd5bd542cf8..aab740397b7d6 100644 --- a/c_glib/arrow-glib/schema.h +++ b/c_glib/arrow-glib/schema.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_SCHEMA (garrow_schema_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowSchema, garrow_schema, GARROW, SCHEMA, GObject) struct _GArrowSchemaClass { @@ -34,6 +35,7 @@ GARROW_AVAILABLE_IN_6_0 GArrowSchema * garrow_schema_import(gpointer c_abi_schema, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_new(GList *fields); @@ -41,34 +43,48 @@ GARROW_AVAILABLE_IN_6_0 gpointer garrow_schema_export(GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_schema_equal(GArrowSchema *schema, GArrowSchema *other_schema); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_schema_get_field(GArrowSchema *schema, guint i); + +GARROW_AVAILABLE_IN_ALL GArrowField * garrow_schema_get_field_by_name(GArrowSchema *schema, const gchar *name); GARROW_AVAILABLE_IN_0_15 gint garrow_schema_get_field_index(GArrowSchema *schema, const gchar *name); +GARROW_AVAILABLE_IN_ALL guint garrow_schema_n_fields(GArrowSchema *schema); + +GARROW_AVAILABLE_IN_ALL GList * garrow_schema_get_fields(GArrowSchema *schema); +GARROW_AVAILABLE_IN_ALL gchar * garrow_schema_to_string(GArrowSchema *schema); + GARROW_AVAILABLE_IN_0_17 gchar * garrow_schema_to_string_metadata(GArrowSchema *schema, gboolean show_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_add_field(GArrowSchema *schema, guint i, GArrowField *field, GError **error); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_remove_field(GArrowSchema *schema, guint i, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_schema_replace_field(GArrowSchema *schema, guint i, diff --git a/c_glib/arrow-glib/schema.hpp b/c_glib/arrow-glib/schema.hpp index 333f73391c900..ba6c459495461 100644 --- a/c_glib/arrow-glib/schema.hpp +++ b/c_glib/arrow-glib/schema.hpp @@ -23,7 +23,10 @@ #include +GARROW_EXTERN GArrowSchema * garrow_schema_new_raw(std::shared_ptr *arrow_schema); + +GARROW_EXTERN std::shared_ptr garrow_schema_get_raw(GArrowSchema *schema); diff --git a/c_glib/arrow-glib/table-builder.h b/c_glib/arrow-glib/table-builder.h index 0e13352bbdde3..6fad1ae79a40f 100644 --- a/c_glib/arrow-glib/table-builder.h +++ b/c_glib/arrow-glib/table-builder.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_BUILDER (garrow_record_batch_builder_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchBuilder, garrow_record_batch_builder, GARROW, @@ -36,34 +37,45 @@ struct _GArrowRecordBatchBuilderClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchBuilder * garrow_record_batch_builder_new(GArrowSchema *schema, GError **error); +GARROW_AVAILABLE_IN_ALL gint64 garrow_record_batch_builder_get_initial_capacity(GArrowRecordBatchBuilder *builder); + +GARROW_AVAILABLE_IN_ALL void garrow_record_batch_builder_set_initial_capacity(GArrowRecordBatchBuilder *builder, gint64 capacity); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_record_batch_builder_get_schema(GArrowRecordBatchBuilder *builder); #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_n_columns) gint garrow_record_batch_builder_get_n_fields(GArrowRecordBatchBuilder *builder); #endif + GARROW_AVAILABLE_IN_0_13 gint garrow_record_batch_builder_get_n_columns(GArrowRecordBatchBuilder *builder); + #ifndef GARROW_DISABLE_DEPRECATED +GARROW_AVAILABLE_IN_ALL GARROW_DEPRECATED_IN_0_13_FOR(garrow_record_batch_builder_get_column_builder) GArrowArrayBuilder * garrow_record_batch_builder_get_field(GArrowRecordBatchBuilder *builder, gint i); #endif + GARROW_AVAILABLE_IN_0_13 GArrowArrayBuilder * garrow_record_batch_builder_get_column_builder(GArrowRecordBatchBuilder *builder, gint i); +GARROW_AVAILABLE_IN_ALL GArrowRecordBatch * garrow_record_batch_builder_flush(GArrowRecordBatchBuilder *builder, GError **error); diff --git a/c_glib/arrow-glib/table.h b/c_glib/arrow-glib/table.h index 1bf64d25a4f3f..d790e413df5fc 100644 --- a/c_glib/arrow-glib/table.h +++ b/c_glib/arrow-glib/table.h @@ -29,6 +29,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_TABLE_CONCATENATE_OPTIONS \ (garrow_table_concatenate_options_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTableConcatenateOptions, garrow_table_concatenate_options, GARROW, @@ -44,6 +45,7 @@ GArrowTableConcatenateOptions * garrow_table_concatenate_options_new(void); #define GARROW_TYPE_TABLE (garrow_table_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTable, garrow_table, GARROW, TABLE, GObject) struct _GArrowTableClass { @@ -53,18 +55,21 @@ struct _GArrowTableClass GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_values(GArrowSchema *schema, GList *values, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowTable * garrow_table_new_chunked_arrays(GArrowSchema *schema, GArrowChunkedArray **chunked_arrays, gsize n_chunked_arrays, GError **error); + GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_arrays(GArrowSchema *schema, GArrowArray **arrays, gsize n_arrays, GError **error); + GARROW_AVAILABLE_IN_0_12 GArrowTable * garrow_table_new_record_batches(GArrowSchema *schema, @@ -72,22 +77,29 @@ garrow_table_new_record_batches(GArrowSchema *schema, gsize n_record_batches, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_table_equal(GArrowTable *table, GArrowTable *other_table); + GARROW_AVAILABLE_IN_0_17 gboolean garrow_table_equal_metadata(GArrowTable *table, GArrowTable *other_table, gboolean check_metadata); +GARROW_AVAILABLE_IN_ALL GArrowSchema * garrow_table_get_schema(GArrowTable *table); + GARROW_AVAILABLE_IN_0_15 GArrowChunkedArray * garrow_table_get_column_data(GArrowTable *table, gint i); +GARROW_AVAILABLE_IN_ALL guint garrow_table_get_n_columns(GArrowTable *table); + +GARROW_AVAILABLE_IN_ALL guint64 garrow_table_get_n_rows(GArrowTable *table); @@ -98,8 +110,11 @@ garrow_table_add_column(GArrowTable *table, GArrowField *field, GArrowChunkedArray *chunked_array, GError **error); + +GARROW_AVAILABLE_IN_ALL GArrowTable * garrow_table_remove_column(GArrowTable *table, guint i, GError **error); + GARROW_AVAILABLE_IN_0_15 GArrowTable * garrow_table_replace_column(GArrowTable *table, @@ -107,22 +122,28 @@ garrow_table_replace_column(GArrowTable *table, GArrowField *field, GArrowChunkedArray *chunked_array, GError **error); + +GARROW_AVAILABLE_IN_ALL gchar * garrow_table_to_string(GArrowTable *table, GError **error); + GARROW_AVAILABLE_IN_0_14 GArrowTable * garrow_table_concatenate(GArrowTable *table, GList *other_tables, GArrowTableConcatenateOptions *options, GError **error); + GARROW_AVAILABLE_IN_0_14 GArrowTable * garrow_table_slice(GArrowTable *table, gint64 offset, gint64 length); + GARROW_AVAILABLE_IN_0_16 GArrowTable * garrow_table_combine_chunks(GArrowTable *table, GError **error); #define GARROW_TYPE_FEATHER_WRITE_PROPERTIES (garrow_feather_write_properties_get_type()) +GARROW_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GArrowFeatherWriteProperties, garrow_feather_write_properties, GARROW, diff --git a/c_glib/arrow-glib/table.hpp b/c_glib/arrow-glib/table.hpp index 3077c2ece9b37..79fc97471a42c 100644 --- a/c_glib/arrow-glib/table.hpp +++ b/c_glib/arrow-glib/table.hpp @@ -24,10 +24,14 @@ #include +GARROW_EXTERN GArrowTable * garrow_table_new_raw(std::shared_ptr *arrow_table); + +GARROW_EXTERN std::shared_ptr garrow_table_get_raw(GArrowTable *table); +GARROW_EXTERN arrow::ipc::feather::WriteProperties * garrow_feather_write_properties_get_raw(GArrowFeatherWriteProperties *properties); diff --git a/c_glib/arrow-glib/tensor.h b/c_glib/arrow-glib/tensor.h index a6d11b248110e..5971c3af12600 100644 --- a/c_glib/arrow-glib/tensor.h +++ b/c_glib/arrow-glib/tensor.h @@ -25,12 +25,14 @@ G_BEGIN_DECLS #define GARROW_TYPE_TENSOR (garrow_tensor_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowTensor, garrow_tensor, GARROW, TENSOR, GObject) struct _GArrowTensorClass { GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowTensor * garrow_tensor_new(GArrowDataType *data_type, GArrowBuffer *data, @@ -40,30 +42,55 @@ garrow_tensor_new(GArrowDataType *data_type, gsize n_strides, gchar **dimension_names, gsize n_dimension_names); +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_equal(GArrowTensor *tensor, GArrowTensor *other_tensor); + +GARROW_AVAILABLE_IN_ALL GArrowDataType * garrow_tensor_get_value_data_type(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL GArrowType garrow_tensor_get_value_type(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL GArrowBuffer * garrow_tensor_get_buffer(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gint64 * garrow_tensor_get_shape(GArrowTensor *tensor, gint *n_dimensions); + +GARROW_AVAILABLE_IN_ALL gint64 * garrow_tensor_get_strides(GArrowTensor *tensor, gint *n_strides); + +GARROW_AVAILABLE_IN_ALL gint garrow_tensor_get_n_dimensions(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL const gchar * garrow_tensor_get_dimension_name(GArrowTensor *tensor, gint i); + +GARROW_AVAILABLE_IN_ALL gint64 garrow_tensor_get_size(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_mutable(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_contiguous(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_row_major(GArrowTensor *tensor); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_tensor_is_column_major(GArrowTensor *tensor); diff --git a/c_glib/arrow-glib/timestamp-parser.h b/c_glib/arrow-glib/timestamp-parser.h index 05cad54746eeb..a7265d6ef46fb 100644 --- a/c_glib/arrow-glib/timestamp-parser.h +++ b/c_glib/arrow-glib/timestamp-parser.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_TIMESTAMP_PARSER (garrow_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE( GArrowTimestampParser, garrow_timestamp_parser, GARROW, TIMESTAMP_PARSER, GObject) struct _GArrowTimestampParserClass @@ -39,6 +40,7 @@ garrow_timestamp_parser_get_kind(GArrowTimestampParser *parser); #define GARROW_TYPE_STRPTIME_TIMESTAMP_PARSER \ (garrow_strptime_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowStrptimeTimestampParser, garrow_strptime_timestamp_parser, GARROW, @@ -58,6 +60,7 @@ const gchar * garrow_strptime_timestamp_parser_get_format(GArrowStrptimeTimestampParser *parser); #define GARROW_TYPE_ISO8601_TIMESTAMP_PARSER (garrow_iso8601_timestamp_parser_get_type()) +GARROW_AVAILABLE_IN_16_0 G_DECLARE_DERIVABLE_TYPE(GArrowISO8601TimestampParser, garrow_iso8601_timestamp_parser, GARROW, diff --git a/c_glib/arrow-glib/version.h.in b/c_glib/arrow-glib/version.h.in index a83c68a2a16dc..b530a088c8e38 100644 --- a/c_glib/arrow-glib/version.h.in +++ b/c_glib/arrow-glib/version.h.in @@ -19,6 +19,8 @@ #pragma once +#include + /** * SECTION: version * @section_id: version-macros @@ -36,7 +38,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MAJOR (@GARROW_VERSION_MAJOR@) +#define GARROW_VERSION_MAJOR (@VERSION_MAJOR@) /** * GARROW_VERSION_MINOR: @@ -45,7 +47,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MINOR (@GARROW_VERSION_MINOR@) +#define GARROW_VERSION_MINOR (@VERSION_MINOR@) /** * GARROW_VERSION_MICRO: @@ -54,7 +56,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_MICRO (@GARROW_VERSION_MICRO@) +#define GARROW_VERSION_MICRO (@VERSION_MICRO@) /** * GARROW_VERSION_TAG: @@ -64,7 +66,7 @@ * * Since: 0.10.0 */ -#define GARROW_VERSION_TAG "@GARROW_VERSION_TAG@" +#define GARROW_VERSION_TAG "@VERSION_TAG@" /** * GARROW_VERSION_CHECK: @@ -108,212 +110,7 @@ # define GARROW_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif -/** - * GARROW_VERSION_16_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 16.0.0 - */ -#define GARROW_VERSION_16_0 G_ENCODE_VERSION(16, 0) - -/** - * GARROW_VERSION_15_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 15.0.0 - */ -#define GARROW_VERSION_15_0 G_ENCODE_VERSION(15, 0) - -/** - * GARROW_VERSION_14_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 14.0.0 - */ -#define GARROW_VERSION_14_0 G_ENCODE_VERSION(14, 0) - -/** - * GARROW_VERSION_13_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 13.0.0 - */ -#define GARROW_VERSION_13_0 G_ENCODE_VERSION(13, 0) - -/** - * GARROW_VERSION_12_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 12.0.0 - */ -#define GARROW_VERSION_12_0 G_ENCODE_VERSION(12, 0) - -/** - * GARROW_VERSION_11_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 11.0.0 - */ -#define GARROW_VERSION_11_0 G_ENCODE_VERSION(11, 0) - -/** - * GARROW_VERSION_10_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 10.0.0 - */ -#define GARROW_VERSION_10_0 G_ENCODE_VERSION(10, 0) - -/** - * GARROW_VERSION_9_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 9.0.0 - */ -#define GARROW_VERSION_9_0 G_ENCODE_VERSION(9, 0) - -/** - * GARROW_VERSION_8_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 8.0.0 - */ -#define GARROW_VERSION_8_0 G_ENCODE_VERSION(8, 0) - -/** - * GARROW_VERSION_7_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 7.0.0 - */ -#define GARROW_VERSION_7_0 G_ENCODE_VERSION(7, 0) - -/** - * GARROW_VERSION_6_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 6.0.0 - */ -#define GARROW_VERSION_6_0 G_ENCODE_VERSION(6, 0) - -/** - * GARROW_VERSION_5_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 5.0.0 - */ -#define GARROW_VERSION_5_0 G_ENCODE_VERSION(5, 0) - -/** - * GARROW_VERSION_4_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 4.0.0 - */ -#define GARROW_VERSION_4_0 G_ENCODE_VERSION(4, 0) - -/** - * GARROW_VERSION_3_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 3.0.0 - */ -#define GARROW_VERSION_3_0 G_ENCODE_VERSION(3, 0) - -/** - * GARROW_VERSION_2_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 2.0.0 - */ -#define GARROW_VERSION_2_0 G_ENCODE_VERSION(2, 0) - -/** - * GARROW_VERSION_1_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 1.0.0 - */ -#define GARROW_VERSION_1_0 G_ENCODE_VERSION(1, 0) - -/** - * GARROW_VERSION_0_17: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.17.0 - */ -#define GARROW_VERSION_0_17 G_ENCODE_VERSION(0, 17) - -/** - * GARROW_VERSION_0_16: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.16.0 - */ -#define GARROW_VERSION_0_16 G_ENCODE_VERSION(0, 16) - -/** - * GARROW_VERSION_0_15: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.15.0 - */ -#define GARROW_VERSION_0_15 G_ENCODE_VERSION(0, 15) - -/** - * GARROW_VERSION_0_14: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.14.0 - */ -#define GARROW_VERSION_0_14 G_ENCODE_VERSION(0, 14) - -/** - * GARROW_VERSION_0_13: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.13.0 - */ -#define GARROW_VERSION_0_13 G_ENCODE_VERSION(0, 13) - -/** - * GARROW_VERSION_0_12: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.12.0 - */ -#define GARROW_VERSION_0_12 G_ENCODE_VERSION(0, 12) - -/** - * GARROW_VERSION_0_10: - * - * You can use this macro value for compile time API version check. - * - * Since: 0.10.0 - */ -#define GARROW_VERSION_0_10 G_ENCODE_VERSION(0, 10) +@ENCODED_VERSIONS@ /** * GARROW_VERSION_MIN_REQUIRED: @@ -359,327 +156,6 @@ G_ENCODE_VERSION(GARROW_VERSION_MAJOR, GARROW_VERSION_MINOR) #endif +@VISIBILITY_MACROS@ -#define GARROW_AVAILABLE_IN_ALL - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_16_0 -# define GARROW_DEPRECATED_IN_16_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_16_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_16_0 -# define GARROW_DEPRECATED_IN_16_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_16_0 -# define GARROW_AVAILABLE_IN_16_0 GARROW_UNAVAILABLE(16, 0) -#else -# define GARROW_AVAILABLE_IN_16_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_15_0 -# define GARROW_DEPRECATED_IN_15_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_15_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_15_0 -# define GARROW_DEPRECATED_IN_15_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_15_0 -# define GARROW_AVAILABLE_IN_15_0 GARROW_UNAVAILABLE(15, 0) -#else -# define GARROW_AVAILABLE_IN_15_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_14_0 -# define GARROW_DEPRECATED_IN_14_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_14_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_14_0 -# define GARROW_DEPRECATED_IN_14_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_14_0 -# define GARROW_AVAILABLE_IN_14_0 GARROW_UNAVAILABLE(14, 0) -#else -# define GARROW_AVAILABLE_IN_14_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_13_0 -# define GARROW_DEPRECATED_IN_13_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_13_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_13_0 -# define GARROW_DEPRECATED_IN_13_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_13_0 -# define GARROW_AVAILABLE_IN_13_0 GARROW_UNAVAILABLE(13, 0) -#else -# define GARROW_AVAILABLE_IN_13_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_12_0 -# define GARROW_DEPRECATED_IN_12_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_12_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_12_0 -# define GARROW_DEPRECATED_IN_12_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_12_0 -# define GARROW_AVAILABLE_IN_12_0 GARROW_UNAVAILABLE(12, 0) -#else -# define GARROW_AVAILABLE_IN_12_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_11_0 -# define GARROW_DEPRECATED_IN_11_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_11_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_11_0 -# define GARROW_DEPRECATED_IN_11_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_11_0 -# define GARROW_AVAILABLE_IN_11_0 GARROW_UNAVAILABLE(11, 0) -#else -# define GARROW_AVAILABLE_IN_11_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_10_0 -# define GARROW_DEPRECATED_IN_10_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_10_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_10_0 -# define GARROW_DEPRECATED_IN_10_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_10_0 -# define GARROW_AVAILABLE_IN_10_0 GARROW_UNAVAILABLE(10, 0) -#else -# define GARROW_AVAILABLE_IN_10_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_9_0 -# define GARROW_DEPRECATED_IN_9_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_9_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_9_0 -# define GARROW_DEPRECATED_IN_9_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_9_0 -# define GARROW_AVAILABLE_IN_9_0 GARROW_UNAVAILABLE(9, 0) -#else -# define GARROW_AVAILABLE_IN_9_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_8_0 -# define GARROW_DEPRECATED_IN_8_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_8_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_8_0 -# define GARROW_DEPRECATED_IN_8_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_8_0 -# define GARROW_AVAILABLE_IN_8_0 GARROW_UNAVAILABLE(8, 0) -#else -# define GARROW_AVAILABLE_IN_8_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_7_0 -# define GARROW_DEPRECATED_IN_7_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_7_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_7_0 -# define GARROW_DEPRECATED_IN_7_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_7_0 -# define GARROW_AVAILABLE_IN_7_0 GARROW_UNAVAILABLE(7, 0) -#else -# define GARROW_AVAILABLE_IN_7_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_6_0 -# define GARROW_DEPRECATED_IN_6_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_6_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_6_0 -# define GARROW_DEPRECATED_IN_6_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_6_0 -# define GARROW_AVAILABLE_IN_6_0 GARROW_UNAVAILABLE(6, 0) -#else -# define GARROW_AVAILABLE_IN_6_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_5_0 -# define GARROW_DEPRECATED_IN_5_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_5_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_5_0 -# define GARROW_DEPRECATED_IN_5_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_5_0 -# define GARROW_AVAILABLE_IN_5_0 GARROW_UNAVAILABLE(5, 0) -#else -# define GARROW_AVAILABLE_IN_5_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_4_0 -# define GARROW_DEPRECATED_IN_4_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_4_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_4_0 -# define GARROW_DEPRECATED_IN_4_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_4_0 -# define GARROW_AVAILABLE_IN_4_0 GARROW_UNAVAILABLE(4, 0) -#else -# define GARROW_AVAILABLE_IN_4_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_3_0 -# define GARROW_DEPRECATED_IN_3_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_3_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_3_0 -# define GARROW_DEPRECATED_IN_3_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_3_0 -# define GARROW_AVAILABLE_IN_3_0 GARROW_UNAVAILABLE(3, 0) -#else -# define GARROW_AVAILABLE_IN_3_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_2_0 -# define GARROW_DEPRECATED_IN_2_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_2_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_2_0 -# define GARROW_DEPRECATED_IN_2_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_2_0 -# define GARROW_AVAILABLE_IN_2_0 GARROW_UNAVAILABLE(2, 0) -#else -# define GARROW_AVAILABLE_IN_2_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_1_0 -# define GARROW_DEPRECATED_IN_1_0 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_1_0_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_1_0 -# define GARROW_DEPRECATED_IN_1_0_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_1_0 -# define GARROW_AVAILABLE_IN_1_0 GARROW_UNAVAILABLE(1, 0) -#else -# define GARROW_AVAILABLE_IN_1_0 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_17 -# define GARROW_DEPRECATED_IN_0_17 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_17_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_17 -# define GARROW_DEPRECATED_IN_0_17_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_17 -# define GARROW_AVAILABLE_IN_0_17 GARROW_UNAVAILABLE(0, 17) -#else -# define GARROW_AVAILABLE_IN_0_17 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_16 -# define GARROW_DEPRECATED_IN_0_16 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_16_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_16 -# define GARROW_DEPRECATED_IN_0_16_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_16 -# define GARROW_AVAILABLE_IN_0_16 GARROW_UNAVAILABLE(0, 16) -#else -# define GARROW_AVAILABLE_IN_0_16 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_15 -# define GARROW_DEPRECATED_IN_0_15 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_15_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_15 -# define GARROW_DEPRECATED_IN_0_15_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_15 -# define GARROW_AVAILABLE_IN_0_15 GARROW_UNAVAILABLE(0, 15) -#else -# define GARROW_AVAILABLE_IN_0_15 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_14 -# define GARROW_DEPRECATED_IN_0_14 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_14_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_14 -# define GARROW_DEPRECATED_IN_0_14_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_14 -# define GARROW_AVAILABLE_IN_0_14 GARROW_UNAVAILABLE(0, 14) -#else -# define GARROW_AVAILABLE_IN_0_14 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_13 -# define GARROW_DEPRECATED_IN_0_13 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_13_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_13 -# define GARROW_DEPRECATED_IN_0_13_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_13 -# define GARROW_AVAILABLE_IN_0_13 GARROW_UNAVAILABLE(0, 13) -#else -# define GARROW_AVAILABLE_IN_0_13 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_12 -# define GARROW_DEPRECATED_IN_0_12 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_12_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_12 -# define GARROW_DEPRECATED_IN_0_12_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_12 -# define GARROW_AVAILABLE_IN_0_12 GARROW_UNAVAILABLE(0, 12) -#else -# define GARROW_AVAILABLE_IN_0_12 -#endif - -#if GARROW_VERSION_MIN_REQUIRED >= GARROW_VERSION_0_10 -# define GARROW_DEPRECATED_IN_0_10 GARROW_DEPRECATED -# define GARROW_DEPRECATED_IN_0_10_FOR(function) GARROW_DEPRECATED_FOR(function) -#else -# define GARROW_DEPRECATED_IN_0_10 -# define GARROW_DEPRECATED_IN_0_10_FOR(function) -#endif - -#if GARROW_VERSION_MAX_ALLOWED < GARROW_VERSION_0_10 -# define GARROW_AVAILABLE_IN_0_10 GARROW_UNAVAILABLE(0, 10) -#else -# define GARROW_AVAILABLE_IN_0_10 -#endif +@AVAILABILITY_MACROS@ diff --git a/c_glib/arrow-glib/writable-file.h b/c_glib/arrow-glib/writable-file.h index 555705767e4aa..e9aa9122e92fa 100644 --- a/c_glib/arrow-glib/writable-file.h +++ b/c_glib/arrow-glib/writable-file.h @@ -24,9 +24,11 @@ G_BEGIN_DECLS #define GARROW_TYPE_WRITABLE_FILE (garrow_writable_file_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE( GArrowWritableFile, garrow_writable_file, GARROW, WRITABLE_FILE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_file_write_at(GArrowWritableFile *writable_file, gint64 position, diff --git a/c_glib/arrow-glib/writable.h b/c_glib/arrow-glib/writable.h index a556443967b5a..dcc1e67668e78 100644 --- a/c_glib/arrow-glib/writable.h +++ b/c_glib/arrow-glib/writable.h @@ -24,13 +24,17 @@ G_BEGIN_DECLS #define GARROW_TYPE_WRITABLE (garrow_writable_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_INTERFACE(GArrowWritable, garrow_writable, GARROW, WRITABLE, GObject) +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_write(GArrowWritable *writable, const guint8 *data, gint64 n_bytes, GError **error); + +GARROW_AVAILABLE_IN_ALL gboolean garrow_writable_flush(GArrowWritable *writable, GError **error); diff --git a/c_glib/arrow-glib/writer.h b/c_glib/arrow-glib/writer.h index 30b0ea987da39..46bbdddec8c9d 100644 --- a/c_glib/arrow-glib/writer.h +++ b/c_glib/arrow-glib/writer.h @@ -28,6 +28,7 @@ G_BEGIN_DECLS #define GARROW_TYPE_RECORD_BATCH_WRITER (garrow_record_batch_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchWriter, garrow_record_batch_writer, GARROW, @@ -38,19 +39,23 @@ struct _GArrowRecordBatchWriterClass GObjectClass parent_class; }; +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_write_record_batch(GArrowRecordBatchWriter *writer, GArrowRecordBatch *record_batch, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_write_table(GArrowRecordBatchWriter *writer, GArrowTable *table, GError **error); +GARROW_AVAILABLE_IN_ALL gboolean garrow_record_batch_writer_close(GArrowRecordBatchWriter *writer, GError **error); #define GARROW_TYPE_RECORD_BATCH_STREAM_WRITER \ (garrow_record_batch_stream_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchStreamWriter, garrow_record_batch_stream_writer, GARROW, @@ -61,12 +66,14 @@ struct _GArrowRecordBatchStreamWriterClass GArrowRecordBatchWriterClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchStreamWriter * garrow_record_batch_stream_writer_new(GArrowOutputStream *sink, GArrowSchema *schema, GError **error); #define GARROW_TYPE_RECORD_BATCH_FILE_WRITER (garrow_record_batch_file_writer_get_type()) +GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowRecordBatchFileWriter, garrow_record_batch_file_writer, GARROW, @@ -77,6 +84,7 @@ struct _GArrowRecordBatchFileWriterClass GArrowRecordBatchStreamWriterClass parent_class; }; +GARROW_AVAILABLE_IN_ALL GArrowRecordBatchFileWriter * garrow_record_batch_file_writer_new(GArrowOutputStream *sink, GArrowSchema *schema, diff --git a/c_glib/gandiva-glib/expression.h b/c_glib/gandiva-glib/expression.h index f8f061ceb08fa..bb7eb22ac01dc 100644 --- a/c_glib/gandiva-glib/expression.h +++ b/c_glib/gandiva-glib/expression.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_EXPRESSION (ggandiva_expression_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaExpression, ggandiva_expression, GGANDIVA, EXPRESSION, GObject) @@ -34,12 +35,16 @@ struct _GGandivaExpressionClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaExpression * ggandiva_expression_new(GGandivaNode *root_node, GArrowField *result_field); + +GGANDIVA_AVAILABLE_IN_0_12 gchar * ggandiva_expression_to_string(GGandivaExpression *expression); #define GGANDIVA_TYPE_CONDITION (ggandiva_condition_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GGandivaCondition, ggandiva_condition, GGANDIVA, CONDITION, GGandivaExpression) diff --git a/c_glib/gandiva-glib/filter.h b/c_glib/gandiva-glib/filter.h index b95981198e0c4..0a2199ccfa106 100644 --- a/c_glib/gandiva-glib/filter.h +++ b/c_glib/gandiva-glib/filter.h @@ -25,6 +25,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_FILTER (ggandiva_filter_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFilter, ggandiva_filter, GGANDIVA, FILTER, GObject) struct _GGandivaFilterClass @@ -32,8 +33,11 @@ struct _GGandivaFilterClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_4_0 GGandivaFilter * ggandiva_filter_new(GArrowSchema *schema, GGandivaCondition *condition, GError **error); + +GGANDIVA_AVAILABLE_IN_4_0 gboolean ggandiva_filter_evaluate(GGandivaFilter *filter, GArrowRecordBatch *record_batch, diff --git a/c_glib/gandiva-glib/function-registry.h b/c_glib/gandiva-glib/function-registry.h index ed21e120a2533..e13f4b36d28dc 100644 --- a/c_glib/gandiva-glib/function-registry.h +++ b/c_glib/gandiva-glib/function-registry.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_FUNCTION_REGISTRY (ggandiva_function_registry_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionRegistry, ggandiva_function_registry, GGANDIVA, @@ -35,14 +36,20 @@ struct _GGandivaFunctionRegistryClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_15_0 +GGANDIVA_AVAILABLE_IN_15_0 GGandivaFunctionRegistry * ggandiva_function_registry_default(void); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaFunctionRegistry * ggandiva_function_registry_new(void); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaNativeFunction * ggandiva_function_registry_lookup(GGandivaFunctionRegistry *function_registry, GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_function_registry_get_native_functions( GGandivaFunctionRegistry *function_registry); diff --git a/c_glib/gandiva-glib/function-signature.h b/c_glib/gandiva-glib/function-signature.h index ef6834ea85723..4fd8cc8a7e761 100644 --- a/c_glib/gandiva-glib/function-signature.h +++ b/c_glib/gandiva-glib/function-signature.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GGANDIVA_TYPE_FUNCTION_SIGNATURE (ggandiva_function_signature_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE(GGandivaFunctionSignature, ggandiva_function_signature, GGANDIVA, @@ -35,20 +38,31 @@ struct _GGandivaFunctionSignatureClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_14_0 GGandivaFunctionSignature * ggandiva_function_signature_new(const gchar *base_name, GList *parameter_types, GArrowDataType *return_type); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_function_signature_equal(GGandivaFunctionSignature *function_signature, GGandivaFunctionSignature *other_function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_function_signature_to_string(GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GArrowDataType * ggandiva_function_signature_get_return_type( GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_function_signature_get_base_name(GGandivaFunctionSignature *function_signature); + +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_function_signature_get_param_types( GGandivaFunctionSignature *function_signature); diff --git a/c_glib/gandiva-glib/meson.build b/c_glib/gandiva-glib/meson.build index d5cab109dcf89..8cd00b3805b91 100644 --- a/c_glib/gandiva-glib/meson.build +++ b/c_glib/gandiva-glib/meson.build @@ -53,14 +53,12 @@ cpp_headers = files( 'selection-vector.hpp', ) -version_h_conf = configuration_data() -version_h_conf.set('GGANDIVA_VERSION_MAJOR', version_major) -version_h_conf.set('GGANDIVA_VERSION_MINOR', version_minor) -version_h_conf.set('GGANDIVA_VERSION_MICRO', version_micro) -version_h_conf.set('GGANDIVA_VERSION_TAG', version_tag) -version_h = configure_file(input: 'version.h.in', - output: 'version.h', - configuration: version_h_conf) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GGANDIVA', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + c_headers += version_h enums = gnome.mkenums('enums', @@ -87,6 +85,7 @@ libgandiva_glib = library('gandiva-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGGANDIVA_COMPILATION'], soversion: so_version, version: library_version) gandiva_glib = declare_dependency(link_with: libgandiva_glib, diff --git a/c_glib/gandiva-glib/native-function.h b/c_glib/gandiva-glib/native-function.h index 5ceef396ef40c..934d29ab7e33b 100644 --- a/c_glib/gandiva-glib/native-function.h +++ b/c_glib/gandiva-glib/native-function.h @@ -40,6 +40,7 @@ typedef enum { } GGandivaResultNullableType; #define GGANDIVA_TYPE_NATIVE_FUNCTION (ggandiva_native_function_get_type()) +GGANDIVA_AVAILABLE_IN_14_0 G_DECLARE_DERIVABLE_TYPE( GGandivaNativeFunction, ggandiva_native_function, GGANDIVA, NATIVE_FUNCTION, GObject) @@ -48,20 +49,33 @@ struct _GGandivaNativeFunctionClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_14_0 GList * ggandiva_native_function_get_signatures(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_equal(GGandivaNativeFunction *native_function, GGandivaNativeFunction *other_native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gchar * ggandiva_native_function_to_string(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 GGandivaResultNullableType ggandiva_native_function_get_result_nullable_type( GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_need_context(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_need_function_holder(GGandivaNativeFunction *native_function); + +GGANDIVA_AVAILABLE_IN_14_0 gboolean ggandiva_native_function_can_return_errors(GGandivaNativeFunction *native_function); diff --git a/c_glib/gandiva-glib/node.h b/c_glib/gandiva-glib/node.h index 715a3d6ebaf18..1733cac918c51 100644 --- a/c_glib/gandiva-glib/node.h +++ b/c_glib/gandiva-glib/node.h @@ -26,6 +26,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_NODE (ggandiva_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaNode, ggandiva_node, GGANDIVA, NODE, GObject) struct _GGandivaNodeClass @@ -33,10 +34,12 @@ struct _GGandivaNodeClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_16 gchar * ggandiva_node_to_string(GGandivaNode *node); #define GGANDIVA_TYPE_FIELD_NODE (ggandiva_field_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaFieldNode, ggandiva_field_node, GGANDIVA, FIELD_NODE, GGandivaNode) struct _GGandivaFieldNodeClass @@ -44,10 +47,12 @@ struct _GGandivaFieldNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFieldNode * ggandiva_field_node_new(GArrowField *field); #define GGANDIVA_TYPE_FUNCTION_NODE (ggandiva_function_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaFunctionNode, ggandiva_function_node, GGANDIVA, FUNCTION_NODE, GGandivaNode) struct _GGandivaFunctionNodeClass @@ -55,14 +60,18 @@ struct _GGandivaFunctionNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFunctionNode * ggandiva_function_node_new(const gchar *name, GList *parameters, GArrowDataType *return_type); + +GGANDIVA_AVAILABLE_IN_0_12 GList * ggandiva_function_node_get_parameters(GGandivaFunctionNode *node); #define GGANDIVA_TYPE_LITERAL_NODE (ggandiva_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaLiteralNode, ggandiva_literal_node, GGANDIVA, LITERAL_NODE, GGandivaNode) struct _GGandivaLiteralNodeClass @@ -71,6 +80,7 @@ struct _GGandivaLiteralNodeClass }; #define GGANDIVA_TYPE_NULL_LITERAL_NODE (ggandiva_null_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaNullLiteralNode, ggandiva_null_literal_node, GGANDIVA, @@ -81,10 +91,12 @@ struct _GGandivaNullLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaNullLiteralNode * ggandiva_null_literal_node_new(GArrowDataType *return_type, GError **error); #define GGANDIVA_TYPE_BOOLEAN_LITERAL_NODE (ggandiva_boolean_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaBooleanLiteralNode, ggandiva_boolean_literal_node, GGANDIVA, @@ -95,12 +107,16 @@ struct _GGandivaBooleanLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBooleanLiteralNode * ggandiva_boolean_literal_node_new(gboolean value); + +GGANDIVA_AVAILABLE_IN_0_12 gboolean ggandiva_boolean_literal_node_get_value(GGandivaBooleanLiteralNode *node); #define GGANDIVA_TYPE_INT8_LITERAL_NODE (ggandiva_int8_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt8LiteralNode, ggandiva_int8_literal_node, GGANDIVA, @@ -111,12 +127,16 @@ struct _GGandivaInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt8LiteralNode * ggandiva_int8_literal_node_new(gint8 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint8 ggandiva_int8_literal_node_get_value(GGandivaInt8LiteralNode *node); #define GGANDIVA_TYPE_UINT8_LITERAL_NODE (ggandiva_uint8_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt8LiteralNode, ggandiva_uint8_literal_node, GGANDIVA, @@ -127,12 +147,16 @@ struct _GGandivaUInt8LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt8LiteralNode * ggandiva_uint8_literal_node_new(guint8 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint8 ggandiva_uint8_literal_node_get_value(GGandivaUInt8LiteralNode *node); #define GGANDIVA_TYPE_INT16_LITERAL_NODE (ggandiva_int16_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt16LiteralNode, ggandiva_int16_literal_node, GGANDIVA, @@ -143,12 +167,16 @@ struct _GGandivaInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt16LiteralNode * ggandiva_int16_literal_node_new(gint16 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint16 ggandiva_int16_literal_node_get_value(GGandivaInt16LiteralNode *node); #define GGANDIVA_TYPE_UINT16_LITERAL_NODE (ggandiva_uint16_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16LiteralNode, ggandiva_uint16_literal_node, GGANDIVA, @@ -159,12 +187,16 @@ struct _GGandivaUInt16LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt16LiteralNode * ggandiva_uint16_literal_node_new(guint16 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint16 ggandiva_uint16_literal_node_get_value(GGandivaUInt16LiteralNode *node); #define GGANDIVA_TYPE_INT32_LITERAL_NODE (ggandiva_int32_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt32LiteralNode, ggandiva_int32_literal_node, GGANDIVA, @@ -175,12 +207,16 @@ struct _GGandivaInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt32LiteralNode * ggandiva_int32_literal_node_new(gint32 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint32 ggandiva_int32_literal_node_get_value(GGandivaInt32LiteralNode *node); #define GGANDIVA_TYPE_UINT32_LITERAL_NODE (ggandiva_uint32_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32LiteralNode, ggandiva_uint32_literal_node, GGANDIVA, @@ -191,12 +227,16 @@ struct _GGandivaUInt32LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt32LiteralNode * ggandiva_uint32_literal_node_new(guint32 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint32 ggandiva_uint32_literal_node_get_value(GGandivaUInt32LiteralNode *node); #define GGANDIVA_TYPE_INT64_LITERAL_NODE (ggandiva_int64_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaInt64LiteralNode, ggandiva_int64_literal_node, GGANDIVA, @@ -207,12 +247,16 @@ struct _GGandivaInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaInt64LiteralNode * ggandiva_int64_literal_node_new(gint64 value); + +GGANDIVA_AVAILABLE_IN_0_12 gint64 ggandiva_int64_literal_node_get_value(GGandivaInt64LiteralNode *node); #define GGANDIVA_TYPE_UINT64_LITERAL_NODE (ggandiva_uint64_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64LiteralNode, ggandiva_uint64_literal_node, GGANDIVA, @@ -223,12 +267,16 @@ struct _GGandivaUInt64LiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaUInt64LiteralNode * ggandiva_uint64_literal_node_new(guint64 value); + +GGANDIVA_AVAILABLE_IN_0_12 guint64 ggandiva_uint64_literal_node_get_value(GGandivaUInt64LiteralNode *node); #define GGANDIVA_TYPE_FLOAT_LITERAL_NODE (ggandiva_float_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaFloatLiteralNode, ggandiva_float_literal_node, GGANDIVA, @@ -239,12 +287,16 @@ struct _GGandivaFloatLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaFloatLiteralNode * ggandiva_float_literal_node_new(gfloat value); + +GGANDIVA_AVAILABLE_IN_0_12 gfloat ggandiva_float_literal_node_get_value(GGandivaFloatLiteralNode *node); #define GGANDIVA_TYPE_DOUBLE_LITERAL_NODE (ggandiva_double_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaDoubleLiteralNode, ggandiva_double_literal_node, GGANDIVA, @@ -255,12 +307,16 @@ struct _GGandivaDoubleLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaDoubleLiteralNode * ggandiva_double_literal_node_new(gdouble value); + +GGANDIVA_AVAILABLE_IN_0_12 gdouble ggandiva_double_literal_node_get_value(GGandivaDoubleLiteralNode *node); #define GGANDIVA_TYPE_BINARY_LITERAL_NODE (ggandiva_binary_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaBinaryLiteralNode, ggandiva_binary_literal_node, GGANDIVA, @@ -271,14 +327,20 @@ struct _GGandivaBinaryLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new(const guint8 *value, gsize size); + +GGANDIVA_AVAILABLE_IN_0_12 GGandivaBinaryLiteralNode * ggandiva_binary_literal_node_new_bytes(GBytes *value); + +GGANDIVA_AVAILABLE_IN_0_12 GBytes * ggandiva_binary_literal_node_get_value(GGandivaBinaryLiteralNode *node); #define GGANDIVA_TYPE_STRING_LITERAL_NODE (ggandiva_string_literal_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE(GGandivaStringLiteralNode, ggandiva_string_literal_node, GGANDIVA, @@ -289,12 +351,16 @@ struct _GGandivaStringLiteralNodeClass GGandivaLiteralNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaStringLiteralNode * ggandiva_string_literal_node_new(const gchar *value); + +GGANDIVA_AVAILABLE_IN_0_12 const gchar * ggandiva_string_literal_node_get_value(GGandivaStringLiteralNode *node); #define GGANDIVA_TYPE_IF_NODE (ggandiva_if_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaIfNode, ggandiva_if_node, GGANDIVA, IF_NODE, GGandivaNode) struct _GGandivaIfNodeClass @@ -302,6 +368,7 @@ struct _GGandivaIfNodeClass GGandivaNodeClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaIfNode * ggandiva_if_node_new(GGandivaNode *condition_node, GGandivaNode *then_node, @@ -310,6 +377,7 @@ ggandiva_if_node_new(GGandivaNode *condition_node, GError **error); #define GGANDIVA_TYPE_BOOLEAN_NODE (ggandiva_boolean_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaBooleanNode, ggandiva_boolean_node, GGANDIVA, BOOLEAN_NODE, GGandivaNode) @@ -323,6 +391,7 @@ GList * ggandiva_boolean_node_get_children(GGandivaBooleanNode *node); #define GGANDIVA_TYPE_AND_NODE (ggandiva_and_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaAndNode, ggandiva_and_node, GGANDIVA, AND_NODE, GGandivaBooleanNode) struct _GGandivaAndNodeClass @@ -335,6 +404,7 @@ GGandivaAndNode * ggandiva_and_node_new(GList *children); #define GGANDIVA_TYPE_OR_NODE (ggandiva_or_node_get_type()) +GGANDIVA_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE( GGandivaOrNode, ggandiva_or_node, GGANDIVA, OR_NODE, GGandivaBooleanNode) struct _GGandivaOrNodeClass diff --git a/c_glib/gandiva-glib/projector.h b/c_glib/gandiva-glib/projector.h index e0afec5cb1ba1..5fbf9c290beab 100644 --- a/c_glib/gandiva-glib/projector.h +++ b/c_glib/gandiva-glib/projector.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GGANDIVA_TYPE_PROJECTOR (ggandiva_projector_get_type()) +GGANDIVA_AVAILABLE_IN_0_12 G_DECLARE_DERIVABLE_TYPE( GGandivaProjector, ggandiva_projector, GGANDIVA, PROJECTOR, GObject) @@ -32,14 +33,18 @@ struct _GGandivaProjectorClass GObjectClass parent_class; }; +GGANDIVA_AVAILABLE_IN_0_12 GGandivaProjector * ggandiva_projector_new(GArrowSchema *schema, GList *expressions, GError **error); + +GGANDIVA_AVAILABLE_IN_0_12 GList * ggandiva_projector_evaluate(GGandivaProjector *projector, GArrowRecordBatch *record_batch, GError **error); #define GGANDIVA_TYPE_SELECTABLE_PROJECTOR (ggandiva_selectable_projector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaSelectableProjector, ggandiva_selectable_projector, GGANDIVA, diff --git a/c_glib/gandiva-glib/selection-vector.h b/c_glib/gandiva-glib/selection-vector.h index 6d78192e35e28..558b9b950cf84 100644 --- a/c_glib/gandiva-glib/selection-vector.h +++ b/c_glib/gandiva-glib/selection-vector.h @@ -47,6 +47,7 @@ typedef enum { } GGandivaSelectionVectorMode; #define GGANDIVA_TYPE_SELECTION_VECTOR (ggandiva_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE( GGandivaSelectionVector, ggandiva_selection_vector, GGANDIVA, SELECTION_VECTOR, GObject) @@ -65,6 +66,7 @@ ggandiva_selection_vector_to_array(GGandivaSelectionVector *selection_vector); #define GGANDIVA_TYPE_UINT16_SELECTION_VECTOR \ (ggandiva_uint16_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt16SelectionVector, ggandiva_uint16_selection_vector, GGANDIVA, @@ -82,6 +84,7 @@ ggandiva_uint16_selection_vector_new(gint64 max_slots, GError **error); #define GGANDIVA_TYPE_UINT32_SELECTION_VECTOR \ (ggandiva_uint32_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt32SelectionVector, ggandiva_uint32_selection_vector, GGANDIVA, @@ -99,6 +102,7 @@ ggandiva_uint32_selection_vector_new(gint64 max_slots, GError **error); #define GGANDIVA_TYPE_UINT64_SELECTION_VECTOR \ (ggandiva_uint64_selection_vector_get_type()) +GGANDIVA_AVAILABLE_IN_4_0 G_DECLARE_DERIVABLE_TYPE(GGandivaUInt64SelectionVector, ggandiva_uint64_selection_vector, GGANDIVA, diff --git a/c_glib/gandiva-glib/version.h.in b/c_glib/gandiva-glib/version.h.in index 3c9e87c9d52e1..857c7367bd7e2 100644 --- a/c_glib/gandiva-glib/version.h.in +++ b/c_glib/gandiva-glib/version.h.in @@ -38,7 +38,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MAJOR (@GGANDIVA_VERSION_MAJOR@) +#define GGANDIVA_VERSION_MAJOR (@VERSION_MAJOR@) /** * GGANDIVA_VERSION_MINOR: @@ -47,7 +47,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MINOR (@GGANDIVA_VERSION_MINOR@) +#define GGANDIVA_VERSION_MINOR (@VERSION_MINOR@) /** * GGANDIVA_VERSION_MICRO: @@ -56,7 +56,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_MICRO (@GGANDIVA_VERSION_MICRO@) +#define GGANDIVA_VERSION_MICRO (@VERSION_MICRO@) /** * GGANDIVA_VERSION_TAG: @@ -66,7 +66,7 @@ * * Since: 1.0.0 */ -#define GGANDIVA_VERSION_TAG "@GGANDIVA_VERSION_TAG@" +#define GGANDIVA_VERSION_TAG "@VERSION_TAG@" /** * GGANDIVA_VERSION_CHECK: @@ -110,23 +110,7 @@ # define GGANDIVA_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) #endif -/** - * GGANDIVA_VERSION_1_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 1.0.0 - */ -#define GGANDIVA_VERSION_1_0 G_ENCODE_VERSION(1, 0) - -/** - * GGANDIVA_VERSION_4_0: - * - * You can use this macro value for compile time API version check. - * - * Since: 4.0.0 - */ -#define GGANDIVA_VERSION_4_0 G_ENCODE_VERSION(4, 0) +@ENCODED_VERSIONS@ /** * GGANDIVA_VERSION_MIN_REQUIRED: @@ -172,47 +156,6 @@ G_ENCODE_VERSION(GGANDIVA_VERSION_MAJOR, GGANDIVA_VERSION_MINOR) #endif +@VISIBILITY_MACROS@ -#define GGANDIVA_AVAILABLE_IN_ALL - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_4_0 -# define GGANDIVA_DEPRECATED_IN_4_0 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_4_0 -# define GGANDIVA_DEPRECATED_IN_4_0_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_4_0 -# define GGANDIVA_AVAILABLE_IN_4_0 GGANDIVA_UNAVAILABLE(4, 0) -#else -# define GGANDIVA_AVAILABLE_IN_4_0 -#endif - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_1_0 -# define GGANDIVA_DEPRECATED_IN_1_0 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_1_0 -# define GGANDIVA_DEPRECATED_IN_1_0_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_1_0 -# define GGANDIVA_AVAILABLE_IN_1_0 GGANDIVA_UNAVAILABLE(1, 0) -#else -# define GGANDIVA_AVAILABLE_IN_1_0 -#endif - -#if GGANDIVA_VERSION_MIN_REQUIRED >= GGANDIVA_VERSION_0_17 -# define GGANDIVA_DEPRECATED_IN_0_17 GGANDIVA_DEPRECATED -# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) GGANDIVA_DEPRECATED_FOR(function) -#else -# define GGANDIVA_DEPRECATED_IN_0_17 -# define GGANDIVA_DEPRECATED_IN_0_17_FOR(function) -#endif - -#if GGANDIVA_VERSION_MAX_ALLOWED < GGANDIVA_VERSION_0_17 -# define GGANDIVA_AVAILABLE_IN_0_17 GGANDIVA_UNAVAILABLE(0, 17) -#else -# define GGANDIVA_AVAILABLE_IN_0_17 -#endif +@AVAILABILITY_MACROS@ diff --git a/c_glib/meson.build b/c_glib/meson.build index 08a9cd182e02e..06aa5b941e77c 100644 --- a/c_glib/meson.build +++ b/c_glib/meson.build @@ -171,6 +171,10 @@ if cxx.get_id() != 'msvc' endif add_project_arguments(cxx.get_supported_arguments(cxx_flags), language: 'cpp') +python = import('python') +python3 = python.find_installation('python3') +generate_version_header_py = project_source_root / 'tool' / 'generate-version-header.py' + subdir('arrow-glib') if arrow_cuda.found() subdir('arrow-cuda-glib') diff --git a/c_glib/parquet-glib/arrow-file-reader.h b/c_glib/parquet-glib/arrow-file-reader.h index 63c14ac71da86..52d7293bad0fa 100644 --- a/c_glib/parquet-glib/arrow-file-reader.h +++ b/c_glib/parquet-glib/arrow-file-reader.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GPARQUET_TYPE_ARROW_FILE_READER (gparquet_arrow_file_reader_get_type()) +GPARQUET_AVAILABLE_IN_0_11 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileReader, gparquet_arrow_file_reader, GPARQUET, @@ -34,15 +35,19 @@ struct _GParquetArrowFileReaderClass GObjectClass parent_class; }; +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileReader * gparquet_arrow_file_reader_new_arrow(GArrowSeekableInputStream *source, GError **error); + +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileReader * gparquet_arrow_file_reader_new_path(const gchar *path, GError **error); +GPARQUET_AVAILABLE_IN_0_11 GArrowTable * gparquet_arrow_file_reader_read_table(GParquetArrowFileReader *reader, GError **error); -GARROW_AVAILABLE_IN_1_0 +GPARQUET_AVAILABLE_IN_1_0 GArrowTable * gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, gint row_group_index, @@ -50,26 +55,30 @@ gparquet_arrow_file_reader_read_row_group(GParquetArrowFileReader *reader, gsize n_column_indices, GError **error); +GPARQUET_AVAILABLE_IN_0_12 GArrowSchema * gparquet_arrow_file_reader_get_schema(GParquetArrowFileReader *reader, GError **error); +GPARQUET_AVAILABLE_IN_0_15 GArrowChunkedArray * gparquet_arrow_file_reader_read_column_data(GParquetArrowFileReader *reader, gint i, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gint gparquet_arrow_file_reader_get_n_row_groups(GParquetArrowFileReader *reader); -GARROW_AVAILABLE_IN_6_0 +GPARQUET_AVAILABLE_IN_6_0 gint64 gparquet_arrow_file_reader_get_n_rows(GParquetArrowFileReader *reader); +GPARQUET_AVAILABLE_IN_0_11 void gparquet_arrow_file_reader_set_use_threads(GParquetArrowFileReader *reader, gboolean use_threads); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetFileMetadata * gparquet_arrow_file_reader_get_metadata(GParquetArrowFileReader *reader); diff --git a/c_glib/parquet-glib/arrow-file-writer.h b/c_glib/parquet-glib/arrow-file-writer.h index 592ea4ae3f1ba..71cbfa195e842 100644 --- a/c_glib/parquet-glib/arrow-file-writer.h +++ b/c_glib/parquet-glib/arrow-file-writer.h @@ -20,10 +20,12 @@ #pragma once #include +#include G_BEGIN_DECLS #define GPARQUET_TYPE_WRITER_PROPERTIES (gparquet_writer_properties_get_type()) +GPARQUET_AVAILABLE_IN_0_17 G_DECLARE_DERIVABLE_TYPE(GParquetWriterProperties, gparquet_writer_properties, GPARQUET, @@ -34,61 +36,62 @@ struct _GParquetWriterPropertiesClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 GParquetWriterProperties * gparquet_writer_properties_new(void); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_compression(GParquetWriterProperties *properties, GArrowCompressionType compression_type, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 GArrowCompressionType gparquet_writer_properties_get_compression_path(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_enable_dictionary(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_disable_dictionary(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gboolean gparquet_writer_properties_is_dictionary_enabled(GParquetWriterProperties *properties, const gchar *path); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_dictionary_page_size_limit( GParquetWriterProperties *properties, gint64 limit); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_dictionary_page_size_limit( GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_batch_size(GParquetWriterProperties *properties, gint64 batch_size); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_batch_size(GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_max_row_group_length(GParquetWriterProperties *properties, gint64 length); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_max_row_group_length(GParquetWriterProperties *properties); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 void gparquet_writer_properties_set_data_page_size(GParquetWriterProperties *properties, gint64 data_page_size); -GARROW_AVAILABLE_IN_0_17 +GPARQUET_AVAILABLE_IN_0_17 gint64 gparquet_writer_properties_get_data_page_size(GParquetWriterProperties *properties); #define GPARQUET_TYPE_ARROW_FILE_WRITER (gparquet_arrow_file_writer_get_type()) +GPARQUET_AVAILABLE_IN_0_11 G_DECLARE_DERIVABLE_TYPE(GParquetArrowFileWriter, gparquet_arrow_file_writer, GPARQUET, @@ -99,23 +102,28 @@ struct _GParquetArrowFileWriterClass GObjectClass parent_class; }; +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileWriter * gparquet_arrow_file_writer_new_arrow(GArrowSchema *schema, GArrowOutputStream *sink, GParquetWriterProperties *writer_properties, GError **error); + +GPARQUET_AVAILABLE_IN_0_11 GParquetArrowFileWriter * gparquet_arrow_file_writer_new_path(GArrowSchema *schema, const gchar *path, GParquetWriterProperties *writer_properties, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gboolean gparquet_arrow_file_writer_write_table(GParquetArrowFileWriter *writer, GArrowTable *table, guint64 chunk_size, GError **error); +GPARQUET_AVAILABLE_IN_0_11 gboolean gparquet_arrow_file_writer_close(GParquetArrowFileWriter *writer, GError **error); diff --git a/c_glib/parquet-glib/meson.build b/c_glib/parquet-glib/meson.build index 67de0bf2d91fb..a3de1d0933f7f 100644 --- a/c_glib/parquet-glib/meson.build +++ b/c_glib/parquet-glib/meson.build @@ -42,10 +42,17 @@ cpp_headers = files( 'parquet-glib.hpp', ) +version_h = configure_file( + input: 'version.h.in', + output: 'version.h', + command: [python3, generate_version_header_py, '--library', 'GPARQUET', '--version', version, '--input', '@INPUT@', '--output', '@OUTPUT@'], +) + +c_headers += version_h + headers = c_headers + cpp_headers install_headers(headers, subdir: project_name) - dependencies = [ arrow, parquet, @@ -57,6 +64,7 @@ libparquet_glib = library('parquet-glib', dependencies: dependencies, implicit_include_directories: false, include_directories: base_include_directories, + cpp_args: ['-DGPARQUET_COMPILATION'], soversion: so_version, version: library_version) parquet_glib = declare_dependency(link_with: libparquet_glib, diff --git a/c_glib/parquet-glib/metadata.h b/c_glib/parquet-glib/metadata.h index 1c9fce7cc778d..d79bf009751ca 100644 --- a/c_glib/parquet-glib/metadata.h +++ b/c_glib/parquet-glib/metadata.h @@ -24,6 +24,7 @@ G_BEGIN_DECLS #define GPARQUET_TYPE_COLUMN_CHUNK_METADATA (gparquet_column_chunk_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetColumnChunkMetadata, gparquet_column_chunk_metadata, GPARQUET, @@ -34,28 +35,29 @@ struct _GParquetColumnChunkMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_column_chunk_metadata_equal(GParquetColumnChunkMetadata *metadata, GParquetColumnChunkMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_total_size(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_total_compressed_size( GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_column_chunk_metadata_get_file_offset(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_column_chunk_metadata_can_decompress(GParquetColumnChunkMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetStatistics * gparquet_column_chunk_metadata_get_statistics(GParquetColumnChunkMetadata *metadata); #define GPARQUET_TYPE_ROW_GROUP_METADATA (gparquet_row_group_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetRowGroupMetadata, gparquet_row_group_metadata, GPARQUET, @@ -66,35 +68,36 @@ struct _GParquetRowGroupMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_row_group_metadata_equal(GParquetRowGroupMetadata *metadata, GParquetRowGroupMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_row_group_metadata_get_n_columns(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetColumnChunkMetadata * gparquet_row_group_metadata_get_column_chunk(GParquetRowGroupMetadata *metadata, gint index, GError **error); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_n_rows(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_total_size(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_total_compressed_size(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_row_group_metadata_get_file_offset(GParquetRowGroupMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_row_group_metadata_can_decompress(GParquetRowGroupMetadata *metadata); #define GPARQUET_TYPE_FILE_METADATA (gparquet_file_metadata_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GParquetFileMetadata, gparquet_file_metadata, GPARQUET, FILE_METADATA, GObject) struct _GParquetFileMetadataClass @@ -102,34 +105,34 @@ struct _GParquetFileMetadataClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_file_metadata_equal(GParquetFileMetadata *metadata, GParquetFileMetadata *other_metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_columns(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_schema_elements(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_file_metadata_get_n_rows(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint gparquet_file_metadata_get_n_row_groups(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GParquetRowGroupMetadata * gparquet_file_metadata_get_row_group(GParquetFileMetadata *metadata, gint index, GError **error); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 const gchar * gparquet_file_metadata_get_created_by(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 guint32 gparquet_file_metadata_get_size(GParquetFileMetadata *metadata); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_file_metadata_can_decompress(GParquetFileMetadata *metadata); diff --git a/c_glib/parquet-glib/parquet-glib.h b/c_glib/parquet-glib/parquet-glib.h index 23659421ce3d8..308adb87a7ed2 100644 --- a/c_glib/parquet-glib/parquet-glib.h +++ b/c_glib/parquet-glib/parquet-glib.h @@ -19,6 +19,8 @@ #pragma once +#include + #include #include #include diff --git a/c_glib/parquet-glib/statistics.h b/c_glib/parquet-glib/statistics.h index f28e2a3713638..25e02df8774b2 100644 --- a/c_glib/parquet-glib/statistics.h +++ b/c_glib/parquet-glib/statistics.h @@ -21,9 +21,12 @@ #include +#include + G_BEGIN_DECLS #define GPARQUET_TYPE_STATISTICS (gparquet_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE( GParquetStatistics, gparquet_statistics, GPARQUET, STATISTICS, GObject) struct _GParquetStatisticsClass @@ -31,30 +34,31 @@ struct _GParquetStatisticsClass GObjectClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_equal(GParquetStatistics *statistics, GParquetStatistics *other_statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_n_nulls(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_nulls(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_n_distinct_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_distinct_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_statistics_get_n_values(GParquetStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_statistics_has_min_max(GParquetStatistics *statistics); #define GPARQUET_TYPE_BOOLEAN_STATISTICS (gparquet_boolean_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetBooleanStatistics, gparquet_boolean_statistics, GPARQUET, @@ -65,14 +69,15 @@ struct _GParquetBooleanStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_boolean_statistics_get_min(GParquetBooleanStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gboolean gparquet_boolean_statistics_get_max(GParquetBooleanStatistics *statistics); #define GPARQUET_TYPE_INT32_STATISTICS (gparquet_int32_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetInt32Statistics, gparquet_int32_statistics, GPARQUET, @@ -83,14 +88,15 @@ struct _GParquetInt32StatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint32 gparquet_int32_statistics_get_min(GParquetInt32Statistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint32 gparquet_int32_statistics_get_max(GParquetInt32Statistics *statistics); #define GPARQUET_TYPE_INT64_STATISTICS (gparquet_int64_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetInt64Statistics, gparquet_int64_statistics, GPARQUET, @@ -101,14 +107,15 @@ struct _GParquetInt64StatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_int64_statistics_get_min(GParquetInt64Statistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gint64 gparquet_int64_statistics_get_max(GParquetInt64Statistics *statistics); #define GPARQUET_TYPE_FLOAT_STATISTICS (gparquet_float_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetFloatStatistics, gparquet_float_statistics, GPARQUET, @@ -119,14 +126,15 @@ struct _GParquetFloatStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gfloat gparquet_float_statistics_get_min(GParquetFloatStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gfloat gparquet_float_statistics_get_max(GParquetFloatStatistics *statistics); #define GPARQUET_TYPE_DOUBLE_STATISTICS (gparquet_double_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetDoubleStatistics, gparquet_double_statistics, GPARQUET, @@ -137,14 +145,15 @@ struct _GParquetDoubleStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gdouble gparquet_double_statistics_get_min(GParquetDoubleStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 gdouble gparquet_double_statistics_get_max(GParquetDoubleStatistics *statistics); #define GPARQUET_TYPE_BYTE_ARRAY_STATISTICS (gparquet_byte_array_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetByteArrayStatistics, gparquet_byte_array_statistics, GPARQUET, @@ -155,15 +164,16 @@ struct _GParquetByteArrayStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_byte_array_statistics_get_min(GParquetByteArrayStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_byte_array_statistics_get_max(GParquetByteArrayStatistics *statistics); #define GPARQUET_TYPE_FIXED_LENGTH_BYTE_ARRAY_STATISTICS \ (gparquet_fixed_length_byte_array_statistics_get_type()) +GPARQUET_AVAILABLE_IN_8_0 G_DECLARE_DERIVABLE_TYPE(GParquetFixedLengthByteArrayStatistics, gparquet_fixed_length_byte_array_statistics, GPARQUET, @@ -174,11 +184,11 @@ struct _GParquetFixedLengthByteArrayStatisticsClass GParquetStatisticsClass parent_class; }; -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_fixed_length_byte_array_statistics_get_min( GParquetFixedLengthByteArrayStatistics *statistics); -GARROW_AVAILABLE_IN_8_0 +GPARQUET_AVAILABLE_IN_8_0 GBytes * gparquet_fixed_length_byte_array_statistics_get_max( GParquetFixedLengthByteArrayStatistics *statistics); diff --git a/c_glib/parquet-glib/version.h.in b/c_glib/parquet-glib/version.h.in new file mode 100644 index 0000000000000..142b3b83e0f3d --- /dev/null +++ b/c_glib/parquet-glib/version.h.in @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#pragma once + +#include + +/** + * SECTION: version + * @section_id: version-macros + * @title: Version related macros + * @include: parquet-glib/parquet-glib.h + * + * Parquet GLib provides macros that can be used by C pre-processor. + * They are useful to check version related things at compile time. + */ + +/** + * GPARQUET_VERSION_MAJOR: + * + * The major version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MAJOR (@VERSION_MAJOR@) + +/** + * GPARQUET_VERSION_MINOR: + * + * The minor version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MINOR (@VERSION_MINOR@) + +/** + * GPARQUET_VERSION_MICRO: + * + * The micro version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_MICRO (@VERSION_MICRO@) + +/** + * GPARQUET_VERSION_TAG: + * + * The version tag. Normally, it's an empty string. It's "SNAPSHOT" + * for snapshot version. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_TAG "@VERSION_TAG@" + +/** + * GPARQUET_VERSION_CHECK: + * @major: A major version to check for. + * @minor: A minor version to check for. + * @micro: A micro version to check for. + * + * You can use this macro in C pre-processor. + * + * Returns: %TRUE if the compile time Apache Arrow GLib version is the + * same as or newer than the passed version, %FALSE otherwise. + * + * Since: 17.0.0 + */ +#define GPARQUET_VERSION_CHECK(major, minor, micro) \ + (GPARQUET_VERSION_MAJOR > (major) || \ + (GPARQUET_VERSION_MAJOR == (major) && \ + GPARQUET_VERSION_MINOR > (minor)) || \ + (GPARQUET_VERSION_MAJOR == (major) && \ + GPARQUET_VERSION_MINOR == (minor) && \ + GPARQUET_VERSION_MICRO >= (micro))) + +/** + * GPARQUET_DISABLE_DEPRECATION_WARNINGS: + * + * If this macro is defined, no deprecated warnings are produced. + * + * You must define this macro before including the + * arrow-glib/arrow-glib.h header. + * + * Since: 17.0.0 + */ + +#ifdef GPARQUET_DISABLE_DEPRECATION_WARNINGS +# define GPARQUET_DEPRECATED +# define GPARQUET_DEPRECATED_FOR(function) +# define GPARQUET_UNAVAILABLE(major, minor) +#else +# define GPARQUET_DEPRECATED G_DEPRECATED +# define GPARQUET_DEPRECATED_FOR(function) G_DEPRECATED_FOR(function) +# define GPARQUET_UNAVAILABLE(major, minor) G_UNAVAILABLE(major, minor) +#endif + +@ENCODED_VERSIONS@ + +/** + * GPARQUET_VERSION_MIN_REQUIRED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GPARQUET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GPARQUET_VERSION_MIN_REQUIRED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * parquet-glib/parquet-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GPARQUET_VERSION_MIN_REQUIRED +# define GPARQUET_VERSION_MIN_REQUIRED GARROW_VERSION_MIN_REQUIRED +#endif + +/** + * GPARQUET_VERSION_MAX_ALLOWED: + * + * You can use this macro for compile time API version check. + * + * This macro value must be one of the predefined version macros such + * as %GPARQUET_VERSION_0_10. + * + * If you use any functions that is defined by newer version than + * %GPARQUET_VERSION_MAX_ALLOWED, deprecated warnings are produced at + * compile time. + * + * You must define this macro before including the + * parquet-glib/parquet-glib.h header. + * + * Since: 17.0.0 + */ +#ifndef GPARQUET_VERSION_MAX_ALLOWED +# define GPARQUET_VERSION_MAX_ALLOWED GARROW_VERSION_MAX_ALLOWED +#endif + +@VISIBILITY_MACROS@ + +@AVAILABILITY_MACROS@ diff --git a/c_glib/tool/generate-version-header.py b/c_glib/tool/generate-version-header.py new file mode 100755 index 0000000000000..f2fc26132c143 --- /dev/null +++ b/c_glib/tool/generate-version-header.py @@ -0,0 +1,172 @@ +#!/usr/bin/env python3 + +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +import argparse +from io import TextIOBase +from pathlib import Path +import re + + +def main(): + parser = argparse.ArgumentParser( + description="Generate C header with version macros") + parser.add_argument( + "--library", + required=True, + help="The library name to use in macro prefixes") + parser.add_argument( + "--version", + required=True, + help="The library version number") + parser.add_argument( + "--input", + type=Path, + required=True, + help="Path to the input template file") + parser.add_argument( + "--output", + type=Path, + required=True, + help="Path to the output file to generate") + + args = parser.parse_args() + + with open(args.input, "r", encoding="utf-8") as input_file, \ + open(args.output, "w", encoding="utf-8") as output_file: + write_header( + input_file, output_file, args.library, args.version) + + +def write_header( + input_file: TextIOBase, + output_file: TextIOBase, + library_name: str, + version: str): + if "-" in version: + version, version_tag = version.split("-") + else: + version_tag = "" + version_major, version_minor, version_micro = [int(v) for v in version.split(".")] + + encoded_versions = generate_encoded_versions(library_name) + visibility_macros = generate_visibility_macros(library_name) + availability_macros = generate_availability_macros(library_name) + + replacements = { + "VERSION_MAJOR": str(version_major), + "VERSION_MINOR": str(version_minor), + "VERSION_MICRO": str(version_micro), + "VERSION_TAG": version_tag, + "ENCODED_VERSIONS": encoded_versions, + "VISIBILITY_MACROS": visibility_macros, + "AVAILABILITY_MACROS": availability_macros, + } + + output_file.write(re.sub( + r"@([A-Z_]+)@", lambda match: replacements[match[1]], input_file.read())) + + +def generate_visibility_macros(library: str) -> str: + return f"""#if (defined(_WIN32) || defined(__CYGWIN__)) && defined(_MSVC_LANG) && \ + !defined({library}_STATIC_COMPILATION) +# define {library}_EXPORT __declspec(dllexport) +# define {library}_IMPORT __declspec(dllimport) +#else +# define {library}_EXPORT +# define {library}_IMPORT +#endif + +#ifdef {library}_COMPILATION +# define {library}_API {library}_EXPORT +#else +# define {library}_API {library}_IMPORT +#endif + +#define {library}_EXTERN {library}_API extern""" + + +def generate_encoded_versions(library: str) -> str: + macros = [] + + for major_version, minor_version in ALL_VERSIONS: + macros.append(f"""/** + * {library}_VERSION_{major_version}_{minor_version}: + * + * You can use this macro value for compile time API version check. + * + * Since: {major_version}.{minor_version}.0 + */ +#define {library}_VERSION_{major_version}_{minor_version} G_ENCODE_VERSION({major_version}, {minor_version})""") # noqa: E501 + + return "\n\n".join(macros) + + +def generate_availability_macros(library: str) -> str: + macros = [f"""#define {library}_AVAILABLE_IN_ALL {library}_EXTERN"""] + + for major_version, minor_version in ALL_VERSIONS: + macros.append(f"""#if {library}_VERSION_MIN_REQUIRED >= {library}_VERSION_{major_version}_{minor_version} +# define {library}_DEPRECATED_IN_{major_version}_{minor_version} {library}_DEPRECATED +# define {library}_DEPRECATED_IN_{major_version}_{minor_version}_FOR(function) {library}_DEPRECATED_FOR(function) +#else +# define {library}_DEPRECATED_IN_{major_version}_{minor_version} +# define {library}_DEPRECATED_IN_{major_version}_{minor_version}_FOR(function) +#endif + +#if {library}_VERSION_MAX_ALLOWED < {library}_VERSION_{major_version}_{minor_version} +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_EXTERN {library}_UNAVAILABLE({major_version}, {minor_version}) +#else +# define {library}_AVAILABLE_IN_{major_version}_{minor_version} {library}_EXTERN +#endif""") # noqa: E501 + + return "\n\n".join(macros) + + +ALL_VERSIONS = [ + (17, 0), + (16, 0), + (15, 0), + (14, 0), + (13, 0), + (12, 0), + (11, 0), + (10, 0), + (9, 0), + (8, 0), + (7, 0), + (6, 0), + (5, 0), + (4, 0), + (3, 0), + (2, 0), + (1, 0), + (0, 17), + (0, 16), + (0, 15), + (0, 14), + (0, 13), + (0, 12), + (0, 11), + (0, 10), +] + + +if __name__ == '__main__': + main() diff --git a/c_glib/vcpkg.json b/c_glib/vcpkg.json new file mode 100644 index 0000000000000..4a14a1e437ff6 --- /dev/null +++ b/c_glib/vcpkg.json @@ -0,0 +1,8 @@ +{ + "name": "arrow-glib", + "version-string": "17.0.0-SNAPSHOT", + "dependencies": [ + "glib", + "pkgconf" + ] +} diff --git a/ci/appveyor-cpp-build.bat b/ci/appveyor-cpp-build.bat index 8cfa67c437264..f688fbb63a9ad 100644 --- a/ci/appveyor-cpp-build.bat +++ b/ci/appveyor-cpp-build.bat @@ -129,7 +129,6 @@ set PYARROW_WITH_ORC=%ARROW_ORC% set PYARROW_WITH_PARQUET=ON set PYARROW_WITH_PARQUET_ENCRYPTION=ON set PYARROW_WITH_S3=%ARROW_S3% -set PYARROW_WITH_STATIC_BOOST=ON set PYARROW_WITH_SUBSTRAIT=ON set ARROW_HOME=%CONDA_PREFIX%\Library diff --git a/ci/scripts/PKGBUILD b/ci/scripts/PKGBUILD index b0905886dd50f..f6bbc78be710e 100644 --- a/ci/scripts/PKGBUILD +++ b/ci/scripts/PKGBUILD @@ -18,7 +18,7 @@ _realname=arrow pkgbase=mingw-w64-${_realname} pkgname="${MINGW_PACKAGE_PREFIX}-${_realname}" -pkgver=16.0.0.9000 +pkgver=16.1.0.9000 pkgrel=8000 pkgdesc="Apache Arrow is a cross-language development platform for in-memory data (mingw-w64)" arch=("any") diff --git a/ci/scripts/c_glib_build.sh b/ci/scripts/c_glib_build.sh index 6a6295e4ff0bd..ee01bb220710e 100755 --- a/ci/scripts/c_glib_build.sh +++ b/ci/scripts/c_glib_build.sh @@ -28,14 +28,35 @@ build_root=${2} : ${BUILD_DOCS_C_GLIB:=OFF} with_doc=$([ "${BUILD_DOCS_C_GLIB}" == "ON" ] && echo "true" || echo "false") -export PKG_CONFIG_PATH=${ARROW_HOME}/lib/pkgconfig + +if [ -n "${MSYSTEM:-}" ]; then + # Fix ARROW_HOME when running under MSYS2 + export ARROW_HOME="$(cygpath --unix "${ARROW_HOME}")" +fi + +meson_pkg_config_path="${ARROW_HOME}/lib/pkgconfig" mkdir -p ${build_dir} +if [ -n "${VCPKG_ROOT:-}" ]; then + vcpkg_install_root="${build_root}/vcpkg_installed" + $VCPKG_ROOT/vcpkg install --x-manifest-root=${source_dir} --x-install-root=${vcpkg_install_root} + export PKG_CONFIG="${vcpkg_install_root}/x64-windows/tools/pkgconf/pkgconf.exe" + meson_pkg_config_path="${vcpkg_install_root}/x64-windows/lib/pkgconfig:${meson_pkg_config_path}" +fi + +if [ -n "${VCToolsInstallDir:-}" -a -n "${MSYSTEM:-}" ]; then + # Meson finds the gnu link.exe instead of MSVC link.exe when running in MSYS2/git bash, + # so we need to make sure the MSCV link.exe is first in $PATH + export PATH="$(cygpath --unix "${VCToolsInstallDir}")/bin/HostX64/x64:${PATH}" +fi + # Build with Meson meson setup \ + --backend=ninja \ --prefix=$ARROW_HOME \ --libdir=lib \ + --pkg-config-path="${meson_pkg_config_path}" \ -Ddoc=${with_doc} \ -Dvapi=${ARROW_GLIB_VAPI} \ -Dwerror=${ARROW_GLIB_WERROR} \ diff --git a/ci/scripts/cpp_build.sh b/ci/scripts/cpp_build.sh index a1f40fc360e2f..6a3a53f2533cd 100755 --- a/ci/scripts/cpp_build.sh +++ b/ci/scripts/cpp_build.sh @@ -120,6 +120,7 @@ else -DARROW_BUILD_BENCHMARKS=${ARROW_BUILD_BENCHMARKS:-OFF} \ -DARROW_BUILD_EXAMPLES=${ARROW_BUILD_EXAMPLES:-OFF} \ -DARROW_BUILD_INTEGRATION=${ARROW_BUILD_INTEGRATION:-OFF} \ + -DARROW_BUILD_OPENMP_BENCHMARKS=${ARROW_BUILD_OPENMP_BENCHMARKS:-OFF} \ -DARROW_BUILD_SHARED=${ARROW_BUILD_SHARED:-ON} \ -DARROW_BUILD_STATIC=${ARROW_BUILD_STATIC:-ON} \ -DARROW_BUILD_TESTS=${ARROW_BUILD_TESTS:-OFF} \ diff --git a/ci/scripts/install_vcpkg.sh b/ci/scripts/install_vcpkg.sh index cc80582326ec5..08989d6444827 100755 --- a/ci/scripts/install_vcpkg.sh +++ b/ci/scripts/install_vcpkg.sh @@ -25,13 +25,16 @@ if [ "$#" -lt 1 ]; then fi arrow_dir=$(cd -- "$(dirname -- "$0")/../.." && pwd -P) -default_vcpkg_version=$(cat "${arrow_dir}/.env" | grep "VCPKG" | cut -d "=" -f2 | tr -d '"') default_vcpkg_ports_patch="${arrow_dir}/ci/vcpkg/ports.patch" vcpkg_destination=$1 -vcpkg_version=${2:-$default_vcpkg_version} +vcpkg_version=${2:-} vcpkg_ports_patch=${3:-$default_vcpkg_ports_patch} +if [ -z "${vcpkg_version}" ]; then + vcpkg_version=$(source "${arrow_dir}/.env" && echo "$VCPKG") +fi + # reduce the fetched data using a shallow clone git clone --shallow-since=2021-04-01 https://github.com/microsoft/vcpkg ${vcpkg_destination} diff --git a/ci/scripts/java_jni_manylinux_build.sh b/ci/scripts/java_jni_manylinux_build.sh index 4921ce170b7a9..6f3769751af42 100755 --- a/ci/scripts/java_jni_manylinux_build.sh +++ b/ci/scripts/java_jni_manylinux_build.sh @@ -35,6 +35,9 @@ echo "=== Clear output directories and leftovers ===" rm -rf ${build_dir} echo "=== Building Arrow C++ libraries ===" +devtoolset_version=$(rpm -qa "devtoolset-*-gcc" --queryformat %{VERSION} | \ + grep -o "^[0-9]*") +devtoolset_include_cpp="/opt/rh/devtoolset-${devtoolset_version}/root/usr/include/c++/${devtoolset_version}" : ${ARROW_ACERO:=ON} export ARROW_ACERO : ${ARROW_BUILD_TESTS:=ON} @@ -55,7 +58,7 @@ export ARROW_ORC : ${VCPKG_ROOT:=/opt/vcpkg} : ${VCPKG_FEATURE_FLAGS:=-manifests} : ${VCPKG_TARGET_TRIPLET:=${VCPKG_DEFAULT_TRIPLET:-x64-linux-static-${CMAKE_BUILD_TYPE}}} -: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-isystem;-lpthread} +: ${GANDIVA_CXX_FLAGS:=-isystem;${devtoolset_include_cpp};-isystem;${devtoolset_include_cpp}/x86_64-redhat-linux;-lpthread} if [ "${ARROW_USE_CCACHE}" == "ON" ]; then echo "=== ccache statistics before build ===" diff --git a/cpp/build-support/update-thrift.sh b/cpp/build-support/update-thrift.sh index 9b8f2539cffe3..9e050a5e49d64 100755 --- a/cpp/build-support/update-thrift.sh +++ b/cpp/build-support/update-thrift.sh @@ -20,4 +20,4 @@ # Run this from cpp/ directory. thrift is expected to be in your path -thrift --gen cpp:moveable_types -out src/generated src/parquet/parquet.thrift +thrift --gen cpp:moveable_types,templates -out src/generated src/parquet/parquet.thrift diff --git a/cpp/cmake_modules/FindProtobufAlt.cmake b/cpp/cmake_modules/FindProtobufAlt.cmake index f343b42f2b762..703e05c4731b6 100644 --- a/cpp/cmake_modules/FindProtobufAlt.cmake +++ b/cpp/cmake_modules/FindProtobufAlt.cmake @@ -31,6 +31,11 @@ endif() find_package(protobuf CONFIG ${find_package_args}) set(ProtobufAlt_FOUND ${protobuf_FOUND}) if(ProtobufAlt_FOUND) + if(Protobuf_PROTOC_EXECUTABLE) + # work around https://github.com/protocolbuffers/protobuf/issues/14576 + set_target_properties(protobuf::protoc PROPERTIES IMPORTED_LOCATION_RELEASE + "${Protobuf_PROTOC_EXECUTABLE}") + endif() set(ProtobufAlt_VERSION ${protobuf_VERSION}) set(ProtobufAlt_VERSION_MAJOR ${protobuf_VERSION_MAJOR}) set(ProtobufAlt_VERSION_MINOR ${protobuf_VERSION_MINOR}) diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake b/cpp/cmake_modules/ThirdpartyToolchain.cmake index c24442dcb8749..f102c7bb81683 100644 --- a/cpp/cmake_modules/ThirdpartyToolchain.cmake +++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake @@ -5348,9 +5348,3 @@ if(ARROW_WITH_UCX) endif() message(STATUS "All bundled static libraries: ${ARROW_BUNDLED_STATIC_LIBS}") - -# Write out the package configurations. - -configure_file("src/arrow/util/config.h.cmake" "src/arrow/util/config.h" ESCAPE_QUOTES) -install(FILES "${ARROW_BINARY_DIR}/src/arrow/util/config.h" - DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/util") diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt index 5d61112518f5e..150a304975cad 100644 --- a/cpp/src/arrow/CMakeLists.txt +++ b/cpp/src/arrow/CMakeLists.txt @@ -351,6 +351,12 @@ macro(append_runtime_avx512_src SRCS SRC) endif() endmacro() +# Write out compile-time configuration constants +configure_file("util/config.h.cmake" "util/config.h" ESCAPE_QUOTES) +configure_file("util/config_internal.h.cmake" "util/config_internal.h" ESCAPE_QUOTES) +install(FILES "${CMAKE_CURRENT_BINARY_DIR}/util/config.h" + DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/arrow/util") + set(ARROW_SRCS builder.cc buffer.cc @@ -637,6 +643,7 @@ endif() set(ARROW_TESTING_SRCS io/test_common.cc ipc/test_common.cc + testing/fixed_width_test_util.cc testing/gtest_util.cc testing/random.cc testing/generator.cc @@ -716,7 +723,8 @@ set(ARROW_COMPUTE_SRCS compute/row/compare_internal.cc compute/row/grouper.cc compute/row/row_internal.cc - compute/util.cc) + compute/util.cc + compute/util_internal.cc) append_runtime_avx2_src(ARROW_COMPUTE_SRCS compute/key_hash_internal_avx2.cc) append_runtime_avx2_bmi2_src(ARROW_COMPUTE_SRCS compute/key_map_internal_avx2.cc) diff --git a/cpp/src/arrow/acero/asof_join_node.cc b/cpp/src/arrow/acero/asof_join_node.cc index 48cc83dd3d6a9..1d94467df9ee2 100644 --- a/cpp/src/arrow/acero/asof_join_node.cc +++ b/cpp/src/arrow/acero/asof_join_node.cc @@ -548,8 +548,10 @@ class InputState { // true when the queue is empty and, when memo may have future entries (the case of a // positive tolerance), when the memo is empty. // used when checking whether RHS is up to date with LHS. - bool CurrentEmpty() const { - return memo_.no_future_ ? Empty() : memo_.times_.empty() && Empty(); + // NOTE: The emptiness must be decided by a single call to Empty() in caller, due to the + // potential race with Push(), see GH-41614. + bool CurrentEmpty(bool empty) const { + return memo_.no_future_ ? empty : (memo_.times_.empty() && empty); } // in case memo may not have future entries (the case of a non-positive tolerance), @@ -650,13 +652,15 @@ class InputState { // timestamp, update latest_time and latest_ref_row to the value that immediately pass // the horizon. Update the memo-store with any entries or future entries so observed. // Returns true if updates were made, false if not. - Result AdvanceAndMemoize(OnType ts) { + // NOTE: The emptiness must be decided by a single call to Empty() in caller, due to the + // potential race with Push(), see GH-41614. + Result AdvanceAndMemoize(OnType ts, bool empty) { // Advance the right side row index until we reach the latest right row (for each key) // for the given left timestamp. DEBUG_SYNC(node_, "Advancing input ", index_, DEBUG_MANIP(std::endl)); // Check if already updated for TS (or if there is no latest) - if (Empty()) { // can't advance if empty and no future entries + if (empty) { // can't advance if empty and no future entries return memo_.no_future_ ? false : memo_.RemoveEntriesWithLesserTime(ts); } @@ -918,34 +922,46 @@ class CompositeTableBuilder { // guaranteeing this probability is below 1 in a billion. The fix is 128-bit hashing. // See ARROW-17653 class AsofJoinNode : public ExecNode { - // Advances the RHS as far as possible to be up to date for the current LHS timestamp - Result UpdateRhs() { + // A simple wrapper for the result of a single call to UpdateRhs(), identifying: + // 1) If any RHS has advanced. + // 2) If all RHS are up to date with LHS. + struct RhsUpdateState { + bool any_advanced; + bool all_up_to_date_with_lhs; + }; + // Advances the RHS as far as possible to be up to date for the current LHS timestamp, + // and checks if all RHS are up to date with LHS. The reason they have to be performed + // together is that they both depend on the emptiness of the RHS, which can be changed + // by Push() executing in another thread. + Result UpdateRhs() { auto& lhs = *state_.at(0); auto lhs_latest_time = lhs.GetLatestTime(); - bool any_updated = false; - for (size_t i = 1; i < state_.size(); ++i) { - ARROW_ASSIGN_OR_RAISE(bool advanced, state_[i]->AdvanceAndMemoize(lhs_latest_time)); - any_updated |= advanced; - } - return any_updated; - } - - // Returns false if RHS not up to date for LHS - bool IsUpToDateWithLhsRow() const { - auto& lhs = *state_[0]; - if (lhs.Empty()) return false; // can't proceed if nothing on the LHS - OnType lhs_ts = lhs.GetLatestTime(); + RhsUpdateState update_state{/*any_advanced=*/false, /*all_up_to_date_with_lhs=*/true}; for (size_t i = 1; i < state_.size(); ++i) { auto& rhs = *state_[i]; - if (!rhs.Finished()) { + + // Obtain RHS emptiness once for subsequent AdvanceAndMemoize() and CurrentEmpty(). + bool rhs_empty = rhs.Empty(); + // Obtain RHS current time here because AdvanceAndMemoize() can change the + // emptiness. + OnType rhs_current_time = rhs_empty ? OnType{} : rhs.GetLatestTime(); + + ARROW_ASSIGN_OR_RAISE(bool advanced, + rhs.AdvanceAndMemoize(lhs_latest_time, rhs_empty)); + update_state.any_advanced |= advanced; + + if (update_state.all_up_to_date_with_lhs && !rhs.Finished()) { // If RHS is finished, then we know it's up to date - if (rhs.CurrentEmpty()) - return false; // RHS isn't finished, but is empty --> not up to date - if (lhs_ts > rhs.GetCurrentTime()) - return false; // RHS isn't up to date (and not finished) + if (rhs.CurrentEmpty(rhs_empty)) { + // RHS isn't finished, but is empty --> not up to date + update_state.all_up_to_date_with_lhs = false; + } else if (lhs_latest_time > rhs_current_time) { + // RHS isn't up to date (and not finished) + update_state.all_up_to_date_with_lhs = false; + } } } - return true; + return update_state; } Result> ProcessInner() { @@ -963,20 +979,19 @@ class AsofJoinNode : public ExecNode { // If LHS is finished or empty then there's nothing we can do here if (lhs.Finished() || lhs.Empty()) break; - // Advance each of the RHS as far as possible to be up to date for the LHS timestamp - ARROW_ASSIGN_OR_RAISE(bool any_rhs_advanced, UpdateRhs()); + ARROW_ASSIGN_OR_RAISE(auto rhs_update_state, UpdateRhs()); // If we have received enough inputs to produce the next output batch // (decided by IsUpToDateWithLhsRow), we will perform the join and // materialize the output batch. The join is done by advancing through // the LHS and adding joined row to rows_ (done by Emplace). Finally, // input batches that are no longer needed are removed to free up memory. - if (IsUpToDateWithLhsRow()) { + if (rhs_update_state.all_up_to_date_with_lhs) { dst.Emplace(state_, tolerance_); ARROW_ASSIGN_OR_RAISE(bool advanced, lhs.Advance()); if (!advanced) break; // if we can't advance LHS, we're done for this batch } else { - if (!any_rhs_advanced) break; // need to wait for new data + if (!rhs_update_state.any_advanced) break; // need to wait for new data } } diff --git a/cpp/src/arrow/acero/asof_join_node_test.cc b/cpp/src/arrow/acero/asof_join_node_test.cc index d95d2aaad3643..051e280a4c53c 100644 --- a/cpp/src/arrow/acero/asof_join_node_test.cc +++ b/cpp/src/arrow/acero/asof_join_node_test.cc @@ -1678,5 +1678,59 @@ TEST(AsofJoinTest, BackpressureWithBatchesGen) { /*slow_r0=*/false); } +// Reproduction of GH-40675: A logical race between Process() and Push() that can be more +// easily observed with single small batch. +TEST(AsofJoinTest, RhsEmptinessRace) { + auto left_batch = ExecBatchFromJSON( + {int64(), utf8()}, R"([[1, "a"], [1, "b"], [5, "a"], [6, "b"], [7, "f"]])"); + auto right_batch = ExecBatchFromJSON( + {int64(), utf8(), float64()}, R"([[2, "a", 1.0], [9, "b", 3.0], [15, "g", 5.0]])"); + + Declaration left{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colA", int64()), field("col2", utf8())}), + {std::move(left_batch)})}; + Declaration right{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colB", int64()), field("col3", utf8()), + field("colC", float64())}), + {std::move(right_batch)})}; + AsofJoinNodeOptions asof_join_opts({{{"colA"}, {{"col2"}}}, {{"colB"}, {{"col3"}}}}, 1); + Declaration asof_join{ + "asofjoin", {std::move(left), std::move(right)}, std::move(asof_join_opts)}; + + ASSERT_OK_AND_ASSIGN(auto result, DeclarationToExecBatches(std::move(asof_join))); + + auto exp_batch = ExecBatchFromJSON( + {int64(), utf8(), float64()}, + R"([[1, "a", 1.0], [1, "b", null], [5, "a", null], [6, "b", null], [7, "f", null]])"); + AssertExecBatchesEqualIgnoringOrder(result.schema, {exp_batch}, result.batches); +} + +// Reproduction of GH-41149: Another case of the same root cause as GH-40675, but with +// empty "by" columns. +TEST(AsofJoinTest, RhsEmptinessRaceEmptyBy) { + auto left_batch = ExecBatchFromJSON({int64()}, R"([[1], [2], [3]])"); + auto right_batch = + ExecBatchFromJSON({utf8(), int64()}, R"([["Z", 2], ["B", 3], ["A", 4]])"); + + Declaration left{"exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("on", int64())}), + {std::move(left_batch)})}; + Declaration right{ + "exec_batch_source", + ExecBatchSourceNodeOptions(schema({field("colVals", utf8()), field("on", int64())}), + {std::move(right_batch)})}; + AsofJoinNodeOptions asof_join_opts({{{"on"}, {}}, {{"on"}, {}}}, 1); + Declaration asof_join{ + "asofjoin", {std::move(left), std::move(right)}, std::move(asof_join_opts)}; + + ASSERT_OK_AND_ASSIGN(auto result, DeclarationToExecBatches(std::move(asof_join))); + + auto exp_batch = + ExecBatchFromJSON({int64(), utf8()}, R"([[1, "Z"], [2, "Z"], [3, "B"]])"); + AssertExecBatchesEqualIgnoringOrder(result.schema, {exp_batch}, result.batches); +} + } // namespace acero } // namespace arrow diff --git a/cpp/src/arrow/acero/exec_plan.cc b/cpp/src/arrow/acero/exec_plan.cc index 97119726d4b17..d9fb1942fccd8 100644 --- a/cpp/src/arrow/acero/exec_plan.cc +++ b/cpp/src/arrow/acero/exec_plan.cc @@ -128,7 +128,7 @@ struct ExecPlanImpl : public ExecPlan { Future<> scheduler_finished = arrow::util::AsyncTaskScheduler::Make( [this](arrow::util::AsyncTaskScheduler* async_scheduler) { QueryContext* ctx = query_context(); - RETURN_NOT_OK(ctx->Init(ctx->max_concurrency(), async_scheduler)); + RETURN_NOT_OK(ctx->Init(async_scheduler)); #ifdef ARROW_WITH_OPENTELEMETRY if (HasMetadata()) { diff --git a/cpp/src/arrow/acero/hash_aggregate_test.cc b/cpp/src/arrow/acero/hash_aggregate_test.cc index 2626fd50379dd..d529f443319b9 100644 --- a/cpp/src/arrow/acero/hash_aggregate_test.cc +++ b/cpp/src/arrow/acero/hash_aggregate_test.cc @@ -592,6 +592,12 @@ void TestSegments(std::unique_ptr& segmenter, const ExecSpan& batc ASSERT_EQ(expected_segment, segment); offset = segment.offset + segment.length; } + // Assert next is the last (empty) segment. + ASSERT_OK_AND_ASSIGN(auto segment, segmenter->GetNextSegment(batch, offset)); + ASSERT_GE(segment.offset, batch.length); + ASSERT_EQ(segment.length, 0); + ASSERT_TRUE(segment.is_open); + ASSERT_TRUE(segment.extends); } Result> MakeGrouper(const std::vector& key_types) { @@ -682,48 +688,142 @@ TEST(RowSegmenter, Basics) { } TEST(RowSegmenter, NonOrdered) { - std::vector types = {int32()}; - auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 1, true, false}, - {5, 0, true, true}}); + { + std::vector types = {int32()}; + auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [1], [2]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 1, true, false}, + {5, 0, true, true}}); + } + { + std::vector types = {int32(), int32()}; + auto batch = ExecBatchFromJSON(types, "[[1, 1], [1, 1], [2, 2], [1, 2], [2, 2]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 1, true, false}, + {5, 0, true, true}}); + } } TEST(RowSegmenter, EmptyBatches) { - std::vector types = {int32()}; - std::vector batches = { - ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), - ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), - }; - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batches[0]), {}); - TestSegments(segmenter, ExecSpan(batches[1]), {}); - TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[3]), {}); - TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); - TestSegments(segmenter, ExecSpan(batches[5]), {}); - TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); - TestSegments(segmenter, ExecSpan(batches[7]), {}); + { + std::vector types = {int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[2], [2]]"), ExecBatchFromJSON(types, "[]"), + }; + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {}); + TestSegments(segmenter, ExecSpan(batches[1]), {}); + TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[3]), {}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[5]), {}); + TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[7]), {}); + } + { + std::vector types = {int32(), int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[]"), + ExecBatchFromJSON(types, "[[2, 2], [2, 2]]"), + ExecBatchFromJSON(types, "[]"), + }; + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {}); + TestSegments(segmenter, ExecSpan(batches[1]), {}); + TestSegments(segmenter, ExecSpan(batches[2]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[3]), {}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[5]), {}); + TestSegments(segmenter, ExecSpan(batches[6]), {{0, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[7]), {}); + } } TEST(RowSegmenter, MultipleSegments) { - std::vector types = {int32()}; - auto batch = ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]"); - ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); - TestSegments(segmenter, ExecSpan(batch), - {{0, 2, false, true}, - {2, 1, false, false}, - {3, 1, false, false}, - {4, 2, false, false}, - {6, 2, false, false}, - {8, 1, true, false}, - {9, 0, true, true}}); + { + std::vector types = {int32()}; + auto batch = + ExecBatchFromJSON(types, "[[1], [1], [2], [5], [3], [3], [5], [5], [4]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 2, false, false}, + {6, 2, false, false}, + {8, 1, true, false}, + {9, 0, true, true}}); + } + { + std::vector types = {int32(), int32()}; + auto batch = ExecBatchFromJSON( + types, + "[[1, 1], [1, 1], [2, 2], [5, 5], [3, 3], [3, 3], [5, 5], [5, 5], [4, 4]]"); + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batch), + {{0, 2, false, true}, + {2, 1, false, false}, + {3, 1, false, false}, + {4, 2, false, false}, + {6, 2, false, false}, + {8, 1, true, false}, + {9, 0, true, true}}); + } +} + +TEST(RowSegmenter, MultipleSegmentsMultipleBatches) { + { + std::vector types = {int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[[1]]"), ExecBatchFromJSON(types, "[[1], [2]]"), + ExecBatchFromJSON(types, "[[5], [3]]"), + ExecBatchFromJSON(types, "[[3], [5], [5]]"), ExecBatchFromJSON(types, "[[4]]")}; + + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[1]), + {{0, 1, false, true}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[2]), + {{0, 1, false, false}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[3]), + {{0, 1, false, true}, {1, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); + } + { + std::vector types = {int32(), int32()}; + std::vector batches = { + ExecBatchFromJSON(types, "[[1, 1]]"), + ExecBatchFromJSON(types, "[[1, 1], [2, 2]]"), + ExecBatchFromJSON(types, "[[5, 5], [3, 3]]"), + ExecBatchFromJSON(types, "[[3, 3], [5, 5], [5, 5]]"), + ExecBatchFromJSON(types, "[[4, 4]]")}; + + ASSERT_OK_AND_ASSIGN(auto segmenter, MakeRowSegmenter(types)); + TestSegments(segmenter, ExecSpan(batches[0]), {{0, 1, true, true}}); + TestSegments(segmenter, ExecSpan(batches[1]), + {{0, 1, false, true}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[2]), + {{0, 1, false, false}, {1, 1, true, false}}); + TestSegments(segmenter, ExecSpan(batches[3]), + {{0, 1, false, true}, {1, 2, true, false}}); + TestSegments(segmenter, ExecSpan(batches[4]), {{0, 1, true, false}}); + } } namespace { diff --git a/cpp/src/arrow/acero/hash_join_benchmark.cc b/cpp/src/arrow/acero/hash_join_benchmark.cc index ad1bd67cc8ec7..1f8e02e9f0fcf 100644 --- a/cpp/src/arrow/acero/hash_join_benchmark.cc +++ b/cpp/src/arrow/acero/hash_join_benchmark.cc @@ -148,7 +148,7 @@ class JoinBenchmark { }; scheduler_ = TaskScheduler::Make(); - DCHECK_OK(ctx_.Init(settings.num_threads, nullptr)); + DCHECK_OK(ctx_.Init(nullptr)); auto register_task_group_callback = [&](std::function task, std::function cont) { diff --git a/cpp/src/arrow/acero/hash_join_node.cc b/cpp/src/arrow/acero/hash_join_node.cc index b49364300dac8..06405f16c8d4c 100644 --- a/cpp/src/arrow/acero/hash_join_node.cc +++ b/cpp/src/arrow/acero/hash_join_node.cc @@ -497,11 +497,11 @@ struct BloomFilterPushdownContext { using BuildFinishedCallback = std::function; using FiltersReceivedCallback = std::function; using FilterFinishedCallback = std::function; - void Init(HashJoinNode* owner, size_t num_threads, - RegisterTaskGroupCallback register_task_group_callback, - StartTaskGroupCallback start_task_group_callback, - FiltersReceivedCallback on_bloom_filters_received, bool disable_bloom_filter, - bool use_sync_execution); + Status Init(HashJoinNode* owner, size_t num_threads, + RegisterTaskGroupCallback register_task_group_callback, + StartTaskGroupCallback start_task_group_callback, + FiltersReceivedCallback on_bloom_filters_received, + bool disable_bloom_filter, bool use_sync_execution); Status StartProducing(size_t thread_index); @@ -559,8 +559,7 @@ struct BloomFilterPushdownContext { std::vector hashes(batch.length); std::vector bv(bit_vector_bytes); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* stack = &tld_[thread_index].stack; // Start with full selection for the current batch memset(selected.data(), 0xff, bit_vector_bytes); @@ -654,7 +653,17 @@ struct BloomFilterPushdownContext { FiltersReceivedCallback all_received_callback_; FilterFinishedCallback on_finished_; } eval_; + + static constexpr auto kTempStackUsage = + Hashing32::kHashBatchTempStackUsage + + (sizeof(uint32_t) + /*extra=*/1) * arrow::util::MiniBatch::kMiniBatchLength; + + struct ThreadLocalData { + arrow::util::TempVectorStack stack; + }; + std::vector tld_; }; + bool HashJoinSchema::HasDictionaries() const { for (int side = 0; side <= 1; ++side) { for (int icol = 0; icol < proj_maps[side].num_cols(HashJoinProjection::INPUT); @@ -930,7 +939,7 @@ class HashJoinNode : public ExecNode, public TracedNode { // we will change it back to just the CPU's thread pool capacity. size_t num_threads = (GetCpuThreadPoolCapacity() + io::GetIOThreadPoolCapacity() + 1); - pushdown_context_.Init( + RETURN_NOT_OK(pushdown_context_.Init( this, num_threads, [ctx](std::function fn, std::function on_finished) { @@ -940,7 +949,7 @@ class HashJoinNode : public ExecNode, public TracedNode { return ctx->StartTaskGroup(task_group_id, num_tasks); }, [this](size_t thread_index) { return OnFiltersReceived(thread_index); }, - disable_bloom_filter_, use_sync_execution); + disable_bloom_filter_, use_sync_execution)); RETURN_NOT_OK(impl_->Init( ctx, join_type_, num_threads, &(schema_mgr_->proj_maps[0]), @@ -1037,7 +1046,7 @@ class HashJoinNode : public ExecNode, public TracedNode { BloomFilterPushdownContext pushdown_context_; }; -void BloomFilterPushdownContext::Init( +Status BloomFilterPushdownContext::Init( HashJoinNode* owner, size_t num_threads, RegisterTaskGroupCallback register_task_group_callback, StartTaskGroupCallback start_task_group_callback, @@ -1074,6 +1083,12 @@ void BloomFilterPushdownContext::Init( return eval_.on_finished_(thread_index, std::move(eval_.batches_)); }); start_task_group_callback_ = std::move(start_task_group_callback); + tld_.resize(num_threads); + for (auto& local_data : tld_) { + RETURN_NOT_OK(local_data.stack.Init(ctx_->memory_pool(), kTempStackUsage)); + } + + return Status::OK(); } Status BloomFilterPushdownContext::StartProducing(size_t thread_index) { @@ -1124,8 +1139,7 @@ Status BloomFilterPushdownContext::BuildBloomFilter_exec_task(size_t thread_inde } ARROW_ASSIGN_OR_RAISE(ExecBatch key_batch, ExecBatch::Make(std::move(key_columns))); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* stack = &tld_[thread_index].stack; arrow::util::TempVectorHolder hash_holder( stack, arrow::util::MiniBatch::kMiniBatchLength); uint32_t* hashes = hash_holder.mutable_data(); diff --git a/cpp/src/arrow/acero/hash_join_node_test.cc b/cpp/src/arrow/acero/hash_join_node_test.cc index 9c3dbc176ff4f..215b1e4d21125 100644 --- a/cpp/src/arrow/acero/hash_join_node_test.cc +++ b/cpp/src/arrow/acero/hash_join_node_test.cc @@ -28,6 +28,7 @@ #include "arrow/api.h" #include "arrow/compute/kernels/row_encoder_internal.h" #include "arrow/compute/kernels/test_util.h" +#include "arrow/compute/light_array_internal.h" #include "arrow/testing/extension_type.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/matchers.h" @@ -41,6 +42,7 @@ namespace arrow { using compute::call; using compute::default_exec_context; +using compute::ExecBatchBuilder; using compute::ExecSpan; using compute::field_ref; using compute::SortIndices; @@ -3201,5 +3203,55 @@ TEST(HashJoin, ChainedIntegerHashJoins) { } } +// Test that a large number of joins don't overflow the temp vector stack, like GH-39582 +// and GH-39951. +TEST(HashJoin, ManyJoins) { + // The idea of this case is to create many nested join nodes that may possibly cause + // recursive usage of temp vector stack. To make sure that the recursion happens: + // 1. A left-deep join tree is created so that the left-most (the final probe side) + // table will go through all the hash tables from the right side. + // 2. Left-outer join is used so that every join will increase the cardinality. + // 3. The left-most table contains rows of unique integers from 0 to N. + // 4. Each right table at level i contains two rows of integer i, so that the probing of + // each level will increase the result by one row. + // 5. The left-most table is a single batch of enough rows, so that at each level, the + // probing will accumulate enough result rows to have to output to the subsequent level + // before finishing the current batch (releasing the buffer allocated on the temp vector + // stack), which is essentially the recursive usage of the temp vector stack. + + // A fair number of joins to guarantee temp vector stack overflow before GH-41335. + const int num_joins = 64; + + // `ExecBatchBuilder::num_rows_max()` is the number of rows for swiss join to accumulate + // before outputting. + const int num_left_rows = ExecBatchBuilder::num_rows_max(); + ASSERT_OK_AND_ASSIGN( + auto left_batches, + MakeIntegerBatches({[](int row_id) -> int64_t { return row_id; }}, + schema({field("l_key", int32())}), + /*num_batches=*/1, /*batch_size=*/num_left_rows)); + Declaration root{"exec_batch_source", + ExecBatchSourceNodeOptions(std::move(left_batches.schema), + std::move(left_batches.batches))}; + + HashJoinNodeOptions join_opts(JoinType::LEFT_OUTER, /*left_keys=*/{"l_key"}, + /*right_keys=*/{"r_key"}); + + for (int i = 0; i < num_joins; ++i) { + ASSERT_OK_AND_ASSIGN(auto right_batches, + MakeIntegerBatches({[i](int) -> int64_t { return i; }}, + schema({field("r_key", int32())}), + /*num_batches=*/1, /*batch_size=*/2)); + Declaration table{"exec_batch_source", + ExecBatchSourceNodeOptions(std::move(right_batches.schema), + std::move(right_batches.batches))}; + + Declaration new_root{"hashjoin", {std::move(root), std::move(table)}, join_opts}; + root = std::move(new_root); + } + + ASSERT_OK_AND_ASSIGN(std::ignore, DeclarationToTable(std::move(root))); +} + } // namespace acero } // namespace arrow diff --git a/cpp/src/arrow/acero/query_context.cc b/cpp/src/arrow/acero/query_context.cc index a27397d12079d..18beb19ab7f8b 100644 --- a/cpp/src/arrow/acero/query_context.cc +++ b/cpp/src/arrow/acero/query_context.cc @@ -40,8 +40,7 @@ QueryContext::QueryContext(QueryOptions opts, ExecContext exec_context) const CpuInfo* QueryContext::cpu_info() const { return CpuInfo::GetInstance(); } int64_t QueryContext::hardware_flags() const { return cpu_info()->hardware_flags(); } -Status QueryContext::Init(size_t max_num_threads, util::AsyncTaskScheduler* scheduler) { - tld_.resize(max_num_threads); +Status QueryContext::Init(util::AsyncTaskScheduler* scheduler) { async_scheduler_ = scheduler; return Status::OK(); } @@ -50,15 +49,6 @@ size_t QueryContext::GetThreadIndex() { return thread_indexer_(); } size_t QueryContext::max_concurrency() const { return thread_indexer_.Capacity(); } -Result QueryContext::GetTempStack(size_t thread_index) { - if (!tld_[thread_index].is_init) { - RETURN_NOT_OK(tld_[thread_index].stack.Init( - memory_pool(), 32 * util::MiniBatch::kMiniBatchLength * sizeof(uint64_t))); - tld_[thread_index].is_init = true; - } - return &tld_[thread_index].stack; -} - Result> QueryContext::BeginExternalTask(std::string_view name) { Future<> completion_future = Future<>::Make(); if (async_scheduler_->AddSimpleTask([completion_future] { return completion_future; }, diff --git a/cpp/src/arrow/acero/query_context.h b/cpp/src/arrow/acero/query_context.h index 9ea11679cba05..3eff299439828 100644 --- a/cpp/src/arrow/acero/query_context.h +++ b/cpp/src/arrow/acero/query_context.h @@ -38,7 +38,7 @@ class ARROW_ACERO_EXPORT QueryContext { QueryContext(QueryOptions opts = {}, ExecContext exec_context = *default_exec_context()); - Status Init(size_t max_num_threads, arrow::util::AsyncTaskScheduler* scheduler); + Status Init(arrow::util::AsyncTaskScheduler* scheduler); const ::arrow::internal::CpuInfo* cpu_info() const; int64_t hardware_flags() const; @@ -52,7 +52,6 @@ class ARROW_ACERO_EXPORT QueryContext { size_t GetThreadIndex(); size_t max_concurrency() const; - Result GetTempStack(size_t thread_index); /// \brief Start an external task /// @@ -145,11 +144,6 @@ class ARROW_ACERO_EXPORT QueryContext { std::unique_ptr task_scheduler_ = TaskScheduler::Make(); ThreadIndexer thread_indexer_; - struct ThreadLocalData { - bool is_init = false; - arrow::util::TempVectorStack stack; - }; - std::vector tld_; std::atomic in_flight_bytes_to_disk_{0}; }; diff --git a/cpp/src/arrow/acero/sink_node.cc b/cpp/src/arrow/acero/sink_node.cc index 4ab6b4537de02..66f447aa87f11 100644 --- a/cpp/src/arrow/acero/sink_node.cc +++ b/cpp/src/arrow/acero/sink_node.cc @@ -423,6 +423,7 @@ class ConsumingSinkNode : public ExecNode, std::atomic backpressure_counter_ = 0; std::unique_ptr sequencer_; }; + static Result MakeTableConsumingSinkNode(ExecPlan* plan, std::vector inputs, const ExecNodeOptions& options) { diff --git a/cpp/src/arrow/acero/swiss_join.cc b/cpp/src/arrow/acero/swiss_join.cc index 542e943c4a82b..17c5212697339 100644 --- a/cpp/src/arrow/acero/swiss_join.cc +++ b/cpp/src/arrow/acero/swiss_join.cc @@ -2470,6 +2470,8 @@ Status JoinProbeProcessor::OnFinished() { class SwissJoin : public HashJoinImpl { public: + static constexpr auto kTempStackUsage = 64 * arrow::util::MiniBatch::kMiniBatchLength; + Status Init(QueryContext* ctx, JoinType join_type, size_t num_threads, const HashJoinProjectionMaps* proj_map_left, const HashJoinProjectionMaps* proj_map_right, @@ -2513,6 +2515,7 @@ class SwissJoin : public HashJoinImpl { local_states_.resize(num_threads_); for (int i = 0; i < num_threads_; ++i) { + RETURN_NOT_OK(local_states_[i].stack.Init(pool_, kTempStackUsage)); local_states_[i].hash_table_ready = false; local_states_[i].num_output_batches = 0; local_states_[i].materialize.Init(pool_, proj_map_left, proj_map_right); @@ -2566,8 +2569,7 @@ class SwissJoin : public HashJoinImpl { ExecBatch keypayload_batch; ARROW_ASSIGN_OR_RAISE(keypayload_batch, KeyPayloadFromInput(/*side=*/0, &batch)); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_index)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_index].stack; return CancelIfNotOK( probe_processor_.OnNextBatch(thread_index, keypayload_batch, temp_stack, @@ -2679,8 +2681,7 @@ class SwissJoin : public HashJoinImpl { input_batch.values[schema->num_cols(HashJoinProjection::KEY) + icol]; } } - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; RETURN_NOT_OK(CancelIfNotOK(hash_table_build_.PushNextBatch( static_cast(thread_id), key_batch, no_payload ? nullptr : &payload_batch, temp_stack))); @@ -2715,8 +2716,7 @@ class SwissJoin : public HashJoinImpl { Status MergeFinished(size_t thread_id) { RETURN_NOT_OK(status()); - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; hash_table_build_.FinishPrtnMerge(temp_stack); return CancelIfNotOK(OnBuildHashTableFinished(static_cast(thread_id))); } @@ -2771,8 +2771,7 @@ class SwissJoin : public HashJoinImpl { std::min((task_id + 1) * kNumRowsPerScanTask, hash_table_.num_rows()); // Get thread index and related temp vector stack // - ARROW_ASSIGN_OR_RAISE(arrow::util::TempVectorStack * temp_stack, - ctx_->GetTempStack(thread_id)); + arrow::util::TempVectorStack* temp_stack = &local_states_[thread_id].stack; // Split into mini-batches // @@ -2949,6 +2948,7 @@ class SwissJoin : public HashJoinImpl { FinishedCallback finished_callback_; struct ThreadLocalState { + arrow::util::TempVectorStack stack; JoinResultMaterialize materialize; std::vector temp_column_arrays; int64_t num_output_batches; diff --git a/cpp/src/arrow/array/array_base.h b/cpp/src/arrow/array/array_base.h index 6411aebf80442..716ae0722069e 100644 --- a/cpp/src/arrow/array/array_base.h +++ b/cpp/src/arrow/array/array_base.h @@ -224,6 +224,14 @@ class ARROW_EXPORT Array { /// \return Status Status ValidateFull() const; + /// \brief Return the device_type that this array's data is allocated on + /// + /// This just delegates to calling device_type on the underlying ArrayData + /// object which backs this Array. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const { return data_->device_type(); } + protected: Array() = default; ARROW_DEFAULT_MOVE_AND_ASSIGN(Array); diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index 7e25ad61fa2ea..32806d9d2edb3 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -478,6 +478,7 @@ TEST_F(TestArray, TestMakeArrayOfNull) { ASSERT_EQ(array->type(), type); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); if (is_union(type->id())) { ASSERT_EQ(array->null_count(), 0); ASSERT_EQ(array->ComputeLogicalNullCount(), length); @@ -719,6 +720,7 @@ TEST_F(TestArray, TestMakeArrayFromScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), length); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); // test case for ARROW-13321 for (int64_t i : {int64_t{0}, length / 2, length - 1}) { @@ -744,6 +746,7 @@ TEST_F(TestArray, TestMakeArrayFromScalarSliced) { auto sliced = array->Slice(1, 4); ASSERT_EQ(sliced->length(), 4); ASSERT_EQ(sliced->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); ARROW_EXPECT_OK(sliced->ValidateFull()); } } @@ -758,6 +761,7 @@ TEST_F(TestArray, TestMakeArrayFromDictionaryScalar) { ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 4); ASSERT_EQ(array->null_count(), 0); + ASSERT_EQ(array->device_type(), DeviceAllocationType::kCPU); for (int i = 0; i < 4; i++) { ASSERT_OK_AND_ASSIGN(auto item, array->GetScalar(i)); @@ -797,6 +801,7 @@ TEST_F(TestArray, TestMakeEmptyArray) { ASSERT_OK_AND_ASSIGN(auto array, MakeEmptyArray(type)); ASSERT_OK(array->ValidateFull()); ASSERT_EQ(array->length(), 0); + CheckSpanRoundTrip(*array); } } diff --git a/cpp/src/arrow/array/data.cc b/cpp/src/arrow/array/data.cc index ac828a9c35c67..76a43521394c1 100644 --- a/cpp/src/arrow/array/data.cc +++ b/cpp/src/arrow/array/data.cc @@ -224,6 +224,42 @@ int64_t ArrayData::ComputeLogicalNullCount() const { return ArraySpan(*this).ComputeLogicalNullCount(); } +DeviceAllocationType ArrayData::device_type() const { + // we're using 0 as a sentinel value for NOT YET ASSIGNED + // there is explicitly no constant DeviceAllocationType to represent + // the "UNASSIGNED" case as it is invalid for data to not have an + // assigned device type. If it's still 0 at the end, then we return + // CPU as the allocation device type + int type = 0; + for (const auto& buf : buffers) { + if (!buf) continue; + if (type == 0) { + type = static_cast(buf->device_type()); + } else { + DCHECK_EQ(type, static_cast(buf->device_type())); + } + } + + for (const auto& child : child_data) { + if (!child) continue; + if (type == 0) { + type = static_cast(child->device_type()); + } else { + DCHECK_EQ(type, static_cast(child->device_type())); + } + } + + if (dictionary) { + if (type == 0) { + type = static_cast(dictionary->device_type()); + } else { + DCHECK_EQ(type, static_cast(dictionary->device_type())); + } + } + + return type == 0 ? DeviceAllocationType::kCPU : static_cast(type); +} + // ---------------------------------------------------------------------- // Methods for ArraySpan diff --git a/cpp/src/arrow/array/data.h b/cpp/src/arrow/array/data.h index beec29789ad1e..0c49f36229a40 100644 --- a/cpp/src/arrow/array/data.h +++ b/cpp/src/arrow/array/data.h @@ -101,6 +101,11 @@ struct ARROW_EXPORT ArrayData { int64_t null_count = kUnknownNullCount, int64_t offset = 0) : ArrayData(std::move(type), length, null_count, offset) { this->buffers = std::move(buffers); +#ifndef NDEBUG + // in debug mode, call the `device_type` function to trigger + // the DCHECKs that validate all the buffers are on the same device + ARROW_UNUSED(this->device_type()); +#endif } ArrayData(std::shared_ptr type, int64_t length, @@ -110,6 +115,12 @@ struct ARROW_EXPORT ArrayData { : ArrayData(std::move(type), length, null_count, offset) { this->buffers = std::move(buffers); this->child_data = std::move(child_data); +#ifndef NDEBUG + // in debug mode, call the `device_type` function to trigger + // the DCHECKs that validate all the buffers (including children) + // are on the same device + ARROW_UNUSED(this->device_type()); +#endif } static std::shared_ptr Make(std::shared_ptr type, int64_t length, @@ -358,6 +369,16 @@ struct ARROW_EXPORT ArrayData { /// \see GetNullCount int64_t ComputeLogicalNullCount() const; + /// \brief Returns the device_type of the underlying buffers and children + /// + /// If there are no buffers in this ArrayData object, it just returns + /// DeviceAllocationType::kCPU as a default. We also assume that all buffers + /// should be allocated on the same device type and perform DCHECKs to confirm + /// this in debug mode. + /// + /// \return DeviceAllocationType + DeviceAllocationType device_type() const; + std::shared_ptr type; int64_t length = 0; mutable std::atomic null_count{0}; diff --git a/cpp/src/arrow/array/util.cc b/cpp/src/arrow/array/util.cc index bdba92c9a11fb..41cd6a1c0b260 100644 --- a/cpp/src/arrow/array/util.cc +++ b/cpp/src/arrow/array/util.cc @@ -548,7 +548,7 @@ class NullArrayFactory { } Status Visit(const StructType& type) { - for (int i = 0; i < type_->num_fields(); ++i) { + for (int i = 0; i < type.num_fields(); ++i) { ARROW_ASSIGN_OR_RAISE(out_->child_data[i], CreateChild(type, i, length_)); } return Status::OK(); diff --git a/cpp/src/arrow/c/bridge.cc b/cpp/src/arrow/c/bridge.cc index 8a530b3798d41..afb664c3bc258 100644 --- a/cpp/src/arrow/c/bridge.cc +++ b/cpp/src/arrow/c/bridge.cc @@ -1059,8 +1059,14 @@ struct SchemaImporter { ARROW_ASSIGN_OR_RAISE( type_, registered_ext_type->Deserialize(std::move(type_), metadata_.extension_serialized)); - RETURN_NOT_OK(metadata_.metadata->DeleteMany( - {metadata_.extension_name_index, metadata_.extension_serialized_index})); + // If metadata is present, delete both metadata keys (otherwise, just remove + // the extension name key) + if (metadata_.extension_serialized_index >= 0) { + RETURN_NOT_OK(metadata_.metadata->DeleteMany( + {metadata_.extension_name_index, metadata_.extension_serialized_index})); + } else { + RETURN_NOT_OK(metadata_.metadata->Delete(metadata_.extension_name_index)); + } } } @@ -1448,6 +1454,7 @@ namespace { // The ArrowArray is released on destruction. struct ImportedArrayData { struct ArrowArray array_; + DeviceAllocationType device_type_; std::shared_ptr device_sync_; ImportedArrayData() { @@ -1514,6 +1521,7 @@ struct ArrayImporter { recursion_level_ = 0; import_ = std::make_shared(); c_struct_ = &import_->array_; + import_->device_type_ = device_type_; ArrowArrayMove(src, c_struct_); return DoImport(); } @@ -1541,7 +1549,8 @@ struct ArrayImporter { "cannot be imported as RecordBatch"); } return RecordBatch::Make(std::move(schema), data_->length, - std::move(data_->child_data)); + std::move(data_->child_data), import_->device_type_, + import_->device_sync_); } Status ImportChild(const ArrayImporter* parent, struct ArrowArray* src) { @@ -1868,24 +1877,17 @@ struct ArrayImporter { template Status ImportStringValuesBuffer(int32_t offsets_buffer_id, int32_t buffer_id, int64_t byte_width = 1) { - if (device_type_ == DeviceAllocationType::kCPU) { - auto offsets = data_->GetValues(offsets_buffer_id); + int64_t buffer_size = 0; + if (c_struct_->length > 0) { + int64_t last_offset_value_offset = + (c_struct_->length + c_struct_->offset) * sizeof(OffsetType); + OffsetType last_offset_value; + RETURN_NOT_OK(MemoryManager::CopyBufferSliceToCPU( + data_->buffers[offsets_buffer_id], last_offset_value_offset, sizeof(OffsetType), + reinterpret_cast(&last_offset_value))); // Compute visible size of buffer - int64_t buffer_size = - (c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0; - return ImportBuffer(buffer_id, buffer_size); - } - - // we only need the value of the last offset so let's just copy that - // one value from device to host. - auto single_value_buf = - SliceBuffer(data_->buffers[offsets_buffer_id], - c_struct_->length * sizeof(OffsetType), sizeof(OffsetType)); - ARROW_ASSIGN_OR_RAISE( - auto cpubuf, Buffer::ViewOrCopy(single_value_buf, default_cpu_memory_manager())); - auto offsets = cpubuf->data_as(); - // Compute visible size of buffer - int64_t buffer_size = (c_struct_->length > 0) ? byte_width * offsets[0] : 0; + buffer_size = byte_width * last_offset_value; + } return ImportBuffer(buffer_id, buffer_size); } @@ -2041,6 +2043,23 @@ Status ExportStreamNext(const std::shared_ptr& src, int64_t i } } +// the int64_t i input here is unused, but exists simply to allow utilizing the +// overload of this with the version for ChunkedArrays. If we removed the int64_t +// from the signature despite it being unused, we wouldn't be able to leverage the +// overloading in the templated exporters. +Status ExportStreamNext(const std::shared_ptr& src, int64_t i, + struct ArrowDeviceArray* out_array) { + std::shared_ptr batch; + RETURN_NOT_OK(src->ReadNext(&batch)); + if (batch == nullptr) { + // End of stream + ArrowArrayMarkReleased(&out_array->array); + return Status::OK(); + } else { + return ExportDeviceRecordBatch(*batch, batch->GetSyncEvent(), out_array); + } +} + Status ExportStreamNext(const std::shared_ptr& src, int64_t i, struct ArrowArray* out_array) { if (i >= src->num_chunks()) { @@ -2052,8 +2071,27 @@ Status ExportStreamNext(const std::shared_ptr& src, int64_t i, } } -template +Status ExportStreamNext(const std::shared_ptr& src, int64_t i, + struct ArrowDeviceArray* out_array) { + if (i >= src->num_chunks()) { + // End of stream + ArrowArrayMarkReleased(&out_array->array); + return Status::OK(); + } else { + return ExportDeviceArray(*src->chunk(static_cast(i)), nullptr, out_array); + } +} + +template class ExportedArrayStream { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: struct PrivateData { explicit PrivateData(std::shared_ptr reader) @@ -2067,13 +2105,13 @@ class ExportedArrayStream { ARROW_DISALLOW_COPY_AND_ASSIGN(PrivateData); }; - explicit ExportedArrayStream(struct ArrowArrayStream* stream) : stream_(stream) {} + explicit ExportedArrayStream(StreamType* stream) : stream_(stream) {} Status GetSchema(struct ArrowSchema* out_schema) { return ExportStreamSchema(reader(), out_schema); } - Status GetNext(struct ArrowArray* out_array) { + Status GetNext(ArrayType* out_array) { return ExportStreamNext(reader(), next_batch_num(), out_array); } @@ -2083,38 +2121,35 @@ class ExportedArrayStream { } void Release() { - if (ArrowArrayStreamIsReleased(stream_)) { + if (StreamTraits::IsReleasedFunc(stream_)) { return; } + DCHECK_NE(private_data(), nullptr); delete private_data(); - ArrowArrayStreamMarkReleased(stream_); + StreamTraits::MarkReleased(stream_); } // C-compatible callbacks - static int StaticGetSchema(struct ArrowArrayStream* stream, - struct ArrowSchema* out_schema) { + static int StaticGetSchema(StreamType* stream, struct ArrowSchema* out_schema) { ExportedArrayStream self{stream}; return self.ToCError(self.GetSchema(out_schema)); } - static int StaticGetNext(struct ArrowArrayStream* stream, - struct ArrowArray* out_array) { + static int StaticGetNext(StreamType* stream, ArrayType* out_array) { ExportedArrayStream self{stream}; return self.ToCError(self.GetNext(out_array)); } - static void StaticRelease(struct ArrowArrayStream* stream) { - ExportedArrayStream{stream}.Release(); - } + static void StaticRelease(StreamType* stream) { ExportedArrayStream{stream}.Release(); } - static const char* StaticGetLastError(struct ArrowArrayStream* stream) { + static const char* StaticGetLastError(StreamType* stream) { return ExportedArrayStream{stream}.GetLastError(); } - static Status Make(std::shared_ptr reader, struct ArrowArrayStream* out) { + static Status Make(std::shared_ptr reader, StreamType* out) { out->get_schema = ExportedArrayStream::StaticGetSchema; out->get_next = ExportedArrayStream::StaticGetNext; out->get_last_error = ExportedArrayStream::StaticGetLastError; @@ -2150,19 +2185,36 @@ class ExportedArrayStream { int64_t next_batch_num() { return private_data()->batch_num_++; } - struct ArrowArrayStream* stream_; + StreamType* stream_; }; } // namespace Status ExportRecordBatchReader(std::shared_ptr reader, struct ArrowArrayStream* out) { - return ExportedArrayStream::Make(std::move(reader), out); + memset(out, 0, sizeof(struct ArrowArrayStream)); + return ExportedArrayStream::Make(std::move(reader), out); } Status ExportChunkedArray(std::shared_ptr chunked_array, struct ArrowArrayStream* out) { - return ExportedArrayStream::Make(std::move(chunked_array), out); + memset(out, 0, sizeof(struct ArrowArrayStream)); + return ExportedArrayStream::Make(std::move(chunked_array), out); +} + +Status ExportDeviceRecordBatchReader(std::shared_ptr reader, + struct ArrowDeviceArrayStream* out) { + memset(out, 0, sizeof(struct ArrowDeviceArrayStream)); + out->device_type = static_cast(reader->device_type()); + return ExportedArrayStream::Make(std::move(reader), out); +} + +Status ExportDeviceChunkedArray(std::shared_ptr chunked_array, + DeviceAllocationType device_type, + struct ArrowDeviceArrayStream* out) { + memset(out, 0, sizeof(struct ArrowDeviceArrayStream)); + out->device_type = static_cast(device_type); + return ExportedArrayStream::Make(std::move(chunked_array), out); } ////////////////////////////////////////////////////////////////////////// @@ -2170,33 +2222,65 @@ Status ExportChunkedArray(std::shared_ptr chunked_array, namespace { +template class ArrayStreamReader { + protected: + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamReader(struct ArrowArrayStream* stream) { - ArrowArrayStreamMove(stream, &stream_); - DCHECK(!ArrowArrayStreamIsReleased(&stream_)); + explicit ArrayStreamReader(StreamType* stream, + const DeviceMemoryMapper mapper = DefaultDeviceMemoryMapper) + : mapper_{std::move(mapper)} { + StreamTraits::MoveFunc(stream, &stream_); + DCHECK(!StreamTraits::IsReleasedFunc(&stream_)); } ~ArrayStreamReader() { ReleaseStream(); } void ReleaseStream() { - if (!ArrowArrayStreamIsReleased(&stream_)) { - ArrowArrayStreamRelease(&stream_); - } - DCHECK(ArrowArrayStreamIsReleased(&stream_)); + // all our trait release funcs check IsReleased so we don't + // need to repeat it here + StreamTraits::ReleaseFunc(&stream_); + DCHECK(StreamTraits::IsReleasedFunc(&stream_)); } protected: - Status ReadNextArrayInternal(struct ArrowArray* array) { - ArrowArrayMarkReleased(array); + Status ReadNextArrayInternal(ArrayType* array) { + ArrayTraits::MarkReleased(array); Status status = StatusFromCError(stream_.get_next(&stream_, array)); - if (!status.ok() && !ArrowArrayIsReleased(array)) { - ArrowArrayRelease(array); + if (!status.ok()) { + ArrayTraits::ReleaseFunc(array); } return status; } + Result> ImportRecordBatchInternal( + struct ArrowArray* array, std::shared_ptr schema) { + return ImportRecordBatch(array, schema); + } + + Result> ImportRecordBatchInternal( + struct ArrowDeviceArray* array, std::shared_ptr schema) { + return ImportDeviceRecordBatch(array, schema, mapper_); + } + + Result> ImportArrayInternal( + struct ArrowArray* array, std::shared_ptr type) { + return ImportArray(array, type); + } + + Result> ImportArrayInternal( + struct ArrowDeviceArray* array, std::shared_ptr type) { + return ImportDeviceArray(array, type, mapper_); + } + Result> ReadSchema() { struct ArrowSchema c_schema = {}; ARROW_RETURN_NOT_OK( @@ -2214,19 +2298,19 @@ class ArrayStreamReader { } Status CheckNotReleased() { - if (ArrowArrayStreamIsReleased(&stream_)) { + if (StreamTraits::IsReleasedFunc(&stream_)) { return Status::Invalid( "Attempt to read from a stream that has already been closed"); - } else { - return Status::OK(); } + + return Status::OK(); } Status StatusFromCError(int errno_like) const { return StatusFromCError(&stream_, errno_like); } - static Status StatusFromCError(struct ArrowArrayStream* stream, int errno_like) { + static Status StatusFromCError(StreamType* stream, int errno_like) { if (ARROW_PREDICT_TRUE(errno_like == 0)) { return Status::OK(); } @@ -2250,70 +2334,102 @@ class ArrayStreamReader { return {code, last_error ? std::string(last_error) : ""}; } + DeviceAllocationType get_device_type() const { + if constexpr (IsDevice) { + return static_cast(stream_.device_type); + } else { + return DeviceAllocationType::kCPU; + } + } + private: - mutable struct ArrowArrayStream stream_; + mutable StreamType stream_; + const DeviceMemoryMapper mapper_; }; -class ArrayStreamBatchReader : public RecordBatchReader, public ArrayStreamReader { +template +class ArrayStreamBatchReader : public RecordBatchReader, + public ArrayStreamReader { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamBatchReader(struct ArrowArrayStream* stream) - : ArrayStreamReader(stream) {} + explicit ArrayStreamBatchReader( + StreamType* stream, const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) + : ArrayStreamReader(stream, mapper) {} Status Init() { - ARROW_ASSIGN_OR_RAISE(schema_, ReadSchema()); + ARROW_ASSIGN_OR_RAISE(schema_, this->ReadSchema()); return Status::OK(); } std::shared_ptr schema() const override { return schema_; } Status ReadNext(std::shared_ptr* batch) override { - ARROW_RETURN_NOT_OK(CheckNotReleased()); + ARROW_RETURN_NOT_OK(this->CheckNotReleased()); - struct ArrowArray c_array; - ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array)); + ArrayType c_array; + ARROW_RETURN_NOT_OK(this->ReadNextArrayInternal(&c_array)); - if (ArrowArrayIsReleased(&c_array)) { + if (ArrayTraits::IsReleasedFunc(&c_array)) { // End of stream batch->reset(); return Status::OK(); } else { - return ImportRecordBatch(&c_array, schema_).Value(batch); + return this->ImportRecordBatchInternal(&c_array, schema_).Value(batch); } } Status Close() override { - ReleaseStream(); + this->ReleaseStream(); return Status::OK(); } + DeviceAllocationType device_type() const override { return this->get_device_type(); } + private: std::shared_ptr schema_; }; -class ArrayStreamArrayReader : public ArrayStreamReader { +template +class ArrayStreamArrayReader : public ArrayStreamReader { + using StreamTraits = + std::conditional_t; + using StreamType = typename StreamTraits::CType; + using ArrayTraits = std::conditional_t; + using ArrayType = typename ArrayTraits::CType; + public: - explicit ArrayStreamArrayReader(struct ArrowArrayStream* stream) - : ArrayStreamReader(stream) {} + explicit ArrayStreamArrayReader( + StreamType* stream, const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) + : ArrayStreamReader(stream, mapper) {} Status Init() { - ARROW_ASSIGN_OR_RAISE(field_, ReadField()); + ARROW_ASSIGN_OR_RAISE(field_, this->ReadField()); return Status::OK(); } std::shared_ptr data_type() const { return field_->type(); } Status ReadNext(std::shared_ptr* array) { - ARROW_RETURN_NOT_OK(CheckNotReleased()); + ARROW_RETURN_NOT_OK(this->CheckNotReleased()); - struct ArrowArray c_array; - ARROW_RETURN_NOT_OK(ReadNextArrayInternal(&c_array)); + ArrayType c_array; + ARROW_RETURN_NOT_OK(this->ReadNextArrayInternal(&c_array)); - if (ArrowArrayIsReleased(&c_array)) { + if (ArrayTraits::IsReleasedFunc(&c_array)) { // End of stream array->reset(); return Status::OK(); } else { - return ImportArray(&c_array, field_->type()).Value(array); + return this->ImportArrayInternal(&c_array, field_->type()).Value(array); } } @@ -2321,30 +2437,35 @@ class ArrayStreamArrayReader : public ArrayStreamReader { std::shared_ptr field_; }; -} // namespace - -Result> ImportRecordBatchReader( - struct ArrowArrayStream* stream) { - if (ArrowArrayStreamIsReleased(stream)) { - return Status::Invalid("Cannot import released ArrowArrayStream"); +template > +Result> ImportReader( + typename StreamTraits::CType* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) { + if (StreamTraits::IsReleasedFunc(stream)) { + return Status::Invalid("Cannot import released Arrow Stream"); } - auto reader = std::make_shared(stream); + auto reader = std::make_shared>(stream, mapper); ARROW_RETURN_NOT_OK(reader->Init()); return reader; } -Result> ImportChunkedArray( - struct ArrowArrayStream* stream) { - if (ArrowArrayStreamIsReleased(stream)) { - return Status::Invalid("Cannot import released ArrowArrayStream"); +template > +Result> ImportChunked( + typename StreamTraits::CType* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper) { + if (StreamTraits::IsReleasedFunc(stream)) { + return Status::Invalid("Cannot import released Arrow Stream"); } - auto reader = std::make_shared(stream); + auto reader = std::make_shared>(stream, mapper); ARROW_RETURN_NOT_OK(reader->Init()); - std::shared_ptr data_type = reader->data_type(); - + auto data_type = reader->data_type(); ArrayVector chunks; std::shared_ptr chunk; while (true) { @@ -2360,4 +2481,26 @@ Result> ImportChunkedArray( return ChunkedArray::Make(std::move(chunks), std::move(data_type)); } +} // namespace + +Result> ImportRecordBatchReader( + struct ArrowArrayStream* stream) { + return ImportReader(stream); +} + +Result> ImportDeviceRecordBatchReader( + struct ArrowDeviceArrayStream* stream, const DeviceMemoryMapper& mapper) { + return ImportReader(stream, mapper); +} + +Result> ImportChunkedArray( + struct ArrowArrayStream* stream) { + return ImportChunked(stream); +} + +Result> ImportDeviceChunkedArray( + struct ArrowDeviceArrayStream* stream, const DeviceMemoryMapper& mapper) { + return ImportChunked(stream, mapper); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/bridge.h b/cpp/src/arrow/c/bridge.h index 74a302be4c27d..45367e4f93062 100644 --- a/cpp/src/arrow/c/bridge.h +++ b/cpp/src/arrow/c/bridge.h @@ -321,6 +321,31 @@ ARROW_EXPORT Status ExportChunkedArray(std::shared_ptr chunked_array, struct ArrowArrayStream* out); +/// \brief Export C++ RecordBatchReader using the C device stream interface +/// +/// The resulting ArrowDeviceArrayStream struct keeps the record batch reader +/// alive until its release callback is called by the consumer. The device +/// type is determined by calling device_type() on the RecordBatchReader. +/// +/// \param[in] reader RecordBatchReader object to export +/// \param[out] out C struct to export the stream to +ARROW_EXPORT +Status ExportDeviceRecordBatchReader(std::shared_ptr reader, + struct ArrowDeviceArrayStream* out); + +/// \brief Export C++ ChunkedArray using the C device data interface format. +/// +/// The resulting ArrowDeviceArrayStream keeps the chunked array data and buffers +/// alive until its release callback is called by the consumer. +/// +/// \param[in] chunked_array ChunkedArray object to export +/// \param[in] device_type the device type the data is located on +/// \param[out] out C struct to export the stream to +ARROW_EXPORT +Status ExportDeviceChunkedArray(std::shared_ptr chunked_array, + DeviceAllocationType device_type, + struct ArrowDeviceArrayStream* out); + /// \brief Import C++ RecordBatchReader from the C stream interface. /// /// The ArrowArrayStream struct has its contents moved to a private object @@ -343,6 +368,42 @@ Result> ImportRecordBatchReader( ARROW_EXPORT Result> ImportChunkedArray(struct ArrowArrayStream* stream); +/// \brief Import C++ RecordBatchReader from the C device stream interface +/// +/// The ArrowDeviceArrayStream struct has its contents moved to a private object +/// held alive by the resulting record batch reader. +/// +/// \note If there was a required sync event, sync events are accessible by individual +/// buffers of columns. We are not yet bubbling the sync events from the buffers up to +/// the `GetSyncEvent` method of an imported RecordBatch. This will be added in a future +/// update. +/// +/// \param[in,out] stream C device stream interface struct +/// \param[in] mapper mapping from device type and ID to memory manager +/// \return Imported RecordBatchReader object +ARROW_EXPORT +Result> ImportDeviceRecordBatchReader( + struct ArrowDeviceArrayStream* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); + +/// \brief Import C++ ChunkedArray from the C device stream interface +/// +/// The ArrowDeviceArrayStream struct has its contents moved to a private object, +/// is consumed in its entirety, and released before returning all chunks as a +/// ChunkedArray. +/// +/// \note Any chunks that require synchronization for their device memory will have +/// the SyncEvent objects available by checking the individual buffers of each chunk. +/// These SyncEvents should be checked before accessing the data in those buffers. +/// +/// \param[in,out] stream C device stream interface struct +/// \param[in] mapper mapping from device type and ID to memory manager +/// \return Imported ChunkedArray object +ARROW_EXPORT +Result> ImportDeviceChunkedArray( + struct ArrowDeviceArrayStream* stream, + const DeviceMemoryMapper& mapper = DefaultDeviceMemoryMapper); + /// @} } // namespace arrow diff --git a/cpp/src/arrow/c/bridge_test.cc b/cpp/src/arrow/c/bridge_test.cc index d64fe67accde0..e3ec262422ba6 100644 --- a/cpp/src/arrow/c/bridge_test.cc +++ b/cpp/src/arrow/c/bridge_test.cc @@ -53,11 +53,15 @@ namespace arrow { +using internal::ArrayDeviceExportTraits; +using internal::ArrayDeviceStreamExportTraits; using internal::ArrayExportGuard; using internal::ArrayExportTraits; using internal::ArrayStreamExportGuard; using internal::ArrayStreamExportTraits; using internal::checked_cast; +using internal::DeviceArrayExportGuard; +using internal::DeviceArrayStreamExportGuard; using internal::SchemaExportGuard; using internal::SchemaExportTraits; using internal::Zip; @@ -4098,6 +4102,23 @@ TEST_F(TestArrayRoundtrip, RegisteredExtension) { TestWithArrayFactory(NestedFactory(ExampleDictExtension)); } +TEST_F(TestArrayRoundtrip, RegisteredExtensionNoMetadata) { + auto ext_type = std::make_shared(); + ExtensionTypeGuard guard(ext_type); + + auto ext_metadata = + KeyValueMetadata::Make({"ARROW:extension:name"}, {ext_type->extension_name()}); + auto ext_field = field("", ext_type->storage_type(), true, std::move(ext_metadata)); + + struct ArrowSchema c_schema {}; + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK(ExportField(*ext_field, &c_schema)); + + ASSERT_OK_AND_ASSIGN(auto ext_type_roundtrip, ImportType(&c_schema)); + ASSERT_EQ(ext_type_roundtrip->id(), Type::EXTENSION); + AssertTypeEqual(ext_type_roundtrip, ext_type); +} + TEST_F(TestArrayRoundtrip, UnregisteredExtension) { auto StorageExtractor = [](ArrayFactory factory) { return [factory]() -> Result> { @@ -4746,4 +4767,516 @@ TEST_F(TestArrayStreamRoundtrip, ChunkedArrayRoundtripEmpty) { }); } +//////////////////////////////////////////////////////////////////////////// +// Array device stream export tests + +class TestArrayDeviceStreamExport : public BaseArrayStreamTest { + public: + void AssertStreamSchema(struct ArrowDeviceArrayStream* c_stream, + const Schema& expected) { + struct ArrowSchema c_schema; + ASSERT_EQ(0, c_stream->get_schema(c_stream, &c_schema)); + + SchemaExportGuard schema_guard(&c_schema); + ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema)); + ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema)); + AssertSchemaEqual(expected, *schema, /*check_metadata=*/true); + } + + void AssertStreamEnd(struct ArrowDeviceArrayStream* c_stream) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_TRUE(ArrowDeviceArrayIsReleased(&c_array)); + } + + void AssertStreamNext(struct ArrowDeviceArrayStream* c_stream, + const RecordBatch& expected) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_FALSE(ArrowDeviceArrayIsReleased(&c_array)); + + ASSERT_OK_AND_ASSIGN(auto batch, + ImportDeviceRecordBatch(&c_array, expected.schema(), + TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(expected, *batch); + } + + void AssertStreamNext(struct ArrowDeviceArrayStream* c_stream, const Array& expected) { + struct ArrowDeviceArray c_array; + ASSERT_EQ(0, c_stream->get_next(c_stream, &c_array)); + + DeviceArrayExportGuard guard(&c_array); + ASSERT_FALSE(ArrowDeviceArrayIsReleased(&c_array)); + + ASSERT_OK_AND_ASSIGN(auto array, + ImportDeviceArray(&c_array, expected.type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + AssertArraysEqual(expected, *array); + } + + static Result> ToDeviceData( + const std::shared_ptr& mm, const ArrayData& data) { + arrow::BufferVector buffers; + for (const auto& buf : data.buffers) { + if (buf) { + ARROW_ASSIGN_OR_RAISE(auto dest, mm->CopyBuffer(buf, mm)); + buffers.push_back(dest); + } else { + buffers.push_back(nullptr); + } + } + + arrow::ArrayDataVector children; + for (const auto& child : data.child_data) { + ARROW_ASSIGN_OR_RAISE(auto dest, ToDeviceData(mm, *child)); + children.push_back(dest); + } + + return ArrayData::Make(data.type, data.length, buffers, children, data.null_count, + data.offset); + } + + static Result> ToDevice(const std::shared_ptr& mm, + const ArrayData& data) { + ARROW_ASSIGN_OR_RAISE(auto result, ToDeviceData(mm, data)); + return MakeArray(result); + } +}; + +TEST_F(TestArrayDeviceStreamExport, Empty) { + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {}); + ASSERT_OK_AND_ASSIGN( + auto reader, + RecordBatchReader::Make(batches, schema, + static_cast(kMyDeviceType))); + + struct ArrowDeviceArrayStream c_stream; + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + AssertStreamSchema(&c_stream, *schema); + AssertStreamEnd(&c_stream); + AssertStreamEnd(&c_stream); +} + +TEST_F(TestArrayDeviceStreamExport, Simple) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN(auto reader, + RecordBatchReader::Make(batches, schema, device->device_type())); + + struct ArrowDeviceArrayStream c_stream; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + AssertStreamSchema(&c_stream, *schema); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + AssertStreamNext(&c_stream, *batches[0]); + AssertStreamNext(&c_stream, *batches[1]); + AssertStreamEnd(&c_stream); + AssertStreamEnd(&c_stream); +} + +TEST_F(TestArrayDeviceStreamExport, ArrayLifetime) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN(auto reader, + RecordBatchReader::Make(batches, schema, device->device_type())); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + struct ArrowDeviceArray c_array0, c_array1; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1)); + AssertStreamEnd(&c_stream); + } + + DeviceArrayExportGuard guard0(&c_array0), guard1(&c_array1); + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_schema, ImportSchema(&c_schema)); + AssertSchemaEqual(*schema, *got_schema, /*check_metadata=*/true); + } + + ASSERT_EQ(kMyDeviceType, c_array0.device_type); + ASSERT_EQ(kMyDeviceType, c_array1.device_type); + + ASSERT_GT(pool_->bytes_allocated(), orig_allocated_); + ASSERT_OK_AND_ASSIGN( + auto batch, + ImportDeviceRecordBatch(&c_array1, schema, TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(*batches[1], *batch); + ASSERT_EQ(device->device_type(), batch->device_type()); + ASSERT_OK_AND_ASSIGN( + batch, + ImportDeviceRecordBatch(&c_array0, schema, TestDeviceArrayRoundtrip::DeviceMapper)); + AssertBatchesEqual(*batches[0], *batch); + ASSERT_EQ(device->device_type(), batch->device_type()); +} + +TEST_F(TestArrayDeviceStreamExport, Errors) { + auto reader = + std::make_shared(Status::Invalid("some example error")); + + struct ArrowDeviceArrayStream c_stream; + + ASSERT_OK(ExportDeviceRecordBatchReader(reader, &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + struct ArrowSchema c_schema; + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_FALSE(ArrowSchemaIsReleased(&c_schema)); + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto schema, ImportSchema(&c_schema)); + AssertSchemaEqual(schema, arrow::schema({}), /*check_metadata=*/true); + } + + struct ArrowDeviceArray c_array; + ASSERT_EQ(EINVAL, c_stream.get_next(&c_stream, &c_array)); +} + +TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExportEmpty) { + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({}, int32())); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + + ASSERT_OK(ExportDeviceChunkedArray( + chunked_array, static_cast(kMyDeviceType), &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + AssertStreamEnd(&c_stream); + } + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema)); + AssertTypeEqual(*chunked_array->type(), *got_type); + } +} + +TEST_F(TestArrayDeviceStreamExport, ChunkedArrayExport) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + + ASSERT_OK_AND_ASSIGN(auto chunked_array, ChunkedArray::Make({arr1, arr2})); + + struct ArrowDeviceArrayStream c_stream; + struct ArrowSchema c_schema; + struct ArrowDeviceArray c_array0, c_array1; + + ASSERT_OK(ExportDeviceChunkedArray(chunked_array, device->device_type(), &c_stream)); + DeviceArrayStreamExportGuard guard(&c_stream); + + { + DeviceArrayStreamExportGuard guard(&c_stream); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + ASSERT_EQ(0, c_stream.get_schema(&c_stream, &c_schema)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array0)); + ASSERT_EQ(0, c_stream.get_next(&c_stream, &c_array1)); + AssertStreamEnd(&c_stream); + } + + DeviceArrayExportGuard guard0(&c_array0), guard1(&c_array1); + + { + SchemaExportGuard schema_guard(&c_schema); + ASSERT_OK_AND_ASSIGN(auto got_type, ImportType(&c_schema)); + AssertTypeEqual(*chunked_array->type(), *got_type); + } + + ASSERT_EQ(kMyDeviceType, c_array0.device_type); + ASSERT_EQ(kMyDeviceType, c_array1.device_type); + + ASSERT_GT(pool_->bytes_allocated(), orig_allocated_); + ASSERT_OK_AND_ASSIGN(auto array, + ImportDeviceArray(&c_array0, chunked_array->type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + ASSERT_EQ(device->device_type(), array->device_type()); + AssertArraysEqual(*chunked_array->chunk(0), *array); + ASSERT_OK_AND_ASSIGN(array, ImportDeviceArray(&c_array1, chunked_array->type(), + TestDeviceArrayRoundtrip::DeviceMapper)); + ASSERT_EQ(device->device_type(), array->device_type()); + AssertArraysEqual(*chunked_array->chunk(1), *array); +} + +//////////////////////////////////////////////////////////////////////////// +// Array device stream roundtrip tests + +class TestArrayDeviceStreamRoundtrip : public BaseArrayStreamTest { + public: + static Result> ToDeviceData( + const std::shared_ptr& mm, const ArrayData& data) { + arrow::BufferVector buffers; + for (const auto& buf : data.buffers) { + if (buf) { + ARROW_ASSIGN_OR_RAISE(auto dest, mm->CopyBuffer(buf, mm)); + buffers.push_back(dest); + } else { + buffers.push_back(nullptr); + } + } + + arrow::ArrayDataVector children; + for (const auto& child : data.child_data) { + ARROW_ASSIGN_OR_RAISE(auto dest, ToDeviceData(mm, *child)); + children.push_back(dest); + } + + return ArrayData::Make(data.type, data.length, buffers, children, data.null_count, + data.offset); + } + + static Result> ToDevice(const std::shared_ptr& mm, + const ArrayData& data) { + ARROW_ASSIGN_OR_RAISE(auto result, ToDeviceData(mm, data)); + return MakeArray(result); + } + + void Roundtrip(std::shared_ptr* reader, + struct ArrowDeviceArrayStream* c_stream) { + ASSERT_OK(ExportDeviceRecordBatchReader(*reader, c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(c_stream)); + + ASSERT_OK_AND_ASSIGN( + auto got_reader, + ImportDeviceRecordBatchReader(c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + *reader = std::move(got_reader); + } + + void Roundtrip( + std::shared_ptr reader, + std::function&)> check_func) { + ArrowDeviceArrayStream c_stream; + + // NOTE: ReleaseCallback<> is not immediately usable with ArrowDeviceArayStream + // because get_next and get_schema need the original private_data. + std::weak_ptr weak_reader(reader); + ASSERT_EQ(weak_reader.use_count(), 1); // Expiration check will fail otherwise + + ASSERT_OK(ExportDeviceRecordBatchReader(std::move(reader), &c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + { + ASSERT_OK_AND_ASSIGN(auto new_reader, + ImportDeviceRecordBatchReader( + &c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + // stream was moved + ASSERT_TRUE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_FALSE(weak_reader.expired()); + + check_func(new_reader); + } + // Stream was released when `new_reader` was destroyed + ASSERT_TRUE(weak_reader.expired()); + } + + void Roundtrip(std::shared_ptr src, + std::function&)> check_func) { + ArrowDeviceArrayStream c_stream; + + // One original copy to compare the result, one copy held by the stream + std::weak_ptr weak_src(src); + int64_t initial_use_count = weak_src.use_count(); + + ASSERT_OK(ExportDeviceChunkedArray( + std::move(src), static_cast(kMyDeviceType), &c_stream)); + ASSERT_FALSE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + ASSERT_EQ(kMyDeviceType, c_stream.device_type); + + { + ASSERT_OK_AND_ASSIGN( + auto dst, + ImportDeviceChunkedArray(&c_stream, TestDeviceArrayRoundtrip::DeviceMapper)); + // Stream was moved, consumed, and released + ASSERT_TRUE(ArrowDeviceArrayStreamIsReleased(&c_stream)); + + // Stream was released by ImportDeviceChunkedArray but original copy remains + ASSERT_EQ(weak_src.use_count(), initial_use_count - 1); + + check_func(dst); + } + } + + void AssertReaderNext(const std::shared_ptr& reader, + const RecordBatch& expected) { + ASSERT_OK_AND_ASSIGN(auto batch, reader->Next()); + ASSERT_NE(batch, nullptr); + ASSERT_EQ(static_cast(kMyDeviceType), batch->device_type()); + AssertBatchesEqual(expected, *batch); + } + + void AssertReaderEnd(const std::shared_ptr& reader) { + ASSERT_OK_AND_ASSIGN(auto batch, reader->Next()); + ASSERT_EQ(batch, nullptr); + } + + void AssertReaderClosed(const std::shared_ptr& reader) { + ASSERT_THAT(reader->Next(), + Raises(StatusCode::Invalid, ::testing::HasSubstr("already been closed"))); + } + + void AssertReaderClose(const std::shared_ptr& reader) { + ASSERT_OK(reader->Close()); + AssertReaderClosed(reader); + } +}; + +TEST_F(TestArrayDeviceStreamRoundtrip, Simple) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto orig_schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(orig_schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN( + auto reader, RecordBatchReader::Make(batches, orig_schema, device->device_type())); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + AssertSchemaEqual(*orig_schema, *reader->schema(), /*check_metadata=*/true); + AssertReaderNext(reader, *batches[0]); + AssertReaderNext(reader, *batches[1]); + AssertReaderEnd(reader); + AssertReaderEnd(reader); + AssertReaderClose(reader); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, CloseEarly) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + auto orig_schema = arrow::schema({field("ints", int32())}); + auto batches = MakeBatches(orig_schema, {arr1, arr2}); + ASSERT_OK_AND_ASSIGN( + auto reader, RecordBatchReader::Make(batches, orig_schema, device->device_type())); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + AssertReaderNext(reader, *batches[0]); + AssertReaderClose(reader); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, Errors) { + auto reader = std::make_shared( + Status::Invalid("roundtrip error example")); + + Roundtrip(std::move(reader), [&](const std::shared_ptr& reader) { + EXPECT_THAT(reader->Next(), Raises(StatusCode::Invalid, + ::testing::HasSubstr("roundtrip error example"))); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, SchemaError) { + struct ArrowDeviceArrayStream stream = {}; + stream.get_last_error = [](struct ArrowDeviceArrayStream* stream) { + return "Expected error"; + }; + stream.get_schema = [](struct ArrowDeviceArrayStream* stream, + struct ArrowSchema* schema) { return EIO; }; + stream.get_next = [](struct ArrowDeviceArrayStream* stream, + struct ArrowDeviceArray* array) { return EINVAL; }; + stream.release = [](struct ArrowDeviceArrayStream* stream) { + *static_cast(stream->private_data) = true; + std::memset(stream, 0, sizeof(*stream)); + }; + bool released = false; + stream.private_data = &released; + + EXPECT_RAISES_WITH_MESSAGE_THAT(IOError, ::testing::HasSubstr("Expected error"), + ImportDeviceRecordBatchReader(&stream)); + ASSERT_TRUE(released); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtrip) { + std::shared_ptr device = std::make_shared(1); + auto mm = device->default_memory_manager(); + + ASSERT_OK_AND_ASSIGN(auto arr1, + ToDevice(mm, *ArrayFromJSON(int32(), "[1, 2]")->data())); + ASSERT_EQ(device->device_type(), arr1->device_type()); + ASSERT_OK_AND_ASSIGN(auto arr2, + ToDevice(mm, *ArrayFromJSON(int32(), "[4, 5, null]")->data())); + ASSERT_EQ(device->device_type(), arr2->device_type()); + + ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({arr1, arr2})); + + Roundtrip(src, [&](const std::shared_ptr& dst) { + AssertTypeEqual(*dst->type(), *src->type()); + AssertChunkedEqual(*dst, *src); + }); +} + +TEST_F(TestArrayDeviceStreamRoundtrip, ChunkedArrayRoundtripEmpty) { + ASSERT_OK_AND_ASSIGN(auto src, ChunkedArray::Make({}, int32())); + + Roundtrip(src, [&](const std::shared_ptr& dst) { + AssertTypeEqual(*dst->type(), *src->type()); + AssertChunkedEqual(*dst, *src); + }); +} + } // namespace arrow diff --git a/cpp/src/arrow/c/helpers.h b/cpp/src/arrow/c/helpers.h index a24f272feac81..6e4df17f43ebf 100644 --- a/cpp/src/arrow/c/helpers.h +++ b/cpp/src/arrow/c/helpers.h @@ -17,6 +17,7 @@ #pragma once +#include #include #include #include @@ -70,9 +71,17 @@ inline int ArrowArrayIsReleased(const struct ArrowArray* array) { return array->release == NULL; } +inline int ArrowDeviceArrayIsReleased(const struct ArrowDeviceArray* array) { + return ArrowArrayIsReleased(&array->array); +} + /// Mark the C array released (for use in release callbacks) inline void ArrowArrayMarkReleased(struct ArrowArray* array) { array->release = NULL; } +inline void ArrowDeviceArrayMarkReleased(struct ArrowDeviceArray* array) { + ArrowArrayMarkReleased(&array->array); +} + /// Move the C array from `src` to `dest` /// /// Note `dest` must *not* point to a valid array already, otherwise there @@ -84,6 +93,14 @@ inline void ArrowArrayMove(struct ArrowArray* src, struct ArrowArray* dest) { ArrowArrayMarkReleased(src); } +inline void ArrowDeviceArrayMove(struct ArrowDeviceArray* src, + struct ArrowDeviceArray* dest) { + assert(dest != src); + assert(!ArrowDeviceArrayIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowDeviceArray)); + ArrowDeviceArrayMarkReleased(src); +} + /// Release the C array, if necessary, by calling its release callback inline void ArrowArrayRelease(struct ArrowArray* array) { if (!ArrowArrayIsReleased(array)) { @@ -93,16 +110,32 @@ inline void ArrowArrayRelease(struct ArrowArray* array) { } } +inline void ArrowDeviceArrayRelease(struct ArrowDeviceArray* array) { + if (!ArrowDeviceArrayIsReleased(array)) { + array->array.release(&array->array); + ARROW_C_ASSERT(ArrowDeviceArrayIsReleased(array), + "ArrowDeviceArrayRelease did not cleanup release callback"); + } +} + /// Query whether the C array stream is released inline int ArrowArrayStreamIsReleased(const struct ArrowArrayStream* stream) { return stream->release == NULL; } +inline int ArrowDeviceArrayStreamIsReleased(const struct ArrowDeviceArrayStream* stream) { + return stream->release == NULL; +} + /// Mark the C array stream released (for use in release callbacks) inline void ArrowArrayStreamMarkReleased(struct ArrowArrayStream* stream) { stream->release = NULL; } +inline void ArrowDeviceArrayStreamMarkReleased(struct ArrowDeviceArrayStream* stream) { + stream->release = NULL; +} + /// Move the C array stream from `src` to `dest` /// /// Note `dest` must *not* point to a valid stream already, otherwise there @@ -115,6 +148,14 @@ inline void ArrowArrayStreamMove(struct ArrowArrayStream* src, ArrowArrayStreamMarkReleased(src); } +inline void ArrowDeviceArrayStreamMove(struct ArrowDeviceArrayStream* src, + struct ArrowDeviceArrayStream* dest) { + assert(dest != src); + assert(!ArrowDeviceArrayStreamIsReleased(src)); + memcpy(dest, src, sizeof(struct ArrowDeviceArrayStream)); + ArrowDeviceArrayStreamMarkReleased(src); +} + /// Release the C array stream, if necessary, by calling its release callback inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { if (!ArrowArrayStreamIsReleased(stream)) { @@ -124,6 +165,14 @@ inline void ArrowArrayStreamRelease(struct ArrowArrayStream* stream) { } } +inline void ArrowDeviceArrayStreamRelease(struct ArrowDeviceArrayStream* stream) { + if (!ArrowDeviceArrayStreamIsReleased(stream)) { + stream->release(stream); + ARROW_C_ASSERT(ArrowDeviceArrayStreamIsReleased(stream), + "ArrowDeviceArrayStreamRelease did not cleanup release callback"); + } +} + #ifdef __cplusplus } #endif diff --git a/cpp/src/arrow/c/util_internal.h b/cpp/src/arrow/c/util_internal.h index 6a33be9b0da8e..dc0e25710e987 100644 --- a/cpp/src/arrow/c/util_internal.h +++ b/cpp/src/arrow/c/util_internal.h @@ -32,12 +32,32 @@ struct ArrayExportTraits { typedef struct ArrowArray CType; static constexpr auto IsReleasedFunc = &ArrowArrayIsReleased; static constexpr auto ReleaseFunc = &ArrowArrayRelease; + static constexpr auto MoveFunc = &ArrowArrayMove; + static constexpr auto MarkReleased = &ArrowArrayMarkReleased; +}; + +struct ArrayDeviceExportTraits { + typedef struct ArrowDeviceArray CType; + static constexpr auto IsReleasedFunc = &ArrowDeviceArrayIsReleased; + static constexpr auto ReleaseFunc = &ArrowDeviceArrayRelease; + static constexpr auto MoveFunc = &ArrowDeviceArrayMove; + static constexpr auto MarkReleased = &ArrowDeviceArrayMarkReleased; }; struct ArrayStreamExportTraits { typedef struct ArrowArrayStream CType; static constexpr auto IsReleasedFunc = &ArrowArrayStreamIsReleased; static constexpr auto ReleaseFunc = &ArrowArrayStreamRelease; + static constexpr auto MoveFunc = &ArrowArrayStreamMove; + static constexpr auto MarkReleased = &ArrowArrayStreamMarkReleased; +}; + +struct ArrayDeviceStreamExportTraits { + typedef struct ArrowDeviceArrayStream CType; + static constexpr auto IsReleasedFunc = &ArrowDeviceArrayStreamIsReleased; + static constexpr auto ReleaseFunc = &ArrowDeviceArrayStreamRelease; + static constexpr auto MoveFunc = &ArrowDeviceArrayStreamMove; + static constexpr auto MarkReleased = &ArrowDeviceArrayStreamMarkReleased; }; // A RAII-style object to release a C Array / Schema struct at block scope exit. @@ -79,7 +99,9 @@ class ExportGuard { using SchemaExportGuard = ExportGuard; using ArrayExportGuard = ExportGuard; +using DeviceArrayExportGuard = ExportGuard; using ArrayStreamExportGuard = ExportGuard; +using DeviceArrayStreamExportGuard = ExportGuard; } // namespace internal } // namespace arrow diff --git a/cpp/src/arrow/chunk_resolver.cc b/cpp/src/arrow/chunk_resolver.cc index 29bccb52658f8..55eec53ced1c7 100644 --- a/cpp/src/arrow/chunk_resolver.cc +++ b/cpp/src/arrow/chunk_resolver.cc @@ -19,14 +19,14 @@ #include #include +#include #include #include #include "arrow/array.h" #include "arrow/record_batch.h" -namespace arrow { -namespace internal { +namespace arrow::internal { namespace { template @@ -54,6 +54,51 @@ inline std::vector MakeChunksOffsets(const std::vector& chunks) { offsets[chunks.size()] = offset; return offsets; } + +/// \pre all the pre-conditions of ChunkResolver::ResolveMany() +/// \pre num_offsets - 1 <= std::numeric_limits::max() +template +void ResolveManyInline(size_t num_offsets, const int64_t* signed_offsets, + int64_t n_indices, const IndexType* logical_index_vec, + IndexType* out_chunk_index_vec, IndexType chunk_hint, + IndexType* out_index_in_chunk_vec) { + auto* offsets = reinterpret_cast(signed_offsets); + const auto num_chunks = static_cast(num_offsets - 1); + // chunk_hint in [0, num_offsets) per the precondition. + for (int64_t i = 0; i < n_indices; i++) { + const auto index = static_cast(logical_index_vec[i]); + if (index >= offsets[chunk_hint] && + (chunk_hint == num_chunks || index < offsets[chunk_hint + 1])) { + out_chunk_index_vec[i] = chunk_hint; // hint is correct! + continue; + } + // lo < hi is guaranteed by `num_offsets = chunks.size() + 1` + auto chunk_index = + ChunkResolver::Bisect(index, offsets, /*lo=*/0, /*hi=*/num_offsets); + chunk_hint = static_cast(chunk_index); + out_chunk_index_vec[i] = chunk_hint; + } + if (out_index_in_chunk_vec != NULLPTR) { + for (int64_t i = 0; i < n_indices; i++) { + auto logical_index = logical_index_vec[i]; + auto chunk_index = out_chunk_index_vec[i]; + // chunk_index is in [0, chunks.size()] no matter what the + // value of logical_index is, so it's always safe to dereference + // offset_ as it contains chunks.size()+1 values. + out_index_in_chunk_vec[i] = + logical_index - static_cast(offsets[chunk_index]); +#if defined(ARROW_VALGRIND) || defined(ADDRESS_SANITIZER) + // Make it more likely that Valgrind/ASAN can catch an invalid memory + // access by poisoning out_index_in_chunk_vec[i] when the logical + // index is out-of-bounds. + if (chunk_index == num_chunks) { + out_index_in_chunk_vec[i] = std::numeric_limits::max(); + } +#endif + } + } +} + } // namespace ChunkResolver::ChunkResolver(const ArrayVector& chunks) noexcept @@ -84,5 +129,32 @@ ChunkResolver& ChunkResolver::operator=(const ChunkResolver& other) noexcept { return *this; } -} // namespace internal -} // namespace arrow +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint8_t* logical_index_vec, + uint8_t* out_chunk_index_vec, uint8_t chunk_hint, + uint8_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint32_t* logical_index_vec, + uint32_t* out_chunk_index_vec, uint32_t chunk_hint, + uint32_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint16_t* logical_index_vec, + uint16_t* out_chunk_index_vec, uint16_t chunk_hint, + uint16_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +void ChunkResolver::ResolveManyImpl(int64_t n_indices, const uint64_t* logical_index_vec, + uint64_t* out_chunk_index_vec, uint64_t chunk_hint, + uint64_t* out_index_in_chunk_vec) const { + ResolveManyInline(offsets_.size(), offsets_.data(), n_indices, logical_index_vec, + out_chunk_index_vec, chunk_hint, out_index_in_chunk_vec); +} + +} // namespace arrow::internal diff --git a/cpp/src/arrow/chunk_resolver.h b/cpp/src/arrow/chunk_resolver.h index c5dad1a17b18e..a2a3d5a864243 100644 --- a/cpp/src/arrow/chunk_resolver.h +++ b/cpp/src/arrow/chunk_resolver.h @@ -20,6 +20,8 @@ #include #include #include +#include +#include #include #include "arrow/type_fwd.h" @@ -27,6 +29,8 @@ namespace arrow::internal { +struct ChunkResolver; + struct ChunkLocation { /// \brief Index of the chunk in the array of chunks /// @@ -36,8 +40,17 @@ struct ChunkLocation { /// \brief Index of the value in the chunk /// - /// The value is undefined if chunk_index >= chunks.size() + /// The value is UNDEFINED if chunk_index >= chunks.size() int64_t index_in_chunk = 0; + + ChunkLocation() = default; + + ChunkLocation(int64_t chunk_index, int64_t index_in_chunk) + : chunk_index(chunk_index), index_in_chunk(index_in_chunk) {} + + bool operator==(ChunkLocation other) const { + return chunk_index == other.chunk_index && index_in_chunk == other.index_in_chunk; + } }; /// \brief An utility that incrementally resolves logical indices into @@ -60,12 +73,35 @@ struct ARROW_EXPORT ChunkResolver { explicit ChunkResolver(const std::vector& chunks) noexcept; explicit ChunkResolver(const RecordBatchVector& batches) noexcept; + /// \brief Construct a ChunkResolver from a vector of chunks.size() + 1 offsets. + /// + /// The first offset must be 0 and the last offset must be the logical length of the + /// chunked array. Each offset before the last represents the starting logical index of + /// the corresponding chunk. + explicit ChunkResolver(std::vector offsets) noexcept + : offsets_(std::move(offsets)), cached_chunk_(0) { +#ifndef NDEBUG + assert(offsets_.size() >= 1); + assert(offsets_[0] == 0); + for (size_t i = 1; i < offsets_.size(); i++) { + assert(offsets_[i] >= offsets_[i - 1]); + } +#endif + } + ChunkResolver(ChunkResolver&& other) noexcept; ChunkResolver& operator=(ChunkResolver&& other) noexcept; ChunkResolver(const ChunkResolver& other) noexcept; ChunkResolver& operator=(const ChunkResolver& other) noexcept; + int64_t logical_array_length() const { return offsets_.back(); } + int64_t num_chunks() const { return static_cast(offsets_.size()) - 1; } + + int64_t chunk_length(int64_t chunk_index) const { + return offsets_[chunk_index + 1] - offsets_[chunk_index]; + } + /// \brief Resolve a logical index to a ChunkLocation. /// /// The returned ChunkLocation contains the chunk index and the within-chunk index @@ -81,7 +117,7 @@ struct ARROW_EXPORT ChunkResolver { const auto cached_chunk = cached_chunk_.load(std::memory_order_relaxed); const auto chunk_index = ResolveChunkIndex(index, cached_chunk); - return {chunk_index, index - offsets_[chunk_index]}; + return ChunkLocation{chunk_index, index - offsets_[chunk_index]}; } /// \brief Resolve a logical index to a ChunkLocation. @@ -97,12 +133,70 @@ struct ARROW_EXPORT ChunkResolver { /// \return ChunkLocation with a valid chunk_index if index is within /// bounds, or with chunk_index == chunks.size() if logical index is /// `>= chunked_array.length()`. - inline ChunkLocation ResolveWithChunkIndexHint(int64_t index, - ChunkLocation hint) const { + inline ChunkLocation ResolveWithHint(int64_t index, ChunkLocation hint) const { assert(hint.chunk_index < static_cast(offsets_.size())); const auto chunk_index = ResolveChunkIndex(index, hint.chunk_index); - return {chunk_index, index - offsets_[chunk_index]}; + return ChunkLocation{chunk_index, index - offsets_[chunk_index]}; + } + + /// \brief Resolve `n_indices` logical indices to chunk indices. + /// + /// \pre 0 <= logical_index_vec[i] < logical_array_length() + /// (for well-defined and valid chunk index results) + /// \pre out_chunk_index_vec has space for `n_indices` + /// \pre chunk_hint in [0, chunks.size()] + /// \post out_chunk_index_vec[i] in [0, chunks.size()] for i in [0, n) + /// \post if logical_index_vec[i] >= chunked_array.length(), then + /// out_chunk_index_vec[i] == chunks.size() + /// and out_index_in_chunk_vec[i] is UNDEFINED (can be out-of-bounds) + /// \post if logical_index_vec[i] < 0, then both out_chunk_index_vec[i] and + /// out_index_in_chunk_vec[i] are UNDEFINED + /// + /// \param n_indices The number of logical indices to resolve + /// \param logical_index_vec The logical indices to resolve + /// \param out_chunk_index_vec The output array where the chunk indices will be written + /// \param chunk_hint 0 or the last chunk_index produced by ResolveMany + /// \param out_index_in_chunk_vec If not NULLPTR, the output array where the + /// within-chunk indices will be written + /// \return false iff chunks.size() > std::numeric_limits::max() + template + [[nodiscard]] bool ResolveMany(int64_t n_indices, const IndexType* logical_index_vec, + IndexType* out_chunk_index_vec, IndexType chunk_hint = 0, + IndexType* out_index_in_chunk_vec = NULLPTR) const { + if constexpr (sizeof(IndexType) < sizeof(uint64_t)) { + // The max value returned by Bisect is `offsets.size() - 1` (= chunks.size()). + constexpr uint64_t kMaxIndexTypeValue = std::numeric_limits::max(); + // A ChunkedArray with enough empty chunks can make the index of a chunk + // exceed the logical index and thus the maximum value of IndexType. + const bool chunk_index_fits_on_type = + static_cast(offsets_.size() - 1) <= kMaxIndexTypeValue; + if (ARROW_PREDICT_FALSE(!chunk_index_fits_on_type)) { + return false; + } + // Since an index-in-chunk cannot possibly exceed the logical index being + // queried, we don't have to worry about these values not fitting on IndexType. + } + if constexpr (std::is_signed_v) { + // We interpret signed integers as unsigned and avoid having to generate double + // the amount of binary code to handle each integer width. + // + // Negative logical indices can become large values when cast to unsigned, and + // they are gracefully handled by ResolveManyImpl, but both the chunk index + // and the index in chunk values will be undefined in these cases. This + // happend because int8_t(-1) == uint8_t(255) and 255 could be a valid + // logical index in the chunked array. + using U = std::make_unsigned_t; + ResolveManyImpl(n_indices, reinterpret_cast(logical_index_vec), + reinterpret_cast(out_chunk_index_vec), + static_cast(chunk_hint), + reinterpret_cast(out_index_in_chunk_vec)); + } else { + static_assert(std::is_unsigned_v); + ResolveManyImpl(n_indices, logical_index_vec, out_chunk_index_vec, chunk_hint, + out_index_in_chunk_vec); + } + return true; } private: @@ -130,17 +224,33 @@ struct ARROW_EXPORT ChunkResolver { return chunk_index; } + /// \pre all the pre-conditions of ChunkResolver::ResolveMany() + /// \pre num_offsets - 1 <= std::numeric_limits::max() + void ResolveManyImpl(int64_t, const uint8_t*, uint8_t*, uint8_t, uint8_t*) const; + void ResolveManyImpl(int64_t, const uint16_t*, uint16_t*, uint16_t, uint16_t*) const; + void ResolveManyImpl(int64_t, const uint32_t*, uint32_t*, uint32_t, uint32_t*) const; + void ResolveManyImpl(int64_t, const uint64_t*, uint64_t*, uint64_t, uint64_t*) const; + + public: /// \brief Find the index of the chunk that contains the logical index. /// /// Any non-negative index is accepted. When `hi=num_offsets`, the largest /// possible return value is `num_offsets-1` which is equal to - /// `chunks.size()`. The is returned when the logical index is out-of-bounds. + /// `chunks.size()`. Which is returned when the logical index is greater or + /// equal the logical length of the chunked array. /// - /// \pre index >= 0 + /// \pre index >= 0 (otherwise, when index is negative, hi-1 is returned) /// \pre lo < hi /// \pre lo >= 0 && hi <= offsets_.size() static inline int64_t Bisect(int64_t index, const int64_t* offsets, int64_t lo, int64_t hi) { + return Bisect(static_cast(index), + reinterpret_cast(offsets), static_cast(lo), + static_cast(hi)); + } + + static inline int64_t Bisect(uint64_t index, const uint64_t* offsets, uint64_t lo, + uint64_t hi) { // Similar to std::upper_bound(), but slightly different as our offsets // array always starts with 0. auto n = hi - lo; @@ -148,8 +258,8 @@ struct ARROW_EXPORT ChunkResolver { // (lo < hi is guaranteed by the precondition). assert(n > 1 && "lo < hi is a precondition of Bisect"); do { - const int64_t m = n >> 1; - const int64_t mid = lo + m; + const uint64_t m = n >> 1; + const uint64_t mid = lo + m; if (index >= offsets[mid]) { lo = mid; n -= m; diff --git a/cpp/src/arrow/chunked_array_test.cc b/cpp/src/arrow/chunked_array_test.cc index 6ca52ab46ca68..e9cc283b53cd5 100644 --- a/cpp/src/arrow/chunked_array_test.cc +++ b/cpp/src/arrow/chunked_array_test.cc @@ -23,6 +23,7 @@ #include #include +#include "arrow/chunk_resolver.h" #include "arrow/scalar.h" #include "arrow/status.h" #include "arrow/testing/builder.h" @@ -34,6 +35,9 @@ namespace arrow { +using internal::ChunkLocation; +using internal::ChunkResolver; + class TestChunkedArray : public ::testing::Test { protected: virtual void Construct() { @@ -310,4 +314,200 @@ TEST_F(TestChunkedArray, GetScalar) { ASSERT_RAISES(IndexError, carr.GetScalar(7)); } +// ChunkResolver tests + +using IndexTypes = ::testing::Types; + +TEST(TestChunkResolver, Resolve) { + ChunkResolver empty(std::vector({0})); // [] + // ChunkLocation::index_in_chunk is undefined when chunk_index==chunks.size(), + // so only chunk_index is compared in these cases. + ASSERT_EQ(empty.Resolve(0).chunk_index, 0); + ASSERT_EQ(empty.Resolve(0).chunk_index, 0); + + ChunkResolver one(std::vector({0, 1})); // [[0]] + ASSERT_EQ(one.Resolve(1).chunk_index, 1); + ASSERT_EQ(one.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one.Resolve(1).chunk_index, 1); + + ChunkResolver one_and_empty(std::vector({0, 1, 1, 1})); // [[0], [], []] + ASSERT_EQ(one_and_empty.Resolve(3).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(2).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(1).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one_and_empty.Resolve(1).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(2).chunk_index, 3); + ASSERT_EQ(one_and_empty.Resolve(3).chunk_index, 3); + + ChunkResolver one_one_one(std::vector({0, 1, 2, 3})); // [[0], [1], [2]] + ASSERT_EQ(one_one_one.Resolve(3).chunk_index, 3); + ASSERT_EQ(one_one_one.Resolve(2), (ChunkLocation(2, 0))); + ASSERT_EQ(one_one_one.Resolve(1), (ChunkLocation(1, 0))); + ASSERT_EQ(one_one_one.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(one_one_one.Resolve(1), (ChunkLocation(1, 0))); + ASSERT_EQ(one_one_one.Resolve(2), (ChunkLocation(2, 0))); + ASSERT_EQ(one_one_one.Resolve(3).chunk_index, 3); + + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + ASSERT_EQ(resolver.Resolve(10).chunk_index, 3); + ASSERT_EQ(resolver.Resolve(9), (ChunkLocation(2, 6))); + ASSERT_EQ(resolver.Resolve(8), (ChunkLocation(2, 5))); + ASSERT_EQ(resolver.Resolve(4), (ChunkLocation(2, 1))); + ASSERT_EQ(resolver.Resolve(3), (ChunkLocation(2, 0))); + ASSERT_EQ(resolver.Resolve(2), (ChunkLocation(1, 0))); + ASSERT_EQ(resolver.Resolve(1), (ChunkLocation(0, 1))); + ASSERT_EQ(resolver.Resolve(0), (ChunkLocation(0, 0))); + ASSERT_EQ(resolver.Resolve(1), (ChunkLocation(0, 1))); + ASSERT_EQ(resolver.Resolve(2), (ChunkLocation(1, 0))); + ASSERT_EQ(resolver.Resolve(3), (ChunkLocation(2, 0))); + ASSERT_EQ(resolver.Resolve(4), (ChunkLocation(2, 1))); + ASSERT_EQ(resolver.Resolve(8), (ChunkLocation(2, 5))); + ASSERT_EQ(resolver.Resolve(9), (ChunkLocation(2, 6))); + ASSERT_EQ(resolver.Resolve(10).chunk_index, 3); +} + +template +class TestChunkResolverMany : public ::testing::Test { + public: + using IndexType = T; + + Result> ResolveMany( + const ChunkResolver& resolver, const std::vector& logical_index_vec) { + const size_t n = logical_index_vec.size(); + std::vector chunk_index_vec; + chunk_index_vec.resize(n); + std::vector index_in_chunk_vec; + index_in_chunk_vec.resize(n); + bool valid = resolver.ResolveMany( + static_cast(n), logical_index_vec.data(), chunk_index_vec.data(), 0, + index_in_chunk_vec.data()); + if (ARROW_PREDICT_FALSE(!valid)) { + return Status::Invalid("index type doesn't fit possible chunk indexes"); + } + std::vector locations; + locations.reserve(n); + for (size_t i = 0; i < n; i++) { + auto chunk_index = static_cast(chunk_index_vec[i]); + auto index_in_chunk = static_cast(index_in_chunk_vec[i]); + locations.emplace_back(chunk_index, index_in_chunk); + } + return locations; + } + + void CheckResolveMany(const ChunkResolver& resolver, + const std::vector& logical_index_vec) { + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + IndexType logical_index = logical_index_vec[i]; + const auto expected = resolver.Resolve(logical_index); + ASSERT_LE(expected.chunk_index, resolver.num_chunks()); + if (expected.chunk_index == resolver.num_chunks()) { + // index_in_chunk is undefined in this case + ASSERT_EQ(locations[i].chunk_index, expected.chunk_index); + } else { + ASSERT_EQ(locations[i], expected); + } + } + } + + void TestBasics() { + std::vector logical_index_vec; + + ChunkResolver empty(std::vector({0})); // [] + logical_index_vec = {0, 0}; + CheckResolveMany(empty, logical_index_vec); + + ChunkResolver one(std::vector({0, 1})); // [[0]] + logical_index_vec = {1, 0, 1}; + CheckResolveMany(one, logical_index_vec); + + ChunkResolver one_and_empty(std::vector({0, 1, 1, 1})); // [[0], [], []] + logical_index_vec = {3, 2, 1, 0, 1, 2, 3}; + CheckResolveMany(one_and_empty, logical_index_vec); + + ChunkResolver one_one_one(std::vector({0, 1, 2, 3})); // [[0], [1], [2]] + logical_index_vec = {3, 2, 1, 0, 1, 2, 3}; + CheckResolveMany(one_one_one, logical_index_vec); + + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + logical_index_vec = {10, 9, 8, 4, 3, 2, 1, 0, 1, 2, 3, 4, 8, 9, 10}; + CheckResolveMany(resolver, logical_index_vec); + } + + void TestOutOfBounds() { + ChunkResolver resolver(std::vector({0, 2, 3, 10})); // [[0, 1], [2], [3..9]] + + std::vector logical_index_vec = {10, 11, 12, 13, 14, 13, 11, 10}; + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + + if constexpr (std::is_signed_v) { + std::vector logical_index_vec = {-1, -2, -3, -4, INT8_MIN}; + + ChunkResolver resolver(std::vector({0, 2, 128})); // [[0, 1], [2..127]] + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + // All the negative indices are greater than resolver.logical_array_length()-1 + // when cast to uint8_t. + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + + if constexpr (sizeof(IndexType) == 1) { + ChunkResolver resolver(std::vector( + {0, 2, 128, 129, 256})); // [[0, 1], [2..127], [128], [129, 255]] + ASSERT_OK_AND_ASSIGN(auto locations, ResolveMany(resolver, logical_index_vec)); + EXPECT_EQ(logical_index_vec.size(), locations.size()); + for (size_t i = 0; i < logical_index_vec.size(); i++) { + if constexpr (sizeof(IndexType) == 1) { + // All the negative 8-bit indices are SMALLER than + // resolver.logical_array_length()=256 when cast to 8-bit unsigned integers. + // So the resolved locations might look valid, but they should not be trusted. + ASSERT_LT(locations[i].chunk_index, resolver.num_chunks()); + } else { + // All the negative indices are greater than resolver.logical_array_length() + // when cast to 16/32/64-bit unsigned integers. + ASSERT_EQ(locations[i].chunk_index, resolver.num_chunks()); + } + } + } + } + } + + void TestOverflow() { + const int64_t kMaxIndex = std::is_signed_v ? 127 : 255; + std::vector logical_index_vec = {0, 1, 2, + static_cast(kMaxIndex)}; + + // Overflows are rare because to make them possible, we need more chunks + // than logical elements in the ChunkedArray. That requires at least one + // empty chunk. + std::vector offsets; + for (int64_t i = 0; i <= kMaxIndex; i++) { + offsets.push_back(i); + } + ChunkResolver resolver{offsets}; + ASSERT_OK(ResolveMany(resolver, logical_index_vec)); + + offsets.push_back(kMaxIndex); // adding an empty chunk + ChunkResolver resolver_with_empty{offsets}; + if (sizeof(IndexType) == 1) { + ASSERT_NOT_OK(ResolveMany(resolver_with_empty, logical_index_vec)); + } else { + ASSERT_OK(ResolveMany(resolver_with_empty, logical_index_vec)); + } + } +}; + +TYPED_TEST_SUITE(TestChunkResolverMany, IndexTypes); + +TYPED_TEST(TestChunkResolverMany, Basics) { this->TestBasics(); } +TYPED_TEST(TestChunkResolverMany, OutOfBounds) { this->TestOutOfBounds(); } +TYPED_TEST(TestChunkResolverMany, Overflow) { this->TestOverflow(); } + } // namespace arrow diff --git a/cpp/src/arrow/compute/CMakeLists.txt b/cpp/src/arrow/compute/CMakeLists.txt index fb778be113029..0a8018cd580cf 100644 --- a/cpp/src/arrow/compute/CMakeLists.txt +++ b/cpp/src/arrow/compute/CMakeLists.txt @@ -91,7 +91,8 @@ add_arrow_test(internals_test registry_test.cc key_hash_test.cc row/compare_test.cc - row/grouper_test.cc) + row/grouper_test.cc + util_internal_test.cc) add_arrow_compute_test(expression_test SOURCES expression_test.cc) diff --git a/cpp/src/arrow/compute/kernels/test_util.cc b/cpp/src/arrow/compute/kernels/test_util.cc index 23d0fd18d578a..2217787663a63 100644 --- a/cpp/src/arrow/compute/kernels/test_util.cc +++ b/cpp/src/arrow/compute/kernels/test_util.cc @@ -31,6 +31,7 @@ #include "arrow/datum.h" #include "arrow/result.h" #include "arrow/table.h" +#include "arrow/testing/fixed_width_test_util.h" #include "arrow/testing/gtest_util.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc index 8d43c65668d4b..5e24331fe96f2 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_filter_internal.cc @@ -164,7 +164,7 @@ class PrimitiveFilterImpl { values_is_valid_(values.buffers[0].data), // No offset applied for boolean because it's a bitmap values_data_(kIsBoolean ? values.buffers[1].data - : util::OffsetPointerOfFixedWidthValues(values)), + : util::OffsetPointerOfFixedByteWidthValues(values)), values_null_count_(values.null_count), values_offset_(values.offset), values_length_(values.length), @@ -470,7 +470,7 @@ Status PrimitiveFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult // validity bitmap. const bool allocate_validity = values.null_count != 0 || !filter_null_count_is_zero; - DCHECK(util::IsFixedWidthLike(values, /*force_null_count=*/false)); + DCHECK(util::IsFixedWidthLike(values)); const int64_t bit_width = util::FixedWidthInBits(*values.type); RETURN_NOT_OK(util::internal::PreallocateFixedWidthArrayData( ctx, output_length, /*source=*/values, allocate_validity, out_arr)); diff --git a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc index 93cd5060348db..2ba660e49ac38 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_internal.cc @@ -898,7 +898,7 @@ Status FSLFilterExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) // PrimitiveFilterExec for a fixed-size list array. if (util::IsFixedWidthLike(values, /*force_null_count=*/true, - /*exclude_dictionary=*/true)) { + /*exclude_bool_and_dictionary=*/true)) { const auto byte_width = util::FixedWidthInBytes(*values.type); // 0 is a valid byte width for FixedSizeList, but PrimitiveFilterExec // might not handle it correctly. @@ -971,7 +971,7 @@ Status FSLTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* out) { // PrimitiveTakeExec for a fixed-size list array. if (util::IsFixedWidthLike(values, /*force_null_count=*/true, - /*exclude_dictionary=*/true)) { + /*exclude_bool_and_dictionary=*/true)) { const auto byte_width = util::FixedWidthInBytes(*values.type); // Additionally, PrimitiveTakeExec is only implemented for specific byte widths. // TODO(GH-41301): Extend PrimitiveTakeExec for any fixed-width type. diff --git a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc index 48a2de9936cd4..1a9af0efcd700 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_take_internal.cc @@ -347,7 +347,7 @@ struct PrimitiveTakeImpl { static void Exec(const ArraySpan& values, const ArraySpan& indices, ArrayData* out_arr) { DCHECK_EQ(util::FixedWidthInBytes(*values.type), kValueWidth); - const auto* values_data = util::OffsetPointerOfFixedWidthValues(values); + const auto* values_data = util::OffsetPointerOfFixedByteWidthValues(values); const uint8_t* values_is_valid = values.buffers[0].data; auto values_offset = values.offset; @@ -588,8 +588,7 @@ Status PrimitiveTakeExec(KernelContext* ctx, const ExecSpan& batch, ExecResult* ArrayData* out_arr = out->array_data().get(); - DCHECK(util::IsFixedWidthLike(values, /*force_null_count=*/false, - /*exclude_dictionary=*/true)); + DCHECK(util::IsFixedWidthLike(values)); const int64_t bit_width = util::FixedWidthInBits(*values.type); // TODO: When neither values nor indices contain nulls, we can skip diff --git a/cpp/src/arrow/compute/kernels/vector_selection_test.cc b/cpp/src/arrow/compute/kernels/vector_selection_test.cc index 4c7d85b103f36..6261fa2daec5f 100644 --- a/cpp/src/arrow/compute/kernels/vector_selection_test.cc +++ b/cpp/src/arrow/compute/kernels/vector_selection_test.cc @@ -30,10 +30,10 @@ #include "arrow/compute/kernels/test_util.h" #include "arrow/table.h" #include "arrow/testing/builder.h" +#include "arrow/testing/fixed_width_test_util.h" #include "arrow/testing/gtest_util.h" #include "arrow/testing/random.h" #include "arrow/testing/util.h" -#include "arrow/util/fixed_width_test_util.h" #include "arrow/util/logging.h" namespace arrow { diff --git a/cpp/src/arrow/compute/kernels/vector_sort.cc b/cpp/src/arrow/compute/kernels/vector_sort.cc index db2023ef04cad..ad22fa8d365c4 100644 --- a/cpp/src/arrow/compute/kernels/vector_sort.cc +++ b/cpp/src/arrow/compute/kernels/vector_sort.cc @@ -747,15 +747,13 @@ class TableSorter { auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices, [&](uint64_t left, uint64_t right) { // First column is either null or nan - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); auto chunk_left = first_sort_key.GetChunk(left_loc); auto chunk_right = first_sort_key.GetChunk(right_loc); const auto left_is_null = chunk_left.IsNull(); @@ -786,15 +784,13 @@ class TableSorter { // Untyped implementation auto& comparator = comparator_; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(nulls_begin, nulls_middle, nulls_middle, nulls_end, temp_indices, [&](uint64_t left, uint64_t right) { // First column is always null - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); return comparator.Compare(left_loc, right_loc, 1); }); // Copy back temp area into main buffer @@ -812,15 +808,13 @@ class TableSorter { auto& comparator = comparator_; const auto& first_sort_key = sort_keys_[0]; - ChunkLocation left_loc{0, 0}; - ChunkLocation right_loc{0, 0}; + ChunkLocation left_loc; + ChunkLocation right_loc; std::merge(range_begin, range_middle, range_middle, range_end, temp_indices, [&](uint64_t left, uint64_t right) { // Both values are never null nor NaN. - left_loc = - left_resolver_.ResolveWithChunkIndexHint(left, /*hint=*/left_loc); - right_loc = - right_resolver_.ResolveWithChunkIndexHint(right, /*hint=*/right_loc); + left_loc = left_resolver_.ResolveWithHint(left, /*hint=*/left_loc); + right_loc = right_resolver_.ResolveWithHint(right, /*hint=*/right_loc); auto chunk_left = first_sort_key.GetChunk(left_loc); auto chunk_right = first_sort_key.GetChunk(right_loc); DCHECK(!chunk_left.IsNull()); diff --git a/cpp/src/arrow/compute/key_hash_internal.h b/cpp/src/arrow/compute/key_hash_internal.h index 7d226f52086b1..1f25beb0e1622 100644 --- a/cpp/src/arrow/compute/key_hash_internal.h +++ b/cpp/src/arrow/compute/key_hash_internal.h @@ -48,6 +48,16 @@ class ARROW_EXPORT Hashing32 { static void HashMultiColumn(const std::vector& cols, LightContext* ctx, uint32_t* out_hash); + // Clarify the max temp stack usage for HashBatch, which might be necessary for the + // caller to be aware of at compile time to reserve enough stack size in advance. The + // HashBatch implementation uses one uint32 temp vector as a buffer for hash, one uint16 + // temp vector as a buffer for null indices and one uint32 temp vector as a buffer for + // null hash, all are of size kMiniBatchLength. Plus extra kMiniBatchLength to cope with + // stack padding and aligning. + static constexpr auto kHashBatchTempStackUsage = + (sizeof(uint32_t) + sizeof(uint16_t) + sizeof(uint32_t) + /*extra=*/1) * + util::MiniBatch::kMiniBatchLength; + static Status HashBatch(const ExecBatch& key_batch, uint32_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, @@ -161,6 +171,15 @@ class ARROW_EXPORT Hashing64 { static void HashMultiColumn(const std::vector& cols, LightContext* ctx, uint64_t* hashes); + // Clarify the max temp stack usage for HashBatch, which might be necessary for the + // caller to be aware of at compile time to reserve enough stack size in advance. The + // HashBatch implementation uses one uint16 temp vector as a buffer for null indices and + // one uint64 temp vector as a buffer for null hash, all are of size kMiniBatchLength. + // Plus extra kMiniBatchLength to cope with stack padding and aligning. + static constexpr auto kHashBatchTempStackUsage = + (sizeof(uint16_t) + sizeof(uint64_t) + /*extra=*/1) * + util::MiniBatch::kMiniBatchLength; + static Status HashBatch(const ExecBatch& key_batch, uint64_t* hashes, std::vector& column_arrays, int64_t hardware_flags, util::TempVectorStack* temp_stack, diff --git a/cpp/src/arrow/compute/key_hash_test.cc b/cpp/src/arrow/compute/key_hash_test.cc index 4e5d869cb7db6..fdf6d2125850a 100644 --- a/cpp/src/arrow/compute/key_hash_test.cc +++ b/cpp/src/arrow/compute/key_hash_test.cc @@ -25,12 +25,16 @@ #include "arrow/array/builder_binary.h" #include "arrow/compute/key_hash_internal.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" #include "arrow/testing/util.h" #include "arrow/util/cpu_info.h" #include "arrow/util/pcg_random.h" namespace arrow { +using arrow::random::RandomArrayGenerator; +using arrow::util::MiniBatch; +using arrow::util::TempVectorStack; using internal::checked_pointer_cast; using internal::CpuInfo; @@ -156,7 +160,7 @@ class TestVectorHash { std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; if (use_32bit_hash) { if (!use_varlen_input) { @@ -192,7 +196,7 @@ class TestVectorHash { // Verify that all implementations (scalar, SIMD) give the same hashes // const auto& hashes_scalar64 = hashes64[0]; - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { for (int j = 0; j < num_rows; ++j) { ASSERT_EQ(hashes64[i][j], hashes_scalar64[j]) << "scalar and simd approaches yielded different hashes"; @@ -280,7 +284,7 @@ void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { std::vector temp_buffer; temp_buffer.resize(mini_batch_size * 4); - for (int i = 0; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { const auto hardware_flags = hardware_flags_for_testing[i]; Hashing32::HashFixed(hardware_flags, /*combine_hashes=*/false, num_rows_to_hash, key_length, @@ -292,7 +296,7 @@ void HashFixedLengthFrom(int key_length, int num_rows, int start_row) { } // Verify that all implementations (scalar, SIMD) give the same hashes. - for (int i = 1; i < static_cast(hardware_flags_for_testing.size()); ++i) { + for (size_t i = 1; i < hardware_flags_for_testing.size(); ++i) { for (int j = 0; j < num_rows_to_hash; ++j) { ASSERT_EQ(hashes32[i][j], hashes32[0][j]) << "scalar and simd approaches yielded different 32-bit hashes"; @@ -311,5 +315,52 @@ TEST(VectorHash, FixedLengthTailByteSafety) { HashFixedLengthFrom(/*key_length=*/19, /*num_rows=*/64, /*start_row=*/63); } +// Make sure that Hashing32/64::HashBatch uses no more stack space than declared in +// Hashing32/64::kHashBatchTempStackUsage. +TEST(VectorHash, HashBatchTempStackUsage) { + for (auto num_rows : + {0, 1, MiniBatch::kMiniBatchLength, MiniBatch::kMiniBatchLength * 64}) { + SCOPED_TRACE("num_rows = " + std::to_string(num_rows)); + + MemoryPool* pool = default_memory_pool(); + RandomArrayGenerator gen(42); + + auto column = gen.Int8(num_rows, 0, 127); + ExecBatch batch({column}, num_rows); + + std::vector column_arrays; + ASSERT_OK(ColumnArraysFromExecBatch(batch, &column_arrays)); + + const auto hardware_flags_for_testing = HardwareFlagsForTesting(); + ASSERT_GT(hardware_flags_for_testing.size(), 0); + + { + std::vector hashes(num_rows); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, Hashing32::kHashBatchTempStackUsage)); + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { + SCOPED_TRACE("hashing32 for hardware flags = " + + std::to_string(hardware_flags_for_testing[i])); + ASSERT_OK(Hashing32::HashBatch(batch, hashes.data(), column_arrays, + hardware_flags_for_testing[i], &stack, + /*start_rows=*/0, num_rows)); + } + } + + { + std::vector hashes(num_rows); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, Hashing64::kHashBatchTempStackUsage)); + for (size_t i = 0; i < hardware_flags_for_testing.size(); ++i) { + SCOPED_TRACE("hashing64 for hardware flags = " + + std::to_string(hardware_flags_for_testing[i])); + ASSERT_OK(Hashing64::HashBatch(batch, hashes.data(), column_arrays, + hardware_flags_for_testing[i], &stack, + /*start_rows=*/0, num_rows)); + } + } + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/key_map_internal.h b/cpp/src/arrow/compute/key_map_internal.h index 8e06dc83483aa..a5e784a9e4463 100644 --- a/cpp/src/arrow/compute/key_map_internal.h +++ b/cpp/src/arrow/compute/key_map_internal.h @@ -21,6 +21,7 @@ #include #include "arrow/compute/util.h" +#include "arrow/compute/util_internal.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" diff --git a/cpp/src/arrow/compute/light_array_internal.h b/cpp/src/arrow/compute/light_array_internal.h index 67de71bf56c92..995c4211998e0 100644 --- a/cpp/src/arrow/compute/light_array_internal.h +++ b/cpp/src/arrow/compute/light_array_internal.h @@ -22,6 +22,7 @@ #include "arrow/array.h" #include "arrow/compute/exec.h" #include "arrow/compute/util.h" +#include "arrow/compute/util_internal.h" #include "arrow/type.h" #include "arrow/util/cpu_info.h" #include "arrow/util/logging.h" diff --git a/cpp/src/arrow/compute/light_array_test.cc b/cpp/src/arrow/compute/light_array_test.cc index 08f36ee606025..cc02d489d138f 100644 --- a/cpp/src/arrow/compute/light_array_test.cc +++ b/cpp/src/arrow/compute/light_array_test.cc @@ -20,6 +20,7 @@ #include #include +#include "arrow/memory_pool.h" #include "arrow/testing/generator.h" #include "arrow/testing/gtest_util.h" #include "arrow/type.h" diff --git a/cpp/src/arrow/compute/row/CMakeLists.txt b/cpp/src/arrow/compute/row/CMakeLists.txt index 6ae982dbaf3a7..ef03c767f974e 100644 --- a/cpp/src/arrow/compute/row/CMakeLists.txt +++ b/cpp/src/arrow/compute/row/CMakeLists.txt @@ -19,3 +19,5 @@ # in a row-major order. arrow_install_all_headers("arrow/compute/row") + +add_arrow_benchmark(grouper_benchmark PREFIX "arrow-compute") diff --git a/cpp/src/arrow/compute/row/compare_internal.h b/cpp/src/arrow/compute/row/compare_internal.h index 16002ee5184e9..a5a109b0b516a 100644 --- a/cpp/src/arrow/compute/row/compare_internal.h +++ b/cpp/src/arrow/compute/row/compare_internal.h @@ -32,6 +32,16 @@ namespace compute { class ARROW_EXPORT KeyCompare { public: + // Clarify the max temp stack usage for CompareColumnsToRows, which might be necessary + // for the caller to be aware of (possibly at compile time) to reserve enough stack size + // in advance. The CompareColumnsToRows implementation uses three uint8 temp vectors as + // buffers for match vectors, all are of size num_rows. Plus extra kMiniBatchLength to + // cope with stack padding and aligning. + constexpr static int64_t CompareColumnsToRowsTempStackUsage(int64_t num_rows) { + return (sizeof(uint8_t) + sizeof(uint8_t) + sizeof(uint8_t)) * num_rows + + /*extra=*/util::MiniBatch::kMiniBatchLength; + } + // Returns a single 16-bit selection vector of rows that failed comparison. // If there is input selection on the left, the resulting selection is a filtered image // of input selection. diff --git a/cpp/src/arrow/compute/row/compare_test.cc b/cpp/src/arrow/compute/row/compare_test.cc index 1d8562cd56d3c..4044049b10863 100644 --- a/cpp/src/arrow/compute/row/compare_test.cc +++ b/cpp/src/arrow/compute/row/compare_test.cc @@ -19,23 +19,26 @@ #include "arrow/compute/row/compare_internal.h" #include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" namespace arrow { namespace compute { using arrow::bit_util::BytesForBits; using arrow::internal::CpuInfo; +using arrow::random::RandomArrayGenerator; using arrow::util::MiniBatch; using arrow::util::TempVectorStack; // Specialized case for GH-39577. TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) { int fsb_length = 9; + int num_rows = 7; + MemoryPool* pool = default_memory_pool(); TempVectorStack stack; - ASSERT_OK(stack.Init(pool, 8 * MiniBatch::kMiniBatchLength * sizeof(uint64_t))); + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); - int num_rows = 7; auto column_right = ArrayFromJSON(fixed_size_binary(fsb_length), R"([ "000000000", "111111111", @@ -106,5 +109,60 @@ TEST(KeyCompare, CompareColumnsToRowsCuriousFSB) { } } +// Make sure that KeyCompare::CompareColumnsToRows uses no more stack space than declared +// in KeyCompare::CompareColumnsToRowsTempStackUsage(). +TEST(KeyCompare, CompareColumnsToRowsTempStackUsage) { + for (auto num_rows : + {0, 1, MiniBatch::kMiniBatchLength, MiniBatch::kMiniBatchLength * 64}) { + SCOPED_TRACE("num_rows = " + std::to_string(num_rows)); + + MemoryPool* pool = default_memory_pool(); + TempVectorStack stack; + ASSERT_OK(stack.Init(pool, KeyCompare::CompareColumnsToRowsTempStackUsage(num_rows))); + + RandomArrayGenerator gen(42); + + auto column_right = gen.Int8(num_rows, 0, 127); + ExecBatch batch_right({column_right}, num_rows); + + std::vector column_metadatas_right; + ASSERT_OK(ColumnMetadatasFromExecBatch(batch_right, &column_metadatas_right)); + + RowTableMetadata table_metadata_right; + table_metadata_right.FromColumnMetadataVector(column_metadatas_right, + sizeof(uint64_t), sizeof(uint64_t)); + + std::vector column_arrays_right; + ASSERT_OK(ColumnArraysFromExecBatch(batch_right, &column_arrays_right)); + + RowTableImpl row_table; + ASSERT_OK(row_table.Init(pool, table_metadata_right)); + + RowTableEncoder row_encoder; + row_encoder.Init(column_metadatas_right, sizeof(uint64_t), sizeof(uint64_t)); + row_encoder.PrepareEncodeSelected(0, num_rows, column_arrays_right); + + std::vector row_ids_right(num_rows); + std::iota(row_ids_right.begin(), row_ids_right.end(), 0); + ASSERT_OK(row_encoder.EncodeSelected(&row_table, num_rows, row_ids_right.data())); + + auto column_left = gen.Int8(num_rows, 0, 127); + ExecBatch batch_left({column_left}, num_rows); + std::vector column_arrays_left; + ASSERT_OK(ColumnArraysFromExecBatch(batch_left, &column_arrays_left)); + + std::vector row_ids_left(num_rows); + std::iota(row_ids_left.begin(), row_ids_left.end(), 0); + + LightContext ctx{CpuInfo::GetInstance()->hardware_flags(), &stack}; + + uint32_t num_rows_no_match; + std::vector row_ids_out(num_rows); + KeyCompare::CompareColumnsToRows(num_rows, NULLPTR, row_ids_left.data(), &ctx, + &num_rows_no_match, row_ids_out.data(), + column_arrays_left, row_table, true, NULLPTR); + } +} + } // namespace compute } // namespace arrow diff --git a/cpp/src/arrow/compute/row/grouper.cc b/cpp/src/arrow/compute/row/grouper.cc index 756c70967ac6f..3ed5411d0ba02 100644 --- a/cpp/src/arrow/compute/row/grouper.cc +++ b/cpp/src/arrow/compute/row/grouper.cc @@ -217,18 +217,18 @@ struct SimpleKeySegmenter : public BaseRowSegmenter { struct AnyKeysSegmenter : public BaseRowSegmenter { static Result> Make( const std::vector& key_types, ExecContext* ctx) { - ARROW_RETURN_NOT_OK(Grouper::Make(key_types, ctx)); // check types - return std::make_unique(key_types, ctx); + ARROW_ASSIGN_OR_RAISE(auto grouper, Grouper::Make(key_types, ctx)); // check types + return std::make_unique(key_types, ctx, std::move(grouper)); } - AnyKeysSegmenter(const std::vector& key_types, ExecContext* ctx) + AnyKeysSegmenter(const std::vector& key_types, ExecContext* ctx, + std::unique_ptr grouper) : BaseRowSegmenter(key_types), - ctx_(ctx), - grouper_(nullptr), + grouper_(std::move(grouper)), save_group_id_(kNoGroupId) {} Status Reset() override { - grouper_ = nullptr; + ARROW_RETURN_NOT_OK(grouper_->Reset()); save_group_id_ = kNoGroupId; return Status::OK(); } @@ -245,7 +245,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { // first row of a new segment to see if it extends the previous segment. template Result MapGroupIdAt(const Batch& batch, int64_t offset) { - if (!grouper_) return kNoGroupId; ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset, /*length=*/1)); if (!datum.is_array()) { @@ -264,9 +263,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { if (offset == batch.length) { return MakeSegment(batch.length, offset, 0, kEmptyExtends); } - // ARROW-18311: make Grouper support Reset() - // so it can be reset instead of recreated below - // // the group id must be computed prior to resetting the grouper, since it is compared // to save_group_id_, and after resetting the grouper produces incomparable group ids ARROW_ASSIGN_OR_RAISE(auto group_id, MapGroupIdAt(batch, offset)); @@ -276,7 +272,7 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { return extends; }; // resetting drops grouper's group-ids, freeing-up memory for the next segment - ARROW_ASSIGN_OR_RAISE(grouper_, Grouper::Make(key_types_, ctx_)); // TODO: reset it + ARROW_RETURN_NOT_OK(grouper_->Reset()); // GH-34475: cache the grouper-consume result across invocations of GetNextSegment ARROW_ASSIGN_OR_RAISE(auto datum, grouper_->Consume(batch, offset)); if (datum.is_array()) { @@ -299,7 +295,6 @@ struct AnyKeysSegmenter : public BaseRowSegmenter { } private: - ExecContext* const ctx_; std::unique_ptr grouper_; group_id_t save_group_id_; }; @@ -354,6 +349,7 @@ struct GrouperNoKeysImpl : Grouper { RETURN_NOT_OK(builder->Finish(&array)); return std::move(array); } + Status Reset() override { return Status::OK(); } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_ASSIGN_OR_RAISE(auto array, MakeConstantGroupIdArray(length, 0)); return Datum(array); @@ -419,6 +415,14 @@ struct GrouperImpl : public Grouper { return std::move(impl); } + Status Reset() override { + map_.clear(); + offsets_.clear(); + key_bytes_.clear(); + num_groups_ = 0; + return Status::OK(); + } + Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_RETURN_NOT_OK(CheckAndCapLengthForConsume(batch.length, offset, &length)); if (offset != 0 || length != batch.length) { @@ -595,7 +599,18 @@ struct GrouperFastImpl : public Grouper { return std::move(impl); } - ~GrouperFastImpl() { map_.cleanup(); } + Status Reset() override { + ARROW_DCHECK_EQ(temp_stack_.AllocatedSize(), 0); + rows_.Clean(); + rows_minibatch_.Clean(); + map_.cleanup(); + RETURN_NOT_OK(map_.init(encode_ctx_.hardware_flags, ctx_->memory_pool())); + // TODO: It is now assumed that the dictionaries_ are identical to the first batch + // throughout the grouper's lifespan so no resetting is needed. But if we want to + // support different dictionaries for different batches, we need to reset the + // dictionaries_ here. + return Status::OK(); + } Result Consume(const ExecSpan& batch, int64_t offset, int64_t length) override { ARROW_RETURN_NOT_OK(CheckAndCapLengthForConsume(batch.length, offset, &length)); @@ -838,8 +853,7 @@ struct GrouperFastImpl : public Grouper { return out; } - static constexpr int log_minibatch_max_ = 10; - static constexpr int minibatch_size_max_ = 1 << log_minibatch_max_; + static constexpr int minibatch_size_max_ = arrow::util::MiniBatch::kMiniBatchLength; static constexpr int minibatch_size_min_ = 128; int minibatch_size_; diff --git a/cpp/src/arrow/compute/row/grouper.h b/cpp/src/arrow/compute/row/grouper.h index 628a9c14f3e44..a883fb938ddaf 100644 --- a/cpp/src/arrow/compute/row/grouper.h +++ b/cpp/src/arrow/compute/row/grouper.h @@ -109,6 +109,10 @@ class ARROW_EXPORT Grouper { static Result> Make(const std::vector& key_types, ExecContext* ctx = default_exec_context()); + /// Reset all intermediate state, make the grouper logically as just `Make`ed. + /// The underlying buffers, if any, may or may not be released though. + virtual Status Reset() = 0; + /// Consume a batch of keys, producing the corresponding group ids as an integer array, /// over a slice defined by an offset and length, which defaults to the batch length. /// Currently only uint32 indices will be produced, eventually the bit width will only diff --git a/cpp/src/arrow/compute/row/grouper_benchmark.cc b/cpp/src/arrow/compute/row/grouper_benchmark.cc new file mode 100644 index 0000000000000..1e1a16d579009 --- /dev/null +++ b/cpp/src/arrow/compute/row/grouper_benchmark.cc @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/util/key_value_metadata.h" +#include "arrow/util/string.h" + +#include "arrow/compute/row/grouper.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/testing/random.h" +#include "arrow/util/benchmark_util.h" + +namespace arrow { +namespace compute { + +constexpr auto kSeed = 0x0ff1ce; +constexpr int64_t kRound = 16; +constexpr double true_and_unique_probability = 0.2; + +static ExecBatch MakeRandomExecBatch(const DataTypeVector& types, int64_t num_rows, + double null_probability, + int64_t alignment = kDefaultBufferAlignment, + MemoryPool* memory_pool = nullptr) { + random::RandomArrayGenerator rng(kSeed); + auto num_types = static_cast(types.size()); + + // clang-format off + // For unique probability: + // The proportion of Unique determines the number of groups. + // 1. In most scenarios, unique has a small proportion and exists + // 2. In GroupBy/HashJoin are sometimes used for deduplication and + // in that use case the key is mostly unique + auto metadata = key_value_metadata( + { + "null_probability", + "true_probability", // for boolean type + "unique" // for string type + }, + { + internal::ToChars(null_probability), + internal::ToChars(true_and_unique_probability), + internal::ToChars(static_cast(num_rows * + true_and_unique_probability)) + }); + // clang-format on + + std::vector values; + values.resize(num_types); + for (int i = 0; i < num_types; ++i) { + auto field = ::arrow::field("", types[i], metadata); + values[i] = rng.ArrayOf(*field, num_rows, alignment, memory_pool); + } + + return ExecBatch(std::move(values), num_rows); +} + +static void GrouperBenchmark(benchmark::State& state, const ExecSpan& span, + ExecContext* ctx = nullptr) { + uint32_t num_groups = 0; + for (auto _ : state) { + ASSIGN_OR_ABORT(auto grouper, Grouper::Make(span.GetTypes(), ctx)); + for (int i = 0; i < kRound; ++i) { + ASSIGN_OR_ABORT(auto group_ids, grouper->Consume(span)); + } + num_groups = grouper->num_groups(); + } + + state.SetItemsProcessed(state.iterations() * kRound * span.length); + state.counters["num_groups"] = num_groups; + state.counters["uniqueness"] = static_cast(num_groups) / (kRound * span.length); +} + +static void GrouperWithMultiTypes(benchmark::State& state, const DataTypeVector& types) { + auto ctx = default_exec_context(); + + RegressionArgs args(state, false); + const int64_t num_rows = args.size; + const double null_proportion = args.null_proportion; + + auto exec_batch = MakeRandomExecBatch(types, num_rows, null_proportion, + kDefaultBufferAlignment, ctx->memory_pool()); + ExecSpan exec_span(exec_batch); + ASSIGN_OR_ABORT(auto grouper, Grouper::Make(exec_span.GetTypes(), ctx)); + GrouperBenchmark(state, exec_span, ctx); +} + +void SetArgs(benchmark::internal::Benchmark* bench) { + BenchmarkSetArgsWithSizes(bench, {1 << 10, 1 << 12}); +} + +// This benchmark is mainly to ensure that the construction of our underlying +// RowTable and the performance of the comparison operations in the lower-level +// compare_internal can be tracked (we have not systematically tested these +// underlying operations before). +// +// It mainly covers: +// 1. Basics types, including the impact of null ratio on performance (comparison +// operations will compare null values separately.) +// +// 2. Combination types which will break the CPU-pipeline in column comparision. +// Examples: https://github.com/apache/arrow/pull/41036#issuecomment-2048721547 +// +// 3. Combination types requiring column resorted. These combinations are +// essentially to test the impact of RowTableEncoder's sorting function on +// input columns on the performance of CompareColumnsToRows +// Examples: https://github.com/apache/arrow/pull/40998#issuecomment-2039204161 + +// basic types +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean}", {boolean()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32}", {int32()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int64}", {int64()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{utf8}", {utf8()})->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{fixed_size_binary(32)}", + {fixed_size_binary(32)}) + ->Apply(SetArgs); + +// combination types +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean, utf8}", {boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, int32}", {int32(), int32()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int64, int32}", {int64(), int32()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{boolean, int64, utf8}", + {boolean(), int64(), utf8()}) + ->Apply(SetArgs); + +// combination types requiring column resorted +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, boolean, utf8}", + {int32(), boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, "{int32, int64, boolean, utf8}", + {int32(), int64(), boolean(), utf8()}) + ->Apply(SetArgs); +BENCHMARK_CAPTURE(GrouperWithMultiTypes, + "{utf8, int32, int64, fixed_size_binary(32), boolean}", + {utf8(), int32(), int64(), fixed_size_binary(32), boolean()}) + ->Apply(SetArgs); + +} // namespace compute +} // namespace arrow diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc index b0c863b26a062..b90b3a64056bd 100644 --- a/cpp/src/arrow/compute/util.cc +++ b/cpp/src/arrow/compute/util.cc @@ -17,11 +17,7 @@ #include "arrow/compute/util.h" -#include "arrow/table.h" -#include "arrow/util/bit_util.h" -#include "arrow/util/bitmap_ops.h" #include "arrow/util/logging.h" -#include "arrow/util/tracing_internal.h" #include "arrow/util/ubsan.h" namespace arrow { @@ -31,33 +27,6 @@ using internal::CpuInfo; namespace util { -void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { - int64_t new_top = top_ + EstimatedAllocationSize(num_bytes); - // Stack overflow check (see GH-39582). - // XXX cannot return a regular Status because most consumers do not either. - ARROW_CHECK_LE(new_top, buffer_size_) << "TempVectorStack::alloc overflow"; - *data = buffer_->mutable_data() + top_ + sizeof(uint64_t); - // We set 8 bytes before the beginning of the allocated range and - // 8 bytes after the end to check for stack overflow (which would - // result in those known bytes being corrupted). - reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; - reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; - *id = num_vectors_++; - top_ = new_top; -} - -void TempVectorStack::release(int id, uint32_t num_bytes) { - ARROW_DCHECK(num_vectors_ == id + 1); - int64_t size = EstimatedAllocationSize(num_bytes); - ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == - kGuard2); - ARROW_DCHECK(top_ >= size); - top_ -= size; - ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == - kGuard1); - --num_vectors_; -} - namespace bit_util { inline uint64_t SafeLoadUpTo8Bytes(const uint8_t* bytes, int num_bytes) { diff --git a/cpp/src/arrow/compute/util.h b/cpp/src/arrow/compute/util.h index 88dce160ce936..d56e398667f66 100644 --- a/cpp/src/arrow/compute/util.h +++ b/cpp/src/arrow/compute/util.h @@ -24,17 +24,10 @@ #include #include -#include "arrow/buffer.h" #include "arrow/compute/expression.h" #include "arrow/compute/type_fwd.h" -#include "arrow/memory_pool.h" #include "arrow/result.h" -#include "arrow/status.h" -#include "arrow/util/bit_util.h" #include "arrow/util/cpu_info.h" -#include "arrow/util/mutex.h" -#include "arrow/util/thread_pool.h" -#include "arrow/util/type_fwd.h" #if defined(__clang__) || defined(__GNUC__) #define BYTESWAP(x) __builtin_bswap64(x) @@ -77,72 +70,6 @@ class MiniBatch { static constexpr int kMiniBatchLength = 1 << kLogMiniBatchLength; }; -/// Storage used to allocate temporary vectors of a batch size. -/// Temporary vectors should resemble allocating temporary variables on the stack -/// but in the context of vectorized processing where we need to store a vector of -/// temporaries instead of a single value. -class ARROW_EXPORT TempVectorStack { - template - friend class TempVectorHolder; - - public: - Status Init(MemoryPool* pool, int64_t size) { - num_vectors_ = 0; - top_ = 0; - buffer_size_ = EstimatedAllocationSize(size); - ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(size, pool)); - // Ensure later operations don't accidentally read uninitialized memory. - std::memset(buffer->mutable_data(), 0xFF, size); - buffer_ = std::move(buffer); - return Status::OK(); - } - - private: - static int64_t EstimatedAllocationSize(int64_t size) { - return PaddedAllocationSize(size) + 2 * sizeof(uint64_t); - } - - static int64_t PaddedAllocationSize(int64_t num_bytes) { - // Round up allocation size to multiple of 8 bytes - // to avoid returning temp vectors with unaligned address. - // - // Also add padding at the end to facilitate loads and stores - // using SIMD when number of vector elements is not divisible - // by the number of SIMD lanes. - // - return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding; - } - void alloc(uint32_t num_bytes, uint8_t** data, int* id); - void release(int id, uint32_t num_bytes); - static constexpr uint64_t kGuard1 = 0x3141592653589793ULL; - static constexpr uint64_t kGuard2 = 0x0577215664901532ULL; - static constexpr int64_t kPadding = 64; - int num_vectors_; - int64_t top_; - std::unique_ptr buffer_; - int64_t buffer_size_; -}; - -template -class TempVectorHolder { - friend class TempVectorStack; - - public: - ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } - T* mutable_data() { return reinterpret_cast(data_); } - TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { - stack_ = stack; - num_elements_ = num_elements; - stack_->alloc(num_elements * sizeof(T), &data_, &id_); - } - - private: - TempVectorStack* stack_; - uint8_t* data_; - int id_; - uint32_t num_elements_; -}; - namespace bit_util { ARROW_EXPORT void bits_to_indexes(int bit_to_search, int64_t hardware_flags, diff --git a/cpp/src/arrow/compute/util_internal.cc b/cpp/src/arrow/compute/util_internal.cc new file mode 100644 index 0000000000000..7a7875162c434 --- /dev/null +++ b/cpp/src/arrow/compute/util_internal.cc @@ -0,0 +1,100 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/compute/util_internal.h" + +#include "arrow/compute/util.h" +#include "arrow/memory_pool.h" + +#ifdef ADDRESS_SANITIZER +#include +#endif + +namespace arrow { +namespace util { + +TempVectorStack::~TempVectorStack() { +#ifdef ADDRESS_SANITIZER + if (buffer_) { + ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data(), buffer_size_); + } +#endif +} + +Status TempVectorStack::Init(MemoryPool* pool, int64_t size) { + num_vectors_ = 0; + top_ = 0; + buffer_size_ = EstimatedAllocationSize(size); + ARROW_ASSIGN_OR_RAISE(auto buffer, AllocateResizableBuffer(buffer_size_, pool)); +#ifdef ADDRESS_SANITIZER + ASAN_POISON_MEMORY_REGION(buffer->mutable_data(), buffer_size_); +#endif + buffer_ = std::move(buffer); + return Status::OK(); +} + +int64_t TempVectorStack::PaddedAllocationSize(int64_t num_bytes) { + // Round up allocation size to multiple of 8 bytes + // to avoid returning temp vectors with unaligned address. + // + // Also add padding at the end to facilitate loads and stores + // using SIMD when number of vector elements is not divisible + // by the number of SIMD lanes. + // + return ::arrow::bit_util::RoundUp(num_bytes, sizeof(int64_t)) + kPadding; +} + +void TempVectorStack::alloc(uint32_t num_bytes, uint8_t** data, int* id) { + int64_t estimated_alloc_size = EstimatedAllocationSize(num_bytes); + int64_t new_top = top_ + estimated_alloc_size; + // Stack overflow check (see GH-39582). + // XXX cannot return a regular Status because most consumers do not either. + ARROW_CHECK_LE(new_top, buffer_size_) + << "TempVectorStack::alloc overflow: allocating " << estimated_alloc_size + << " on top of " << top_ << " in stack of size " << buffer_size_; +#ifdef ADDRESS_SANITIZER + ASAN_UNPOISON_MEMORY_REGION(buffer_->mutable_data() + top_, estimated_alloc_size); +#endif + *data = buffer_->mutable_data() + top_ + /*one guard*/ sizeof(uint64_t); +#ifndef NDEBUG + // We set 8 bytes before the beginning of the allocated range and + // 8 bytes after the end to check for stack overflow (which would + // result in those known bytes being corrupted). + reinterpret_cast(buffer_->mutable_data() + top_)[0] = kGuard1; + reinterpret_cast(buffer_->mutable_data() + new_top)[-1] = kGuard2; +#endif + *id = num_vectors_++; + top_ = new_top; +} + +void TempVectorStack::release(int id, uint32_t num_bytes) { + ARROW_DCHECK(num_vectors_ == id + 1); + int64_t size = EstimatedAllocationSize(num_bytes); + ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[-1] == + kGuard2); + ARROW_DCHECK(top_ >= size); + top_ -= size; + ARROW_DCHECK(reinterpret_cast(buffer_->mutable_data() + top_)[0] == + kGuard1); +#ifdef ADDRESS_SANITIZER + ASAN_POISON_MEMORY_REGION(buffer_->mutable_data() + top_, size); +#endif + --num_vectors_; +} + +} // namespace util +} // namespace arrow diff --git a/cpp/src/arrow/compute/util_internal.h b/cpp/src/arrow/compute/util_internal.h index 87e89a3350721..5e5b15a5ff600 100644 --- a/cpp/src/arrow/compute/util_internal.h +++ b/cpp/src/arrow/compute/util_internal.h @@ -17,7 +17,10 @@ #pragma once +#include "arrow/status.h" +#include "arrow/type_fwd.h" #include "arrow/util/logging.h" +#include "arrow/util/macros.h" namespace arrow { namespace util { @@ -27,5 +30,65 @@ void CheckAlignment(const void* ptr) { ARROW_DCHECK(reinterpret_cast(ptr) % sizeof(T) == 0); } +/// Storage used to allocate temporary vectors of a batch size. +/// Temporary vectors should resemble allocating temporary variables on the stack +/// but in the context of vectorized processing where we need to store a vector of +/// temporaries instead of a single value. +class ARROW_EXPORT TempVectorStack { + template + friend class TempVectorHolder; + + public: + TempVectorStack() = default; + ~TempVectorStack(); + + ARROW_DISALLOW_COPY_AND_ASSIGN(TempVectorStack); + + ARROW_DEFAULT_MOVE_AND_ASSIGN(TempVectorStack); + + Status Init(MemoryPool* pool, int64_t size); + + int64_t AllocatedSize() const { return top_; } + + private: + static int64_t EstimatedAllocationSize(int64_t size) { + return PaddedAllocationSize(size) + /*two guards*/ 2 * sizeof(uint64_t); + } + + static int64_t PaddedAllocationSize(int64_t num_bytes); + + void alloc(uint32_t num_bytes, uint8_t** data, int* id); + void release(int id, uint32_t num_bytes); + static constexpr uint64_t kGuard1 = 0x3141592653589793ULL; + static constexpr uint64_t kGuard2 = 0x0577215664901532ULL; + static constexpr int64_t kPadding = 64; + int num_vectors_; + int64_t top_; + std::unique_ptr buffer_; + int64_t buffer_size_; + + friend class TempVectorStackTest; +}; + +template +class TempVectorHolder { + friend class TempVectorStack; + + public: + ~TempVectorHolder() { stack_->release(id_, num_elements_ * sizeof(T)); } + T* mutable_data() { return reinterpret_cast(data_); } + TempVectorHolder(TempVectorStack* stack, uint32_t num_elements) { + stack_ = stack; + num_elements_ = num_elements; + stack_->alloc(num_elements * sizeof(T), &data_, &id_); + } + + private: + TempVectorStack* stack_; + uint8_t* data_; + int id_; + uint32_t num_elements_; +}; + } // namespace util } // namespace arrow diff --git a/cpp/src/arrow/compute/util_internal_test.cc b/cpp/src/arrow/compute/util_internal_test.cc new file mode 100644 index 0000000000000..fbf34f2228488 --- /dev/null +++ b/cpp/src/arrow/compute/util_internal_test.cc @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "arrow/buffer.h" +#include "arrow/compute/util_internal.h" +#include "arrow/testing/gtest_util.h" + +namespace arrow { +namespace util { + +class TempVectorStackTest : public ::testing::Test { + protected: + static const uint8_t* BufferData(const TempVectorStack& stack) { + return stack.buffer_->data(); + } + + static int64_t BufferCapacity(const TempVectorStack& stack) { + return stack.buffer_->capacity(); + } +}; + +// GH-41738: Test the underlying buffer capacity is sufficient to hold the requested +// vector. +TEST_F(TempVectorStackTest, BufferCapacitySufficiency) { + for (uint32_t stack_size : {1, 7, 8, 63, 64, 65535, 65536}) { + ARROW_SCOPED_TRACE("stack_size = ", stack_size); + TempVectorStack stack; + ASSERT_OK(stack.Init(default_memory_pool(), stack_size)); + + TempVectorHolder v(&stack, stack_size); + ASSERT_LE(v.mutable_data() + stack_size, BufferData(stack) + BufferCapacity(stack)); + } +} + +} // namespace util +} // namespace arrow diff --git a/cpp/src/arrow/config.cc b/cpp/src/arrow/config.cc index 9e32e5437325f..a0e3a079b3157 100644 --- a/cpp/src/arrow/config.cc +++ b/cpp/src/arrow/config.cc @@ -20,6 +20,7 @@ #include #include "arrow/util/config.h" +#include "arrow/util/config_internal.h" #include "arrow/util/cpu_info.h" #include "arrow/vendored/datetime.h" diff --git a/cpp/src/arrow/dataset/discovery_test.cc b/cpp/src/arrow/dataset/discovery_test.cc index 92cec7f324963..981146b7999ef 100644 --- a/cpp/src/arrow/dataset/discovery_test.cc +++ b/cpp/src/arrow/dataset/discovery_test.cc @@ -144,7 +144,8 @@ class FileSystemDatasetFactoryTest : public DatasetFactoryTest { } options_ = std::make_shared(); options_->dataset_schema = schema; - ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default(*schema)); + ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::Default( + *schema, options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); ASSERT_OK_AND_ASSIGN(dataset_, factory_->Finish(schema)); ASSERT_OK_AND_ASSIGN(auto fragment_it, dataset_->GetFragments()); diff --git a/cpp/src/arrow/dataset/file_parquet_test.cc b/cpp/src/arrow/dataset/file_parquet_test.cc index 76cd0af3b835f..bf626826d4d1b 100644 --- a/cpp/src/arrow/dataset/file_parquet_test.cc +++ b/cpp/src/arrow/dataset/file_parquet_test.cc @@ -330,8 +330,9 @@ TEST_F(TestParquetFileFormat, CachedMetadata) { // Read the file the first time, will read metadata auto options = std::make_shared(); options->filter = literal(true); - ASSERT_OK_AND_ASSIGN(auto projection_descr, - ProjectionDescr::FromNames({"x"}, *test_schema)); + ASSERT_OK_AND_ASSIGN( + auto projection_descr, + ProjectionDescr::FromNames({"x"}, *test_schema, options->add_augmented_fields)); options->projected_schema = projection_descr.schema; options->projection = projection_descr.expression; ASSERT_OK_AND_ASSIGN(auto generator, fragment->ScanBatchesAsync(options)); diff --git a/cpp/src/arrow/dataset/scanner.cc b/cpp/src/arrow/dataset/scanner.cc index 18981d1451980..a856a792a264f 100644 --- a/cpp/src/arrow/dataset/scanner.cc +++ b/cpp/src/arrow/dataset/scanner.cc @@ -211,7 +211,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // create the projected schema only if the provided expressions // produces valid set of fields. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*projected_schema)); + ProjectionDescr::Default( + *projected_schema, scan_options->add_augmented_fields)); scan_options->projected_schema = std::move(projection_descr.schema); scan_options->projection = projection_descr.expression; ARROW_ASSIGN_OR_RAISE(scan_options->projection, @@ -220,7 +221,8 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, // if projected_fields are not found, we default to creating the projected_schema // and projection from the dataset_schema. ARROW_ASSIGN_OR_RAISE(auto projection_descr, - ProjectionDescr::Default(*dataset_schema)); + ProjectionDescr::Default( + *dataset_schema, scan_options->add_augmented_fields)); scan_options->projected_schema = std::move(projection_descr.schema); scan_options->projection = projection_descr.expression; } @@ -231,7 +233,7 @@ Status NormalizeScanOptions(const std::shared_ptr& scan_options, ARROW_ASSIGN_OR_RAISE( auto projection_descr, ProjectionDescr::FromNames(scan_options->projected_schema->field_names(), - *dataset_schema)); + *dataset_schema, scan_options->add_augmented_fields)); scan_options->projection = projection_descr.expression; } @@ -730,7 +732,8 @@ Future AsyncScanner::CountRowsAsync(Executor* executor) { const auto options = std::make_shared(*scan_options_); ARROW_ASSIGN_OR_RAISE(auto empty_projection, ProjectionDescr::FromNames(std::vector(), - *scan_options_->dataset_schema)); + *scan_options_->dataset_schema, + scan_options_->add_augmented_fields)); SetProjection(options.get(), empty_projection); auto total = std::make_shared>(0); @@ -828,7 +831,8 @@ Result ProjectionDescr::FromExpressions( } Result ProjectionDescr::FromNames(std::vector names, - const Schema& dataset_schema) { + const Schema& dataset_schema, + bool add_augmented_fields) { std::vector exprs(names.size()); for (size_t i = 0; i < exprs.size(); ++i) { // If name isn't in schema, try finding it by dotted path. @@ -846,15 +850,19 @@ Result ProjectionDescr::FromNames(std::vector name } } auto fields = dataset_schema.fields(); - for (const auto& aug_field : kAugmentedFields) { - fields.push_back(aug_field); + if (add_augmented_fields) { + for (const auto& aug_field : kAugmentedFields) { + fields.push_back(aug_field); + } } return ProjectionDescr::FromExpressions(std::move(exprs), std::move(names), Schema(fields, dataset_schema.metadata())); } -Result ProjectionDescr::Default(const Schema& dataset_schema) { - return ProjectionDescr::FromNames(dataset_schema.field_names(), dataset_schema); +Result ProjectionDescr::Default(const Schema& dataset_schema, + bool add_augmented_fields) { + return ProjectionDescr::FromNames(dataset_schema.field_names(), dataset_schema, + add_augmented_fields); } void SetProjection(ScanOptions* options, ProjectionDescr projection) { @@ -899,7 +907,8 @@ const std::shared_ptr& ScannerBuilder::projected_schema() const { Status ScannerBuilder::Project(std::vector columns) { ARROW_ASSIGN_OR_RAISE( auto projection, - ProjectionDescr::FromNames(std::move(columns), *scan_options_->dataset_schema)); + ProjectionDescr::FromNames(std::move(columns), *scan_options_->dataset_schema, + scan_options_->add_augmented_fields)); SetProjection(scan_options_.get(), std::move(projection)); return Status::OK(); } @@ -1052,8 +1061,10 @@ Result MakeScanNode(acero::ExecPlan* plan, }); auto fields = scan_options->dataset_schema->fields(); - for (const auto& aug_field : kAugmentedFields) { - fields.push_back(aug_field); + if (scan_options->add_augmented_fields) { + for (const auto& aug_field : kAugmentedFields) { + fields.push_back(aug_field); + } } return acero::MakeExecNode( diff --git a/cpp/src/arrow/dataset/scanner.h b/cpp/src/arrow/dataset/scanner.h index 4479158ff20cc..d2de267897180 100644 --- a/cpp/src/arrow/dataset/scanner.h +++ b/cpp/src/arrow/dataset/scanner.h @@ -114,6 +114,9 @@ struct ARROW_DS_EXPORT ScanOptions { /// Note: This must be true in order for any readahead to happen bool use_threads = false; + /// If true the scanner will add augmented fields to the output schema. + bool add_augmented_fields = true; + /// Fragment-specific scan options. std::shared_ptr fragment_scan_options; @@ -287,10 +290,12 @@ struct ARROW_DS_EXPORT ProjectionDescr { /// \brief Create a default projection referencing fields in the dataset schema static Result FromNames(std::vector names, - const Schema& dataset_schema); + const Schema& dataset_schema, + bool add_augmented_fields = true); /// \brief Make a projection that projects every field in the dataset schema - static Result Default(const Schema& dataset_schema); + static Result Default(const Schema& dataset_schema, + bool add_augmented_fields = true); }; /// \brief Utility method to set the projection expression and schema diff --git a/cpp/src/arrow/dataset/scanner_test.cc b/cpp/src/arrow/dataset/scanner_test.cc index fccfc80032d31..58bc9c8c0ea6b 100644 --- a/cpp/src/arrow/dataset/scanner_test.cc +++ b/cpp/src/arrow/dataset/scanner_test.cc @@ -1103,7 +1103,8 @@ TEST_P(TestScanner, ProjectionDefaults) { } // If we only specify a projection expression then infer the projected schema // from the projection expression - auto projection_desc = ProjectionDescr::FromNames({"i32"}, *schema_); + auto projection_desc = + ProjectionDescr::FromNames({"i32"}, *schema_, /*add_augmented_fields=*/true); { ARROW_SCOPED_TRACE("User only specifies projection"); options_->projection = projection_desc->expression; @@ -1148,7 +1149,8 @@ TEST_P(TestScanner, ProjectedScanNestedFromNames) { }); ASSERT_OK_AND_ASSIGN(auto descr, ProjectionDescr::FromNames({".struct.i32", "nested.right.f64"}, - *options_->dataset_schema)) + *options_->dataset_schema, + options_->add_augmented_fields)) SetProjection(options_.get(), std::move(descr)); auto batch_in = ConstantArrayGenerator::Zeroes(GetParam().items_per_batch, schema_); auto batch_out = ConstantArrayGenerator::Zeroes( @@ -2106,7 +2108,8 @@ TEST(ScanOptions, TestMaterializedFields) { auto set_projection_from_names = [&opts](std::vector names) { ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::FromNames( - std::move(names), *opts->dataset_schema)); + std::move(names), *opts->dataset_schema, + opts->add_augmented_fields)); SetProjection(opts.get(), std::move(projection)); }; @@ -2160,7 +2163,8 @@ TEST(ScanOptions, TestMaterializedFields) { // project top-level field, filter nothing opts->filter = literal(true); ASSERT_OK_AND_ASSIGN(projection, - ProjectionDescr::FromNames({"nested"}, *opts->dataset_schema)); + ProjectionDescr::FromNames({"nested"}, *opts->dataset_schema, + opts->add_augmented_fields)); SetProjection(opts.get(), std::move(projection)); EXPECT_THAT(opts->MaterializedFields(), ElementsAre(FieldRef("nested"))); diff --git a/cpp/src/arrow/dataset/test_util_internal.h b/cpp/src/arrow/dataset/test_util_internal.h index de0519afac9e1..8195218b0cfe8 100644 --- a/cpp/src/arrow/dataset/test_util_internal.h +++ b/cpp/src/arrow/dataset/test_util_internal.h @@ -386,7 +386,8 @@ class DatasetFixtureMixin : public ::testing::Test { options_ = std::make_shared(); options_->dataset_schema = schema_; ASSERT_OK_AND_ASSIGN(auto projection, - ProjectionDescr::FromNames(schema_->field_names(), *schema_)); + ProjectionDescr::FromNames(schema_->field_names(), *schema_, + options_->add_augmented_fields)); SetProjection(options_.get(), std::move(projection)); SetFilter(literal(true)); } @@ -398,7 +399,8 @@ class DatasetFixtureMixin : public ::testing::Test { void SetProjectedColumns(std::vector column_names) { ASSERT_OK_AND_ASSIGN( auto projection, - ProjectionDescr::FromNames(std::move(column_names), *options_->dataset_schema)); + ProjectionDescr::FromNames(std::move(column_names), *options_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(options_.get(), std::move(projection)); } @@ -502,7 +504,8 @@ class FileFormatFixtureMixin : public ::testing::Test { void SetSchema(std::vector> fields) { opts_->dataset_schema = schema(std::move(fields)); ASSERT_OK_AND_ASSIGN(auto projection, - ProjectionDescr::Default(*opts_->dataset_schema)); + ProjectionDescr::Default(*opts_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(opts_.get(), std::move(projection)); } @@ -512,7 +515,8 @@ class FileFormatFixtureMixin : public ::testing::Test { void Project(std::vector names) { ASSERT_OK_AND_ASSIGN(auto projection, ProjectionDescr::FromNames( - std::move(names), *opts_->dataset_schema)); + std::move(names), *opts_->dataset_schema, + /*add_augmented_fields=*/true)); SetProjection(opts_.get(), std::move(projection)); } @@ -993,7 +997,8 @@ class FileFormatScanMixin : public FileFormatFixtureMixin, auto i64 = field("i64", int64()); this->opts_->dataset_schema = schema({i32, i32, i64}); ASSERT_RAISES(Invalid, - ProjectionDescr::FromNames({"i32"}, *this->opts_->dataset_schema)); + ProjectionDescr::FromNames({"i32"}, *this->opts_->dataset_schema, + /*add_augmented_fields=*/true)); } void TestScanWithPushdownNulls() { // Regression test for ARROW-15312 @@ -1933,7 +1938,8 @@ class WriteFileSystemDatasetMixin : public MakeFileSystemDatasetMixin { scan_options_->dataset_schema = dataset_->schema(); ASSERT_OK_AND_ASSIGN( auto projection, - ProjectionDescr::FromNames(source_schema_->field_names(), *dataset_->schema())); + ProjectionDescr::FromNames(source_schema_->field_names(), *dataset_->schema(), + scan_options_->add_augmented_fields)); SetProjection(scan_options_.get(), std::move(projection)); } diff --git a/cpp/src/arrow/device.cc b/cpp/src/arrow/device.cc index 98b8f7b30397e..01a2b8df5398d 100644 --- a/cpp/src/arrow/device.cc +++ b/cpp/src/arrow/device.cc @@ -116,6 +116,32 @@ Result> MemoryManager::ViewBuffer( " on ", to->device()->ToString(), " not supported"); } +Status MemoryManager::CopyBufferSliceToCPU(const std::shared_ptr& buf, + int64_t offset, int64_t length, + uint8_t* out_data) { + if (ARROW_PREDICT_TRUE(buf->is_cpu())) { + memcpy(out_data, buf->data() + offset, static_cast(length)); + return Status::OK(); + } + + auto& from = buf->memory_manager(); + auto cpu_mm = default_cpu_memory_manager(); + // Try a view first + auto maybe_buffer_result = from->ViewBufferTo(buf, cpu_mm); + if (!COPY_BUFFER_SUCCESS(maybe_buffer_result)) { + // View failed, try a copy instead + maybe_buffer_result = from->CopyBufferTo(buf, cpu_mm); + } + ARROW_ASSIGN_OR_RAISE(auto maybe_buffer, std::move(maybe_buffer_result)); + if (maybe_buffer != nullptr) { + memcpy(out_data, maybe_buffer->data() + offset, static_cast(length)); + return Status::OK(); + } + + return Status::NotImplemented("Copying buffer slice from ", from->device()->ToString(), + " to CPU not supported"); +} + #undef COPY_BUFFER_RETURN #undef COPY_BUFFER_SUCCESS diff --git a/cpp/src/arrow/device.h b/cpp/src/arrow/device.h index a591167ef9a45..f5cca0d27d7b2 100644 --- a/cpp/src/arrow/device.h +++ b/cpp/src/arrow/device.h @@ -249,6 +249,10 @@ class ARROW_EXPORT MemoryManager : public std::enable_shared_from_this> ViewBuffer( const std::shared_ptr& source, const std::shared_ptr& to); + /// \brief Copy a slice of a buffer into a CPU pointer + static Status CopyBufferSliceToCPU(const std::shared_ptr& buf, int64_t offset, + int64_t length, uint8_t* out_data); + /// \brief Create a new SyncEvent. /// /// This version should construct the appropriate event for the device and diff --git a/cpp/src/arrow/engine/substrait/relation_internal.cc b/cpp/src/arrow/engine/substrait/relation_internal.cc index f15f1a5527b7b..7c462c418f81b 100644 --- a/cpp/src/arrow/engine/substrait/relation_internal.cc +++ b/cpp/src/arrow/engine/substrait/relation_internal.cc @@ -393,6 +393,7 @@ Result FromProto(const substrait::Rel& rel, const ExtensionSet& auto scan_options = std::make_shared(); scan_options->use_threads = true; + scan_options->add_augmented_fields = false; if (read.has_filter()) { ARROW_ASSIGN_OR_RAISE(scan_options->filter, diff --git a/cpp/src/arrow/engine/substrait/serde_test.cc b/cpp/src/arrow/engine/substrait/serde_test.cc index 3e80192377937..6762d1e045450 100644 --- a/cpp/src/arrow/engine/substrait/serde_test.cc +++ b/cpp/src/arrow/engine/substrait/serde_test.cc @@ -1064,6 +1064,86 @@ NamedTableProvider AlwaysProvideSameTable(std::shared_ptr table) { }; } +TEST(Substrait, ExecReadRelWithLocalFiles) { + ASSERT_OK_AND_ASSIGN(std::string dir_string, + arrow::internal::GetEnvVar("PARQUET_TEST_DATA")); + + std::string substrait_json = R"({ + "relations": [ + { + "root": { + "input": { + "read": { + "common": { + "direct": {} + }, + "baseSchema": { + "names": [ + "f32", + "f64" + ], + "struct": { + "types": [ + { + "fp32": { + "nullability": "NULLABILITY_REQUIRED" + } + }, + { + "fp64": { + "nullability": "NULLABILITY_REQUIRED" + } + } + ], + "nullability": "NULLABILITY_REQUIRED" + } + }, + "localFiles": { + "items": [ + { + "uriFile": "file://[DIRECTORY_PLACEHOLDER]/byte_stream_split.zstd.parquet", + "parquet": {} + } + ] + } + } + }, + "names": [ + "f32", + "f64" + ] + } + } + ], + "version": { + "minorNumber": 42, + "producer": "my-producer" + } + })"; + const char* placeholder = "[DIRECTORY_PLACEHOLDER]"; + substrait_json.replace(substrait_json.find(placeholder), strlen(placeholder), + dir_string); + + ASSERT_OK_AND_ASSIGN(auto buf, + internal::SubstraitFromJSON("Plan", substrait_json, + /*ignore_unknown_fields=*/false)); + + ASSERT_OK_AND_ASSIGN(auto declarations, + DeserializePlans(*buf, acero::NullSinkNodeConsumer::Make)); + ASSERT_EQ(declarations.size(), 1); + acero::Declaration* decl = &declarations[0]; + ASSERT_EQ(decl->factory_name, "consuming_sink"); + ASSERT_OK_AND_ASSIGN(auto plan, acero::ExecPlan::Make()); + ASSERT_OK_AND_ASSIGN(auto sink_node, declarations[0].AddToPlan(plan.get())); + ASSERT_STREQ(sink_node->kind_name(), "ConsumingSinkNode"); + ASSERT_EQ(sink_node->num_inputs(), 1); + auto& prev_node = sink_node->inputs()[0]; + ASSERT_STREQ(prev_node->kind_name(), "SourceNode"); + + plan->StartProducing(); + ASSERT_FINISHES_OK(plan->finished()); +} + TEST(Substrait, RelWithHint) { ASSERT_OK_AND_ASSIGN(auto buf, internal::SubstraitFromJSON("Rel", R"({ @@ -2443,6 +2523,7 @@ TEST(SubstraitRoundTrip, BasicPlanEndToEnd) { auto scan_options = std::make_shared(); scan_options->projection = compute::project({}, {}); + scan_options->add_augmented_fields = false; const std::string filter_col_left = "shared"; const std::string filter_col_right = "distinct"; auto comp_left_value = compute::field_ref(filter_col_left); diff --git a/cpp/src/arrow/flight/client.h b/cpp/src/arrow/flight/client.h index 330fa8bad730d..613903108949e 100644 --- a/cpp/src/arrow/flight/client.h +++ b/cpp/src/arrow/flight/client.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -/// \brief Implementation of Flight RPC client. API should be -/// considered experimental for now +/// \brief Implementation of Flight RPC client. #pragma once @@ -177,7 +176,6 @@ class ARROW_FLIGHT_EXPORT FlightMetadataReader { }; /// \brief Client class for Arrow Flight RPC services. -/// API experimental for now class ARROW_FLIGHT_EXPORT FlightClient { public: ~FlightClient(); @@ -275,8 +273,6 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request /// \param[in] listener Callbacks for response and RPC completion - /// - /// This API is EXPERIMENTAL. void GetFlightInfoAsync(const FlightCallOptions& options, const FlightDescriptor& descriptor, std::shared_ptr> listener); @@ -288,8 +284,6 @@ class ARROW_FLIGHT_EXPORT FlightClient { /// \brief Asynchronous GetFlightInfo returning a Future. /// \param[in] options Per-RPC options /// \param[in] descriptor the dataset request - /// - /// This API is EXPERIMENTAL. arrow::Future GetFlightInfoAsync(const FlightCallOptions& options, const FlightDescriptor& descriptor); arrow::Future GetFlightInfoAsync(const FlightDescriptor& descriptor) { diff --git a/cpp/src/arrow/flight/cookie_internal.cc b/cpp/src/arrow/flight/cookie_internal.cc index 8f41106ebce5c..75a10d148bf47 100644 --- a/cpp/src/arrow/flight/cookie_internal.cc +++ b/cpp/src/arrow/flight/cookie_internal.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces for defining middleware for Flight clients. Currently -// experimental. +// Interfaces for defining middleware for Flight clients. #include "arrow/flight/cookie_internal.h" #include "arrow/flight/client.h" diff --git a/cpp/src/arrow/flight/flight_benchmark.cc b/cpp/src/arrow/flight/flight_benchmark.cc index f53b1c6dcea30..057ef15c3c7ae 100644 --- a/cpp/src/arrow/flight/flight_benchmark.cc +++ b/cpp/src/arrow/flight/flight_benchmark.cc @@ -131,7 +131,8 @@ struct PerformanceStats { Status WaitForReady(FlightClient* client, const FlightCallOptions& call_options) { Action action{"ping", nullptr}; for (int attempt = 0; attempt < 10; attempt++) { - if (client->DoAction(call_options, action).ok()) { + auto result_stream_result = client->DoAction(call_options, action); + if (result_stream_result.ok() && (*result_stream_result)->Drain().ok()) { return Status::OK(); } std::this_thread::sleep_for(std::chrono::milliseconds(1000)); diff --git a/cpp/src/arrow/flight/middleware.h b/cpp/src/arrow/flight/middleware.h index 84448097ff019..d717e396a8b68 100644 --- a/cpp/src/arrow/flight/middleware.h +++ b/cpp/src/arrow/flight/middleware.h @@ -16,7 +16,7 @@ // under the License. // Interfaces for defining middleware for Flight clients and -// servers. Currently experimental. +// servers. #pragma once diff --git a/cpp/src/arrow/flight/server.h b/cpp/src/arrow/flight/server.h index ffcffe12e3c78..8d73353ab16c1 100644 --- a/cpp/src/arrow/flight/server.h +++ b/cpp/src/arrow/flight/server.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. #pragma once diff --git a/cpp/src/arrow/flight/server_middleware.h b/cpp/src/arrow/flight/server_middleware.h index 030f1a17c2100..3a3e6f8616ed6 100644 --- a/cpp/src/arrow/flight/server_middleware.h +++ b/cpp/src/arrow/flight/server_middleware.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces for defining middleware for Flight servers. Currently -// experimental. +// Interfaces for defining middleware for Flight servers. #pragma once diff --git a/cpp/src/arrow/flight/sql/server.cc b/cpp/src/arrow/flight/sql/server.cc index cae3542b4faf8..63d1f5c5225fa 100644 --- a/cpp/src/arrow/flight/sql/server.cc +++ b/cpp/src/arrow/flight/sql/server.cc @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. // Platform-specific defines #include "arrow/flight/platform.h" diff --git a/cpp/src/arrow/flight/sql/server.h b/cpp/src/arrow/flight/sql/server.h index 7b5d71678f3de..7130e96987b89 100644 --- a/cpp/src/arrow/flight/sql/server.h +++ b/cpp/src/arrow/flight/sql/server.h @@ -15,8 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Interfaces to use for defining Flight RPC servers. API should be considered -// experimental for now +// Interfaces to use for defining Flight RPC servers. #pragma once diff --git a/cpp/src/arrow/flight/sql/server_session_middleware.h b/cpp/src/arrow/flight/sql/server_session_middleware.h index 021793de3de32..6eb11041a08bd 100644 --- a/cpp/src/arrow/flight/sql/server_session_middleware.h +++ b/cpp/src/arrow/flight/sql/server_session_middleware.h @@ -16,7 +16,6 @@ // under the License. // Middleware for handling Flight SQL Sessions including session cookie handling. -// Currently experimental. #pragma once diff --git a/cpp/src/arrow/flight/transport.h b/cpp/src/arrow/flight/transport.h index 4029aa5223deb..4ce50534023fc 100644 --- a/cpp/src/arrow/flight/transport.h +++ b/cpp/src/arrow/flight/transport.h @@ -19,8 +19,6 @@ /// Internal (but not private) interface for implementing /// alternate network transports in Flight. /// -/// \warning EXPERIMENTAL. Subject to change. -/// /// To implement a transport, implement ServerTransport and /// ClientTransport, and register the desired URI schemes with /// TransportRegistry. Flight takes care of most of the per-RPC @@ -248,8 +246,6 @@ TransportRegistry* GetDefaultTransportRegistry(); /// Transport implementations may subclass this to store their own /// state, and stash an instance in a user-supplied AsyncListener via /// ClientTransport::GetAsyncRpc and ClientTransport::SetAsyncRpc. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT AsyncRpc { public: virtual ~AsyncRpc() = default; diff --git a/cpp/src/arrow/flight/types.h b/cpp/src/arrow/flight/types.h index b3df8377b8ffd..cdf03f21041ee 100644 --- a/cpp/src/arrow/flight/types.h +++ b/cpp/src/arrow/flight/types.h @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -// Data structure for Flight RPC. API should be considered experimental for now +// Data structure for Flight RPC. #pragma once @@ -1115,8 +1115,6 @@ std::string ToString(TransportStatusCode code); /// instead of trying to translate to Arrow Status. /// /// Currently, only attached to the Status passed to AsyncListener::OnFinish. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT TransportStatusDetail : public StatusDetail { public: constexpr static const char* kTypeId = "flight::TransportStatusDetail"; diff --git a/cpp/src/arrow/flight/types_async.h b/cpp/src/arrow/flight/types_async.h index a241e64fb4e49..d5ed48d8a6438 100644 --- a/cpp/src/arrow/flight/types_async.h +++ b/cpp/src/arrow/flight/types_async.h @@ -31,8 +31,6 @@ namespace arrow::flight { /// @{ /// \brief Non-templated state for an async RPC. -/// -/// This API is EXPERIMENTAL. class ARROW_FLIGHT_EXPORT AsyncListenerBase { public: AsyncListenerBase(); @@ -57,8 +55,6 @@ class ARROW_FLIGHT_EXPORT AsyncListenerBase { /// A single listener may not be used for multiple concurrent RPC /// calls. The application MUST hold the listener alive until /// OnFinish() is called and has finished. -/// -/// This API is EXPERIMENTAL. template class ARROW_FLIGHT_EXPORT AsyncListener : public AsyncListenerBase { public: diff --git a/cpp/src/arrow/ipc/CMakeLists.txt b/cpp/src/arrow/ipc/CMakeLists.txt index 9fd71361d9b76..2fc9b145ccc98 100644 --- a/cpp/src/arrow/ipc/CMakeLists.txt +++ b/cpp/src/arrow/ipc/CMakeLists.txt @@ -39,6 +39,7 @@ endfunction() add_arrow_test(feather_test) add_arrow_ipc_test(json_simple_test) +add_arrow_ipc_test(message_internal_test) add_arrow_ipc_test(read_write_test) add_arrow_ipc_test(tensor_test) @@ -56,6 +57,7 @@ if(ARROW_BUILD_UTILITIES OR ARROW_BUILD_INTEGRATION) target_link_libraries(arrow-file-to-stream ${ARROW_UTIL_LIB}) add_executable(arrow-stream-to-file stream_to_file.cc) target_link_libraries(arrow-stream-to-file ${ARROW_UTIL_LIB}) + if(ARROW_BUILD_UTILITIES) install(TARGETS arrow-file-to-stream arrow-stream-to-file ${INSTALL_IS_OPTIONAL} DESTINATION ${CMAKE_INSTALL_BINDIR}) diff --git a/cpp/src/arrow/ipc/message_internal_test.cc b/cpp/src/arrow/ipc/message_internal_test.cc new file mode 100644 index 0000000000000..112240f08d552 --- /dev/null +++ b/cpp/src/arrow/ipc/message_internal_test.cc @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include + +#include "arrow/buffer.h" +#include "arrow/ipc/dictionary.h" +#include "arrow/ipc/metadata_internal.h" +#include "arrow/ipc/options.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/key_value_metadata.h" + +namespace arrow::ipc::internal { + +using FBB = flatbuffers::FlatBufferBuilder; + +// GH-40361: Test that Flatbuffer serialization matches a known output +// byte-for-byte. +// +// Our Flatbuffers code should not depend on argument evaluation order as it's +// undefined (https://en.cppreference.com/w/cpp/language/eval_order) and may +// lead to unnecessary platform- or toolchain-specific differences in +// serialization. +TEST(TestMessageInternal, TestByteIdentical) { + FBB fbb; + flatbuffers::Offset fb_schema; + DictionaryFieldMapper mapper; + + // Create a simple Schema with just two metadata KVPs + auto f0 = field("f0", int64()); + auto f1 = field("f1", int64()); + std::vector> fields = {f0, f1}; + std::shared_ptr metadata = + KeyValueMetadata::Make({"key_1", "key_2"}, {"key_1_value", "key_2_value"}); + auto schema = ::arrow::schema({f0}, metadata); + + // Serialize the Schema to a Buffer + std::shared_ptr out_buffer; + ASSERT_OK( + WriteSchemaMessage(*schema, mapper, IpcWriteOptions::Defaults(), &out_buffer)); + + // This is example output from macOS+ARM+LLVM + const uint8_t expected[] = { + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x0A, 0x00, 0x0E, 0x00, 0x06, 0x00, 0x05, 0x00, + 0x08, 0x00, 0x0A, 0x00, 0x00, 0x00, 0x00, 0x01, 0x04, 0x00, 0x10, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x0A, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x08, 0x00, 0x0A, 0x00, + 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, + 0x38, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xD8, 0xFF, 0xFF, 0xFF, 0x18, 0x00, + 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, + 0x32, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, + 0x79, 0x5F, 0x32, 0x00, 0x00, 0x00, 0x08, 0x00, 0x0C, 0x00, 0x04, 0x00, 0x08, 0x00, + 0x08, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x0B, 0x00, + 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x5F, 0x76, 0x61, 0x6C, 0x75, 0x65, 0x00, + 0x05, 0x00, 0x00, 0x00, 0x6B, 0x65, 0x79, 0x5F, 0x31, 0x00, 0x00, 0x00, 0x01, 0x00, + 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x10, 0x00, 0x14, 0x00, 0x08, 0x00, 0x06, 0x00, + 0x07, 0x00, 0x0C, 0x00, 0x00, 0x00, 0x10, 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x02, 0x10, 0x00, 0x00, 0x00, 0x1C, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x66, 0x30, 0x00, 0x00, 0x08, 0x00, + 0x0C, 0x00, 0x08, 0x00, 0x07, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x40, 0x00, 0x00, 0x00}; + Buffer expected_buffer(expected, sizeof(expected)); + + AssertBufferEqual(expected_buffer, *out_buffer); +} +} // namespace arrow::ipc::internal diff --git a/cpp/src/arrow/ipc/metadata_internal.cc b/cpp/src/arrow/ipc/metadata_internal.cc index e20b352d18d95..eed426d9337dd 100644 --- a/cpp/src/arrow/ipc/metadata_internal.cc +++ b/cpp/src/arrow/ipc/metadata_internal.cc @@ -478,7 +478,9 @@ static Status GetDictionaryEncoding(FBB& fbb, const std::shared_ptr& fiel static KeyValueOffset AppendKeyValue(FBB& fbb, const std::string& key, const std::string& value) { - return flatbuf::CreateKeyValue(fbb, fbb.CreateString(key), fbb.CreateString(value)); + auto fbb_key = fbb.CreateString(key); + auto fbb_value = fbb.CreateString(value); + return flatbuf::CreateKeyValue(fbb, fbb_key, fbb_value); } static void AppendKeyValueMetadata(FBB& fbb, const KeyValueMetadata& metadata, diff --git a/cpp/src/arrow/record_batch.cc b/cpp/src/arrow/record_batch.cc index 8521d500f5c05..351f72f52365b 100644 --- a/cpp/src/arrow/record_batch.cc +++ b/cpp/src/arrow/record_batch.cc @@ -59,17 +59,31 @@ int RecordBatch::num_columns() const { return schema_->num_fields(); } class SimpleRecordBatch : public RecordBatch { public: SimpleRecordBatch(std::shared_ptr schema, int64_t num_rows, - std::vector> columns) - : RecordBatch(std::move(schema), num_rows), boxed_columns_(std::move(columns)) { + std::vector> columns, + std::shared_ptr sync_event = nullptr) + : RecordBatch(std::move(schema), num_rows), + boxed_columns_(std::move(columns)), + device_type_(DeviceAllocationType::kCPU), + sync_event_(std::move(sync_event)) { + if (boxed_columns_.size() > 0) { + device_type_ = boxed_columns_[0]->device_type(); + } + columns_.resize(boxed_columns_.size()); for (size_t i = 0; i < columns_.size(); ++i) { columns_[i] = boxed_columns_[i]->data(); + DCHECK_EQ(device_type_, columns_[i]->device_type()); } } SimpleRecordBatch(const std::shared_ptr& schema, int64_t num_rows, - std::vector> columns) - : RecordBatch(std::move(schema), num_rows), columns_(std::move(columns)) { + std::vector> columns, + DeviceAllocationType device_type = DeviceAllocationType::kCPU, + std::shared_ptr sync_event = nullptr) + : RecordBatch(std::move(schema), num_rows), + columns_(std::move(columns)), + device_type_(device_type), + sync_event_(std::move(sync_event)) { boxed_columns_.resize(schema_->num_fields()); } @@ -99,6 +113,7 @@ class SimpleRecordBatch : public RecordBatch { const std::shared_ptr& column) const override { ARROW_CHECK(field != nullptr); ARROW_CHECK(column != nullptr); + ARROW_CHECK(column->device_type() == device_type_); if (!field->type()->Equals(column->type())) { return Status::TypeError("Column data type ", field->type()->name(), @@ -113,7 +128,8 @@ class SimpleRecordBatch : public RecordBatch { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->AddField(i, field)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::AddVectorElement(columns_, i, column->data())); + internal::AddVectorElement(columns_, i, column->data()), + device_type_, sync_event_); } Result> SetColumn( @@ -121,6 +137,7 @@ class SimpleRecordBatch : public RecordBatch { const std::shared_ptr& column) const override { ARROW_CHECK(field != nullptr); ARROW_CHECK(column != nullptr); + ARROW_CHECK(column->device_type() == device_type_); if (!field->type()->Equals(column->type())) { return Status::TypeError("Column data type ", field->type()->name(), @@ -135,19 +152,22 @@ class SimpleRecordBatch : public RecordBatch { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->SetField(i, field)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::ReplaceVectorElement(columns_, i, column->data())); + internal::ReplaceVectorElement(columns_, i, column->data()), + device_type_, sync_event_); } Result> RemoveColumn(int i) const override { ARROW_ASSIGN_OR_RAISE(auto new_schema, schema_->RemoveField(i)); return RecordBatch::Make(std::move(new_schema), num_rows_, - internal::DeleteVectorElement(columns_, i)); + internal::DeleteVectorElement(columns_, i), device_type_, + sync_event_); } std::shared_ptr ReplaceSchemaMetadata( const std::shared_ptr& metadata) const override { auto new_schema = schema_->WithMetadata(metadata); - return RecordBatch::Make(std::move(new_schema), num_rows_, columns_); + return RecordBatch::Make(std::move(new_schema), num_rows_, columns_, device_type_, + sync_event_); } std::shared_ptr Slice(int64_t offset, int64_t length) const override { @@ -157,7 +177,8 @@ class SimpleRecordBatch : public RecordBatch { arrays.emplace_back(field->Slice(offset, length)); } int64_t num_rows = std::min(num_rows_ - offset, length); - return std::make_shared(schema_, num_rows, std::move(arrays)); + return std::make_shared(schema_, num_rows, std::move(arrays), + device_type_, sync_event_); } Status Validate() const override { @@ -167,11 +188,22 @@ class SimpleRecordBatch : public RecordBatch { return RecordBatch::Validate(); } + const std::shared_ptr& GetSyncEvent() const override { + return sync_event_; + } + + DeviceAllocationType device_type() const override { return device_type_; } + private: std::vector> columns_; // Caching boxed array data mutable std::vector> boxed_columns_; + + // the type of device that the buffers for columns are allocated on. + // all columns should be on the same type of device. + DeviceAllocationType device_type_; + std::shared_ptr sync_event_; }; RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows) @@ -179,18 +211,21 @@ RecordBatch::RecordBatch(const std::shared_ptr& schema, int64_t num_rows std::shared_ptr RecordBatch::Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns) { + std::vector> columns, + std::shared_ptr sync_event) { DCHECK_EQ(schema->num_fields(), static_cast(columns.size())); return std::make_shared(std::move(schema), num_rows, - std::move(columns)); + std::move(columns), std::move(sync_event)); } std::shared_ptr RecordBatch::Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns) { + std::vector> columns, DeviceAllocationType device_type, + std::shared_ptr sync_event) { DCHECK_EQ(schema->num_fields(), static_cast(columns.size())); return std::make_shared(std::move(schema), num_rows, - std::move(columns)); + std::move(columns), device_type, + std::move(sync_event)); } Result> RecordBatch::MakeEmpty( @@ -466,6 +501,10 @@ bool RecordBatch::Equals(const RecordBatch& other, bool check_metadata, return false; } + if (device_type() != other.device_type()) { + return false; + } + for (int i = 0; i < num_columns(); ++i) { if (!column(i)->Equals(other.column(i), opts)) { return false; @@ -480,6 +519,10 @@ bool RecordBatch::ApproxEquals(const RecordBatch& other, const EqualOptions& opt return false; } + if (device_type() != other.device_type()) { + return false; + } + for (int i = 0; i < num_columns(); ++i) { if (!column(i)->ApproxEquals(other.column(i), opts)) { return false; @@ -505,7 +548,7 @@ Result> RecordBatch::ReplaceSchema( ", did not match new schema field type: ", replace_type->ToString()); } } - return RecordBatch::Make(std::move(schema), num_rows(), columns()); + return RecordBatch::Make(std::move(schema), num_rows(), columns(), GetSyncEvent()); } std::vector RecordBatch::ColumnNames() const { @@ -534,7 +577,7 @@ Result> RecordBatch::RenameColumns( } return RecordBatch::Make(::arrow::schema(std::move(fields)), num_rows(), - std::move(columns)); + std::move(columns), GetSyncEvent()); } Result> RecordBatch::SelectColumns( @@ -555,7 +598,8 @@ Result> RecordBatch::SelectColumns( auto new_schema = std::make_shared(std::move(fields), schema()->metadata()); - return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns)); + return RecordBatch::Make(std::move(new_schema), num_rows(), std::move(columns), + GetSyncEvent()); } std::shared_ptr RecordBatch::Slice(int64_t offset) const { @@ -647,12 +691,16 @@ Result> RecordBatchReader::ToTable() { class SimpleRecordBatchReader : public RecordBatchReader { public: SimpleRecordBatchReader(Iterator> it, - std::shared_ptr schema) - : schema_(std::move(schema)), it_(std::move(it)) {} + std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU) + : schema_(std::move(schema)), it_(std::move(it)), device_type_(device_type) {} SimpleRecordBatchReader(std::vector> batches, - std::shared_ptr schema) - : schema_(std::move(schema)), it_(MakeVectorIterator(std::move(batches))) {} + std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU) + : schema_(std::move(schema)), + it_(MakeVectorIterator(std::move(batches))), + device_type_(device_type) {} Status ReadNext(std::shared_ptr* batch) override { return it_.Next().Value(batch); @@ -660,13 +708,17 @@ class SimpleRecordBatchReader : public RecordBatchReader { std::shared_ptr schema() const override { return schema_; } + DeviceAllocationType device_type() const override { return device_type_; } + protected: std::shared_ptr schema_; Iterator> it_; + DeviceAllocationType device_type_; }; Result> RecordBatchReader::Make( - std::vector> batches, std::shared_ptr schema) { + std::vector> batches, std::shared_ptr schema, + DeviceAllocationType device_type) { if (schema == nullptr) { if (batches.size() == 0 || batches[0] == nullptr) { return Status::Invalid("Cannot infer schema from empty vector or nullptr"); @@ -675,16 +727,19 @@ Result> RecordBatchReader::Make( schema = batches[0]->schema(); } - return std::make_shared(std::move(batches), std::move(schema)); + return std::make_shared(std::move(batches), std::move(schema), + device_type); } Result> RecordBatchReader::MakeFromIterator( - Iterator> batches, std::shared_ptr schema) { + Iterator> batches, std::shared_ptr schema, + DeviceAllocationType device_type) { if (schema == nullptr) { return Status::Invalid("Schema cannot be nullptr"); } - return std::make_shared(std::move(batches), std::move(schema)); + return std::make_shared(std::move(batches), std::move(schema), + device_type); } RecordBatchReader::~RecordBatchReader() { @@ -701,6 +756,10 @@ Result> ConcatenateRecordBatches( int cols = batches[0]->num_columns(); auto schema = batches[0]->schema(); for (size_t i = 0; i < batches.size(); ++i) { + if (auto sync = batches[i]->GetSyncEvent()) { + ARROW_RETURN_NOT_OK(sync->Wait()); + } + length += batches[i]->num_rows(); if (!schema->Equals(batches[i]->schema())) { return Status::Invalid( diff --git a/cpp/src/arrow/record_batch.h b/cpp/src/arrow/record_batch.h index cd647a88abd97..b03cbf2251f47 100644 --- a/cpp/src/arrow/record_batch.h +++ b/cpp/src/arrow/record_batch.h @@ -23,6 +23,7 @@ #include #include "arrow/compare.h" +#include "arrow/device.h" #include "arrow/result.h" #include "arrow/status.h" #include "arrow/type_fwd.h" @@ -45,9 +46,12 @@ class ARROW_EXPORT RecordBatch { /// \param[in] num_rows length of fields in the record batch. Each array /// should have the same length as num_rows /// \param[in] columns the record batch fields as vector of arrays - static std::shared_ptr Make(std::shared_ptr schema, - int64_t num_rows, - std::vector> columns); + /// \param[in] sync_event optional synchronization event for non-CPU device + /// memory used by buffers + static std::shared_ptr Make( + std::shared_ptr schema, int64_t num_rows, + std::vector> columns, + std::shared_ptr sync_event = NULLPTR); /// \brief Construct record batch from vector of internal data structures /// \since 0.5.0 @@ -58,9 +62,15 @@ class ARROW_EXPORT RecordBatch { /// \param num_rows the number of semantic rows in the record batch. This /// should be equal to the length of each field /// \param columns the data for the batch's columns + /// \param device_type the type of the device that the Arrow columns are + /// allocated on + /// \param sync_event optional synchronization event for non-CPU device + /// memory used by buffers static std::shared_ptr Make( std::shared_ptr schema, int64_t num_rows, - std::vector> columns); + std::vector> columns, + DeviceAllocationType device_type = DeviceAllocationType::kCPU, + std::shared_ptr sync_event = NULLPTR); /// \brief Create an empty RecordBatch of a given schema /// @@ -260,6 +270,18 @@ class ARROW_EXPORT RecordBatch { /// \return Status virtual Status ValidateFull() const; + /// \brief EXPERIMENTAL: Return a top-level sync event object for this record batch + /// + /// If all of the data for this record batch is in CPU memory, then this + /// will return null. If the data for this batch is + /// on a device, then if synchronization is needed before accessing the + /// data the returned sync event will allow for it. + /// + /// \return null or a Device::SyncEvent + virtual const std::shared_ptr& GetSyncEvent() const = 0; + + virtual DeviceAllocationType device_type() const = 0; + protected: RecordBatch(const std::shared_ptr& schema, int64_t num_rows); @@ -306,6 +328,11 @@ class ARROW_EXPORT RecordBatchReader { /// \brief finalize reader virtual Status Close() { return Status::OK(); } + /// \brief EXPERIMENTAL: Get the device type for record batches this reader produces + /// + /// default implementation is to return DeviceAllocationType::kCPU + virtual DeviceAllocationType device_type() const { return DeviceAllocationType::kCPU; } + class RecordBatchReaderIterator { public: using iterator_category = std::input_iterator_tag; @@ -379,15 +406,19 @@ class ARROW_EXPORT RecordBatchReader { /// \param[in] batches the vector of RecordBatch to read from /// \param[in] schema schema to conform to. Will be inferred from the first /// element if not provided. + /// \param[in] device_type the type of device that the batches are allocated on static Result> Make( - RecordBatchVector batches, std::shared_ptr schema = NULLPTR); + RecordBatchVector batches, std::shared_ptr schema = NULLPTR, + DeviceAllocationType device_type = DeviceAllocationType::kCPU); /// \brief Create a RecordBatchReader from an Iterator of RecordBatch. /// /// \param[in] batches an iterator of RecordBatch to read from. /// \param[in] schema schema that each record batch in iterator will conform to. + /// \param[in] device_type the type of device that the batches are allocated on static Result> MakeFromIterator( - Iterator> batches, std::shared_ptr schema); + Iterator> batches, std::shared_ptr schema, + DeviceAllocationType device_type = DeviceAllocationType::kCPU); }; /// \brief Concatenate record batches diff --git a/cpp/src/arrow/testing/extension_type.h b/cpp/src/arrow/testing/extension_type.h index 846e3c7a16578..6515631f202ae 100644 --- a/cpp/src/arrow/testing/extension_type.h +++ b/cpp/src/arrow/testing/extension_type.h @@ -132,6 +132,25 @@ class ARROW_TESTING_EXPORT DictExtensionType : public ExtensionType { std::string Serialize() const override { return "dict-extension-serialized"; } }; +// A minimal extension type that does not error when passed blank extension information +class ARROW_TESTING_EXPORT MetadataOptionalExtensionType : public ExtensionType { + public: + MetadataOptionalExtensionType() : ExtensionType(null()) {} + std::string extension_name() const override { return "metadata.optional"; } + std::string Serialize() const override { return ""; } + std::shared_ptr MakeArray(std::shared_ptr data) const override { + return nullptr; + } + bool ExtensionEquals(const ExtensionType& other) const override { + return other.extension_name() == extension_name(); + } + Result> Deserialize( + std::shared_ptr storage_type, + const std::string& serialized_data) const override { + return std::make_shared(); + } +}; + class ARROW_TESTING_EXPORT Complex128Array : public ExtensionArray { public: using ExtensionArray::ExtensionArray; diff --git a/cpp/src/arrow/testing/fixed_width_test_util.cc b/cpp/src/arrow/testing/fixed_width_test_util.cc new file mode 100644 index 0000000000000..9c305ed1df97c --- /dev/null +++ b/cpp/src/arrow/testing/fixed_width_test_util.cc @@ -0,0 +1,181 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include +#include +#include + +#include "arrow/array/builder_base.h" +#include "arrow/array/builder_nested.h" +#include "arrow/array/builder_primitive.h" +#include "arrow/testing/fixed_width_test_util.h" +#include "arrow/type.h" +#include "arrow/util/checked_cast.h" + +namespace arrow::util::internal { + +namespace { +template +inline Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) { + using NumericBuilder = ::arrow::NumericBuilder; + using value_type = typename NumericBuilder::value_type; + auto* numeric_builder = ::arrow::internal::checked_cast(builder); + auto cast_next_value = + static_cast(*next_value % std::numeric_limits::max()); + RETURN_NOT_OK(numeric_builder->Append(cast_next_value)); + *next_value += 1; + return Status::OK(); +} +} // namespace + +std::shared_ptr NestedListGenerator::NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes) { + auto type = inner_type; + for (auto it = sizes.rbegin(); it != sizes.rend(); it++) { + type = fixed_size_list(type, *it); + } + return type; +} + +std::shared_ptr NestedListGenerator::NestedListType( + const std::shared_ptr& inner_type, size_t depth) { + auto list_type = list(inner_type); + for (size_t i = 1; i < depth; i++) { + list_type = list(std::move(list_type)); + } + return list_type; +} + +Result> NestedListGenerator::NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedFSLType(inner_type, list_sizes); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); +} + +Result> NestedListGenerator::NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length) { + auto nested_type = NestedListType(inner_type, list_sizes.size()); + ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); + return NestedListArray(builder.get(), list_sizes, length); +} + +void NestedListGenerator::VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, + const std::function&, const std::vector&)>& + visit, + int max_depth, int max_power_of_2_size) { + for (int depth = 1; depth <= max_depth; depth++) { + for (auto& type : inner_value_types) { + assert(is_fixed_width(*type)); + int value_width = type->byte_width(); + + std::vector list_sizes; // stack of list sizes + auto pop = [&]() { // pop the list_sizes stack + assert(!list_sizes.empty()); + value_width /= list_sizes.back(); + list_sizes.pop_back(); + }; + auto next = [&]() { // double the top of the stack + assert(!list_sizes.empty()); + value_width *= 2; + list_sizes.back() *= 2; + return value_width; + }; + auto push_1s = [&]() { // fill the stack with 1s + while (list_sizes.size() < static_cast(depth)) { + list_sizes.push_back(1); + } + }; + + // Loop invariants: + // value_width == product(list_sizes) * type->byte_width() + // value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32) + push_1s(); + do { + // for (auto x : list_sizes) printf("%d * ", x); + // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(), + // value_width); + visit(type, list_sizes); + while (!list_sizes.empty()) { + if (next() <= max_power_of_2_size) { + push_1s(); + break; + } + pop(); + } + } while (!list_sizes.empty()); + } + } +} + +Status NestedListGenerator::AppendNestedList(ArrayBuilder* nested_builder, + const int* list_sizes, + int64_t* next_inner_value) { + using ::arrow::internal::checked_cast; + ArrayBuilder* builder = nested_builder; + auto type = builder->type(); + if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) { + const int list_size = *list_sizes; + if (type->id() == Type::FIXED_SIZE_LIST) { + auto* fsl_builder = checked_cast(builder); + assert(list_size == checked_cast(*type).list_size()); + RETURN_NOT_OK(fsl_builder->Append()); + builder = fsl_builder->value_builder(); + } else { // type->id() == Type::LIST) + auto* list_builder = checked_cast(builder); + RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size)); + builder = list_builder->value_builder(); + } + list_sizes++; + for (int i = 0; i < list_size; i++) { + RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value)); + } + } else { + switch (type->id()) { + case Type::INT8: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT16: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT32: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + case Type::INT64: + RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); + break; + default: + return Status::NotImplemented("Unsupported type: ", *type); + } + } + return Status::OK(); +} + +Result> NestedListGenerator::NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length) { + int64_t next_inner_value = 0; + for (int64_t i = 0; i < length; i++) { + RETURN_NOT_OK(AppendNestedList(nested_builder, list_sizes.data(), &next_inner_value)); + } + return nested_builder->Finish(); +} + +} // namespace arrow::util::internal diff --git a/cpp/src/arrow/testing/fixed_width_test_util.h b/cpp/src/arrow/testing/fixed_width_test_util.h new file mode 100644 index 0000000000000..9e5e6fa68509e --- /dev/null +++ b/cpp/src/arrow/testing/fixed_width_test_util.h @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include +#include +#include + +#include "arrow/testing/visibility.h" +#include "arrow/type.h" +#include "arrow/type_fwd.h" + +namespace arrow::util::internal { + +class ARROW_TESTING_EXPORT NestedListGenerator { + public: + /// \brief Create a nested FixedSizeListType. + /// + /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])` + static std::shared_ptr NestedFSLType( + const std::shared_ptr& inner_type, const std::vector& sizes); + + /// \brief Create a nested FixedListType. + /// + /// \return `list(list(...))` + static std::shared_ptr NestedListType( + const std::shared_ptr& inner_type, size_t depth); + + static Result> NestedFSLArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length); + + static Result> NestedListArray( + const std::shared_ptr& inner_type, const std::vector& list_sizes, + int64_t length); + + /// \brief Generate all possible nested list configurations of depth 1 to max_depth. + /// + /// Each configuration consists of a single inner value type and a list of sizes. + /// Both can be used with NestedFSLArray and NestedListArray to generate test data. + /// + /// The product of the list sizes and the size of the inner value type is always a power + /// of 2 no greater than max_power_of_2_size. For max_depth=3 and + /// max_power_of_2_size=32, this generates 108 configurations. + static void VisitAllNestedListConfigurations( + const std::vector>& inner_value_types, + const std::function&, + const std::vector&)>& visit, + int max_depth = 3, int max_power_of_2_size = 32); + + private: + // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...]) + static Status AppendNestedList(ArrayBuilder* nested_builder, const int* list_sizes, + int64_t* next_inner_value); + + static Result> NestedListArray( + ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length); +}; + +} // namespace arrow::util::internal diff --git a/cpp/src/arrow/util/config.h.cmake b/cpp/src/arrow/util/config.h.cmake index 9fbd685084fd5..08c2ae173601b 100644 --- a/cpp/src/arrow/util/config.h.cmake +++ b/cpp/src/arrow/util/config.h.cmake @@ -31,9 +31,6 @@ #define ARROW_BUILD_TYPE "@UPPERCASE_BUILD_TYPE@" -#define ARROW_GIT_ID "@ARROW_GIT_ID@" -#define ARROW_GIT_DESCRIPTION "@ARROW_GIT_DESCRIPTION@" - #define ARROW_PACKAGE_KIND "@ARROW_PACKAGE_KIND@" #cmakedefine ARROW_COMPUTE diff --git a/cpp/src/arrow/util/config_internal.h.cmake b/cpp/src/arrow/util/config_internal.h.cmake new file mode 100644 index 0000000000000..e90f7ee12da4d --- /dev/null +++ b/cpp/src/arrow/util/config_internal.h.cmake @@ -0,0 +1,22 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// These variables are not exposed as they can make compilation caching +// and increment builds less efficient. + +#define ARROW_GIT_ID "@ARROW_GIT_ID@" +#define ARROW_GIT_DESCRIPTION "@ARROW_GIT_DESCRIPTION@" diff --git a/cpp/src/arrow/util/fixed_width_internal.cc b/cpp/src/arrow/util/fixed_width_internal.cc index 164af3cff66b3..3f12fafb54f0f 100644 --- a/cpp/src/arrow/util/fixed_width_internal.cc +++ b/cpp/src/arrow/util/fixed_width_internal.cc @@ -33,11 +33,12 @@ namespace arrow::util { using ::arrow::internal::checked_cast; bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, - bool exclude_dictionary) { - return IsFixedWidthLike(source, force_null_count, - [exclude_dictionary](const DataType& type) { - return !exclude_dictionary || type.id() != Type::DICTIONARY; - }); + bool exclude_bool_and_dictionary) { + return IsFixedWidthLike( + source, force_null_count, [exclude_bool_and_dictionary](const DataType& type) { + return !exclude_bool_and_dictionary || + (type.id() != Type::DICTIONARY && type.id() != Type::BOOL); + }); } static int64_t FixedWidthInBytesFallback(const FixedSizeListType& fixed_size_list_type) { @@ -73,16 +74,37 @@ int64_t FixedWidthInBytes(const DataType& type) { return -1; } +static int64_t FixedWidthInBitsFallback(const FixedSizeListType& fixed_size_list_type) { + auto* fsl = &fixed_size_list_type; + int64_t list_size = fsl->list_size(); + for (auto type = fsl->value_type().get();;) { + auto type_id = type->id(); + if (type_id == Type::FIXED_SIZE_LIST) { + fsl = checked_cast(type); + list_size *= fsl->list_size(); + type = fsl->value_type().get(); + continue; + } + if (is_fixed_width(type_id)) { + const int64_t flat_bit_width = list_size * type->bit_width(); + DCHECK_GE(flat_bit_width, 0); + return flat_bit_width; + } + break; + } + return -1; +} + int64_t FixedWidthInBits(const DataType& type) { auto type_id = type.id(); if (is_fixed_width(type_id)) { return type.bit_width(); } - const int64_t byte_width = FixedWidthInBytes(type); - if (ARROW_PREDICT_FALSE(byte_width < 0)) { - return -1; + if (type_id == Type::FIXED_SIZE_LIST) { + auto& fsl = ::arrow::internal::checked_cast(type); + return FixedWidthInBitsFallback(fsl); } - return byte_width * 8; + return -1; } namespace internal { @@ -121,9 +143,6 @@ Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, if (type->id() == Type::FIXED_SIZE_LIST) { auto& fsl_type = checked_cast(*type); auto& value_type = fsl_type.value_type(); - if (ARROW_PREDICT_FALSE(value_type->id() == Type::BOOL)) { - return Status::Invalid("PreallocateFixedWidthArrayData: Invalid type: ", fsl_type); - } if (ARROW_PREDICT_FALSE(value_type->id() == Type::DICTIONARY)) { return Status::NotImplemented( "PreallocateFixedWidthArrayData: DICTIONARY type allocation: ", *type); @@ -146,16 +165,13 @@ Status PreallocateFixedWidthArrayData(::arrow::compute::KernelContext* ctx, } // namespace internal -/// \pre same as OffsetPointerOfFixedWidthValues -/// \pre source.type->id() != Type::BOOL -static const uint8_t* OffsetPointerOfFixedWidthValuesFallback(const ArraySpan& source) { +std::pair OffsetPointerOfFixedBitWidthValues( + const ArraySpan& source) { using OffsetAndListSize = std::pair; auto get_offset = [](auto pair) { return pair.first; }; auto get_list_size = [](auto pair) { return pair.second; }; ::arrow::internal::SmallVector stack; - DCHECK_NE(source.type->id(), Type::BOOL); - int64_t list_size = 1; auto* array = &source; while (array->type->id() == Type::FIXED_SIZE_LIST) { @@ -166,31 +182,25 @@ static const uint8_t* OffsetPointerOfFixedWidthValuesFallback(const ArraySpan& s // Now that innermost values were reached, pop the stack and calculate the offset // in bytes of the innermost values buffer by considering the offset at each // level of nesting. - DCHECK(array->type->id() != Type::BOOL && is_fixed_width(*array->type)); + DCHECK(is_fixed_width(*array->type)); DCHECK(array == &source || !array->MayHaveNulls()) << "OffsetPointerOfFixedWidthValues: array is expected to be flat or have no " "nulls in the arrays nested by FIXED_SIZE_LIST."; - int64_t value_width = array->type->byte_width(); - int64_t offset_in_bytes = array->offset * value_width; + int64_t value_width_in_bits = array->type->bit_width(); + int64_t offset_in_bits = array->offset * value_width_in_bits; for (auto it = stack.rbegin(); it != stack.rend(); ++it) { - value_width *= get_list_size(*it); - offset_in_bytes += get_offset(*it) * value_width; + value_width_in_bits *= get_list_size(*it); + offset_in_bits += get_offset(*it) * value_width_in_bits; } - return value_width < 0 ? nullptr : array->GetValues(1, offset_in_bytes); + DCHECK_GE(value_width_in_bits, 0); + const auto* values_ptr = array->GetValues(1, 0); + return {static_cast(offset_in_bits % 8), values_ptr + (offset_in_bits / 8)}; } -const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source) { - auto type_id = source.type->id(); - if (is_fixed_width(type_id)) { - if (ARROW_PREDICT_FALSE(type_id == Type::BOOL)) { - // BOOL arrays are bit-packed, thus a byte-aligned pointer cannot be produced in the - // general case. Returning something for BOOL arrays that happen to byte-align - // because offset=0 would create too much confusion. - return nullptr; - } - return source.GetValues(1, 0) + source.offset * source.type->byte_width(); - } - return OffsetPointerOfFixedWidthValuesFallback(source); +const uint8_t* OffsetPointerOfFixedByteWidthValues(const ArraySpan& source) { + DCHECK(IsFixedWidthLike(source, /*force_null_count=*/false, + [](const DataType& type) { return type.id() != Type::BOOL; })); + return OffsetPointerOfFixedBitWidthValues(source).second; } /// \brief Get the mutable pointer to the fixed-width values of an array @@ -203,24 +213,20 @@ const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source) { /// \return The mutable pointer to the fixed-width byte blocks of the array. If /// pre-conditions are not satisfied, the return values is undefined. uint8_t* MutableFixedWidthValuesPointer(ArrayData* mutable_array) { - auto type_id = mutable_array->type->id(); - if (type_id == Type::FIXED_SIZE_LIST) { - auto* array = mutable_array; - do { - DCHECK_EQ(array->offset, 0); - DCHECK_EQ(array->child_data.size(), 1) << array->type->ToString(true) << " part of " - << mutable_array->type->ToString(true); - array = array->child_data[0].get(); - } while (array->type->id() == Type::FIXED_SIZE_LIST); + auto* array = mutable_array; + auto type_id = array->type->id(); + while (type_id == Type::FIXED_SIZE_LIST) { DCHECK_EQ(array->offset, 0); - DCHECK(array->type->id() != Type::BOOL && is_fixed_width(*array->type)); - return array->GetMutableValues(1, 0); + DCHECK_EQ(array->child_data.size(), 1) << array->type->ToString(true) << " part of " + << mutable_array->type->ToString(true); + array = array->child_data[0].get(); + type_id = array->type->id(); } DCHECK_EQ(mutable_array->offset, 0); // BOOL is allowed here only because the offset is expected to be 0, // so the byte-aligned pointer also points to the first *bit* of the buffer. DCHECK(is_fixed_width(type_id)); - return mutable_array->GetMutableValues(1, 0); + return array->GetMutableValues(1, 0); } } // namespace arrow::util diff --git a/cpp/src/arrow/util/fixed_width_internal.h b/cpp/src/arrow/util/fixed_width_internal.h index f6959485fbd01..232411f4c4a56 100644 --- a/cpp/src/arrow/util/fixed_width_internal.h +++ b/cpp/src/arrow/util/fixed_width_internal.h @@ -56,146 +56,140 @@ namespace arrow::util { /// Additionally, we say that a type is "fixed-width like" if it's a fixed-width as /// defined above, or if it's a fixed-size list (or nested fixed-size lists) and /// the innermost type is fixed-width and the following restrictions also apply: -/// - The value type of the innermost fixed-size list is not BOOL (it has to be excluded -/// because a 1-bit type doesn't byte-align) /// - Only the top-level array may have nulls, all the inner array have to be completely /// free of nulls so we don't need to manage internal validity bitmaps. /// -/// Take the following `fixed_size_list, 3>` array as an -/// example: -/// -/// [ -/// [[1, 2], [3, 4], [ 5, 6]], -/// null, -/// [[7, 8], [9, 10], [11, 12]] -/// ] -/// -/// in memory, it would look like: -/// -/// { -/// type: fixed_size_list, 3>, -/// length: 3, -/// null_count: 1, -/// offset: 0, -/// buffers: [ -/// 0: [0b00000101] -/// ], -/// child_data: [ -/// 0: { -/// type: fixed_size_list, -/// length: 9, -/// null_count: 0, -/// offset: 0, -/// buffers: [0: NULL], -/// child_data: [ -/// 0: { -/// type: int32, -/// length: 18, -/// null_count: 0, -/// offset: 0, -/// buffers: [ -/// 0: NULL, -/// 1: [ 1, 2, 3, 4, 5, 6, -/// 0, 0, 0, 0, 0, 0 -/// 7, 8, 9, 10, 11, 12 ] -/// ], -/// child_data: [] -/// } -/// ] -/// } -/// ] -/// } -/// -/// This layout fits the fixed-width like definition because the innermost type -/// is byte-aligned fixed-width (int32 = 4 bytes) and the internal arrays don't -/// have nulls. The validity bitmap is only needed at the top-level array. -/// -/// Writing to this array can be done in the same way writing to a flat fixed-width -/// array is done, by: -/// 1. Updating the validity bitmap at the top-level array if nulls are present. -/// 2. Updating a continuous fixed-width block of memory through a single pointer. -/// -/// The length of this block of memory is the product of the list sizes in the -/// `FixedSizeList` types and the byte width of the innermost fixed-width type: -/// -/// 3 * 2 * 4 = 24 bytes -/// -/// Writing the `[[1, 2], [3, 4], [5, 6]]` value at a given index can be done by -/// simply setting the validity bit to 1 and writing the 24-byte sequence of -/// integers `[1, 2, 3, 4, 5, 6]` to the memory block at `byte_ptr + index * 24`. -/// -/// The length of the top-level array fully defines the lengths that all the nested -/// arrays must have, which makes defining all the lengths as easy as defining the -/// length of the top-level array. -/// -/// length = 3 -/// child_data[0].length == 3 * 3 == 9 -/// child_data[0].child_data[0].length == 3 * 3 * 2 == 18 -/// -/// child_data[0].child_data[0].buffers[1].size() >= -/// (3 * (3 * 2 * sizeof(int32)) == 3 * 24 == 72) -/// -/// Dealing with offsets is a bit involved. Let's say the array described above has -/// the offsets 2, 5, and 7: -/// -/// { -/// type: fixed_size_list, 3>, -/// offset: 2, -/// ... -/// child_data: [ -/// 0: { -/// type: fixed_size_list, -/// offset: 5, -/// ... -/// child_data: [ -/// 0: { -/// type: int32, -/// offset: 7, -/// buffers: [ -/// 0: NULL, -/// 1: [ 1, 1, 1, 1, 1, 1, 1, // 7 values skipped -/// 0,1, 0,1, 0,1, 0,1, 0,1, // 5 [x,x] values skipped -/// -/// 0,0,0,0,0,1, // -/// 0,0,0,0,0,1, // 2 [[x,x], [x,x], [x,x]] values skipped -/// -/// 1, 2, 3, 4, 5, 6, // -/// 0, 0, 0, 0, 0, 0 // the actual values -/// 7, 8, 9, 10, 11, 12 // -/// ] -/// ], -/// } -/// ] -/// } -/// ] -/// } -/// -/// The offset of the innermost values buffer, in bytes, is calculated as: -/// -/// ((2 * 3) + (5 * 2) + 7) * sizeof(int32) = 29 * 4 bytes = 116 bytes -/// -/// In general, the formula to calculate the offset of the innermost values buffer is: -/// -/// ((off_0 * fsl_size_0) + (off_1 * fsl_size_1) + ... + innermost_off) -/// * sizeof(innermost_type) -/// -/// `OffsetPointerOfFixedWidthValues()` can calculate this byte offset and return the -/// pointer to the first relevant byte of the innermost values buffer. -/// /// \param source The array to check /// \param force_null_count If true, GetNullCount() is used instead of null_count -/// \param exclude_dictionary If true, DICTIONARY is excluded from the -/// is_fixed_width() types. Default: false. +/// \param exclude_bool_and_dictionary If true, BOOL and DICTIONARY are excluded from +/// the is_fixed_width() types. Default: false. ARROW_EXPORT bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count = false, - bool exclude_dictionary = false); + bool exclude_bool_and_dictionary = false); + +// Take the following `fixed_size_list, 3>` array as an +// example: +// +// [ +// [[1, 2], [3, 4], [ 5, 6]], +// null, +// [[7, 8], [9, 10], [11, 12]] +// ] +// +// in memory, it would look like: +// +// { +// type: fixed_size_list, 3>, +// length: 3, +// null_count: 1, +// offset: 0, +// buffers: [ +// 0: [0b00000101] +// ], +// child_data: [ +// 0: { +// type: fixed_size_list, +// length: 9, +// null_count: 0, +// offset: 0, +// buffers: [0: NULL], +// child_data: [ +// 0: { +// type: int32, +// length: 18, +// null_count: 0, +// offset: 0, +// buffers: [ +// 0: NULL, +// 1: [ 1, 2, 3, 4, 5, 6, +// 0, 0, 0, 0, 0, 0 +// 7, 8, 9, 10, 11, 12 ] +// ], +// child_data: [] +// } +// ] +// } +// ] +// } +// +// This layout fits the fixed-width like definition because the innermost type +// is byte-aligned fixed-width (int32 = 4 bytes) and the internal arrays don't +// have nulls. The validity bitmap is only needed at the top-level array. +// +// Writing to this array can be done in the same way writing to a flat fixed-width +// array is done, by: +// 1. Updating the validity bitmap at the top-level array if nulls are present. +// 2. Updating a continuous fixed-width block of memory through a single pointer. +// +// The length of this block of memory is the product of the list sizes in the +// `FixedSizeList` types and the byte width of the innermost fixed-width type: +// +// 3 * 2 * 4 = 24 bytes +// +// Writing the `[[1, 2], [3, 4], [5, 6]]` value at a given index can be done by +// simply setting the validity bit to 1 and writing the 24-byte sequence of +// integers `[1, 2, 3, 4, 5, 6]` to the memory block at `byte_ptr + index * 24`. +// +// The length of the top-level array fully defines the lengths that all the nested +// arrays must have, which makes defining all the lengths as easy as defining the +// length of the top-level array. +// +// length = 3 +// child_data[0].length == 3 * 3 == 9 +// child_data[0].child_data[0].length == 3 * 3 * 2 == 18 +// +// child_data[0].child_data[0].buffers[1].size() >= +// (3 * (3 * 2 * sizeof(int32)) == 3 * 24 == 72) +// +// Dealing with offsets is a bit involved. Let's say the array described above has +// the offsets 2, 5, and 7: +// +// { +// type: fixed_size_list, 3>, +// offset: 2, +// ... +// child_data: [ +// 0: { +// type: fixed_size_list, +// offset: 5, +// ... +// child_data: [ +// 0: { +// type: int32, +// offset: 7, +// buffers: [ +// 0: NULL, +// 1: [ 1, 1, 1, 1, 1, 1, 1, // 7 values skipped +// 0,1, 0,1, 0,1, 0,1, 0,1, // 5 [x,x] values skipped +// +// 0,0,0,0,0,1, // +// 0,0,0,0,0,1, // 2 [[x,x], [x,x], [x,x]] values skipped +// +// 1, 2, 3, 4, 5, 6, // +// 0, 0, 0, 0, 0, 0 // the actual values +// 7, 8, 9, 10, 11, 12 // +// ] +// ], +// } +// ] +// } +// ] +// } +// +// The offset of the innermost values buffer, in bytes, is calculated as: +// +// ((2 * 3) + (5 * 2) + 7) * sizeof(int32) = 29 * 4 bytes = 116 bytes +// +// In general, the formula to calculate the offset of the innermost values buffer is: +// +// ((off_0 * fsl_size_0) + (off_1 * fsl_size_1) + ... + innermost_off) +// * sizeof(innermost_type) +// +// `OffsetPointerOfFixedByteWidthValues()` can calculate this byte offset and return +// the pointer to the first relevant byte of the innermost values buffer. /// \brief Checks if the given array has a fixed-width type or if it's an array of /// fixed-size list that can be flattened to an array of fixed-width values. /// -/// This function is a more general version of -/// `IsFixedWidthLike(const ArraySpan&, bool)` that allows the caller to further -/// restrict the inner value types that should be considered fixed-width. -/// /// \param source The array to check /// \param force_null_count If true, GetNullCount() is used instead of null_count /// \param extra_predicate A DataType predicate that can be used to further @@ -217,9 +211,7 @@ inline bool IsFixedWidthLike(const ArraySpan& source, bool force_null_count, values = &values->child_data[0]; continue; } - // BOOL has to be excluded because it's not byte-aligned. - return type->id() != Type::BOOL && is_fixed_width(type->id()) && - extra_predicate(*type); + return is_fixed_width(type->id()) && extra_predicate(*type); } } return false; @@ -251,6 +243,10 @@ ARROW_EXPORT int64_t FixedWidthInBytes(const DataType& type); /// \brief Get the fixed-width in bits of a type if it is a fixed-width like /// type. /// +/// If the array is a FixedSizeList (of any level of nesting), the bit width of +/// the values is the product of all fixed-list sizes and the bit width of the +/// innermost fixed-width value type. +/// /// \return The bit-width of the values or -1 /// \see FixedWidthInBytes ARROW_EXPORT int64_t FixedWidthInBits(const DataType& type); @@ -260,7 +256,7 @@ namespace internal { /// \brief Allocate an ArrayData for a type that is fixed-width like. /// /// This function performs the same checks performed by -/// `IsFixedWidthLike(source, false)`. If `source.type` is not a simple +/// `IsFixedWidthLike(source, false, false)`. If `source.type` is not a simple /// fixed-width type, caller should make sure it passes the /// `IsFixedWidthLike(source)` checks. That guarantees that it's possible to /// allocate an array that can serve as a destination for a kernel that writes values @@ -280,18 +276,24 @@ ARROW_EXPORT Status PreallocateFixedWidthArrayData(::arrow::compute::KernelConte } // namespace internal -/// \brief Get the pointer to the fixed-width values of a fixed-width like array. +/// \brief Get the 0-7 residual offset in bits and the pointer to the fixed-width +/// values of a fixed-width like array. /// -/// This function might return NULLPTR if the type of the array is BOOL or -/// if the pre-conditions listed are not satisfied. The converse is not true -/// (i.e. not getting NULLPTR doesn't guarantee that source is a fixed-width -/// like array). +/// For byte-aligned types, the offset is always 0. /// /// \pre `IsFixedWidthLike(source)` or the more restrictive /// is_fixed_width(*mutable_array->type) SHOULD be true -/// \return The pointer to the fixed-width values of an array or NULLPTR -/// if pre-conditions are not satisfied. -ARROW_EXPORT const uint8_t* OffsetPointerOfFixedWidthValues(const ArraySpan& source); +/// \return A pair with the residual offset in bits (0-7) and the pointer +/// to the fixed-width values. +ARROW_EXPORT std::pair OffsetPointerOfFixedBitWidthValues( + const ArraySpan& source); + +/// \brief Get the pointer to the fixed-width values of a fixed-width like array. +/// +/// \pre `IsFixedWidthLike(source)` should be true and BOOL should be excluded +/// as each bool is 1-bit width making it impossible to produce a +/// byte-aligned pointer to the values in the general case. +ARROW_EXPORT const uint8_t* OffsetPointerOfFixedByteWidthValues(const ArraySpan& source); /// \brief Get the mutable pointer to the fixed-width values of an array /// allocated by PreallocateFixedWidthArrayData. diff --git a/cpp/src/arrow/util/fixed_width_test.cc b/cpp/src/arrow/util/fixed_width_test.cc index 2f05221ed6535..3b35de1b6bbeb 100644 --- a/cpp/src/arrow/util/fixed_width_test.cc +++ b/cpp/src/arrow/util/fixed_width_test.cc @@ -80,10 +80,7 @@ TEST_F(TestFixedWidth, IsFixedWidth) { TEST_F(TestFixedWidth, IsFixedWidthLike) { auto arr = ArraySpan{*fsl_bool_array_->data()}; - // bools wrapped by fixed-size-list are not fixed-width because the - // innermost data buffer is a bitmap and won't byte-align. - ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false)); - ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/true)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); arr = ArraySpan{*fsl_int_array_->data()}; ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false)); @@ -114,12 +111,12 @@ TEST_F(TestFixedWidth, IsFixedWidthLike) { arr = ArraySpan{*dict_string_array_->data()}; // Dictionaries are considered fixed-width by is_fixed_width(), but excluded - // by IsFixedWidthLike if exclude_dictionary=true. + // by IsFixedWidthLike if exclude_bool_and_dictionary=true. ASSERT_TRUE(IsFixedWidthLike(arr)); - ASSERT_TRUE( - IsFixedWidthLike(arr, /*force_null_count=*/false, /*exclude_dictionary=*/false)); - ASSERT_FALSE( - IsFixedWidthLike(arr, /*force_null_count=*/false, /*exclude_dictionary=*/true)); + ASSERT_TRUE(IsFixedWidthLike(arr, /*force_null_count=*/false, + /*exclude_bool_and_dictionary=*/false)); + ASSERT_FALSE(IsFixedWidthLike(arr, /*force_null_count=*/false, + /*exclude_bool_and_dictionary=*/true)); } TEST_F(TestFixedWidth, MeasureWidthInBytes) { @@ -184,9 +181,9 @@ TEST_F(TestFixedWidth, MeasureWidthInBits) { ASSERT_EQ(FixedWidthInBits(*varlen), -1); ASSERT_EQ(FixedWidthInBits(*varlen), -1); - ASSERT_EQ(FixedWidthInBits(*fsl(0, b)), -1); - ASSERT_EQ(FixedWidthInBits(*fsl(3, b)), -1); - ASSERT_EQ(FixedWidthInBits(*fsl(5, b)), -1); + ASSERT_EQ(FixedWidthInBits(*fsl(0, b)), 0); + ASSERT_EQ(FixedWidthInBits(*fsl(3, b)), 3); + ASSERT_EQ(FixedWidthInBits(*fsl(5, b)), 5); ASSERT_EQ(FixedWidthInBits(*fsl(0, i8)), 0); ASSERT_EQ(FixedWidthInBits(*fsl(3, i8)), 3 * 8); diff --git a/cpp/src/arrow/util/fixed_width_test_util.h b/cpp/src/arrow/util/fixed_width_test_util.h deleted file mode 100644 index ca141b7ca2c4d..0000000000000 --- a/cpp/src/arrow/util/fixed_width_test_util.h +++ /dev/null @@ -1,203 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#pragma once - -#include -#include -#include -#include - -#include "arrow/array/builder_primitive.h" -#include "arrow/builder.h" -#include "arrow/type.h" -#include "arrow/util/checked_cast.h" - -namespace arrow::util::internal { - -class NestedListGenerator { - public: - /// \brief Create a nested FixedSizeListType. - /// - /// \return `fixed_size_list(fixed_size_list(..., sizes[1]), sizes[0])` - static std::shared_ptr NestedFSLType( - const std::shared_ptr& inner_type, const std::vector& sizes) { - auto type = inner_type; - for (auto it = sizes.rbegin(); it != sizes.rend(); it++) { - type = fixed_size_list(std::move(type), *it); - } - return type; - } - - /// \brief Create a nested FixedListType. - /// - /// \return `list(list(...))` - static std::shared_ptr NestedListType( - const std::shared_ptr& inner_type, size_t depth) { - auto list_type = list(inner_type); - for (size_t i = 1; i < depth; i++) { - list_type = list(std::move(list_type)); - } - return list_type; - } - - private: - template - static Status AppendNumeric(ArrayBuilder* builder, int64_t* next_value) { - using NumericBuilder = ::arrow::NumericBuilder; - using value_type = typename NumericBuilder::value_type; - auto* numeric_builder = ::arrow::internal::checked_cast(builder); - auto cast_next_value = - static_cast(*next_value % std::numeric_limits::max()); - RETURN_NOT_OK(numeric_builder->Append(cast_next_value)); - *next_value += 1; - return Status::OK(); - } - - // Append([...[[*next_inner_value++, *next_inner_value++, ...]]...]) - static Status AppendNestedList(ArrayBuilder* nested_builder, const int* list_sizes, - int64_t* next_inner_value) { - using ::arrow::internal::checked_cast; - ArrayBuilder* builder = nested_builder; - auto type = builder->type(); - if (type->id() == Type::FIXED_SIZE_LIST || type->id() == Type::LIST) { - const int list_size = *list_sizes; - if (type->id() == Type::FIXED_SIZE_LIST) { - auto* fsl_builder = checked_cast(builder); - assert(list_size == checked_cast(*type).list_size()); - RETURN_NOT_OK(fsl_builder->Append()); - builder = fsl_builder->value_builder(); - } else { // type->id() == Type::LIST) - auto* list_builder = checked_cast(builder); - RETURN_NOT_OK(list_builder->Append(/*is_valid=*/true, list_size)); - builder = list_builder->value_builder(); - } - list_sizes++; - for (int i = 0; i < list_size; i++) { - RETURN_NOT_OK(AppendNestedList(builder, list_sizes, next_inner_value)); - } - } else { - switch (type->id()) { - case Type::INT8: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - case Type::INT16: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - case Type::INT32: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - case Type::INT64: - RETURN_NOT_OK(AppendNumeric(builder, next_inner_value)); - break; - default: - return Status::NotImplemented("Unsupported type: ", *type); - } - } - return Status::OK(); - } - - static Result> NestedListArray( - ArrayBuilder* nested_builder, const std::vector& list_sizes, int64_t length) { - int64_t next_inner_value = 0; - for (int64_t i = 0; i < length; i++) { - RETURN_NOT_OK( - AppendNestedList(nested_builder, list_sizes.data(), &next_inner_value)); - } - return nested_builder->Finish(); - } - - public: - static Result> NestedFSLArray( - const std::shared_ptr& inner_type, const std::vector& list_sizes, - int64_t length) { - auto nested_type = NestedFSLType(inner_type, list_sizes); - ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); - return NestedListArray(builder.get(), list_sizes, length); - } - - static Result> NestedListArray( - const std::shared_ptr& inner_type, const std::vector& list_sizes, - int64_t length) { - auto nested_type = NestedListType(inner_type, list_sizes.size()); - ARROW_ASSIGN_OR_RAISE(auto builder, MakeBuilder(nested_type)); - return NestedListArray(builder.get(), list_sizes, length); - } - - /// \brief Generate all possible nested list configurations of depth 1 to max_depth. - /// - /// Each configuration consists of a single inner value type and a list of sizes. - /// Both can be used with NestedFSLArray and NestedListArray to generate test data. - /// - /// The product of the list sizes and the size of the inner value type is always a power - /// of 2 no greater than max_power_of_2_size. For max_depth=3 and - /// max_power_of_2_size=32, this generates 108 configurations. - /// - /// \tparam Visit a function type with signature - /// void(const std::shared_ptr& inner_type, - /// const std::vector& list_sizes) - template - static void VisitAllNestedListConfigurations( - const std::vector>& inner_value_types, Visit&& visit, - int max_depth = 3, int max_power_of_2_size = 32) { - for (int depth = 1; depth <= max_depth; depth++) { - for (auto& type : inner_value_types) { - assert(is_fixed_width(*type)); - int value_width = type->byte_width(); - - std::vector list_sizes; // stack of list sizes - auto pop = [&]() { // pop the list_sizes stack - assert(!list_sizes.empty()); - value_width /= list_sizes.back(); - list_sizes.pop_back(); - }; - auto next = [&]() { // double the top of the stack - assert(!list_sizes.empty()); - value_width *= 2; - list_sizes.back() *= 2; - return value_width; - }; - auto push_1s = [&]() { // fill the stack with 1s - while (list_sizes.size() < static_cast(depth)) { - list_sizes.push_back(1); - } - }; - - // Loop invariants: - // value_width == product(list_sizes) * type->byte_width() - // value_width is a power-of-2 (1, 2, 4, 8, 16, max_power_of_2_size=32) - push_1s(); - do { - // for (auto x : list_sizes) printf("%d * ", x); - // printf("(%s) %d = %2d\n", type->name().c_str(), type->byte_width(), - // value_width); - visit(type, list_sizes); - // Advance to the next test case - while (!list_sizes.empty()) { - if (next() <= max_power_of_2_size) { - push_1s(); - break; - } - pop(); - } - } while (!list_sizes.empty()); - } - } - } -}; - -} // namespace arrow::util::internal diff --git a/cpp/src/arrow/util/macros.h b/cpp/src/arrow/util/macros.h index d0c05a7908256..484df3400d92d 100644 --- a/cpp/src/arrow/util/macros.h +++ b/cpp/src/arrow/util/macros.h @@ -102,7 +102,7 @@ #elif defined(_MSC_VER) // MSVC #define ARROW_NORETURN __declspec(noreturn) #define ARROW_NOINLINE __declspec(noinline) -#define ARROW_FORCE_INLINE __declspec(forceinline) +#define ARROW_FORCE_INLINE __forceinline #define ARROW_PREDICT_FALSE(x) (x) #define ARROW_PREDICT_TRUE(x) (x) #define ARROW_PREFETCH(addr) diff --git a/cpp/src/gandiva/cache.cc b/cpp/src/gandiva/cache.cc index a1333ccdc5d43..2358b08c82424 100644 --- a/cpp/src/gandiva/cache.cc +++ b/cpp/src/gandiva/cache.cc @@ -20,26 +20,41 @@ #include "arrow/result.h" #include "arrow/util/io_util.h" #include "arrow/util/logging.h" +#include "arrow/util/value_parsing.h" namespace gandiva { -static const size_t DEFAULT_CACHE_SIZE = 5000; - -int GetCapacity() { - size_t capacity = DEFAULT_CACHE_SIZE; - auto maybe_env_cache_size = ::arrow::internal::GetEnvVar("GANDIVA_CACHE_SIZE"); - if (maybe_env_cache_size.ok()) { - const auto env_cache_size = *std::move(maybe_env_cache_size); - if (!env_cache_size.empty()) { - capacity = std::atol(env_cache_size.c_str()); - if (capacity <= 0) { - ARROW_LOG(WARNING) << "Invalid cache size provided in GANDIVA_CACHE_SIZE. " - << "Using default cache size: " << DEFAULT_CACHE_SIZE; - capacity = DEFAULT_CACHE_SIZE; - } - } +constexpr auto kCacheCapacityEnvVar = "GANDIVA_CACHE_SIZE"; +constexpr auto kDefaultCacheSize = 5000; + +namespace internal { +int GetCacheCapacityFromEnvVar() { + auto maybe_env_value = ::arrow::internal::GetEnvVar(kCacheCapacityEnvVar); + if (!maybe_env_value.ok()) { + return kDefaultCacheSize; + } + const auto env_value = *std::move(maybe_env_value); + if (env_value.empty()) { + return kDefaultCacheSize; + } + int capacity = 0; + bool ok = ::arrow::internal::ParseValue<::arrow::Int32Type>( + env_value.c_str(), env_value.size(), &capacity); + if (!ok || capacity <= 0) { + ARROW_LOG(WARNING) << "Invalid cache size provided in " << kCacheCapacityEnvVar + << ". Using default cache size: " << kDefaultCacheSize; + return kDefaultCacheSize; } - return static_cast(capacity); + return capacity; +} +} // namespace internal + +// Deprecated in 17.0.0. Use GetCacheCapacity instead. +int GetCapacity() { return GetCacheCapacity(); } + +int GetCacheCapacity() { + static const int capacity = internal::GetCacheCapacityFromEnvVar(); + return capacity; } void LogCacheSize(size_t capacity) { diff --git a/cpp/src/gandiva/cache.h b/cpp/src/gandiva/cache.h index 7cff9b02692ae..c19dbb7a0e30e 100644 --- a/cpp/src/gandiva/cache.h +++ b/cpp/src/gandiva/cache.h @@ -20,14 +20,27 @@ #include #include +#include "arrow/util/macros.h" #include "gandiva/lru_cache.h" #include "gandiva/visibility.h" namespace gandiva { +namespace internal { +// Only called once by GetCacheCapacity(). +// Do the actual work of getting the cache capacity from env var. +// Also makes the testing easier. +GANDIVA_EXPORT +int GetCacheCapacityFromEnvVar(); +} // namespace internal + +ARROW_DEPRECATED("Deprecated in 17.0.0. Use GetCacheCapacity instead.") GANDIVA_EXPORT int GetCapacity(); +GANDIVA_EXPORT +int GetCacheCapacity(); + GANDIVA_EXPORT void LogCacheSize(size_t capacity); @@ -36,7 +49,7 @@ class Cache { public: explicit Cache(size_t capacity) : cache_(capacity) { LogCacheSize(capacity); } - Cache() : Cache(GetCapacity()) {} + Cache() : Cache(GetCacheCapacity()) {} ValueType GetObjectCode(const KeyType& cache_key) { std::optional result; diff --git a/cpp/src/gandiva/cache_test.cc b/cpp/src/gandiva/cache_test.cc index a146707079fa6..96cf4a12e587a 100644 --- a/cpp/src/gandiva/cache_test.cc +++ b/cpp/src/gandiva/cache_test.cc @@ -16,10 +16,14 @@ // under the License. #include "gandiva/cache.h" +#include "arrow/testing/gtest_util.h" +#include "arrow/util/io_util.h" +#include "arrow/util/logging.h" #include namespace gandiva { + class TestCacheKey { public: explicit TestCacheKey(int value) : value_(value) {} @@ -38,5 +42,67 @@ TEST(TestCache, TestGetPut) { ASSERT_EQ(cache.GetObjectCode(TestCacheKey(2)), "world"); } -TEST(TestCache, TestGetCacheCapacity) { ASSERT_EQ(GetCapacity(), 5000); } +namespace { +constexpr auto cache_capacity_env_var = "GANDIVA_CACHE_SIZE"; +constexpr auto default_cache_capacity = 5000; +} // namespace + +TEST(TestCache, TestGetCacheCapacityDefault) { + ASSERT_EQ(GetCacheCapacity(), default_cache_capacity); +} + +TEST(TestCache, TestGetCacheCapacityEnvVar) { + using ::arrow::EnvVarGuard; + + // Empty. + { + EnvVarGuard guard(cache_capacity_env_var, ""); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Non-number. + { + EnvVarGuard guard(cache_capacity_env_var, "invalid"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Number with invalid suffix. + { + EnvVarGuard guard(cache_capacity_env_var, "42MB"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Valid positive number. + { + EnvVarGuard guard(cache_capacity_env_var, "42"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), 42); + } + + // Int max. + { + auto str = std::to_string(std::numeric_limits::max()); + EnvVarGuard guard(cache_capacity_env_var, str.c_str()); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), std::numeric_limits::max()); + } + + // Zero. + { + EnvVarGuard guard(cache_capacity_env_var, "0"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Negative number. + { + EnvVarGuard guard(cache_capacity_env_var, "-1"); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } + + // Over int max. + { + auto str = std::to_string(static_cast(std::numeric_limits::max()) + 1); + EnvVarGuard guard(cache_capacity_env_var, str.c_str()); + ASSERT_EQ(internal::GetCacheCapacityFromEnvVar(), default_cache_capacity); + } +} + } // namespace gandiva diff --git a/cpp/src/generated/parquet_types.cpp b/cpp/src/generated/parquet_types.cpp index 8932c4a4f8d19..1ba0c4626233f 100644 --- a/cpp/src/generated/parquet_types.cpp +++ b/cpp/src/generated/parquet_types.cpp @@ -640,128 +640,6 @@ std::ostream& operator<<(std::ostream& out, const SizeStatistics& obj) } -uint32_t SizeStatistics::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes); - this->__isset.unencoded_byte_array_data_bytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->repetition_level_histogram.clear(); - uint32_t _size0; - ::apache::thrift::protocol::TType _etype3; - xfer += iprot->readListBegin(_etype3, _size0); - this->repetition_level_histogram.resize(_size0); - uint32_t _i4; - for (_i4 = 0; _i4 < _size0; ++_i4) - { - xfer += iprot->readI64(this->repetition_level_histogram[_i4]); - } - xfer += iprot->readListEnd(); - } - this->__isset.repetition_level_histogram = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->definition_level_histogram.clear(); - uint32_t _size5; - ::apache::thrift::protocol::TType _etype8; - xfer += iprot->readListBegin(_etype8, _size5); - this->definition_level_histogram.resize(_size5); - uint32_t _i9; - for (_i9 = 0; _i9 < _size5; ++_i9) - { - xfer += iprot->readI64(this->definition_level_histogram[_i9]); - } - xfer += iprot->readListEnd(); - } - this->__isset.definition_level_histogram = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SizeStatistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SizeStatistics"); - - if (this->__isset.unencoded_byte_array_data_bytes) { - xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->unencoded_byte_array_data_bytes); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_level_histogram) { - xfer += oprot->writeFieldBegin("repetition_level_histogram", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histogram.size())); - std::vector ::const_iterator _iter10; - for (_iter10 = this->repetition_level_histogram.begin(); _iter10 != this->repetition_level_histogram.end(); ++_iter10) - { - xfer += oprot->writeI64((*_iter10)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.definition_level_histogram) { - xfer += oprot->writeFieldBegin("definition_level_histogram", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histogram.size())); - std::vector ::const_iterator _iter11; - for (_iter11 = this->definition_level_histogram.begin(); _iter11 != this->definition_level_histogram.end(); ++_iter11) - { - xfer += oprot->writeI64((*_iter11)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SizeStatistics &a, SizeStatistics &b) { using ::std::swap; swap(a.unencoded_byte_array_data_bytes, b.unencoded_byte_array_data_bytes); @@ -856,153 +734,6 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj) } -uint32_t Statistics::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max); - this->__isset.max = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min); - this->__isset.min = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->null_count); - this->__isset.null_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->distinct_count); - this->__isset.distinct_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->max_value); - this->__isset.max_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->min_value); - this->__isset.min_value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_max_value_exact); - this->__isset.is_max_value_exact = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_min_value_exact); - this->__isset.is_min_value_exact = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Statistics::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Statistics"); - - if (this->__isset.max) { - xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->max); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min) { - xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->min); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.null_count) { - xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->null_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.distinct_count) { - xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->distinct_count); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.max_value) { - xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); - xfer += oprot->writeBinary(this->max_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.min_value) { - xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeBinary(this->min_value); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.is_max_value_exact) { - xfer += oprot->writeFieldBegin("is_max_value_exact", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_max_value_exact); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.is_min_value_exact) { - xfer += oprot->writeFieldBegin("is_min_value_exact", ::apache::thrift::protocol::T_BOOL, 8); - xfer += oprot->writeBool(this->is_min_value_exact); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(Statistics &a, Statistics &b) { using ::std::swap; swap(a.max, b.max); @@ -1087,44 +818,6 @@ std::ostream& operator<<(std::ostream& out, const StringType& obj) } -uint32_t StringType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t StringType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("StringType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(StringType &a, StringType &b) { using ::std::swap; (void) a; @@ -1162,44 +855,6 @@ std::ostream& operator<<(std::ostream& out, const UUIDType& obj) } -uint32_t UUIDType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t UUIDType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("UUIDType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(UUIDType &a, UUIDType &b) { using ::std::swap; (void) a; @@ -1237,44 +892,6 @@ std::ostream& operator<<(std::ostream& out, const MapType& obj) } -uint32_t MapType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MapType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MapType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MapType &a, MapType &b) { using ::std::swap; (void) a; @@ -1312,44 +929,6 @@ std::ostream& operator<<(std::ostream& out, const ListType& obj) } -uint32_t ListType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ListType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ListType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ListType &a, ListType &b) { using ::std::swap; (void) a; @@ -1387,44 +966,6 @@ std::ostream& operator<<(std::ostream& out, const EnumType& obj) } -uint32_t EnumType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EnumType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EnumType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EnumType &a, EnumType &b) { using ::std::swap; (void) a; @@ -1462,44 +1003,6 @@ std::ostream& operator<<(std::ostream& out, const DateType& obj) } -uint32_t DateType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t DateType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DateType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DateType &a, DateType &b) { using ::std::swap; (void) a; @@ -1537,44 +1040,6 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj) } -uint32_t Float16Type::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t Float16Type::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Float16Type"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(Float16Type &a, Float16Type &b) { using ::std::swap; (void) a; @@ -1612,44 +1077,6 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj) } -uint32_t NullType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NullType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NullType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(NullType &a, NullType &b) { using ::std::swap; (void) a; @@ -1695,79 +1122,6 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj) } -uint32_t DecimalType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_scale = false; - bool isset_precision = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - isset_scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - isset_precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_scale) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_precision) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DecimalType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DecimalType"); - - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DecimalType &a, DecimalType &b) { using ::std::swap; swap(a.scale, b.scale); @@ -1811,44 +1165,6 @@ std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj) } -uint32_t MilliSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MilliSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MilliSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MilliSeconds &a, MilliSeconds &b) { using ::std::swap; (void) a; @@ -1886,44 +1202,6 @@ std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj) } -uint32_t MicroSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t MicroSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("MicroSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(MicroSeconds &a, MicroSeconds &b) { using ::std::swap; (void) a; @@ -1961,44 +1239,6 @@ std::ostream& operator<<(std::ostream& out, const NanoSeconds& obj) } -uint32_t NanoSeconds::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t NanoSeconds::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("NanoSeconds"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(NanoSeconds &a, NanoSeconds &b) { using ::std::swap; (void) a; @@ -2051,88 +1291,6 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj) } -uint32_t TimeUnit::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MILLIS.read(iprot); - this->__isset.MILLIS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MICROS.read(iprot); - this->__isset.MICROS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->NANOS.read(iprot); - this->__isset.NANOS = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TimeUnit::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeUnit"); - - if (this->__isset.MILLIS) { - xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->MILLIS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MICROS) { - xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MICROS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.NANOS) { - xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->NANOS.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimeUnit &a, TimeUnit &b) { using ::std::swap; swap(a.MILLIS, b.MILLIS); @@ -2195,79 +1353,6 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj) } -uint32_t TimestampType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimestampType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimestampType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimestampType &a, TimestampType &b) { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); @@ -2319,79 +1404,6 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj) } -uint32_t TimeType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_isAdjustedToUTC = false; - bool isset_unit = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isAdjustedToUTC); - isset_isAdjustedToUTC = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->unit.read(iprot); - isset_unit = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_isAdjustedToUTC) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_unit) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t TimeType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TimeType"); - - xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); - xfer += oprot->writeBool(this->isAdjustedToUTC); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->unit.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TimeType &a, TimeType &b) { using ::std::swap; swap(a.isAdjustedToUTC, b.isAdjustedToUTC); @@ -2443,79 +1455,6 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj) } -uint32_t IntType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_bitWidth = false; - bool isset_isSigned = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_BYTE) { - xfer += iprot->readByte(this->bitWidth); - isset_bitWidth = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->isSigned); - isset_isSigned = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_bitWidth) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_isSigned) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t IntType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IntType"); - - xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); - xfer += oprot->writeByte(this->bitWidth); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->isSigned); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(IntType &a, IntType &b) { using ::std::swap; swap(a.bitWidth, b.bitWidth); @@ -2559,44 +1498,6 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj) } -uint32_t JsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t JsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("JsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(JsonType &a, JsonType &b) { using ::std::swap; (void) a; @@ -2634,44 +1535,6 @@ std::ostream& operator<<(std::ostream& out, const BsonType& obj) } -uint32_t BsonType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BsonType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BsonType"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BsonType &a, BsonType &b) { using ::std::swap; (void) a; @@ -2779,231 +1642,6 @@ std::ostream& operator<<(std::ostream& out, const LogicalType& obj) } -uint32_t LogicalType::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->STRING.read(iprot); - this->__isset.STRING = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->MAP.read(iprot); - this->__isset.MAP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->LIST.read(iprot); - this->__isset.LIST = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENUM.read(iprot); - this->__isset.ENUM = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DECIMAL.read(iprot); - this->__isset.DECIMAL = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->DATE.read(iprot); - this->__isset.DATE = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIME.read(iprot); - this->__isset.TIME = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TIMESTAMP.read(iprot); - this->__isset.TIMESTAMP = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->INTEGER.read(iprot); - this->__isset.INTEGER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNKNOWN.read(iprot); - this->__isset.UNKNOWN = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->JSON.read(iprot); - this->__isset.JSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BSON.read(iprot); - this->__isset.BSON = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UUID.read(iprot); - this->__isset.UUID = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 15: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->FLOAT16.read(iprot); - this->__isset.FLOAT16 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t LogicalType::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("LogicalType"); - - if (this->__isset.STRING) { - xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->STRING.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.MAP) { - xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->MAP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.LIST) { - xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->LIST.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENUM) { - xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->ENUM.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DECIMAL) { - xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->DECIMAL.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.DATE) { - xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->DATE.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIME) { - xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->TIME.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.TIMESTAMP) { - xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->TIMESTAMP.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.INTEGER) { - xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->INTEGER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UNKNOWN) { - xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); - xfer += this->UNKNOWN.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.JSON) { - xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->JSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.BSON) { - xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); - xfer += this->BSON.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.UUID) { - xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); - xfer += this->UUID.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.FLOAT16) { - xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); - xfer += this->FLOAT16.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(LogicalType &a, LogicalType &b) { using ::std::swap; swap(a.STRING, b.STRING); @@ -3173,187 +1811,6 @@ std::ostream& operator<<(std::ostream& out, const SchemaElement& obj) } -uint32_t SchemaElement::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_name = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast96; - xfer += iprot->readI32(ecast96); - this->type = static_cast(ecast96); - this->__isset.type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->type_length); - this->__isset.type_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast97; - xfer += iprot->readI32(ecast97); - this->repetition_type = static_cast(ecast97); - this->__isset.repetition_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->name); - isset_name = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_children); - this->__isset.num_children = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast98; - xfer += iprot->readI32(ecast98); - this->converted_type = static_cast(ecast98); - this->__isset.converted_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->scale); - this->__isset.scale = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->precision); - this->__isset.precision = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->field_id); - this->__isset.field_id = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->logicalType.read(iprot); - this->__isset.logicalType = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_name) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SchemaElement::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SchemaElement"); - - if (this->__isset.type) { - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.type_length) { - xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->type_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_type) { - xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->repetition_type)); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); - xfer += oprot->writeString(this->name); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.num_children) { - xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->num_children); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.converted_type) { - xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(static_cast(this->converted_type)); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.scale) { - xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->scale); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.precision) { - xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); - xfer += oprot->writeI32(this->precision); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.field_id) { - xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); - xfer += oprot->writeI32(this->field_id); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.logicalType) { - xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); - xfer += this->logicalType.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SchemaElement &a, SchemaElement &b) { using ::std::swap; swap(a.type, b.type); @@ -3471,128 +1928,6 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj) } -uint32_t DataPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - bool isset_definition_level_encoding = false; - bool isset_repetition_level_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast103; - xfer += iprot->readI32(ecast103); - this->encoding = static_cast(ecast103); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast104; - xfer += iprot->readI32(ecast104); - this->definition_level_encoding = static_cast(ecast104); - isset_definition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast105; - xfer += iprot->readI32(ecast105); - this->repetition_level_encoding = static_cast(ecast105); - isset_repetition_level_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_level_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(static_cast(this->definition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->repetition_level_encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DataPageHeader &a, DataPageHeader &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -3659,44 +1994,6 @@ std::ostream& operator<<(std::ostream& out, const IndexPageHeader& obj) } -uint32_t IndexPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t IndexPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("IndexPageHeader"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(IndexPageHeader &a, IndexPageHeader &b) { using ::std::swap; (void) a; @@ -3747,94 +2044,6 @@ std::ostream& operator<<(std::ostream& out, const DictionaryPageHeader& obj) } -uint32_t DictionaryPageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_encoding = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast114; - xfer += iprot->readI32(ecast114); - this->encoding = static_cast(ecast114); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_sorted); - this->__isset.is_sorted = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DictionaryPageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DictionaryPageHeader"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_sorted) { - xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->is_sorted); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DictionaryPageHeader &a, DictionaryPageHeader &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -3923,167 +2132,6 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj) } -uint32_t DataPageHeaderV2::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_num_values = false; - bool isset_num_nulls = false; - bool isset_num_rows = false; - bool isset_encoding = false; - bool isset_definition_levels_byte_length = false; - bool isset_repetition_levels_byte_length = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_nulls); - isset_num_nulls = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast119; - xfer += iprot->readI32(ecast119); - this->encoding = static_cast(ecast119); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->definition_levels_byte_length); - isset_definition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->repetition_levels_byte_length); - isset_repetition_levels_byte_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->is_compressed); - this->__isset.is_compressed = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_nulls) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_definition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_repetition_levels_byte_length) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t DataPageHeaderV2::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("DataPageHeaderV2"); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->num_nulls); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->definition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); - xfer += oprot->writeI32(this->repetition_levels_byte_length); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.is_compressed) { - xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); - xfer += oprot->writeBool(this->is_compressed); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(DataPageHeaderV2 &a, DataPageHeaderV2 &b) { using ::std::swap; swap(a.num_values, b.num_values); @@ -4168,44 +2216,6 @@ std::ostream& operator<<(std::ostream& out, const SplitBlockAlgorithm& obj) } -uint32_t SplitBlockAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t SplitBlockAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SplitBlockAlgorithm &a, SplitBlockAlgorithm &b) { using ::std::swap; (void) a; @@ -4248,62 +2258,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj) } -uint32_t BloomFilterAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->BLOCK.read(iprot); - this->__isset.BLOCK = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); - - if (this->__isset.BLOCK) { - xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->BLOCK.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterAlgorithm &a, BloomFilterAlgorithm &b) { using ::std::swap; swap(a.BLOCK, b.BLOCK); @@ -4346,44 +2300,6 @@ std::ostream& operator<<(std::ostream& out, const XxHash& obj) } -uint32_t XxHash::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t XxHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("XxHash"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(XxHash &a, XxHash &b) { using ::std::swap; (void) a; @@ -4426,62 +2342,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj) } -uint32_t BloomFilterHash::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->XXHASH.read(iprot); - this->__isset.XXHASH = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterHash::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHash"); - - if (this->__isset.XXHASH) { - xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->XXHASH.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterHash &a, BloomFilterHash &b) { using ::std::swap; swap(a.XXHASH, b.XXHASH); @@ -4511,57 +2371,19 @@ void BloomFilterHash::printTo(std::ostream& out) const { out << "BloomFilterHash("; out << "XXHASH="; (__isset.XXHASH ? (out << to_string(XXHASH)) : (out << "")); out << ")"; -} - - -Uncompressed::~Uncompressed() noexcept { -} - -std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) -{ - obj.printTo(out); - return out; -} - - -uint32_t Uncompressed::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } +} - xfer += iprot->readStructEnd(); - return xfer; +Uncompressed::~Uncompressed() noexcept { } -uint32_t Uncompressed::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("Uncompressed"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; +std::ostream& operator<<(std::ostream& out, const Uncompressed& obj) +{ + obj.printTo(out); + return out; } + void swap(Uncompressed &a, Uncompressed &b) { using ::std::swap; (void) a; @@ -4604,62 +2426,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj) } -uint32_t BloomFilterCompression::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->UNCOMPRESSED.read(iprot); - this->__isset.UNCOMPRESSED = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t BloomFilterCompression::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterCompression"); - - if (this->__isset.UNCOMPRESSED) { - xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->UNCOMPRESSED.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterCompression &a, BloomFilterCompression &b) { using ::std::swap; swap(a.UNCOMPRESSED, b.UNCOMPRESSED); @@ -4718,109 +2484,6 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHeader& obj) } -uint32_t BloomFilterHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_numBytes = false; - bool isset_algorithm = false; - bool isset_hash = false; - bool isset_compression = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->numBytes); - isset_numBytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->algorithm.read(iprot); - isset_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->hash.read(iprot); - isset_hash = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->compression.read(iprot); - isset_compression = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_numBytes) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_hash) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compression) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t BloomFilterHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("BloomFilterHeader"); - - xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->numBytes); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->hash.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); - xfer += this->compression.write(oprot); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(BloomFilterHeader &a, BloomFilterHeader &b) { using ::std::swap; swap(a.numBytes, b.numBytes); @@ -4913,161 +2576,6 @@ std::ostream& operator<<(std::ostream& out, const PageHeader& obj) } -uint32_t PageHeader::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_uncompressed_page_size = false; - bool isset_compressed_page_size = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast152; - xfer += iprot->readI32(ecast152); - this->type = static_cast(ecast152); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->uncompressed_page_size); - isset_uncompressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->crc); - this->__isset.crc = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header.read(iprot); - this->__isset.data_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->index_page_header.read(iprot); - this->__isset.index_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->dictionary_page_header.read(iprot); - this->__isset.dictionary_page_header = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->data_page_header_v2.read(iprot); - this->__isset.data_page_header_v2 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_uncompressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageHeader::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageHeader"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->uncompressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.crc) { - xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(this->crc); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header) { - xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); - xfer += this->data_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.index_page_header) { - xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); - xfer += this->index_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_header) { - xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); - xfer += this->dictionary_page_header.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.data_page_header_v2) { - xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->data_page_header_v2.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageHeader &a, PageHeader &b) { using ::std::swap; swap(a.type, b.type); @@ -5161,77 +2669,6 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj) } -uint32_t KeyValue::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_key = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->key); - isset_key = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->value); - this->__isset.value = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_key) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t KeyValue::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("KeyValue"); - - xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->key); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.value) { - xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeString(this->value); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(KeyValue &a, KeyValue &b) { using ::std::swap; swap(a.key, b.key); @@ -5292,94 +2729,6 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj) } -uint32_t SortingColumn::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_column_idx = false; - bool isset_descending = false; - bool isset_nulls_first = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_idx); - isset_column_idx = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->descending); - isset_descending = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->nulls_first); - isset_nulls_first = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_column_idx) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_descending) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_nulls_first) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t SortingColumn::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("SortingColumn"); - - xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->column_idx); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); - xfer += oprot->writeBool(this->descending); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->nulls_first); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(SortingColumn &a, SortingColumn &b) { using ::std::swap; swap(a.column_idx, b.column_idx); @@ -5441,98 +2790,6 @@ std::ostream& operator<<(std::ostream& out, const PageEncodingStats& obj) } -uint32_t PageEncodingStats::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_type = false; - bool isset_encoding = false; - bool isset_count = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast165; - xfer += iprot->readI32(ecast165); - this->page_type = static_cast(ecast165); - isset_page_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast166; - xfer += iprot->readI32(ecast166); - this->encoding = static_cast(ecast166); - isset_encoding = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->count); - isset_count = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encoding) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_count) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageEncodingStats::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageEncodingStats"); - - xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->page_type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(static_cast(this->encoding)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); - xfer += oprot->writeI32(this->count); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageEncodingStats &a, PageEncodingStats &b) { using ::std::swap; swap(a.page_type, b.page_type); @@ -5654,359 +2911,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj) } -uint32_t ColumnMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_type = false; - bool isset_encodings = false; - bool isset_path_in_schema = false; - bool isset_codec = false; - bool isset_num_values = false; - bool isset_total_uncompressed_size = false; - bool isset_total_compressed_size = false; - bool isset_data_page_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast171; - xfer += iprot->readI32(ecast171); - this->type = static_cast(ecast171); - isset_type = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encodings.clear(); - uint32_t _size172; - ::apache::thrift::protocol::TType _etype175; - xfer += iprot->readListBegin(_etype175, _size172); - this->encodings.resize(_size172); - uint32_t _i176; - for (_i176 = 0; _i176 < _size172; ++_i176) - { - int32_t ecast177; - xfer += iprot->readI32(ecast177); - this->encodings[_i176] = static_cast(ecast177); - } - xfer += iprot->readListEnd(); - } - isset_encodings = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size178; - ::apache::thrift::protocol::TType _etype181; - xfer += iprot->readListBegin(_etype181, _size178); - this->path_in_schema.resize(_size178); - uint32_t _i182; - for (_i182 = 0; _i182 < _size178; ++_i182) - { - xfer += iprot->readString(this->path_in_schema[_i182]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast183; - xfer += iprot->readI32(ecast183); - this->codec = static_cast(ecast183); - isset_codec = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_values); - isset_num_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_uncompressed_size); - isset_total_uncompressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - isset_total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size184; - ::apache::thrift::protocol::TType _etype187; - xfer += iprot->readListBegin(_etype187, _size184); - this->key_value_metadata.resize(_size184); - uint32_t _i188; - for (_i188 = 0; _i188 < _size184; ++_i188) - { - xfer += this->key_value_metadata[_i188].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->data_page_offset); - isset_data_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 10: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->index_page_offset); - this->__isset.index_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 11: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->dictionary_page_offset); - this->__isset.dictionary_page_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 12: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->statistics.read(iprot); - this->__isset.statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 13: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->encoding_stats.clear(); - uint32_t _size189; - ::apache::thrift::protocol::TType _etype192; - xfer += iprot->readListBegin(_etype192, _size189); - this->encoding_stats.resize(_size189); - uint32_t _i193; - for (_i193 = 0; _i193 < _size189; ++_i193) - { - xfer += this->encoding_stats[_i193].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.encoding_stats = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 14: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->bloom_filter_offset); - this->__isset.bloom_filter_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 15: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->bloom_filter_length); - this->__isset.bloom_filter_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 16: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->size_statistics.read(iprot); - this->__isset.size_statistics = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_type) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_encodings) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_codec) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_uncompressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_compressed_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_data_page_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnMetaData"); - - xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(static_cast(this->type)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); - std::vector ::const_iterator _iter194; - for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) - { - xfer += oprot->writeI32(static_cast((*_iter194))); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter195; - for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) - { - xfer += oprot->writeString((*_iter195)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->codec)); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->num_values); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_uncompressed_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter196; - for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) - { - xfer += (*_iter196).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); - xfer += oprot->writeI64(this->data_page_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.index_page_offset) { - xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); - xfer += oprot->writeI64(this->index_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.dictionary_page_offset) { - xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); - xfer += oprot->writeI64(this->dictionary_page_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.statistics) { - xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); - xfer += this->statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encoding_stats) { - xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); - std::vector ::const_iterator _iter197; - for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) - { - xfer += (*_iter197).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_offset) { - xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); - xfer += oprot->writeI64(this->bloom_filter_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.bloom_filter_length) { - xfer += oprot->writeFieldBegin("bloom_filter_length", ::apache::thrift::protocol::T_I32, 15); - xfer += oprot->writeI32(this->bloom_filter_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.size_statistics) { - xfer += oprot->writeFieldBegin("size_statistics", ::apache::thrift::protocol::T_STRUCT, 16); - xfer += this->size_statistics.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnMetaData &a, ColumnMetaData &b) { using ::std::swap; swap(a.type, b.type); @@ -6139,44 +3043,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithFooterKey& obj) } -uint32_t EncryptionWithFooterKey::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionWithFooterKey::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionWithFooterKey &a, EncryptionWithFooterKey &b) { using ::std::swap; (void) a; @@ -6223,97 +3089,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionWithColumnKey& obj) } -uint32_t EncryptionWithColumnKey::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_path_in_schema = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->path_in_schema.clear(); - uint32_t _size206; - ::apache::thrift::protocol::TType _etype209; - xfer += iprot->readListBegin(_etype209, _size206); - this->path_in_schema.resize(_size206); - uint32_t _i210; - for (_i210 = 0; _i210 < _size206; ++_i210) - { - xfer += iprot->readString(this->path_in_schema[_i210]); - } - xfer += iprot->readListEnd(); - } - isset_path_in_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_path_in_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t EncryptionWithColumnKey::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); - - xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); - std::vector ::const_iterator _iter211; - for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) - { - xfer += oprot->writeString((*_iter211)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionWithColumnKey &a, EncryptionWithColumnKey &b) { using ::std::swap; swap(a.path_in_schema, b.path_in_schema); @@ -6372,75 +3147,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnCryptoMetaData& obj) } -uint32_t ColumnCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); - this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); - - if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { - xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnCryptoMetaData &a, ColumnCryptoMetaData &b) { using ::std::swap; swap(a.ENCRYPTION_WITH_FOOTER_KEY, b.ENCRYPTION_WITH_FOOTER_KEY); @@ -6533,168 +3239,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnChunk& obj) } -uint32_t ColumnChunk::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_file_offset = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->file_path); - this->__isset.file_path = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - isset_file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->meta_data.read(iprot); - this->__isset.meta_data = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset_index_offset); - this->__isset.offset_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->offset_index_length); - this->__isset.offset_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->column_index_offset); - this->__isset.column_index_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->column_index_length); - this->__isset.column_index_length = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->crypto_metadata.read(iprot); - this->__isset.crypto_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->encrypted_column_metadata); - this->__isset.encrypted_column_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_file_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnChunk::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnChunk"); - - if (this->__isset.file_path) { - xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeString(this->file_path); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.meta_data) { - xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); - xfer += this->meta_data.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_offset) { - xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); - xfer += oprot->writeI64(this->offset_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.offset_index_length) { - xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); - xfer += oprot->writeI32(this->offset_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_offset) { - xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->column_index_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_index_length) { - xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); - xfer += oprot->writeI32(this->column_index_length); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.crypto_metadata) { - xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->crypto_metadata.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encrypted_column_metadata) { - xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->encrypted_column_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnChunk &a, ColumnChunk &b) { using ::std::swap; swap(a.file_path, b.file_path); @@ -6817,186 +3361,6 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj) } -uint32_t RowGroup::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_columns = false; - bool isset_total_byte_size = false; - bool isset_num_rows = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->columns.clear(); - uint32_t _size224; - ::apache::thrift::protocol::TType _etype227; - xfer += iprot->readListBegin(_etype227, _size224); - this->columns.resize(_size224); - uint32_t _i228; - for (_i228 = 0; _i228 < _size224; ++_i228) - { - xfer += this->columns[_i228].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_byte_size); - isset_total_byte_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->sorting_columns.clear(); - uint32_t _size229; - ::apache::thrift::protocol::TType _etype232; - xfer += iprot->readListBegin(_etype232, _size229); - this->sorting_columns.resize(_size229); - uint32_t _i233; - for (_i233 = 0; _i233 < _size229; ++_i233) - { - xfer += this->sorting_columns[_i233].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.sorting_columns = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->file_offset); - this->__isset.file_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->total_compressed_size); - this->__isset.total_compressed_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_I16) { - xfer += iprot->readI16(this->ordinal); - this->__isset.ordinal = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_columns) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_total_byte_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t RowGroup::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("RowGroup"); - - xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); - std::vector ::const_iterator _iter234; - for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) - { - xfer += (*_iter234).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); - xfer += oprot->writeI64(this->total_byte_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.sorting_columns) { - xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); - std::vector ::const_iterator _iter235; - for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) - { - xfer += (*_iter235).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.file_offset) { - xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); - xfer += oprot->writeI64(this->file_offset); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.total_compressed_size) { - xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); - xfer += oprot->writeI64(this->total_compressed_size); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.ordinal) { - xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); - xfer += oprot->writeI16(this->ordinal); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(RowGroup &a, RowGroup &b) { using ::std::swap; swap(a.columns, b.columns); @@ -7075,44 +3439,6 @@ std::ostream& operator<<(std::ostream& out, const TypeDefinedOrder& obj) } -uint32_t TypeDefinedOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - xfer += iprot->skip(ftype); - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t TypeDefinedOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("TypeDefinedOrder"); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(TypeDefinedOrder &a, TypeDefinedOrder &b) { using ::std::swap; (void) a; @@ -7155,62 +3481,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj) } -uint32_t ColumnOrder::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->TYPE_ORDER.read(iprot); - this->__isset.TYPE_ORDER = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t ColumnOrder::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnOrder"); - - if (this->__isset.TYPE_ORDER) { - xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->TYPE_ORDER.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnOrder &a, ColumnOrder &b) { using ::std::swap; swap(a.TYPE_ORDER, b.TYPE_ORDER); @@ -7265,94 +3535,6 @@ std::ostream& operator<<(std::ostream& out, const PageLocation& obj) } -uint32_t PageLocation::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_offset = false; - bool isset_compressed_page_size = false; - bool isset_first_row_index = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->offset); - isset_offset = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->compressed_page_size); - isset_compressed_page_size = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->first_row_index); - isset_first_row_index = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_offset) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_compressed_page_size) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_first_row_index) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t PageLocation::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("PageLocation"); - - xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); - xfer += oprot->writeI64(this->offset); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); - xfer += oprot->writeI32(this->compressed_page_size); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->first_row_index); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(PageLocation &a, PageLocation &b) { using ::std::swap; swap(a.offset, b.offset); @@ -7411,117 +3593,6 @@ std::ostream& operator<<(std::ostream& out, const OffsetIndex& obj) } -uint32_t OffsetIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_page_locations = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->page_locations.clear(); - uint32_t _size252; - ::apache::thrift::protocol::TType _etype255; - xfer += iprot->readListBegin(_etype255, _size252); - this->page_locations.resize(_size252); - uint32_t _i256; - for (_i256 = 0; _i256 < _size252; ++_i256) - { - xfer += this->page_locations[_i256].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_page_locations = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->unencoded_byte_array_data_bytes.clear(); - uint32_t _size257; - ::apache::thrift::protocol::TType _etype260; - xfer += iprot->readListBegin(_etype260, _size257); - this->unencoded_byte_array_data_bytes.resize(_size257); - uint32_t _i261; - for (_i261 = 0; _i261 < _size257; ++_i261) - { - xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); - } - xfer += iprot->readListEnd(); - } - this->__isset.unencoded_byte_array_data_bytes = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_page_locations) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t OffsetIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("OffsetIndex"); - - xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); - std::vector ::const_iterator _iter262; - for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) - { - xfer += (*_iter262).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.unencoded_byte_array_data_bytes) { - xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); - std::vector ::const_iterator _iter263; - for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) - { - xfer += oprot->writeI64((*_iter263)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(OffsetIndex &a, OffsetIndex &b) { using ::std::swap; swap(a.page_locations, b.page_locations); @@ -7601,270 +3672,6 @@ std::ostream& operator<<(std::ostream& out, const ColumnIndex& obj) } -uint32_t ColumnIndex::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_null_pages = false; - bool isset_min_values = false; - bool isset_max_values = false; - bool isset_boundary_order = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_pages.clear(); - uint32_t _size268; - ::apache::thrift::protocol::TType _etype271; - xfer += iprot->readListBegin(_etype271, _size268); - this->null_pages.resize(_size268); - uint32_t _i272; - for (_i272 = 0; _i272 < _size268; ++_i272) - { - xfer += iprot->readBool(this->null_pages[_i272]); - } - xfer += iprot->readListEnd(); - } - isset_null_pages = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->min_values.clear(); - uint32_t _size273; - ::apache::thrift::protocol::TType _etype276; - xfer += iprot->readListBegin(_etype276, _size273); - this->min_values.resize(_size273); - uint32_t _i277; - for (_i277 = 0; _i277 < _size273; ++_i277) - { - xfer += iprot->readBinary(this->min_values[_i277]); - } - xfer += iprot->readListEnd(); - } - isset_min_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->max_values.clear(); - uint32_t _size278; - ::apache::thrift::protocol::TType _etype281; - xfer += iprot->readListBegin(_etype281, _size278); - this->max_values.resize(_size278); - uint32_t _i282; - for (_i282 = 0; _i282 < _size278; ++_i282) - { - xfer += iprot->readBinary(this->max_values[_i282]); - } - xfer += iprot->readListEnd(); - } - isset_max_values = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_I32) { - int32_t ecast283; - xfer += iprot->readI32(ecast283); - this->boundary_order = static_cast(ecast283); - isset_boundary_order = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->null_counts.clear(); - uint32_t _size284; - ::apache::thrift::protocol::TType _etype287; - xfer += iprot->readListBegin(_etype287, _size284); - this->null_counts.resize(_size284); - uint32_t _i288; - for (_i288 = 0; _i288 < _size284; ++_i288) - { - xfer += iprot->readI64(this->null_counts[_i288]); - } - xfer += iprot->readListEnd(); - } - this->__isset.null_counts = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->repetition_level_histograms.clear(); - uint32_t _size289; - ::apache::thrift::protocol::TType _etype292; - xfer += iprot->readListBegin(_etype292, _size289); - this->repetition_level_histograms.resize(_size289); - uint32_t _i293; - for (_i293 = 0; _i293 < _size289; ++_i293) - { - xfer += iprot->readI64(this->repetition_level_histograms[_i293]); - } - xfer += iprot->readListEnd(); - } - this->__isset.repetition_level_histograms = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->definition_level_histograms.clear(); - uint32_t _size294; - ::apache::thrift::protocol::TType _etype297; - xfer += iprot->readListBegin(_etype297, _size294); - this->definition_level_histograms.resize(_size294); - uint32_t _i298; - for (_i298 = 0; _i298 < _size294; ++_i298) - { - xfer += iprot->readI64(this->definition_level_histograms[_i298]); - } - xfer += iprot->readListEnd(); - } - this->__isset.definition_level_histograms = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_null_pages) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_min_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_max_values) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_boundary_order) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t ColumnIndex::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("ColumnIndex"); - - xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); - std::vector ::const_iterator _iter299; - for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) - { - xfer += oprot->writeBool((*_iter299)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); - std::vector ::const_iterator _iter300; - for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) - { - xfer += oprot->writeBinary((*_iter300)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); - std::vector ::const_iterator _iter301; - for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) - { - xfer += oprot->writeBinary((*_iter301)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); - xfer += oprot->writeI32(static_cast(this->boundary_order)); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.null_counts) { - xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); - std::vector ::const_iterator _iter302; - for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) - { - xfer += oprot->writeI64((*_iter302)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.repetition_level_histograms) { - xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); - std::vector ::const_iterator _iter303; - for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) - { - xfer += oprot->writeI64((*_iter303)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.definition_level_histograms) { - xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); - std::vector ::const_iterator _iter304; - for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) - { - xfer += oprot->writeI64((*_iter304)); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(ColumnIndex &a, ColumnIndex &b) { using ::std::swap; swap(a.null_pages, b.null_pages); @@ -7958,88 +3765,6 @@ std::ostream& operator<<(std::ostream& out, const AesGcmV1& obj) } -uint32_t AesGcmV1::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(AesGcmV1 &a, AesGcmV1 &b) { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); @@ -8109,88 +3834,6 @@ std::ostream& operator<<(std::ostream& out, const AesGcmCtrV1& obj) } -uint32_t AesGcmCtrV1::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_prefix); - this->__isset.aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->aad_file_unique); - this->__isset.aad_file_unique = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_BOOL) { - xfer += iprot->readBool(this->supply_aad_prefix); - this->__isset.supply_aad_prefix = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t AesGcmCtrV1::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("AesGcmCtrV1"); - - if (this->__isset.aad_prefix) { - xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); - xfer += oprot->writeBinary(this->aad_prefix); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.aad_file_unique) { - xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->aad_file_unique); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.supply_aad_prefix) { - xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); - xfer += oprot->writeBool(this->supply_aad_prefix); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(AesGcmCtrV1 &a, AesGcmCtrV1 &b) { using ::std::swap; swap(a.aad_prefix, b.aad_prefix); @@ -8255,75 +3898,6 @@ std::ostream& operator<<(std::ostream& out, const EncryptionAlgorithm& obj) } -uint32_t EncryptionAlgorithm::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_V1.read(iprot); - this->__isset.AES_GCM_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->AES_GCM_CTR_V1.read(iprot); - this->__isset.AES_GCM_CTR_V1 = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - return xfer; -} - -uint32_t EncryptionAlgorithm::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("EncryptionAlgorithm"); - - if (this->__isset.AES_GCM_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->AES_GCM_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.AES_GCM_CTR_V1) { - xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); - xfer += this->AES_GCM_CTR_V1.write(oprot); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(EncryptionAlgorithm &a, EncryptionAlgorithm &b) { using ::std::swap; swap(a.AES_GCM_V1, b.AES_GCM_V1); @@ -8413,254 +3987,6 @@ std::ostream& operator<<(std::ostream& out, const FileMetaData& obj) } -uint32_t FileMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_version = false; - bool isset_schema = false; - bool isset_num_rows = false; - bool isset_row_groups = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_I32) { - xfer += iprot->readI32(this->version); - isset_version = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->schema.clear(); - uint32_t _size321; - ::apache::thrift::protocol::TType _etype324; - xfer += iprot->readListBegin(_etype324, _size321); - this->schema.resize(_size321); - uint32_t _i325; - for (_i325 = 0; _i325 < _size321; ++_i325) - { - xfer += this->schema[_i325].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_schema = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 3: - if (ftype == ::apache::thrift::protocol::T_I64) { - xfer += iprot->readI64(this->num_rows); - isset_num_rows = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 4: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->row_groups.clear(); - uint32_t _size326; - ::apache::thrift::protocol::TType _etype329; - xfer += iprot->readListBegin(_etype329, _size326); - this->row_groups.resize(_size326); - uint32_t _i330; - for (_i330 = 0; _i330 < _size326; ++_i330) - { - xfer += this->row_groups[_i330].read(iprot); - } - xfer += iprot->readListEnd(); - } - isset_row_groups = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 5: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->key_value_metadata.clear(); - uint32_t _size331; - ::apache::thrift::protocol::TType _etype334; - xfer += iprot->readListBegin(_etype334, _size331); - this->key_value_metadata.resize(_size331); - uint32_t _i335; - for (_i335 = 0; _i335 < _size331; ++_i335) - { - xfer += this->key_value_metadata[_i335].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.key_value_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 6: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readString(this->created_by); - this->__isset.created_by = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 7: - if (ftype == ::apache::thrift::protocol::T_LIST) { - { - this->column_orders.clear(); - uint32_t _size336; - ::apache::thrift::protocol::TType _etype339; - xfer += iprot->readListBegin(_etype339, _size336); - this->column_orders.resize(_size336); - uint32_t _i340; - for (_i340 = 0; _i340 < _size336; ++_i340) - { - xfer += this->column_orders[_i340].read(iprot); - } - xfer += iprot->readListEnd(); - } - this->__isset.column_orders = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 8: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - this->__isset.encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 9: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->footer_signing_key_metadata); - this->__isset.footer_signing_key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_version) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_schema) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_num_rows) - throw TProtocolException(TProtocolException::INVALID_DATA); - if (!isset_row_groups) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileMetaData"); - - xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); - xfer += oprot->writeI32(this->version); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); - std::vector ::const_iterator _iter341; - for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) - { - xfer += (*_iter341).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); - xfer += oprot->writeI64(this->num_rows); - xfer += oprot->writeFieldEnd(); - - xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); - std::vector ::const_iterator _iter342; - for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) - { - xfer += (*_iter342).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_value_metadata) { - xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); - std::vector ::const_iterator _iter343; - for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) - { - xfer += (*_iter343).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.created_by) { - xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); - xfer += oprot->writeString(this->created_by); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.column_orders) { - xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); - { - xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); - std::vector ::const_iterator _iter344; - for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) - { - xfer += (*_iter344).write(oprot); - } - xfer += oprot->writeListEnd(); - } - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.encryption_algorithm) { - xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - } - if (this->__isset.footer_signing_key_metadata) { - xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); - xfer += oprot->writeBinary(this->footer_signing_key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(FileMetaData &a, FileMetaData &b) { using ::std::swap; swap(a.version, b.version); @@ -8760,77 +4086,6 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj) } -uint32_t FileCryptoMetaData::read(::apache::thrift::protocol::TProtocol* iprot) { - - ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); - uint32_t xfer = 0; - std::string fname; - ::apache::thrift::protocol::TType ftype; - int16_t fid; - - xfer += iprot->readStructBegin(fname); - - using ::apache::thrift::protocol::TProtocolException; - - bool isset_encryption_algorithm = false; - - while (true) - { - xfer += iprot->readFieldBegin(fname, ftype, fid); - if (ftype == ::apache::thrift::protocol::T_STOP) { - break; - } - switch (fid) - { - case 1: - if (ftype == ::apache::thrift::protocol::T_STRUCT) { - xfer += this->encryption_algorithm.read(iprot); - isset_encryption_algorithm = true; - } else { - xfer += iprot->skip(ftype); - } - break; - case 2: - if (ftype == ::apache::thrift::protocol::T_STRING) { - xfer += iprot->readBinary(this->key_metadata); - this->__isset.key_metadata = true; - } else { - xfer += iprot->skip(ftype); - } - break; - default: - xfer += iprot->skip(ftype); - break; - } - xfer += iprot->readFieldEnd(); - } - - xfer += iprot->readStructEnd(); - - if (!isset_encryption_algorithm) - throw TProtocolException(TProtocolException::INVALID_DATA); - return xfer; -} - -uint32_t FileCryptoMetaData::write(::apache::thrift::protocol::TProtocol* oprot) const { - uint32_t xfer = 0; - ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); - xfer += oprot->writeStructBegin("FileCryptoMetaData"); - - xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); - xfer += this->encryption_algorithm.write(oprot); - xfer += oprot->writeFieldEnd(); - - if (this->__isset.key_metadata) { - xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); - xfer += oprot->writeBinary(this->key_metadata); - xfer += oprot->writeFieldEnd(); - } - xfer += oprot->writeFieldStop(); - xfer += oprot->writeStructEnd(); - return xfer; -} - void swap(FileCryptoMetaData &a, FileCryptoMetaData &b) { using ::std::swap; swap(a.encryption_algorithm, b.encryption_algorithm); diff --git a/cpp/src/generated/parquet_types.h b/cpp/src/generated/parquet_types.h index 9dc6794c4030b..6cf85fe5e73cc 100644 --- a/cpp/src/generated/parquet_types.h +++ b/cpp/src/generated/parquet_types.h @@ -466,7 +466,7 @@ typedef struct _SizeStatistics__isset { * in this structure can help determine the number of nulls at a particular * nesting level and maximum length of lists). */ -class SizeStatistics : public virtual ::apache::thrift::TBase { +class SizeStatistics { public: SizeStatistics(const SizeStatistics&); @@ -546,8 +546,10 @@ class SizeStatistics : public virtual ::apache::thrift::TBase { bool operator < (const SizeStatistics & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -572,7 +574,7 @@ typedef struct _Statistics__isset { * Statistics per row group and per page * All fields are optional. */ -class Statistics : public virtual ::apache::thrift::TBase { +class Statistics { public: Statistics(const Statistics&); @@ -697,8 +699,10 @@ class Statistics : public virtual ::apache::thrift::TBase { bool operator < (const Statistics & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -711,7 +715,7 @@ std::ostream& operator<<(std::ostream& out, const Statistics& obj); /** * Empty structs to use as logical type annotations */ -class StringType : public virtual ::apache::thrift::TBase { +class StringType { public: StringType(const StringType&) noexcept; @@ -733,8 +737,10 @@ class StringType : public virtual ::apache::thrift::TBase { bool operator < (const StringType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -744,7 +750,7 @@ void swap(StringType &a, StringType &b); std::ostream& operator<<(std::ostream& out, const StringType& obj); -class UUIDType : public virtual ::apache::thrift::TBase { +class UUIDType { public: UUIDType(const UUIDType&) noexcept; @@ -766,8 +772,10 @@ class UUIDType : public virtual ::apache::thrift::TBase { bool operator < (const UUIDType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -777,7 +785,7 @@ void swap(UUIDType &a, UUIDType &b); std::ostream& operator<<(std::ostream& out, const UUIDType& obj); -class MapType : public virtual ::apache::thrift::TBase { +class MapType { public: MapType(const MapType&) noexcept; @@ -799,8 +807,10 @@ class MapType : public virtual ::apache::thrift::TBase { bool operator < (const MapType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -810,7 +820,7 @@ void swap(MapType &a, MapType &b); std::ostream& operator<<(std::ostream& out, const MapType& obj); -class ListType : public virtual ::apache::thrift::TBase { +class ListType { public: ListType(const ListType&) noexcept; @@ -832,8 +842,10 @@ class ListType : public virtual ::apache::thrift::TBase { bool operator < (const ListType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -843,7 +855,7 @@ void swap(ListType &a, ListType &b); std::ostream& operator<<(std::ostream& out, const ListType& obj); -class EnumType : public virtual ::apache::thrift::TBase { +class EnumType { public: EnumType(const EnumType&) noexcept; @@ -865,8 +877,10 @@ class EnumType : public virtual ::apache::thrift::TBase { bool operator < (const EnumType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -876,7 +890,7 @@ void swap(EnumType &a, EnumType &b); std::ostream& operator<<(std::ostream& out, const EnumType& obj); -class DateType : public virtual ::apache::thrift::TBase { +class DateType { public: DateType(const DateType&) noexcept; @@ -898,8 +912,10 @@ class DateType : public virtual ::apache::thrift::TBase { bool operator < (const DateType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -909,7 +925,7 @@ void swap(DateType &a, DateType &b); std::ostream& operator<<(std::ostream& out, const DateType& obj); -class Float16Type : public virtual ::apache::thrift::TBase { +class Float16Type { public: Float16Type(const Float16Type&) noexcept; @@ -931,8 +947,10 @@ class Float16Type : public virtual ::apache::thrift::TBase { bool operator < (const Float16Type & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -949,7 +967,7 @@ std::ostream& operator<<(std::ostream& out, const Float16Type& obj); * null and the physical type can't be determined. This annotation signals * the case where the physical type was guessed from all null values. */ -class NullType : public virtual ::apache::thrift::TBase { +class NullType { public: NullType(const NullType&) noexcept; @@ -971,8 +989,10 @@ class NullType : public virtual ::apache::thrift::TBase { bool operator < (const NullType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -993,7 +1013,7 @@ std::ostream& operator<<(std::ostream& out, const NullType& obj); * * Allowed for physical types: INT32, INT64, FIXED, and BINARY */ -class DecimalType : public virtual ::apache::thrift::TBase { +class DecimalType { public: DecimalType(const DecimalType&) noexcept; @@ -1027,8 +1047,10 @@ class DecimalType : public virtual ::apache::thrift::TBase { bool operator < (const DecimalType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1041,7 +1063,7 @@ std::ostream& operator<<(std::ostream& out, const DecimalType& obj); /** * Time units for logical types */ -class MilliSeconds : public virtual ::apache::thrift::TBase { +class MilliSeconds { public: MilliSeconds(const MilliSeconds&) noexcept; @@ -1063,8 +1085,10 @@ class MilliSeconds : public virtual ::apache::thrift::TBase { bool operator < (const MilliSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1074,7 +1098,7 @@ void swap(MilliSeconds &a, MilliSeconds &b); std::ostream& operator<<(std::ostream& out, const MilliSeconds& obj); -class MicroSeconds : public virtual ::apache::thrift::TBase { +class MicroSeconds { public: MicroSeconds(const MicroSeconds&) noexcept; @@ -1096,8 +1120,10 @@ class MicroSeconds : public virtual ::apache::thrift::TBase { bool operator < (const MicroSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1107,7 +1133,7 @@ void swap(MicroSeconds &a, MicroSeconds &b); std::ostream& operator<<(std::ostream& out, const MicroSeconds& obj); -class NanoSeconds : public virtual ::apache::thrift::TBase { +class NanoSeconds { public: NanoSeconds(const NanoSeconds&) noexcept; @@ -1129,8 +1155,10 @@ class NanoSeconds : public virtual ::apache::thrift::TBase { bool operator < (const NanoSeconds & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1146,7 +1174,7 @@ typedef struct _TimeUnit__isset { bool NANOS :1; } _TimeUnit__isset; -class TimeUnit : public virtual ::apache::thrift::TBase { +class TimeUnit { public: TimeUnit(const TimeUnit&) noexcept; @@ -1191,8 +1219,10 @@ class TimeUnit : public virtual ::apache::thrift::TBase { bool operator < (const TimeUnit & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1207,7 +1237,7 @@ std::ostream& operator<<(std::ostream& out, const TimeUnit& obj); * * Allowed for physical types: INT64 */ -class TimestampType : public virtual ::apache::thrift::TBase { +class TimestampType { public: TimestampType(const TimestampType&) noexcept; @@ -1240,8 +1270,10 @@ class TimestampType : public virtual ::apache::thrift::TBase { bool operator < (const TimestampType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1256,7 +1288,7 @@ std::ostream& operator<<(std::ostream& out, const TimestampType& obj); * * Allowed for physical types: INT32 (millis), INT64 (micros, nanos) */ -class TimeType : public virtual ::apache::thrift::TBase { +class TimeType { public: TimeType(const TimeType&) noexcept; @@ -1289,8 +1321,10 @@ class TimeType : public virtual ::apache::thrift::TBase { bool operator < (const TimeType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1307,7 +1341,7 @@ std::ostream& operator<<(std::ostream& out, const TimeType& obj); * * Allowed for physical types: INT32, INT64 */ -class IntType : public virtual ::apache::thrift::TBase { +class IntType { public: IntType(const IntType&) noexcept; @@ -1341,8 +1375,10 @@ class IntType : public virtual ::apache::thrift::TBase { bool operator < (const IntType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1357,7 +1393,7 @@ std::ostream& operator<<(std::ostream& out, const IntType& obj); * * Allowed for physical types: BINARY */ -class JsonType : public virtual ::apache::thrift::TBase { +class JsonType { public: JsonType(const JsonType&) noexcept; @@ -1379,8 +1415,10 @@ class JsonType : public virtual ::apache::thrift::TBase { bool operator < (const JsonType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1395,7 +1433,7 @@ std::ostream& operator<<(std::ostream& out, const JsonType& obj); * * Allowed for physical types: BINARY */ -class BsonType : public virtual ::apache::thrift::TBase { +class BsonType { public: BsonType(const BsonType&) noexcept; @@ -1417,8 +1455,10 @@ class BsonType : public virtual ::apache::thrift::TBase { bool operator < (const BsonType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1452,7 +1492,7 @@ typedef struct _LogicalType__isset { * SchemaElement must also set the corresponding ConvertedType (if any) * from the following table. */ -class LogicalType : public virtual ::apache::thrift::TBase { +class LogicalType { public: LogicalType(const LogicalType&) noexcept; @@ -1574,8 +1614,10 @@ class LogicalType : public virtual ::apache::thrift::TBase { bool operator < (const LogicalType & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1603,7 +1645,7 @@ typedef struct _SchemaElement__isset { * - if it is a primitive type (leaf) then type is defined and num_children is undefined * the nodes are listed in depth first traversal order. */ -class SchemaElement : public virtual ::apache::thrift::TBase { +class SchemaElement { public: SchemaElement(const SchemaElement&); @@ -1754,8 +1796,10 @@ class SchemaElement : public virtual ::apache::thrift::TBase { bool operator < (const SchemaElement & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1772,7 +1816,7 @@ typedef struct _DataPageHeader__isset { /** * Data page header */ -class DataPageHeader : public virtual ::apache::thrift::TBase { +class DataPageHeader { public: DataPageHeader(const DataPageHeader&); @@ -1848,8 +1892,10 @@ class DataPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const DataPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1859,7 +1905,7 @@ void swap(DataPageHeader &a, DataPageHeader &b); std::ostream& operator<<(std::ostream& out, const DataPageHeader& obj); -class IndexPageHeader : public virtual ::apache::thrift::TBase { +class IndexPageHeader { public: IndexPageHeader(const IndexPageHeader&) noexcept; @@ -1881,8 +1927,10 @@ class IndexPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const IndexPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1902,7 +1950,7 @@ typedef struct _DictionaryPageHeader__isset { * can be placed in a column chunk. * */ -class DictionaryPageHeader : public virtual ::apache::thrift::TBase { +class DictionaryPageHeader { public: DictionaryPageHeader(const DictionaryPageHeader&) noexcept; @@ -1957,8 +2005,10 @@ class DictionaryPageHeader : public virtual ::apache::thrift::TBase { bool operator < (const DictionaryPageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -1979,7 +2029,7 @@ typedef struct _DataPageHeaderV2__isset { * The remaining section containing the data is compressed if is_compressed is true * */ -class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { +class DataPageHeaderV2 { public: DataPageHeaderV2(const DataPageHeaderV2&); @@ -2085,8 +2135,10 @@ class DataPageHeaderV2 : public virtual ::apache::thrift::TBase { bool operator < (const DataPageHeaderV2 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2099,7 +2151,7 @@ std::ostream& operator<<(std::ostream& out, const DataPageHeaderV2& obj); /** * Block-based algorithm type annotation. * */ -class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { +class SplitBlockAlgorithm { public: SplitBlockAlgorithm(const SplitBlockAlgorithm&) noexcept; @@ -2121,8 +2173,10 @@ class SplitBlockAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const SplitBlockAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2139,7 +2193,7 @@ typedef struct _BloomFilterAlgorithm__isset { /** * The algorithm used in Bloom filter. * */ -class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { +class BloomFilterAlgorithm { public: BloomFilterAlgorithm(const BloomFilterAlgorithm&) noexcept; @@ -2173,8 +2227,10 @@ class BloomFilterAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2189,7 +2245,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterAlgorithm& obj); * algorithm. It uses 64 bits version of xxHash. * */ -class XxHash : public virtual ::apache::thrift::TBase { +class XxHash { public: XxHash(const XxHash&) noexcept; @@ -2211,8 +2267,10 @@ class XxHash : public virtual ::apache::thrift::TBase { bool operator < (const XxHash & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2231,7 +2289,7 @@ typedef struct _BloomFilterHash__isset { * using plain encoding. * */ -class BloomFilterHash : public virtual ::apache::thrift::TBase { +class BloomFilterHash { public: BloomFilterHash(const BloomFilterHash&) noexcept; @@ -2265,8 +2323,10 @@ class BloomFilterHash : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterHash & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2280,7 +2340,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterHash& obj); * The compression used in the Bloom filter. * */ -class Uncompressed : public virtual ::apache::thrift::TBase { +class Uncompressed { public: Uncompressed(const Uncompressed&) noexcept; @@ -2302,8 +2362,10 @@ class Uncompressed : public virtual ::apache::thrift::TBase { bool operator < (const Uncompressed & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2317,7 +2379,7 @@ typedef struct _BloomFilterCompression__isset { bool UNCOMPRESSED :1; } _BloomFilterCompression__isset; -class BloomFilterCompression : public virtual ::apache::thrift::TBase { +class BloomFilterCompression { public: BloomFilterCompression(const BloomFilterCompression&) noexcept; @@ -2348,8 +2410,10 @@ class BloomFilterCompression : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterCompression & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2364,7 +2428,7 @@ std::ostream& operator<<(std::ostream& out, const BloomFilterCompression& obj); * and followed by its bitset. * */ -class BloomFilterHeader : public virtual ::apache::thrift::TBase { +class BloomFilterHeader { public: BloomFilterHeader(const BloomFilterHeader&) noexcept; @@ -2419,8 +2483,10 @@ class BloomFilterHeader : public virtual ::apache::thrift::TBase { bool operator < (const BloomFilterHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2438,7 +2504,7 @@ typedef struct _PageHeader__isset { bool data_page_header_v2 :1; } _PageHeader__isset; -class PageHeader : public virtual ::apache::thrift::TBase { +class PageHeader { public: PageHeader(const PageHeader&); @@ -2545,8 +2611,10 @@ class PageHeader : public virtual ::apache::thrift::TBase { bool operator < (const PageHeader & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2563,7 +2631,7 @@ typedef struct _KeyValue__isset { /** * Wrapper struct to store key values */ -class KeyValue : public virtual ::apache::thrift::TBase { +class KeyValue { public: KeyValue(const KeyValue&); @@ -2601,8 +2669,10 @@ class KeyValue : public virtual ::apache::thrift::TBase { bool operator < (const KeyValue & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2615,7 +2685,7 @@ std::ostream& operator<<(std::ostream& out, const KeyValue& obj); /** * Wrapper struct to specify sort order */ -class SortingColumn : public virtual ::apache::thrift::TBase { +class SortingColumn { public: SortingColumn(const SortingColumn&) noexcept; @@ -2665,8 +2735,10 @@ class SortingColumn : public virtual ::apache::thrift::TBase { bool operator < (const SortingColumn & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2679,7 +2751,7 @@ std::ostream& operator<<(std::ostream& out, const SortingColumn& obj); /** * statistics of a given page type and encoding */ -class PageEncodingStats : public virtual ::apache::thrift::TBase { +class PageEncodingStats { public: PageEncodingStats(const PageEncodingStats&) noexcept; @@ -2732,8 +2804,10 @@ class PageEncodingStats : public virtual ::apache::thrift::TBase { bool operator < (const PageEncodingStats & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2757,7 +2831,7 @@ typedef struct _ColumnMetaData__isset { /** * Description for column metadata */ -class ColumnMetaData : public virtual ::apache::thrift::TBase { +class ColumnMetaData { public: ColumnMetaData(const ColumnMetaData&); @@ -2950,8 +3024,10 @@ class ColumnMetaData : public virtual ::apache::thrift::TBase { bool operator < (const ColumnMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2961,7 +3037,7 @@ void swap(ColumnMetaData &a, ColumnMetaData &b); std::ostream& operator<<(std::ostream& out, const ColumnMetaData& obj); -class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { +class EncryptionWithFooterKey { public: EncryptionWithFooterKey(const EncryptionWithFooterKey&) noexcept; @@ -2983,8 +3059,10 @@ class EncryptionWithFooterKey : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionWithFooterKey & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -2998,7 +3076,7 @@ typedef struct _EncryptionWithColumnKey__isset { bool key_metadata :1; } _EncryptionWithColumnKey__isset; -class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { +class EncryptionWithColumnKey { public: EncryptionWithColumnKey(const EncryptionWithColumnKey&); @@ -3041,8 +3119,10 @@ class EncryptionWithColumnKey : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionWithColumnKey & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3057,7 +3137,7 @@ typedef struct _ColumnCryptoMetaData__isset { bool ENCRYPTION_WITH_COLUMN_KEY :1; } _ColumnCryptoMetaData__isset; -class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { +class ColumnCryptoMetaData { public: ColumnCryptoMetaData(const ColumnCryptoMetaData&); @@ -3095,8 +3175,10 @@ class ColumnCryptoMetaData : public virtual ::apache::thrift::TBase { bool operator < (const ColumnCryptoMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3117,7 +3199,7 @@ typedef struct _ColumnChunk__isset { bool encrypted_column_metadata :1; } _ColumnChunk__isset; -class ColumnChunk : public virtual ::apache::thrift::TBase { +class ColumnChunk { public: ColumnChunk(const ColumnChunk&); @@ -3241,8 +3323,10 @@ class ColumnChunk : public virtual ::apache::thrift::TBase { bool operator < (const ColumnChunk & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3259,7 +3343,7 @@ typedef struct _RowGroup__isset { bool ordinal :1; } _RowGroup__isset; -class RowGroup : public virtual ::apache::thrift::TBase { +class RowGroup { public: RowGroup(const RowGroup&); @@ -3357,8 +3441,10 @@ class RowGroup : public virtual ::apache::thrift::TBase { bool operator < (const RowGroup & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3371,7 +3457,7 @@ std::ostream& operator<<(std::ostream& out, const RowGroup& obj); /** * Empty struct to signal the order defined by the physical or logical type */ -class TypeDefinedOrder : public virtual ::apache::thrift::TBase { +class TypeDefinedOrder { public: TypeDefinedOrder(const TypeDefinedOrder&) noexcept; @@ -3393,8 +3479,10 @@ class TypeDefinedOrder : public virtual ::apache::thrift::TBase { bool operator < (const TypeDefinedOrder & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3420,7 +3508,7 @@ typedef struct _ColumnOrder__isset { * If the reader does not support the value of this union, min and max stats * for this column should be ignored. */ -class ColumnOrder : public virtual ::apache::thrift::TBase { +class ColumnOrder { public: ColumnOrder(const ColumnOrder&) noexcept; @@ -3501,8 +3589,10 @@ class ColumnOrder : public virtual ::apache::thrift::TBase { bool operator < (const ColumnOrder & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3512,7 +3602,7 @@ void swap(ColumnOrder &a, ColumnOrder &b); std::ostream& operator<<(std::ostream& out, const ColumnOrder& obj); -class PageLocation : public virtual ::apache::thrift::TBase { +class PageLocation { public: PageLocation(const PageLocation&) noexcept; @@ -3563,8 +3653,10 @@ class PageLocation : public virtual ::apache::thrift::TBase { bool operator < (const PageLocation & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3578,7 +3670,7 @@ typedef struct _OffsetIndex__isset { bool unencoded_byte_array_data_bytes :1; } _OffsetIndex__isset; -class OffsetIndex : public virtual ::apache::thrift::TBase { +class OffsetIndex { public: OffsetIndex(const OffsetIndex&); @@ -3624,8 +3716,10 @@ class OffsetIndex : public virtual ::apache::thrift::TBase { bool operator < (const OffsetIndex & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3645,7 +3739,7 @@ typedef struct _ColumnIndex__isset { * Description for ColumnIndex. * Each [i] refers to the page at OffsetIndex.page_locations[i] */ -class ColumnIndex : public virtual ::apache::thrift::TBase { +class ColumnIndex { public: ColumnIndex(const ColumnIndex&); @@ -3756,8 +3850,10 @@ class ColumnIndex : public virtual ::apache::thrift::TBase { bool operator < (const ColumnIndex & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3773,7 +3869,7 @@ typedef struct _AesGcmV1__isset { bool supply_aad_prefix :1; } _AesGcmV1__isset; -class AesGcmV1 : public virtual ::apache::thrift::TBase { +class AesGcmV1 { public: AesGcmV1(const AesGcmV1&); @@ -3831,8 +3927,10 @@ class AesGcmV1 : public virtual ::apache::thrift::TBase { bool operator < (const AesGcmV1 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3848,7 +3946,7 @@ typedef struct _AesGcmCtrV1__isset { bool supply_aad_prefix :1; } _AesGcmCtrV1__isset; -class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { +class AesGcmCtrV1 { public: AesGcmCtrV1(const AesGcmCtrV1&); @@ -3906,8 +4004,10 @@ class AesGcmCtrV1 : public virtual ::apache::thrift::TBase { bool operator < (const AesGcmCtrV1 & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3922,7 +4022,7 @@ typedef struct _EncryptionAlgorithm__isset { bool AES_GCM_CTR_V1 :1; } _EncryptionAlgorithm__isset; -class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { +class EncryptionAlgorithm { public: EncryptionAlgorithm(const EncryptionAlgorithm&); @@ -3960,8 +4060,10 @@ class EncryptionAlgorithm : public virtual ::apache::thrift::TBase { bool operator < (const EncryptionAlgorithm & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -3982,7 +4084,7 @@ typedef struct _FileMetaData__isset { /** * Description for file metadata */ -class FileMetaData : public virtual ::apache::thrift::TBase { +class FileMetaData { public: FileMetaData(const FileMetaData&); @@ -4116,8 +4218,10 @@ class FileMetaData : public virtual ::apache::thrift::TBase { bool operator < (const FileMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -4134,7 +4238,7 @@ typedef struct _FileCryptoMetaData__isset { /** * Crypto metadata for files with encrypted footer * */ -class FileCryptoMetaData : public virtual ::apache::thrift::TBase { +class FileCryptoMetaData { public: FileCryptoMetaData(const FileCryptoMetaData&); @@ -4180,8 +4284,10 @@ class FileCryptoMetaData : public virtual ::apache::thrift::TBase { bool operator < (const FileCryptoMetaData & ) const; - uint32_t read(::apache::thrift::protocol::TProtocol* iprot) override; - uint32_t write(::apache::thrift::protocol::TProtocol* oprot) const override; + template + uint32_t read(Protocol_* iprot); + template + uint32_t write(Protocol_* oprot) const; virtual void printTo(std::ostream& out) const; }; @@ -4192,4 +4298,6 @@ std::ostream& operator<<(std::ostream& out, const FileCryptoMetaData& obj); }} // namespace +#include "parquet_types.tcc" + #endif diff --git a/cpp/src/generated/parquet_types.tcc b/cpp/src/generated/parquet_types.tcc new file mode 100644 index 0000000000000..ee02d7f0139fc --- /dev/null +++ b/cpp/src/generated/parquet_types.tcc @@ -0,0 +1,4867 @@ +/** + * Autogenerated by Thrift Compiler (0.19.0) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +#ifndef parquet_TYPES_TCC +#define parquet_TYPES_TCC + +#include "parquet_types.h" + +namespace parquet { namespace format { + +template +uint32_t SizeStatistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes); + this->__isset.unencoded_byte_array_data_bytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->repetition_level_histogram.clear(); + uint32_t _size0; + ::apache::thrift::protocol::TType _etype3; + xfer += iprot->readListBegin(_etype3, _size0); + this->repetition_level_histogram.resize(_size0); + uint32_t _i4; + for (_i4 = 0; _i4 < _size0; ++_i4) + { + xfer += iprot->readI64(this->repetition_level_histogram[_i4]); + } + xfer += iprot->readListEnd(); + } + this->__isset.repetition_level_histogram = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->definition_level_histogram.clear(); + uint32_t _size5; + ::apache::thrift::protocol::TType _etype8; + xfer += iprot->readListBegin(_etype8, _size5); + this->definition_level_histogram.resize(_size5); + uint32_t _i9; + for (_i9 = 0; _i9 < _size5; ++_i9) + { + xfer += iprot->readI64(this->definition_level_histogram[_i9]); + } + xfer += iprot->readListEnd(); + } + this->__isset.definition_level_histogram = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t SizeStatistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SizeStatistics"); + + if (this->__isset.unencoded_byte_array_data_bytes) { + xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->unencoded_byte_array_data_bytes); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_level_histogram) { + xfer += oprot->writeFieldBegin("repetition_level_histogram", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histogram.size())); + std::vector ::const_iterator _iter10; + for (_iter10 = this->repetition_level_histogram.begin(); _iter10 != this->repetition_level_histogram.end(); ++_iter10) + { + xfer += oprot->writeI64((*_iter10)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.definition_level_histogram) { + xfer += oprot->writeFieldBegin("definition_level_histogram", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histogram.size())); + std::vector ::const_iterator _iter11; + for (_iter11 = this->definition_level_histogram.begin(); _iter11 != this->definition_level_histogram.end(); ++_iter11) + { + xfer += oprot->writeI64((*_iter11)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Statistics::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max); + this->__isset.max = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min); + this->__isset.min = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->null_count); + this->__isset.null_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->distinct_count); + this->__isset.distinct_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->max_value); + this->__isset.max_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->min_value); + this->__isset.min_value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_max_value_exact); + this->__isset.is_max_value_exact = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_min_value_exact); + this->__isset.is_min_value_exact = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Statistics::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Statistics"); + + if (this->__isset.max) { + xfer += oprot->writeFieldBegin("max", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->max); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min) { + xfer += oprot->writeFieldBegin("min", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->min); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.null_count) { + xfer += oprot->writeFieldBegin("null_count", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->null_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.distinct_count) { + xfer += oprot->writeFieldBegin("distinct_count", ::apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->distinct_count); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.max_value) { + xfer += oprot->writeFieldBegin("max_value", ::apache::thrift::protocol::T_STRING, 5); + xfer += oprot->writeBinary(this->max_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.min_value) { + xfer += oprot->writeFieldBegin("min_value", ::apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeBinary(this->min_value); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.is_max_value_exact) { + xfer += oprot->writeFieldBegin("is_max_value_exact", ::apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_max_value_exact); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.is_min_value_exact) { + xfer += oprot->writeFieldBegin("is_min_value_exact", ::apache::thrift::protocol::T_BOOL, 8); + xfer += oprot->writeBool(this->is_min_value_exact); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t StringType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t StringType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("StringType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t UUIDType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t UUIDType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("UUIDType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MapType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MapType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MapType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ListType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ListType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ListType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EnumType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EnumType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EnumType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DateType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t DateType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DateType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Float16Type::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Float16Type::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Float16Type"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t NullType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t NullType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NullType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DecimalType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_scale = false; + bool isset_precision = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + isset_scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + isset_precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_scale) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_precision) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DecimalType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DecimalType"); + + xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MilliSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MilliSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MilliSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t MicroSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t MicroSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("MicroSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t NanoSeconds::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t NanoSeconds::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("NanoSeconds"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimeUnit::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MILLIS.read(iprot); + this->__isset.MILLIS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MICROS.read(iprot); + this->__isset.MICROS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->NANOS.read(iprot); + this->__isset.NANOS = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t TimeUnit::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeUnit"); + + if (this->__isset.MILLIS) { + xfer += oprot->writeFieldBegin("MILLIS", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->MILLIS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MICROS) { + xfer += oprot->writeFieldBegin("MICROS", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MICROS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.NANOS) { + xfer += oprot->writeFieldBegin("NANOS", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->NANOS.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimestampType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t TimestampType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimestampType"); + + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TimeType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_isAdjustedToUTC = false; + bool isset_unit = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isAdjustedToUTC); + isset_isAdjustedToUTC = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->unit.read(iprot); + isset_unit = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_isAdjustedToUTC) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_unit) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t TimeType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TimeType"); + + xfer += oprot->writeFieldBegin("isAdjustedToUTC", ::apache::thrift::protocol::T_BOOL, 1); + xfer += oprot->writeBool(this->isAdjustedToUTC); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("unit", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->unit.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t IntType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_bitWidth = false; + bool isset_isSigned = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_BYTE) { + xfer += iprot->readByte(this->bitWidth); + isset_bitWidth = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->isSigned); + isset_isSigned = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_bitWidth) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_isSigned) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t IntType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IntType"); + + xfer += oprot->writeFieldBegin("bitWidth", ::apache::thrift::protocol::T_BYTE, 1); + xfer += oprot->writeByte(this->bitWidth); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("isSigned", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->isSigned); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t JsonType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t JsonType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("JsonType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BsonType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BsonType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BsonType"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t LogicalType::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->STRING.read(iprot); + this->__isset.STRING = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->MAP.read(iprot); + this->__isset.MAP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->LIST.read(iprot); + this->__isset.LIST = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENUM.read(iprot); + this->__isset.ENUM = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->DECIMAL.read(iprot); + this->__isset.DECIMAL = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->DATE.read(iprot); + this->__isset.DATE = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TIME.read(iprot); + this->__isset.TIME = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TIMESTAMP.read(iprot); + this->__isset.TIMESTAMP = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->INTEGER.read(iprot); + this->__isset.INTEGER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UNKNOWN.read(iprot); + this->__isset.UNKNOWN = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->JSON.read(iprot); + this->__isset.JSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->BSON.read(iprot); + this->__isset.BSON = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UUID.read(iprot); + this->__isset.UUID = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->FLOAT16.read(iprot); + this->__isset.FLOAT16 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t LogicalType::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("LogicalType"); + + if (this->__isset.STRING) { + xfer += oprot->writeFieldBegin("STRING", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->STRING.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.MAP) { + xfer += oprot->writeFieldBegin("MAP", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->MAP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.LIST) { + xfer += oprot->writeFieldBegin("LIST", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->LIST.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENUM) { + xfer += oprot->writeFieldBegin("ENUM", ::apache::thrift::protocol::T_STRUCT, 4); + xfer += this->ENUM.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DECIMAL) { + xfer += oprot->writeFieldBegin("DECIMAL", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->DECIMAL.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.DATE) { + xfer += oprot->writeFieldBegin("DATE", ::apache::thrift::protocol::T_STRUCT, 6); + xfer += this->DATE.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIME) { + xfer += oprot->writeFieldBegin("TIME", ::apache::thrift::protocol::T_STRUCT, 7); + xfer += this->TIME.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.TIMESTAMP) { + xfer += oprot->writeFieldBegin("TIMESTAMP", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->TIMESTAMP.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.INTEGER) { + xfer += oprot->writeFieldBegin("INTEGER", ::apache::thrift::protocol::T_STRUCT, 10); + xfer += this->INTEGER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UNKNOWN) { + xfer += oprot->writeFieldBegin("UNKNOWN", ::apache::thrift::protocol::T_STRUCT, 11); + xfer += this->UNKNOWN.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.JSON) { + xfer += oprot->writeFieldBegin("JSON", ::apache::thrift::protocol::T_STRUCT, 12); + xfer += this->JSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.BSON) { + xfer += oprot->writeFieldBegin("BSON", ::apache::thrift::protocol::T_STRUCT, 13); + xfer += this->BSON.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.UUID) { + xfer += oprot->writeFieldBegin("UUID", ::apache::thrift::protocol::T_STRUCT, 14); + xfer += this->UUID.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.FLOAT16) { + xfer += oprot->writeFieldBegin("FLOAT16", ::apache::thrift::protocol::T_STRUCT, 15); + xfer += this->FLOAT16.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SchemaElement::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_name = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast96; + xfer += iprot->readI32(ecast96); + this->type = static_cast(ecast96); + this->__isset.type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->type_length); + this->__isset.type_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast97; + xfer += iprot->readI32(ecast97); + this->repetition_type = static_cast(ecast97); + this->__isset.repetition_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->name); + isset_name = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_children); + this->__isset.num_children = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast98; + xfer += iprot->readI32(ecast98); + this->converted_type = static_cast(ecast98); + this->__isset.converted_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->scale); + this->__isset.scale = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->precision); + this->__isset.precision = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->field_id); + this->__isset.field_id = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->logicalType.read(iprot); + this->__isset.logicalType = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_name) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t SchemaElement::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SchemaElement"); + + if (this->__isset.type) { + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.type_length) { + xfer += oprot->writeFieldBegin("type_length", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->type_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_type) { + xfer += oprot->writeFieldBegin("repetition_type", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(static_cast(this->repetition_type)); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("name", ::apache::thrift::protocol::T_STRING, 4); + xfer += oprot->writeString(this->name); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.num_children) { + xfer += oprot->writeFieldBegin("num_children", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->num_children); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.converted_type) { + xfer += oprot->writeFieldBegin("converted_type", ::apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(static_cast(this->converted_type)); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.scale) { + xfer += oprot->writeFieldBegin("scale", ::apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->scale); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.precision) { + xfer += oprot->writeFieldBegin("precision", ::apache::thrift::protocol::T_I32, 8); + xfer += oprot->writeI32(this->precision); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.field_id) { + xfer += oprot->writeFieldBegin("field_id", ::apache::thrift::protocol::T_I32, 9); + xfer += oprot->writeI32(this->field_id); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.logicalType) { + xfer += oprot->writeFieldBegin("logicalType", ::apache::thrift::protocol::T_STRUCT, 10); + xfer += this->logicalType.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DataPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + bool isset_definition_level_encoding = false; + bool isset_repetition_level_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast103; + xfer += iprot->readI32(ecast103); + this->encoding = static_cast(ecast103); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast104; + xfer += iprot->readI32(ecast104); + this->definition_level_encoding = static_cast(ecast104); + isset_definition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast105; + xfer += iprot->readI32(ecast105); + this->repetition_level_encoding = static_cast(ecast105); + isset_repetition_level_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_level_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DataPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeader"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("definition_level_encoding", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(static_cast(this->definition_level_encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("repetition_level_encoding", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->repetition_level_encoding)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t IndexPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t IndexPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("IndexPageHeader"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DictionaryPageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_encoding = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast114; + xfer += iprot->readI32(ecast114); + this->encoding = static_cast(ecast114); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_sorted); + this->__isset.is_sorted = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DictionaryPageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DictionaryPageHeader"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_sorted) { + xfer += oprot->writeFieldBegin("is_sorted", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->is_sorted); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t DataPageHeaderV2::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_num_values = false; + bool isset_num_nulls = false; + bool isset_num_rows = false; + bool isset_encoding = false; + bool isset_definition_levels_byte_length = false; + bool isset_repetition_levels_byte_length = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_nulls); + isset_num_nulls = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast119; + xfer += iprot->readI32(ecast119); + this->encoding = static_cast(ecast119); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->definition_levels_byte_length); + isset_definition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->repetition_levels_byte_length); + isset_repetition_levels_byte_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->is_compressed); + this->__isset.is_compressed = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_nulls) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_definition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_repetition_levels_byte_length) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t DataPageHeaderV2::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("DataPageHeaderV2"); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_nulls", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->num_nulls); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("definition_levels_byte_length", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->definition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("repetition_levels_byte_length", ::apache::thrift::protocol::T_I32, 6); + xfer += oprot->writeI32(this->repetition_levels_byte_length); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.is_compressed) { + xfer += oprot->writeFieldBegin("is_compressed", ::apache::thrift::protocol::T_BOOL, 7); + xfer += oprot->writeBool(this->is_compressed); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SplitBlockAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t SplitBlockAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SplitBlockAlgorithm"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->BLOCK.read(iprot); + this->__isset.BLOCK = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterAlgorithm"); + + if (this->__isset.BLOCK) { + xfer += oprot->writeFieldBegin("BLOCK", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->BLOCK.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t XxHash::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t XxHash::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("XxHash"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterHash::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->XXHASH.read(iprot); + this->__isset.XXHASH = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterHash::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterHash"); + + if (this->__isset.XXHASH) { + xfer += oprot->writeFieldBegin("XXHASH", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->XXHASH.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t Uncompressed::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t Uncompressed::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("Uncompressed"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterCompression::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->UNCOMPRESSED.read(iprot); + this->__isset.UNCOMPRESSED = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t BloomFilterCompression::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterCompression"); + + if (this->__isset.UNCOMPRESSED) { + xfer += oprot->writeFieldBegin("UNCOMPRESSED", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->UNCOMPRESSED.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t BloomFilterHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_numBytes = false; + bool isset_algorithm = false; + bool isset_hash = false; + bool isset_compression = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->numBytes); + isset_numBytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->algorithm.read(iprot); + isset_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->hash.read(iprot); + isset_hash = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->compression.read(iprot); + isset_compression = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_numBytes) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_hash) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compression) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t BloomFilterHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("BloomFilterHeader"); + + xfer += oprot->writeFieldBegin("numBytes", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->numBytes); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("algorithm", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("hash", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->hash.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compression", ::apache::thrift::protocol::T_STRUCT, 4); + xfer += this->compression.write(oprot); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageHeader::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_uncompressed_page_size = false; + bool isset_compressed_page_size = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast152; + xfer += iprot->readI32(ecast152); + this->type = static_cast(ecast152); + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->uncompressed_page_size); + isset_uncompressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->crc); + this->__isset.crc = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header.read(iprot); + this->__isset.data_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->index_page_header.read(iprot); + this->__isset.index_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->dictionary_page_header.read(iprot); + this->__isset.dictionary_page_header = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->data_page_header_v2.read(iprot); + this->__isset.data_page_header_v2 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_uncompressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageHeader::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageHeader"); + + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("uncompressed_page_size", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->uncompressed_page_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.crc) { + xfer += oprot->writeFieldBegin("crc", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(this->crc); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header) { + xfer += oprot->writeFieldBegin("data_page_header", ::apache::thrift::protocol::T_STRUCT, 5); + xfer += this->data_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.index_page_header) { + xfer += oprot->writeFieldBegin("index_page_header", ::apache::thrift::protocol::T_STRUCT, 6); + xfer += this->index_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_header) { + xfer += oprot->writeFieldBegin("dictionary_page_header", ::apache::thrift::protocol::T_STRUCT, 7); + xfer += this->dictionary_page_header.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.data_page_header_v2) { + xfer += oprot->writeFieldBegin("data_page_header_v2", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->data_page_header_v2.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t KeyValue::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_key = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->key); + isset_key = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->value); + this->__isset.value = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_key) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t KeyValue::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("KeyValue"); + + xfer += oprot->writeFieldBegin("key", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->key); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.value) { + xfer += oprot->writeFieldBegin("value", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeString(this->value); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t SortingColumn::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_column_idx = false; + bool isset_descending = false; + bool isset_nulls_first = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_idx); + isset_column_idx = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->descending); + isset_descending = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->nulls_first); + isset_nulls_first = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_column_idx) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_descending) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_nulls_first) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t SortingColumn::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("SortingColumn"); + + xfer += oprot->writeFieldBegin("column_idx", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->column_idx); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("descending", ::apache::thrift::protocol::T_BOOL, 2); + xfer += oprot->writeBool(this->descending); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("nulls_first", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->nulls_first); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageEncodingStats::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_type = false; + bool isset_encoding = false; + bool isset_count = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast165; + xfer += iprot->readI32(ecast165); + this->page_type = static_cast(ecast165); + isset_page_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast166; + xfer += iprot->readI32(ecast166); + this->encoding = static_cast(ecast166); + isset_encoding = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->count); + isset_count = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encoding) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_count) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageEncodingStats::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageEncodingStats"); + + xfer += oprot->writeFieldBegin("page_type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->page_type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encoding", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(static_cast(this->encoding)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("count", ::apache::thrift::protocol::T_I32, 3); + xfer += oprot->writeI32(this->count); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_type = false; + bool isset_encodings = false; + bool isset_path_in_schema = false; + bool isset_codec = false; + bool isset_num_values = false; + bool isset_total_uncompressed_size = false; + bool isset_total_compressed_size = false; + bool isset_data_page_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast171; + xfer += iprot->readI32(ecast171); + this->type = static_cast(ecast171); + isset_type = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encodings.clear(); + uint32_t _size172; + ::apache::thrift::protocol::TType _etype175; + xfer += iprot->readListBegin(_etype175, _size172); + this->encodings.resize(_size172); + uint32_t _i176; + for (_i176 = 0; _i176 < _size172; ++_i176) + { + int32_t ecast177; + xfer += iprot->readI32(ecast177); + this->encodings[_i176] = static_cast(ecast177); + } + xfer += iprot->readListEnd(); + } + isset_encodings = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size178; + ::apache::thrift::protocol::TType _etype181; + xfer += iprot->readListBegin(_etype181, _size178); + this->path_in_schema.resize(_size178); + uint32_t _i182; + for (_i182 = 0; _i182 < _size178; ++_i182) + { + xfer += iprot->readString(this->path_in_schema[_i182]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast183; + xfer += iprot->readI32(ecast183); + this->codec = static_cast(ecast183); + isset_codec = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_values); + isset_num_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_uncompressed_size); + isset_total_uncompressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + isset_total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size184; + ::apache::thrift::protocol::TType _etype187; + xfer += iprot->readListBegin(_etype187, _size184); + this->key_value_metadata.resize(_size184); + uint32_t _i188; + for (_i188 = 0; _i188 < _size184; ++_i188) + { + xfer += this->key_value_metadata[_i188].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->data_page_offset); + isset_data_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 10: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->index_page_offset); + this->__isset.index_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 11: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->dictionary_page_offset); + this->__isset.dictionary_page_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 12: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->statistics.read(iprot); + this->__isset.statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 13: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->encoding_stats.clear(); + uint32_t _size189; + ::apache::thrift::protocol::TType _etype192; + xfer += iprot->readListBegin(_etype192, _size189); + this->encoding_stats.resize(_size189); + uint32_t _i193; + for (_i193 = 0; _i193 < _size189; ++_i193) + { + xfer += this->encoding_stats[_i193].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.encoding_stats = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 14: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->bloom_filter_offset); + this->__isset.bloom_filter_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 15: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->bloom_filter_length); + this->__isset.bloom_filter_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 16: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->size_statistics.read(iprot); + this->__isset.size_statistics = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_type) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_encodings) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_codec) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_uncompressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_compressed_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_data_page_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnMetaData"); + + xfer += oprot->writeFieldBegin("type", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(static_cast(this->type)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("encodings", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I32, static_cast(this->encodings.size())); + std::vector ::const_iterator _iter194; + for (_iter194 = this->encodings.begin(); _iter194 != this->encodings.end(); ++_iter194) + { + xfer += oprot->writeI32(static_cast((*_iter194))); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter195; + for (_iter195 = this->path_in_schema.begin(); _iter195 != this->path_in_schema.end(); ++_iter195) + { + xfer += oprot->writeString((*_iter195)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("codec", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->codec)); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_values", ::apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->num_values); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_uncompressed_size", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_uncompressed_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 7); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 8); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter196; + for (_iter196 = this->key_value_metadata.begin(); _iter196 != this->key_value_metadata.end(); ++_iter196) + { + xfer += (*_iter196).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("data_page_offset", ::apache::thrift::protocol::T_I64, 9); + xfer += oprot->writeI64(this->data_page_offset); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.index_page_offset) { + xfer += oprot->writeFieldBegin("index_page_offset", ::apache::thrift::protocol::T_I64, 10); + xfer += oprot->writeI64(this->index_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.dictionary_page_offset) { + xfer += oprot->writeFieldBegin("dictionary_page_offset", ::apache::thrift::protocol::T_I64, 11); + xfer += oprot->writeI64(this->dictionary_page_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.statistics) { + xfer += oprot->writeFieldBegin("statistics", ::apache::thrift::protocol::T_STRUCT, 12); + xfer += this->statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encoding_stats) { + xfer += oprot->writeFieldBegin("encoding_stats", ::apache::thrift::protocol::T_LIST, 13); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->encoding_stats.size())); + std::vector ::const_iterator _iter197; + for (_iter197 = this->encoding_stats.begin(); _iter197 != this->encoding_stats.end(); ++_iter197) + { + xfer += (*_iter197).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.bloom_filter_offset) { + xfer += oprot->writeFieldBegin("bloom_filter_offset", ::apache::thrift::protocol::T_I64, 14); + xfer += oprot->writeI64(this->bloom_filter_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.bloom_filter_length) { + xfer += oprot->writeFieldBegin("bloom_filter_length", ::apache::thrift::protocol::T_I32, 15); + xfer += oprot->writeI32(this->bloom_filter_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.size_statistics) { + xfer += oprot->writeFieldBegin("size_statistics", ::apache::thrift::protocol::T_STRUCT, 16); + xfer += this->size_statistics.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionWithFooterKey::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EncryptionWithFooterKey::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithFooterKey"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionWithColumnKey::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_path_in_schema = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->path_in_schema.clear(); + uint32_t _size206; + ::apache::thrift::protocol::TType _etype209; + xfer += iprot->readListBegin(_etype209, _size206); + this->path_in_schema.resize(_size206); + uint32_t _i210; + for (_i210 = 0; _i210 < _size206; ++_i210) + { + xfer += iprot->readString(this->path_in_schema[_i210]); + } + xfer += iprot->readListEnd(); + } + isset_path_in_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_path_in_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t EncryptionWithColumnKey::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionWithColumnKey"); + + xfer += oprot->writeFieldBegin("path_in_schema", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->path_in_schema.size())); + std::vector ::const_iterator _iter211; + for (_iter211 = this->path_in_schema.begin(); _iter211 != this->path_in_schema.end(); ++_iter211) + { + xfer += oprot->writeString((*_iter211)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnCryptoMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_FOOTER_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.read(iprot); + this->__isset.ENCRYPTION_WITH_COLUMN_KEY = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ColumnCryptoMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnCryptoMetaData"); + + if (this->__isset.ENCRYPTION_WITH_FOOTER_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_FOOTER_KEY", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->ENCRYPTION_WITH_FOOTER_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ENCRYPTION_WITH_COLUMN_KEY) { + xfer += oprot->writeFieldBegin("ENCRYPTION_WITH_COLUMN_KEY", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->ENCRYPTION_WITH_COLUMN_KEY.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnChunk::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_file_offset = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->file_path); + this->__isset.file_path = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + isset_file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->meta_data.read(iprot); + this->__isset.meta_data = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset_index_offset); + this->__isset.offset_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->offset_index_length); + this->__isset.offset_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->column_index_offset); + this->__isset.column_index_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->column_index_length); + this->__isset.column_index_length = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->crypto_metadata.read(iprot); + this->__isset.crypto_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->encrypted_column_metadata); + this->__isset.encrypted_column_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_file_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnChunk::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnChunk"); + + if (this->__isset.file_path) { + xfer += oprot->writeFieldBegin("file_path", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeString(this->file_path); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.meta_data) { + xfer += oprot->writeFieldBegin("meta_data", ::apache::thrift::protocol::T_STRUCT, 3); + xfer += this->meta_data.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_offset) { + xfer += oprot->writeFieldBegin("offset_index_offset", ::apache::thrift::protocol::T_I64, 4); + xfer += oprot->writeI64(this->offset_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.offset_index_length) { + xfer += oprot->writeFieldBegin("offset_index_length", ::apache::thrift::protocol::T_I32, 5); + xfer += oprot->writeI32(this->offset_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_offset) { + xfer += oprot->writeFieldBegin("column_index_offset", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->column_index_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_index_length) { + xfer += oprot->writeFieldBegin("column_index_length", ::apache::thrift::protocol::T_I32, 7); + xfer += oprot->writeI32(this->column_index_length); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.crypto_metadata) { + xfer += oprot->writeFieldBegin("crypto_metadata", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->crypto_metadata.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encrypted_column_metadata) { + xfer += oprot->writeFieldBegin("encrypted_column_metadata", ::apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->encrypted_column_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t RowGroup::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_columns = false; + bool isset_total_byte_size = false; + bool isset_num_rows = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->columns.clear(); + uint32_t _size224; + ::apache::thrift::protocol::TType _etype227; + xfer += iprot->readListBegin(_etype227, _size224); + this->columns.resize(_size224); + uint32_t _i228; + for (_i228 = 0; _i228 < _size224; ++_i228) + { + xfer += this->columns[_i228].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_byte_size); + isset_total_byte_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->sorting_columns.clear(); + uint32_t _size229; + ::apache::thrift::protocol::TType _etype232; + xfer += iprot->readListBegin(_etype232, _size229); + this->sorting_columns.resize(_size229); + uint32_t _i233; + for (_i233 = 0; _i233 < _size229; ++_i233) + { + xfer += this->sorting_columns[_i233].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.sorting_columns = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->file_offset); + this->__isset.file_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->total_compressed_size); + this->__isset.total_compressed_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_I16) { + xfer += iprot->readI16(this->ordinal); + this->__isset.ordinal = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_columns) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_total_byte_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t RowGroup::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("RowGroup"); + + xfer += oprot->writeFieldBegin("columns", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->columns.size())); + std::vector ::const_iterator _iter234; + for (_iter234 = this->columns.begin(); _iter234 != this->columns.end(); ++_iter234) + { + xfer += (*_iter234).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("total_byte_size", ::apache::thrift::protocol::T_I64, 2); + xfer += oprot->writeI64(this->total_byte_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.sorting_columns) { + xfer += oprot->writeFieldBegin("sorting_columns", ::apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->sorting_columns.size())); + std::vector ::const_iterator _iter235; + for (_iter235 = this->sorting_columns.begin(); _iter235 != this->sorting_columns.end(); ++_iter235) + { + xfer += (*_iter235).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.file_offset) { + xfer += oprot->writeFieldBegin("file_offset", ::apache::thrift::protocol::T_I64, 5); + xfer += oprot->writeI64(this->file_offset); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.total_compressed_size) { + xfer += oprot->writeFieldBegin("total_compressed_size", ::apache::thrift::protocol::T_I64, 6); + xfer += oprot->writeI64(this->total_compressed_size); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.ordinal) { + xfer += oprot->writeFieldBegin("ordinal", ::apache::thrift::protocol::T_I16, 7); + xfer += oprot->writeI16(this->ordinal); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t TypeDefinedOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + xfer += iprot->skip(ftype); + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t TypeDefinedOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("TypeDefinedOrder"); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnOrder::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->TYPE_ORDER.read(iprot); + this->__isset.TYPE_ORDER = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t ColumnOrder::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnOrder"); + + if (this->__isset.TYPE_ORDER) { + xfer += oprot->writeFieldBegin("TYPE_ORDER", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->TYPE_ORDER.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t PageLocation::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_offset = false; + bool isset_compressed_page_size = false; + bool isset_first_row_index = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->offset); + isset_offset = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->compressed_page_size); + isset_compressed_page_size = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->first_row_index); + isset_first_row_index = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_offset) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_compressed_page_size) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_first_row_index) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t PageLocation::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("PageLocation"); + + xfer += oprot->writeFieldBegin("offset", ::apache::thrift::protocol::T_I64, 1); + xfer += oprot->writeI64(this->offset); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("compressed_page_size", ::apache::thrift::protocol::T_I32, 2); + xfer += oprot->writeI32(this->compressed_page_size); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("first_row_index", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->first_row_index); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t OffsetIndex::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_page_locations = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->page_locations.clear(); + uint32_t _size252; + ::apache::thrift::protocol::TType _etype255; + xfer += iprot->readListBegin(_etype255, _size252); + this->page_locations.resize(_size252); + uint32_t _i256; + for (_i256 = 0; _i256 < _size252; ++_i256) + { + xfer += this->page_locations[_i256].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_page_locations = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->unencoded_byte_array_data_bytes.clear(); + uint32_t _size257; + ::apache::thrift::protocol::TType _etype260; + xfer += iprot->readListBegin(_etype260, _size257); + this->unencoded_byte_array_data_bytes.resize(_size257); + uint32_t _i261; + for (_i261 = 0; _i261 < _size257; ++_i261) + { + xfer += iprot->readI64(this->unencoded_byte_array_data_bytes[_i261]); + } + xfer += iprot->readListEnd(); + } + this->__isset.unencoded_byte_array_data_bytes = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_page_locations) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t OffsetIndex::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("OffsetIndex"); + + xfer += oprot->writeFieldBegin("page_locations", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->page_locations.size())); + std::vector ::const_iterator _iter262; + for (_iter262 = this->page_locations.begin(); _iter262 != this->page_locations.end(); ++_iter262) + { + xfer += (*_iter262).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.unencoded_byte_array_data_bytes) { + xfer += oprot->writeFieldBegin("unencoded_byte_array_data_bytes", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->unencoded_byte_array_data_bytes.size())); + std::vector ::const_iterator _iter263; + for (_iter263 = this->unencoded_byte_array_data_bytes.begin(); _iter263 != this->unencoded_byte_array_data_bytes.end(); ++_iter263) + { + xfer += oprot->writeI64((*_iter263)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t ColumnIndex::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_null_pages = false; + bool isset_min_values = false; + bool isset_max_values = false; + bool isset_boundary_order = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->null_pages.clear(); + uint32_t _size268; + ::apache::thrift::protocol::TType _etype271; + xfer += iprot->readListBegin(_etype271, _size268); + this->null_pages.resize(_size268); + uint32_t _i272; + for (_i272 = 0; _i272 < _size268; ++_i272) + { + xfer += iprot->readBool(this->null_pages[_i272]); + } + xfer += iprot->readListEnd(); + } + isset_null_pages = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->min_values.clear(); + uint32_t _size273; + ::apache::thrift::protocol::TType _etype276; + xfer += iprot->readListBegin(_etype276, _size273); + this->min_values.resize(_size273); + uint32_t _i277; + for (_i277 = 0; _i277 < _size273; ++_i277) + { + xfer += iprot->readBinary(this->min_values[_i277]); + } + xfer += iprot->readListEnd(); + } + isset_min_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->max_values.clear(); + uint32_t _size278; + ::apache::thrift::protocol::TType _etype281; + xfer += iprot->readListBegin(_etype281, _size278); + this->max_values.resize(_size278); + uint32_t _i282; + for (_i282 = 0; _i282 < _size278; ++_i282) + { + xfer += iprot->readBinary(this->max_values[_i282]); + } + xfer += iprot->readListEnd(); + } + isset_max_values = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_I32) { + int32_t ecast283; + xfer += iprot->readI32(ecast283); + this->boundary_order = static_cast(ecast283); + isset_boundary_order = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->null_counts.clear(); + uint32_t _size284; + ::apache::thrift::protocol::TType _etype287; + xfer += iprot->readListBegin(_etype287, _size284); + this->null_counts.resize(_size284); + uint32_t _i288; + for (_i288 = 0; _i288 < _size284; ++_i288) + { + xfer += iprot->readI64(this->null_counts[_i288]); + } + xfer += iprot->readListEnd(); + } + this->__isset.null_counts = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->repetition_level_histograms.clear(); + uint32_t _size289; + ::apache::thrift::protocol::TType _etype292; + xfer += iprot->readListBegin(_etype292, _size289); + this->repetition_level_histograms.resize(_size289); + uint32_t _i293; + for (_i293 = 0; _i293 < _size289; ++_i293) + { + xfer += iprot->readI64(this->repetition_level_histograms[_i293]); + } + xfer += iprot->readListEnd(); + } + this->__isset.repetition_level_histograms = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->definition_level_histograms.clear(); + uint32_t _size294; + ::apache::thrift::protocol::TType _etype297; + xfer += iprot->readListBegin(_etype297, _size294); + this->definition_level_histograms.resize(_size294); + uint32_t _i298; + for (_i298 = 0; _i298 < _size294; ++_i298) + { + xfer += iprot->readI64(this->definition_level_histograms[_i298]); + } + xfer += iprot->readListEnd(); + } + this->__isset.definition_level_histograms = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_null_pages) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_min_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_max_values) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_boundary_order) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t ColumnIndex::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("ColumnIndex"); + + xfer += oprot->writeFieldBegin("null_pages", ::apache::thrift::protocol::T_LIST, 1); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_BOOL, static_cast(this->null_pages.size())); + std::vector ::const_iterator _iter299; + for (_iter299 = this->null_pages.begin(); _iter299 != this->null_pages.end(); ++_iter299) + { + xfer += oprot->writeBool((*_iter299)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("min_values", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->min_values.size())); + std::vector ::const_iterator _iter300; + for (_iter300 = this->min_values.begin(); _iter300 != this->min_values.end(); ++_iter300) + { + xfer += oprot->writeBinary((*_iter300)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("max_values", ::apache::thrift::protocol::T_LIST, 3); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRING, static_cast(this->max_values.size())); + std::vector ::const_iterator _iter301; + for (_iter301 = this->max_values.begin(); _iter301 != this->max_values.end(); ++_iter301) + { + xfer += oprot->writeBinary((*_iter301)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("boundary_order", ::apache::thrift::protocol::T_I32, 4); + xfer += oprot->writeI32(static_cast(this->boundary_order)); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.null_counts) { + xfer += oprot->writeFieldBegin("null_counts", ::apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->null_counts.size())); + std::vector ::const_iterator _iter302; + for (_iter302 = this->null_counts.begin(); _iter302 != this->null_counts.end(); ++_iter302) + { + xfer += oprot->writeI64((*_iter302)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.repetition_level_histograms) { + xfer += oprot->writeFieldBegin("repetition_level_histograms", ::apache::thrift::protocol::T_LIST, 6); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->repetition_level_histograms.size())); + std::vector ::const_iterator _iter303; + for (_iter303 = this->repetition_level_histograms.begin(); _iter303 != this->repetition_level_histograms.end(); ++_iter303) + { + xfer += oprot->writeI64((*_iter303)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.definition_level_histograms) { + xfer += oprot->writeFieldBegin("definition_level_histograms", ::apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_I64, static_cast(this->definition_level_histograms.size())); + std::vector ::const_iterator _iter304; + for (_iter304 = this->definition_level_histograms.begin(); _iter304 != this->definition_level_histograms.end(); ++_iter304) + { + xfer += oprot->writeI64((*_iter304)); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t AesGcmV1::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t AesGcmV1::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmV1"); + + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t AesGcmCtrV1::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_prefix); + this->__isset.aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->aad_file_unique); + this->__isset.aad_file_unique = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_BOOL) { + xfer += iprot->readBool(this->supply_aad_prefix); + this->__isset.supply_aad_prefix = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t AesGcmCtrV1::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("AesGcmCtrV1"); + + if (this->__isset.aad_prefix) { + xfer += oprot->writeFieldBegin("aad_prefix", ::apache::thrift::protocol::T_STRING, 1); + xfer += oprot->writeBinary(this->aad_prefix); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.aad_file_unique) { + xfer += oprot->writeFieldBegin("aad_file_unique", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->aad_file_unique); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.supply_aad_prefix) { + xfer += oprot->writeFieldBegin("supply_aad_prefix", ::apache::thrift::protocol::T_BOOL, 3); + xfer += oprot->writeBool(this->supply_aad_prefix); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t EncryptionAlgorithm::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_V1.read(iprot); + this->__isset.AES_GCM_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->AES_GCM_CTR_V1.read(iprot); + this->__isset.AES_GCM_CTR_V1 = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + return xfer; +} + +template +uint32_t EncryptionAlgorithm::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("EncryptionAlgorithm"); + + if (this->__isset.AES_GCM_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_V1", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->AES_GCM_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.AES_GCM_CTR_V1) { + xfer += oprot->writeFieldBegin("AES_GCM_CTR_V1", ::apache::thrift::protocol::T_STRUCT, 2); + xfer += this->AES_GCM_CTR_V1.write(oprot); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t FileMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_version = false; + bool isset_schema = false; + bool isset_num_rows = false; + bool isset_row_groups = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_I32) { + xfer += iprot->readI32(this->version); + isset_version = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->schema.clear(); + uint32_t _size321; + ::apache::thrift::protocol::TType _etype324; + xfer += iprot->readListBegin(_etype324, _size321); + this->schema.resize(_size321); + uint32_t _i325; + for (_i325 = 0; _i325 < _size321; ++_i325) + { + xfer += this->schema[_i325].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_schema = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 3: + if (ftype == ::apache::thrift::protocol::T_I64) { + xfer += iprot->readI64(this->num_rows); + isset_num_rows = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 4: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->row_groups.clear(); + uint32_t _size326; + ::apache::thrift::protocol::TType _etype329; + xfer += iprot->readListBegin(_etype329, _size326); + this->row_groups.resize(_size326); + uint32_t _i330; + for (_i330 = 0; _i330 < _size326; ++_i330) + { + xfer += this->row_groups[_i330].read(iprot); + } + xfer += iprot->readListEnd(); + } + isset_row_groups = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 5: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->key_value_metadata.clear(); + uint32_t _size331; + ::apache::thrift::protocol::TType _etype334; + xfer += iprot->readListBegin(_etype334, _size331); + this->key_value_metadata.resize(_size331); + uint32_t _i335; + for (_i335 = 0; _i335 < _size331; ++_i335) + { + xfer += this->key_value_metadata[_i335].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.key_value_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 6: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readString(this->created_by); + this->__isset.created_by = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 7: + if (ftype == ::apache::thrift::protocol::T_LIST) { + { + this->column_orders.clear(); + uint32_t _size336; + ::apache::thrift::protocol::TType _etype339; + xfer += iprot->readListBegin(_etype339, _size336); + this->column_orders.resize(_size336); + uint32_t _i340; + for (_i340 = 0; _i340 < _size336; ++_i340) + { + xfer += this->column_orders[_i340].read(iprot); + } + xfer += iprot->readListEnd(); + } + this->__isset.column_orders = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 8: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + this->__isset.encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 9: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->footer_signing_key_metadata); + this->__isset.footer_signing_key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_version) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_schema) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_num_rows) + throw TProtocolException(TProtocolException::INVALID_DATA); + if (!isset_row_groups) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t FileMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("FileMetaData"); + + xfer += oprot->writeFieldBegin("version", ::apache::thrift::protocol::T_I32, 1); + xfer += oprot->writeI32(this->version); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("schema", ::apache::thrift::protocol::T_LIST, 2); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->schema.size())); + std::vector ::const_iterator _iter341; + for (_iter341 = this->schema.begin(); _iter341 != this->schema.end(); ++_iter341) + { + xfer += (*_iter341).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("num_rows", ::apache::thrift::protocol::T_I64, 3); + xfer += oprot->writeI64(this->num_rows); + xfer += oprot->writeFieldEnd(); + + xfer += oprot->writeFieldBegin("row_groups", ::apache::thrift::protocol::T_LIST, 4); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->row_groups.size())); + std::vector ::const_iterator _iter342; + for (_iter342 = this->row_groups.begin(); _iter342 != this->row_groups.end(); ++_iter342) + { + xfer += (*_iter342).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_value_metadata) { + xfer += oprot->writeFieldBegin("key_value_metadata", ::apache::thrift::protocol::T_LIST, 5); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->key_value_metadata.size())); + std::vector ::const_iterator _iter343; + for (_iter343 = this->key_value_metadata.begin(); _iter343 != this->key_value_metadata.end(); ++_iter343) + { + xfer += (*_iter343).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.created_by) { + xfer += oprot->writeFieldBegin("created_by", ::apache::thrift::protocol::T_STRING, 6); + xfer += oprot->writeString(this->created_by); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.column_orders) { + xfer += oprot->writeFieldBegin("column_orders", ::apache::thrift::protocol::T_LIST, 7); + { + xfer += oprot->writeListBegin(::apache::thrift::protocol::T_STRUCT, static_cast(this->column_orders.size())); + std::vector ::const_iterator _iter344; + for (_iter344 = this->column_orders.begin(); _iter344 != this->column_orders.end(); ++_iter344) + { + xfer += (*_iter344).write(oprot); + } + xfer += oprot->writeListEnd(); + } + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.encryption_algorithm) { + xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 8); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + } + if (this->__isset.footer_signing_key_metadata) { + xfer += oprot->writeFieldBegin("footer_signing_key_metadata", ::apache::thrift::protocol::T_STRING, 9); + xfer += oprot->writeBinary(this->footer_signing_key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +template +uint32_t FileCryptoMetaData::read(Protocol_* iprot) { + + ::apache::thrift::protocol::TInputRecursionTracker tracker(*iprot); + uint32_t xfer = 0; + std::string fname; + ::apache::thrift::protocol::TType ftype; + int16_t fid; + + xfer += iprot->readStructBegin(fname); + + using ::apache::thrift::protocol::TProtocolException; + + bool isset_encryption_algorithm = false; + + while (true) + { + xfer += iprot->readFieldBegin(fname, ftype, fid); + if (ftype == ::apache::thrift::protocol::T_STOP) { + break; + } + switch (fid) + { + case 1: + if (ftype == ::apache::thrift::protocol::T_STRUCT) { + xfer += this->encryption_algorithm.read(iprot); + isset_encryption_algorithm = true; + } else { + xfer += iprot->skip(ftype); + } + break; + case 2: + if (ftype == ::apache::thrift::protocol::T_STRING) { + xfer += iprot->readBinary(this->key_metadata); + this->__isset.key_metadata = true; + } else { + xfer += iprot->skip(ftype); + } + break; + default: + xfer += iprot->skip(ftype); + break; + } + xfer += iprot->readFieldEnd(); + } + + xfer += iprot->readStructEnd(); + + if (!isset_encryption_algorithm) + throw TProtocolException(TProtocolException::INVALID_DATA); + return xfer; +} + +template +uint32_t FileCryptoMetaData::write(Protocol_* oprot) const { + uint32_t xfer = 0; + ::apache::thrift::protocol::TOutputRecursionTracker tracker(*oprot); + xfer += oprot->writeStructBegin("FileCryptoMetaData"); + + xfer += oprot->writeFieldBegin("encryption_algorithm", ::apache::thrift::protocol::T_STRUCT, 1); + xfer += this->encryption_algorithm.write(oprot); + xfer += oprot->writeFieldEnd(); + + if (this->__isset.key_metadata) { + xfer += oprot->writeFieldBegin("key_metadata", ::apache::thrift::protocol::T_STRING, 2); + xfer += oprot->writeBinary(this->key_metadata); + xfer += oprot->writeFieldEnd(); + } + xfer += oprot->writeFieldStop(); + xfer += oprot->writeStructEnd(); + return xfer; +} + +}} // namespace + +#endif diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt index 93f2e72d8d661..5ac5085a694c8 100644 --- a/cpp/src/parquet/CMakeLists.txt +++ b/cpp/src/parquet/CMakeLists.txt @@ -432,6 +432,7 @@ add_parquet_benchmark(column_reader_benchmark) add_parquet_benchmark(column_io_benchmark) add_parquet_benchmark(encoding_benchmark) add_parquet_benchmark(level_conversion_benchmark) +add_parquet_benchmark(metadata_benchmark) add_parquet_benchmark(page_index_benchmark SOURCES page_index_benchmark.cc benchmark_util.cc) add_parquet_benchmark(arrow/reader_writer_benchmark PREFIX "parquet-arrow") diff --git a/cpp/src/parquet/arrow/writer.cc b/cpp/src/parquet/arrow/writer.cc index 5238986c428d3..bd6f542d11c72 100644 --- a/cpp/src/parquet/arrow/writer.cc +++ b/cpp/src/parquet/arrow/writer.cc @@ -547,8 +547,8 @@ Status GetSchemaMetadata(const ::arrow::Schema& schema, ::arrow::MemoryPool* poo // The serialized schema is not UTF-8, which is required for Thrift std::string schema_as_string = serialized->ToString(); std::string schema_base64 = ::arrow::util::base64_encode(schema_as_string); - result->Append(kArrowSchemaKey, schema_base64); - *out = result; + result->Append(kArrowSchemaKey, std::move(schema_base64)); + *out = std::move(result); return Status::OK(); } diff --git a/cpp/src/parquet/column_page.h b/cpp/src/parquet/column_page.h index 905f805b8c9cc..b389ffd98e6c7 100644 --- a/cpp/src/parquet/column_page.h +++ b/cpp/src/parquet/column_page.h @@ -75,13 +75,13 @@ class DataPage : public Page { protected: DataPage(PageType::type type, const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, int64_t uncompressed_size, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : Page(buffer, type), num_values_(num_values), encoding_(encoding), uncompressed_size_(uncompressed_size), - statistics_(statistics), + statistics_(std::move(statistics)), first_row_index_(std::move(first_row_index)) {} int32_t num_values_; @@ -97,10 +97,10 @@ class DataPageV1 : public DataPage { DataPageV1(const std::shared_ptr& buffer, int32_t num_values, Encoding::type encoding, Encoding::type definition_level_encoding, Encoding::type repetition_level_encoding, int64_t uncompressed_size, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : DataPage(PageType::DATA_PAGE, buffer, num_values, encoding, uncompressed_size, - statistics, std::move(first_row_index)), + std::move(statistics), std::move(first_row_index)), definition_level_encoding_(definition_level_encoding), repetition_level_encoding_(repetition_level_encoding) {} @@ -119,10 +119,10 @@ class DataPageV2 : public DataPage { int32_t num_rows, Encoding::type encoding, int32_t definition_levels_byte_length, int32_t repetition_levels_byte_length, int64_t uncompressed_size, bool is_compressed = false, - const EncodedStatistics& statistics = EncodedStatistics(), + EncodedStatistics statistics = EncodedStatistics(), std::optional first_row_index = std::nullopt) : DataPage(PageType::DATA_PAGE_V2, buffer, num_values, encoding, uncompressed_size, - statistics, std::move(first_row_index)), + std::move(statistics), std::move(first_row_index)), num_nulls_(num_nulls), num_rows_(num_rows), definition_levels_byte_length_(definition_levels_byte_length), diff --git a/cpp/src/parquet/column_reader.cc b/cpp/src/parquet/column_reader.cc index eae7ac4252735..407201a89ef08 100644 --- a/cpp/src/parquet/column_reader.cc +++ b/cpp/src/parquet/column_reader.cc @@ -101,6 +101,10 @@ inline void CheckNumberDecoded(int64_t number_decoded, int64_t expected) { std::to_string(expected)); } } + +constexpr std::string_view kErrorRepDefLevelNotMatchesNumValues = + "Number of decoded rep / def levels do not match num_values in page header"; + } // namespace LevelDecoder::LevelDecoder() : num_values_remaining_(0) {} @@ -534,11 +538,11 @@ std::shared_ptr SerializedPageReader::NextPage() { page_buffer = DecompressIfNeeded(std::move(page_buffer), compressed_len, uncompressed_len); - return std::make_shared(page_buffer, header.num_values, - LoadEnumSafe(&header.encoding), - LoadEnumSafe(&header.definition_level_encoding), - LoadEnumSafe(&header.repetition_level_encoding), - uncompressed_len, data_page_statistics); + return std::make_shared( + page_buffer, header.num_values, LoadEnumSafe(&header.encoding), + LoadEnumSafe(&header.definition_level_encoding), + LoadEnumSafe(&header.repetition_level_encoding), uncompressed_len, + std::move(data_page_statistics)); } else if (page_type == PageType::DATA_PAGE_V2) { ++page_ordinal_; const format::DataPageHeaderV2& header = current_page_header_.data_page_header_v2; @@ -565,7 +569,7 @@ std::shared_ptr SerializedPageReader::NextPage() { page_buffer, header.num_values, header.num_nulls, header.num_rows, LoadEnumSafe(&header.encoding), header.definition_levels_byte_length, header.repetition_levels_byte_length, uncompressed_len, is_compressed, - data_page_statistics); + std::move(data_page_statistics)); } else { throw ParquetException( "Internal error, we have already skipped non-data pages in ShouldSkipPage()"); @@ -907,6 +911,8 @@ class ColumnReaderImplBase { static_cast(data_size)); } + // Available values in the current data page, value includes repeated values + // and nulls. int64_t available_values_current_page() const { return num_buffered_values_ - num_decoded_values_; } @@ -933,7 +939,7 @@ class ColumnReaderImplBase { int64_t num_buffered_values_; // The number of values from the current data page that have been decoded - // into memory + // into memory or skipped over. int64_t num_decoded_values_; ::arrow::MemoryPool* pool_; @@ -1026,28 +1032,36 @@ class TypedColumnReaderImpl : public TypedColumnReader, // Read definition and repetition levels. Also return the number of definition levels // and number of values to read. This function is called before reading values. + // + // ReadLevels will throw exception when any num-levels read is not equal to the number + // of the levels can be read. void ReadLevels(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, - int64_t* num_def_levels, int64_t* values_to_read) { - batch_size = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t* num_def_levels, int64_t* non_null_values_to_read) { + batch_size = std::min(batch_size, this->available_values_current_page()); // If the field is required and non-repeated, there are no definition levels if (this->max_def_level_ > 0 && def_levels != nullptr) { *num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); + if (ARROW_PREDICT_FALSE(*num_def_levels != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } // TODO(wesm): this tallying of values-to-decode can be performed with better // cache-efficiency if fused with the level decoding. - *values_to_read += + *non_null_values_to_read += std::count(def_levels, def_levels + *num_def_levels, this->max_def_level_); } else { // Required field, read all values - *values_to_read = batch_size; + if (num_def_levels != nullptr) { + *num_def_levels = 0; + } + *non_null_values_to_read = batch_size; } // Not present for non-repeated fields if (this->max_rep_level_ > 0 && rep_levels != nullptr) { int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); - if (def_levels != nullptr && *num_def_levels != num_rep_levels) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (batch_size != num_rep_levels) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } } } @@ -1090,8 +1104,7 @@ int64_t TypedColumnReaderImpl::ReadBatchWithDictionary( *indices_read = ReadDictionaryIndices(indices_to_read, indices); int64_t total_indices = std::max(num_def_levels, *indices_read); // Some callers use a batch size of 0 just to get the dictionary. - int64_t expected_values = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t expected_values = std::min(batch_size, this->available_values_current_page()); if (total_indices == 0 && expected_values > 0) { std::stringstream ss; ss << "Read 0 values, expected " << expected_values; @@ -1106,7 +1119,8 @@ template int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, int64_t* values_read) { - // HasNext invokes ReadNewPage + // HasNext might invoke ReadNewPage until a data page with + // `available_values_current_page() > 0` is found. if (!HasNext()) { *values_read = 0; return 0; @@ -1115,20 +1129,31 @@ int64_t TypedColumnReaderImpl::ReadBatch(int64_t batch_size, int16_t* def // TODO(wesm): keep reading data pages until batch_size is reached, or the // row group is finished int64_t num_def_levels = 0; - int64_t values_to_read = 0; - ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, &values_to_read); - - *values_read = this->ReadValues(values_to_read, values); + // Number of non-null values to read within `num_def_levels`. + int64_t non_null_values_to_read = 0; + ReadLevels(batch_size, def_levels, rep_levels, &num_def_levels, + &non_null_values_to_read); + // Should not return more values than available in the current data page, + // since currently, ReadLevels would only consume level from current + // data page. + if (ARROW_PREDICT_FALSE(num_def_levels > this->available_values_current_page())) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } + if (non_null_values_to_read != 0) { + *values_read = this->ReadValues(non_null_values_to_read, values); + } else { + *values_read = 0; + } + // Adjust total_values, since if max_def_level_ == 0, num_def_levels would + // be 0 and `values_read` would adjust to `available_values_current_page()`. int64_t total_values = std::max(num_def_levels, *values_read); - int64_t expected_values = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + int64_t expected_values = std::min(batch_size, this->available_values_current_page()); if (total_values == 0 && expected_values > 0) { std::stringstream ss; ss << "Read 0 values, expected " << expected_values; ParquetException::EofException(ss.str()); } this->ConsumeBufferedValues(total_values); - return total_values; } @@ -1137,7 +1162,8 @@ int64_t TypedColumnReaderImpl::ReadBatchSpaced( int64_t batch_size, int16_t* def_levels, int16_t* rep_levels, T* values, uint8_t* valid_bits, int64_t valid_bits_offset, int64_t* levels_read, int64_t* values_read, int64_t* null_count_out) { - // HasNext invokes ReadNewPage + // HasNext might invoke ReadNewPage until a data page with + // `available_values_current_page() > 0` is found. if (!HasNext()) { *levels_read = 0; *values_read = 0; @@ -1145,21 +1171,24 @@ int64_t TypedColumnReaderImpl::ReadBatchSpaced( return 0; } + // Number of non-null values to read int64_t total_values; // TODO(wesm): keep reading data pages until batch_size is reached, or the // row group is finished - batch_size = - std::min(batch_size, this->num_buffered_values_ - this->num_decoded_values_); + batch_size = std::min(batch_size, this->available_values_current_page()); // If the field is required and non-repeated, there are no definition levels if (this->max_def_level_ > 0) { int64_t num_def_levels = this->ReadDefinitionLevels(batch_size, def_levels); + if (ARROW_PREDICT_FALSE(num_def_levels != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } // Not present for non-repeated fields if (this->max_rep_level_ > 0) { int64_t num_rep_levels = this->ReadRepetitionLevels(batch_size, rep_levels); - if (num_def_levels != num_rep_levels) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (ARROW_PREDICT_FALSE(num_def_levels != num_rep_levels)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } } @@ -1401,26 +1430,21 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_levels = this->def_levels() + levels_written_; int16_t* rep_levels = this->rep_levels() + levels_written_; - // Not present for non-repeated fields - int64_t levels_read = 0; + if (ARROW_PREDICT_FALSE(this->ReadDefinitionLevels(batch_size, def_levels) != + batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } if (this->max_rep_level_ > 0) { - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException("Number of decoded rep / def levels did not match"); + int64_t rep_levels_read = this->ReadRepetitionLevels(batch_size, rep_levels); + if (ARROW_PREDICT_FALSE(rep_levels_read != batch_size)) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } - } else if (this->max_def_level_ > 0) { - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - } - - // Exhausted column chunk - if (levels_read == 0) { - break; } - levels_written_ += levels_read; + levels_written_ += batch_size; records_read += ReadRecordData(num_records - records_read); } else { - // No repetition or definition levels + // No repetition and definition levels, we can read values directly batch_size = std::min(num_records - records_read, batch_size); records_read += ReadRecordData(batch_size); } @@ -1574,13 +1598,14 @@ class TypedRecordReader : public TypedColumnReaderImpl, int16_t* def_levels = this->def_levels() + levels_written_; int16_t* rep_levels = this->rep_levels() + levels_written_; - int64_t levels_read = 0; - levels_read = this->ReadDefinitionLevels(batch_size, def_levels); - if (this->ReadRepetitionLevels(batch_size, rep_levels) != levels_read) { - throw ParquetException("Number of decoded rep / def levels did not match"); + if (this->ReadDefinitionLevels(batch_size, def_levels) != batch_size) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); + } + if (this->ReadRepetitionLevels(batch_size, rep_levels) != batch_size) { + throw ParquetException(kErrorRepDefLevelNotMatchesNumValues); } - levels_written_ += levels_read; + levels_written_ += batch_size; int64_t remaining_records = num_records - skipped_records; // This updates at_record_start_. skipped_records += DelimitAndSkipRecordsInBuffer(remaining_records); @@ -1675,44 +1700,55 @@ class TypedRecordReader : public TypedColumnReaderImpl, // // \return Number of records delimited int64_t DelimitRecords(int64_t num_records, int64_t* values_seen) { - int64_t values_to_read = 0; + if (ARROW_PREDICT_FALSE(num_records == 0 || levels_position_ == levels_written_)) { + *values_seen = 0; + return 0; + } int64_t records_read = 0; - - const int16_t* def_levels = this->def_levels() + levels_position_; - const int16_t* rep_levels = this->rep_levels() + levels_position_; - + const int16_t* const rep_levels = this->rep_levels(); + const int16_t* const def_levels = this->def_levels(); ARROW_DCHECK_GT(this->max_rep_level_, 0); - - // Count logical records and number of values to read - while (levels_position_ < levels_written_) { - const int16_t rep_level = *rep_levels++; - if (rep_level == 0) { - // If at_record_start_ is true, we are seeing the start of a record - // for the second time, such as after repeated calls to - // DelimitRecords. In this case we must continue until we find - // another record start or exhausting the ColumnChunk - if (!at_record_start_) { - // We've reached the end of a record; increment the record count. - ++records_read; - if (records_read == num_records) { - // We've found the number of records we were looking for. Set - // at_record_start_ to true and break - at_record_start_ = true; - break; - } - } - } + // If at_record_start_ is true, we are seeing the start of a record + // for the second time, such as after repeated calls to + // DelimitRecords. In this case we must continue until we find + // another record start or exhausting the ColumnChunk + int64_t level = levels_position_; + if (at_record_start_) { + ARROW_DCHECK_EQ(0, rep_levels[levels_position_]); + ++levels_position_; // We have decided to consume the level at this position; therefore we // must advance until we find another record boundary at_record_start_ = false; + } - const int16_t def_level = *def_levels++; - if (def_level == this->max_def_level_) { - ++values_to_read; + // Count logical records and number of non-null values to read + ARROW_DCHECK(!at_record_start_); + // Scan repetition levels to find record end + while (levels_position_ < levels_written_) { + // We use an estimated batch size to simplify branching and + // improve performance in the common case. This might slow + // things down a bit if a single long record remains, though. + int64_t stride = + std::min(levels_written_ - levels_position_, num_records - records_read); + const int64_t position_end = levels_position_ + stride; + for (int64_t i = levels_position_; i < position_end; ++i) { + records_read += rep_levels[i] == 0; + } + levels_position_ = position_end; + if (records_read == num_records) { + // Check last rep_level reaches the boundary and + // pop the last level. + ARROW_CHECK_EQ(rep_levels[levels_position_ - 1], 0); + --levels_position_; + // We've found the number of records we were looking for. Set + // at_record_start_ to true and break + at_record_start_ = true; + break; } - ++levels_position_; } - *values_seen = values_to_read; + // Scan definition levels to find number of physical values + *values_seen = std::count(def_levels + level, def_levels + levels_position_, + this->max_def_level_); return records_read; } diff --git a/cpp/src/parquet/column_reader.h b/cpp/src/parquet/column_reader.h index 086f6c0e55806..29e1b2a25e437 100644 --- a/cpp/src/parquet/column_reader.h +++ b/cpp/src/parquet/column_reader.h @@ -197,7 +197,7 @@ class PARQUET_EXPORT ColumnReader { template class TypedColumnReader : public ColumnReader { public: - typedef typename DType::c_type T; + using T = typename DType::c_type; // Read a batch of repetition levels, definition levels, and values from the // column. diff --git a/cpp/src/parquet/column_reader_test.cc b/cpp/src/parquet/column_reader_test.cc index a48573966a905..9096f195687fb 100644 --- a/cpp/src/parquet/column_reader_test.cc +++ b/cpp/src/parquet/column_reader_test.cc @@ -415,7 +415,7 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { &descr, values, /*num_values=*/2, Encoding::PLAIN, /*indices=*/{}, /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, /*rep_levels=*/{}, - /*max_rep_level=*/0); + /*max_rep_level=*/max_rep_level_); pages_.push_back(data_page); InitReader(&descr); auto reader = static_cast(reader_.get()); @@ -431,6 +431,80 @@ TEST_F(TestPrimitiveReader, TestReadValuesMissing) { ParquetException); } +// GH-41321: When max_def_level > 0 or max_rep_level > 0, and +// Page has more or less levels than the `num_values` in +// PageHeader. We should detect and throw exception. +TEST_F(TestPrimitiveReader, DefRepLevelNotExpected) { + auto do_check = [&](const NodePtr& type, const std::vector& input_def_levels, + const std::vector& input_rep_levels, int num_values) { + std::vector values(num_values, false); + const ColumnDescriptor descr(type, max_def_level_, max_rep_level_); + + // The data page falls back to plain encoding + std::shared_ptr dummy = AllocateBuffer(); + std::shared_ptr data_page = MakeDataPage( + &descr, values, /*num_values=*/num_values, Encoding::PLAIN, /*indices=*/{}, + /*indices_size=*/0, /*def_levels=*/input_def_levels, max_def_level_, + /*rep_levels=*/input_rep_levels, + /*max_rep_level=*/max_rep_level_); + pages_.push_back(data_page); + InitReader(&descr); + auto reader = static_cast(reader_.get()); + ASSERT_TRUE(reader->HasNext()); + + constexpr int batch_size = 10; + std::vector def_levels(batch_size, 0); + std::vector rep_levels(batch_size, 0); + bool values_out[batch_size]; + int64_t values_read; + EXPECT_THROW_THAT( + [&]() { + reader->ReadBatch(batch_size, def_levels.data(), rep_levels.data(), values_out, + &values_read); + }, + ParquetException, + ::testing::Property(&ParquetException::what, + ::testing::HasSubstr("Number of decoded rep / def levels do " + "not match num_values in page header"))); + }; + // storing def-levels less than value in page-header + { + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); + std::vector input_def_levels(1, 1); + std::vector input_rep_levels{}; + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/3); + } + // storing def-levels more than value in page-header + { + max_def_level_ = 1; + max_rep_level_ = 0; + NodePtr type = schema::Boolean("a", Repetition::OPTIONAL); + std::vector input_def_levels(2, 1); + std::vector input_rep_levels{}; + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/1); + } + // storing rep-levels less than value in page-header + { + max_def_level_ = 0; + max_rep_level_ = 1; + NodePtr type = schema::Boolean("a", Repetition::REPEATED); + std::vector input_def_levels{}; + std::vector input_rep_levels(3, 0); + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/4); + } + // storing rep-levels more than value in page-header + { + max_def_level_ = 0; + max_rep_level_ = 1; + NodePtr type = schema::Boolean("a", Repetition::REPEATED); + std::vector input_def_levels{}; + std::vector input_rep_levels(2, 1); + do_check(type, input_def_levels, input_rep_levels, /*num_values=*/1); + } +} + // Repetition level byte length reported in Page but Max Repetition level // is zero for the column. TEST_F(TestPrimitiveReader, TestRepetitionLvlBytesWithMaxRepetitionZero) { diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index eae8fc6125499..9059cd1641745 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -379,6 +379,11 @@ class SerializedPageWriter : public PageWriter { int64_t WriteDataPage(const DataPage& page) override { const int64_t uncompressed_size = page.uncompressed_size(); + if (uncompressed_size > std::numeric_limits::max()) { + throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:", + uncompressed_size); + } + std::shared_ptr compressed_data = page.buffer(); const uint8_t* output_data_buffer = compressed_data->data(); int64_t output_data_len = compressed_data->size(); @@ -399,11 +404,6 @@ class SerializedPageWriter : public PageWriter { } format::PageHeader page_header; - - if (uncompressed_size > std::numeric_limits::max()) { - throw ParquetException("Uncompressed data page size overflows INT32_MAX. Size:", - uncompressed_size); - } page_header.__set_uncompressed_page_size(static_cast(uncompressed_size)); page_header.__set_compressed_page_size(static_cast(output_data_len)); @@ -1018,13 +1018,13 @@ void ColumnWriterImpl::BuildDataPageV1(int64_t definition_levels_rle_size, compressed_data->CopySlice(0, compressed_data->size(), allocator_)); std::unique_ptr page_ptr = std::make_unique( compressed_data_copy, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, page_stats, first_row_index); + uncompressed_size, std::move(page_stats), first_row_index); total_compressed_bytes_ += page_ptr->size() + sizeof(format::PageHeader); data_pages_.push_back(std::move(page_ptr)); } else { // Eagerly write pages DataPageV1 page(compressed_data, num_values, encoding_, Encoding::RLE, Encoding::RLE, - uncompressed_size, page_stats, first_row_index); + uncompressed_size, std::move(page_stats), first_row_index); WriteDataPage(page); } } diff --git a/cpp/src/parquet/metadata_benchmark.cc b/cpp/src/parquet/metadata_benchmark.cc new file mode 100644 index 0000000000000..97a99be798cbb --- /dev/null +++ b/cpp/src/parquet/metadata_benchmark.cc @@ -0,0 +1,156 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include + +#include + +#include "arrow/buffer.h" +#include "arrow/io/memory.h" +#include "arrow/util/logging.h" + +#include "parquet/column_writer.h" +#include "parquet/file_reader.h" +#include "parquet/file_writer.h" +#include "parquet/metadata.h" +#include "parquet/platform.h" +#include "parquet/schema.h" + +namespace parquet { + +using ::arrow::Buffer; +using ::arrow::io::BufferOutputStream; +using ::arrow::io::BufferReader; +using schema::GroupNode; +using schema::NodePtr; +using schema::NodeVector; + +class MetadataBenchmark { + public: + explicit MetadataBenchmark(benchmark::State* state) + : MetadataBenchmark(static_cast(state->range(0)), + static_cast(state->range(1))) {} + + MetadataBenchmark(int num_columns, int num_row_groups) + : num_columns_(num_columns), num_row_groups_(num_row_groups) { + NodeVector fields; + for (int i = 0; i < num_columns_; ++i) { + std::stringstream ss; + ss << "col" << i; + fields.push_back(parquet::schema::Int32(ss.str(), Repetition::REQUIRED)); + } + schema_root_ = std::static_pointer_cast( + GroupNode::Make("schema", Repetition::REQUIRED, fields)); + + WriterProperties::Builder prop_builder; + writer_properties_ = prop_builder.version(ParquetVersion::PARQUET_2_6) + ->disable_dictionary() + ->data_page_version(ParquetDataPageVersion::V2) + ->build(); + } + + std::shared_ptr WriteFile(benchmark::State* state) { + PARQUET_ASSIGN_OR_THROW(auto sink, BufferOutputStream::Create()); + + auto writer = ParquetFileWriter::Open(sink, schema_root_, writer_properties_); + std::vector int32_values(1, 42); + int64_t data_size = 0; + for (int rg = 0; rg < num_row_groups_; ++rg) { + auto row_group_writer = writer->AppendRowGroup(); + for (int col = 0; col < num_columns_; ++col) { + auto col_writer = row_group_writer->NextColumn(); + ARROW_CHECK_EQ(col_writer->type(), Type::INT32); + auto typed_col_writer = static_cast(col_writer); + typed_col_writer->WriteBatch( + /*num_values=*/static_cast(int32_values.size()), + /*def_levels=*/nullptr, /*rep_levels=*/nullptr, int32_values.data()); + typed_col_writer->Close(); + } + row_group_writer->Close(); + data_size += row_group_writer->total_compressed_bytes_written(); + } + writer->Close(); + PARQUET_ASSIGN_OR_THROW(auto buf, sink->Finish()); + state->counters["file_size"] = static_cast(buf->size()); + // Note that "data_size" includes the Thrift page headers + state->counters["data_size"] = static_cast(data_size); + return buf; + } + + void ReadFile(std::shared_ptr contents) { + auto source = std::make_shared(contents); + ReaderProperties props; + auto reader = ParquetFileReader::Open(source, props); + auto metadata = reader->metadata(); + ARROW_CHECK_EQ(metadata->num_columns(), num_columns_); + ARROW_CHECK_EQ(metadata->num_row_groups(), num_row_groups_); + // There should be one row per row group + ARROW_CHECK_EQ(metadata->num_rows(), num_row_groups_); + reader->Close(); + } + + private: + int num_columns_; + int num_row_groups_; + std::shared_ptr schema_root_; + std::shared_ptr writer_properties_; +}; + +void WriteMetadataSetArgs(benchmark::internal::Benchmark* bench) { + bench->ArgNames({"num_columns", "num_row_groups"}); + + for (int num_columns : {1, 10, 100}) { + for (int num_row_groups : {1, 100, 1000}) { + bench->Args({num_columns, num_row_groups}); + } + } + /* For larger num_columns, restrict num_row_groups to small values + * to avoid blowing up benchmark execution time. + */ + for (int num_row_groups : {1, 100}) { + bench->Args({/*num_columns=*/1000, num_row_groups}); + } +} + +void ReadMetadataSetArgs(benchmark::internal::Benchmark* bench) { + WriteMetadataSetArgs(bench); +} + +void WriteFileMetadataAndData(benchmark::State& state) { + MetadataBenchmark benchmark(&state); + + for (auto _ : state) { + auto sink = benchmark.WriteFile(&state); + } + state.SetItemsProcessed(state.iterations()); +} + +void ReadFileMetadata(benchmark::State& state) { + MetadataBenchmark benchmark(&state); + auto contents = benchmark.WriteFile(&state); + + for (auto _ : state) { + benchmark.ReadFile(contents); + } + state.SetItemsProcessed(state.iterations()); +} + +BENCHMARK(WriteFileMetadataAndData)->Apply(WriteMetadataSetArgs); +BENCHMARK(ReadFileMetadata)->Apply(ReadMetadataSetArgs); + +} // namespace parquet diff --git a/cpp/src/parquet/thrift_internal.h b/cpp/src/parquet/thrift_internal.h index 7491f118d32a0..9e02331b44ba0 100644 --- a/cpp/src/parquet/thrift_internal.h +++ b/cpp/src/parquet/thrift_internal.h @@ -446,13 +446,12 @@ class ThriftDeserializer { T* deserialized_msg) { // Deserialize msg bytes into c++ thrift msg using memory transport. auto tmem_transport = CreateReadOnlyMemoryBuffer(const_cast(buf), *len); - apache::thrift::protocol::TCompactProtocolFactoryT tproto_factory; - // Protect against CPU and memory bombs - tproto_factory.setStringSizeLimit(string_size_limit_); - tproto_factory.setContainerSizeLimit(container_size_limit_); - auto tproto = tproto_factory.getProtocol(tmem_transport); + auto tproto = apache::thrift::protocol::TCompactProtocolT( + tmem_transport, string_size_limit_, container_size_limit_); try { - deserialized_msg->read(tproto.get()); + deserialized_msg + ->template read>( + &tproto); } catch (std::exception& e) { std::stringstream ss; ss << "Couldn't deserialize thrift: " << e.what() << "\n"; diff --git a/cpp/thirdparty/versions.txt b/cpp/thirdparty/versions.txt index 4983f3cee2c2d..06506d32bef7c 100644 --- a/cpp/thirdparty/versions.txt +++ b/cpp/thirdparty/versions.txt @@ -113,8 +113,8 @@ ARROW_UCX_BUILD_VERSION=1.12.1 ARROW_UCX_BUILD_SHA256_CHECKSUM=9bef31aed0e28bf1973d28d74d9ac4f8926c43ca3b7010bd22a084e164e31b71 ARROW_UTF8PROC_BUILD_VERSION=v2.7.0 ARROW_UTF8PROC_BUILD_SHA256_CHECKSUM=4bb121e297293c0fd55f08f83afab6d35d48f0af4ecc07523ad8ec99aa2b12a1 -ARROW_XSIMD_BUILD_VERSION=9.0.1 -ARROW_XSIMD_BUILD_SHA256_CHECKSUM=b1bb5f92167fd3a4f25749db0be7e61ed37e0a5d943490f3accdcd2cd2918cc0 +ARROW_XSIMD_BUILD_VERSION=13.0.0 +ARROW_XSIMD_BUILD_SHA256_CHECKSUM=8bdbbad0c3e7afa38d88d0d484d70a1671a1d8aefff03f4223ab2eb6a41110a3 ARROW_ZLIB_BUILD_VERSION=1.3.1 ARROW_ZLIB_BUILD_SHA256_CHECKSUM=9a93b2b7dfdac77ceba5a558a580e74667dd6fede4585b91eefb60f03b72df23 ARROW_ZSTD_BUILD_VERSION=1.5.6 diff --git a/csharp/Directory.Build.props b/csharp/Directory.Build.props index f6d42241f95cf..3c06d3cd31d90 100644 --- a/csharp/Directory.Build.props +++ b/csharp/Directory.Build.props @@ -37,12 +37,13 @@ latest true $(CSharpDir)ApacheArrow.snk + true The Apache Software Foundation - https://www.apache.org/images/feather.png + feather.png LICENSE.txt https://arrow.apache.org/ @@ -55,6 +56,7 @@ + diff --git a/csharp/feather.png b/csharp/feather.png new file mode 100644 index 0000000000000..7b596e6683ddb Binary files /dev/null and b/csharp/feather.png differ diff --git a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs index 1bd4035d5b9da..bd5d9315e9fc4 100644 --- a/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BinaryArray.cs @@ -22,7 +22,7 @@ namespace Apache.Arrow { - public class BinaryArray : Array, IReadOnlyList + public class BinaryArray : Array, IReadOnlyList, ICollection { public class Builder : BuilderBase { @@ -380,5 +380,30 @@ IEnumerator IEnumerable.GetEnumerator() } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(byte[] item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(byte[] item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(byte[] item) + { + for (int index = 0; index < Length; index++) + { + if (GetBytes(index).SequenceEqual(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(byte[][] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetBytes(srcIndex).ToArray(); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs index e9c5f8979e48f..19d4d0b7ed564 100644 --- a/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/BooleanArray.cs @@ -21,7 +21,7 @@ namespace Apache.Arrow { - public class BooleanArray: Array, IReadOnlyList + public class BooleanArray: Array, IReadOnlyList, ICollection { public class Builder : IArrowArrayBuilder { @@ -188,7 +188,7 @@ public bool GetBoolean(int index) public bool? GetValue(int index) { return IsNull(index) - ? (bool?)null + ? null : BitUtility.GetBit(ValueBuffer.Span, index + Offset); } @@ -205,5 +205,30 @@ public bool GetBoolean(int index) } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(bool? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(bool? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(bool? item) + { + for (int index = 0; index < Length; index++) + { + if (GetValue(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(bool?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetValue(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Date32Array.cs b/csharp/src/Apache.Arrow/Arrays/Date32Array.cs index 6ab4986f573e2..55864e89e2eb3 100644 --- a/csharp/src/Apache.Arrow/Arrays/Date32Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Date32Array.cs @@ -23,9 +23,9 @@ namespace Apache.Arrow /// The class holds an array of dates in the Date32 format, where each date is /// stored as the number of days since the dawn of (UNIX) time. /// - public class Date32Array : PrimitiveArray, IReadOnlyList + public class Date32Array : PrimitiveArray, IReadOnlyList, ICollection #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { private static readonly DateTime _epochDate = new DateTime(1970, 1, 1, 0, 0, 0, DateTimeKind.Unspecified); @@ -40,10 +40,9 @@ public class Builder : DateArrayBuilder { private class DateBuilder : PrimitiveArrayBuilder { - protected override Date32Array Build( - ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, - int length, int nullCount, int offset) => - new Date32Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + protected override Date32Array Build(ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, int length, + int nullCount, int offset) => + new(valueBuffer, nullBitmapBuffer, length, nullCount, offset); } /// @@ -149,6 +148,31 @@ public Date32Array(ArrayData data) yield return GetDateOnly(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateOnly(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateOnly(srcIndex); + } + } #endif int IReadOnlyCollection.Count => Length; @@ -160,7 +184,32 @@ public Date32Array(ArrayData data) for (int index = 0; index < Length; index++) { yield return GetDateTime(index); - }; + } + } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTime? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTime?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateTime(srcIndex); + } } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Date64Array.cs b/csharp/src/Apache.Arrow/Arrays/Date64Array.cs index 43e698e10b25c..77538ce59ffae 100644 --- a/csharp/src/Apache.Arrow/Arrays/Date64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Date64Array.cs @@ -24,9 +24,9 @@ namespace Apache.Arrow /// stored as the number of milliseconds since the dawn of (UNIX) time, excluding leap seconds, in multiples of /// 86400000. /// - public class Date64Array : PrimitiveArray, IReadOnlyList + public class Date64Array : PrimitiveArray, IReadOnlyList, ICollection #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { private const long MillisecondsPerDay = 86400000; @@ -45,10 +45,9 @@ public class Builder : DateArrayBuilder { private class DateBuilder : PrimitiveArrayBuilder { - protected override Date64Array Build( - ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, - int length, int nullCount, int offset) => - new Date64Array(valueBuffer, nullBitmapBuffer, length, nullCount, offset); + protected override Date64Array Build(ArrowBuffer valueBuffer, ArrowBuffer nullBitmapBuffer, int length, + int nullCount, int offset) => + new(valueBuffer, nullBitmapBuffer, length, nullCount, offset); } /// @@ -151,6 +150,31 @@ public Date64Array(ArrayData data) yield return GetDateOnly(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateOnly(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateOnly(srcIndex); + } + } #endif int IReadOnlyCollection.Count => Length; @@ -162,7 +186,32 @@ public Date64Array(ArrayData data) for (int index = 0; index < Length; index++) { yield return GetDateTime(index); - }; + } + } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTime? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTime? item) + { + for (int index = 0; index < Length; index++) + { + if (GetDateTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTime?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetDateTime(srcIndex); + } } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs index fa6f765475240..52bfb9eb20768 100644 --- a/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Decimal256Array.cs @@ -13,6 +13,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#nullable enable + using System; using System.Collections; using System.Collections.Generic; @@ -23,7 +25,7 @@ namespace Apache.Arrow { - public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList, IReadOnlyList + public class Decimal256Array : FixedSizeBinaryArray, IReadOnlyList, IReadOnlyList { public class Builder : BuilderBase { @@ -178,7 +180,7 @@ public Decimal256Array(ArrayData data) return list; } - public string GetString(int index) + public string? GetString(int index) { if (IsNull(index)) { @@ -230,10 +232,10 @@ public bool TryGetSqlDecimal(int index, out SqlDecimal? value) } } - int IReadOnlyCollection.Count => Length; - string? IReadOnlyList.this[int index] => GetString(index); + int IReadOnlyCollection.Count => Length; + string? IReadOnlyList.this[int index] => GetString(index); - IEnumerator IEnumerable.GetEnumerator() + IEnumerator IEnumerable.GetEnumerator() { for (int index = 0; index < Length; index++) { @@ -241,6 +243,6 @@ IEnumerator IEnumerable.GetEnumerator() } } - IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); } } diff --git a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs index de4fc42b4cf92..3949af877b0c5 100644 --- a/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/IntervalArray.cs @@ -31,7 +31,7 @@ internal static class IntervalArray } public abstract class IntervalArray : PrimitiveArray - where T : struct + where T : struct, IEquatable { protected IntervalArray(ArrayData data) : base(data) diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs index 0456c5cc65ba4..05d659b5270ad 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArray.cs @@ -20,8 +20,8 @@ namespace Apache.Arrow { - public abstract class PrimitiveArray : Array, IReadOnlyList - where T : struct + public abstract class PrimitiveArray : Array, IReadOnlyList, ICollection + where T : struct, IEquatable { protected PrimitiveArray(ArrayData data) : base(data) @@ -40,7 +40,7 @@ protected PrimitiveArray(ArrayData data) { throw new ArgumentOutOfRangeException(nameof(index)); } - return IsValid(index) ? Values[index] : (T?)null; + return IsValid(index) ? Values[index] : null; } public IList ToList(bool includeNulls = false) @@ -86,5 +86,36 @@ IEnumerator IEnumerable.GetEnumerator() yield return IsValid(index) ? Values[index] : null; } } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(T? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(T? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(T? item) + { + if (item == null) + { + return NullCount > 0; + } + + ReadOnlySpan values = Values; + while (values.Length > 0) + { + int index = Values.IndexOf(item.Value); + if (index < 0 || IsValid(index)) { return index >= 0; } + values = values.Slice(index + 1); + } + return false; + } + + void ICollection.CopyTo(T?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetValue(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs index 67fe46633c18f..ae02173fb0df4 100644 --- a/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs +++ b/csharp/src/Apache.Arrow/Arrays/PrimitiveArrayBuilder.cs @@ -20,7 +20,7 @@ namespace Apache.Arrow { - public abstract class PrimitiveArrayBuilder : IArrowArrayBuilder + public abstract class PrimitiveArrayBuilder : IArrowArrayBuilder where TTo : struct where TArray : IArrowArray where TBuilder : class, IArrowArrayBuilder diff --git a/csharp/src/Apache.Arrow/Arrays/StringArray.cs b/csharp/src/Apache.Arrow/Arrays/StringArray.cs index a3ec596adc7ba..ab44805d8d1e9 100644 --- a/csharp/src/Apache.Arrow/Arrays/StringArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/StringArray.cs @@ -22,7 +22,7 @@ namespace Apache.Arrow { - public class StringArray: BinaryArray, IReadOnlyList + public class StringArray: BinaryArray, IReadOnlyList, ICollection { public static readonly Encoding DefaultEncoding = Encoding.UTF8; @@ -164,5 +164,30 @@ IEnumerator IEnumerable.GetEnumerator() } IEnumerator IEnumerable.GetEnumerator() => ((IEnumerable)this).GetEnumerator(); + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(string item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(string item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(string item) + { + for (int index = 0; index < Length; index++) + { + if (GetString(index) == item) + return true; + } + + return false; + } + + void ICollection.CopyTo(string[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetString(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Arrays/Time32Array.cs b/csharp/src/Apache.Arrow/Arrays/Time32Array.cs index e9c2d7a4d9b28..63c0898935ba5 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time32Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time32Array.cs @@ -26,7 +26,7 @@ namespace Apache.Arrow /// public class Time32Array : PrimitiveArray #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { /// @@ -171,6 +171,31 @@ public Time32Array(ArrayData data) yield return GetTime(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(TimeOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(TimeOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTime(srcIndex); + } + } #endif } } diff --git a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs index fc18dfb8bf726..5518462952050 100644 --- a/csharp/src/Apache.Arrow/Arrays/Time64Array.cs +++ b/csharp/src/Apache.Arrow/Arrays/Time64Array.cs @@ -26,7 +26,7 @@ namespace Apache.Arrow /// public class Time64Array : PrimitiveArray #if NET6_0_OR_GREATER - , IReadOnlyList + , IReadOnlyList, ICollection #endif { /// @@ -162,6 +162,31 @@ public Time64Array(ArrayData data) yield return GetTime(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(TimeOnly? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(TimeOnly? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTime(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(TimeOnly?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTime(srcIndex); + } + } #endif } } diff --git a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs index ccb656854a5df..b83860584707e 100644 --- a/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs +++ b/csharp/src/Apache.Arrow/Arrays/TimestampArray.cs @@ -21,7 +21,7 @@ namespace Apache.Arrow { - public class TimestampArray : PrimitiveArray, IReadOnlyList + public class TimestampArray : PrimitiveArray, IReadOnlyList, ICollection { private static readonly DateTimeOffset s_epoch = new DateTimeOffset(1970, 1, 1, 0, 0, 0, 0, TimeSpan.Zero); @@ -157,5 +157,30 @@ public DateTimeOffset GetTimestampUnchecked(int index) yield return GetTimestamp(index); }; } + + int ICollection.Count => Length; + bool ICollection.IsReadOnly => true; + void ICollection.Add(DateTimeOffset? item) => throw new NotSupportedException("Collection is read-only."); + bool ICollection.Remove(DateTimeOffset? item) => throw new NotSupportedException("Collection is read-only."); + void ICollection.Clear() => throw new NotSupportedException("Collection is read-only."); + + bool ICollection.Contains(DateTimeOffset? item) + { + for (int index = 0; index < Length; index++) + { + if (GetTimestamp(index).Equals(item)) + return true; + } + + return false; + } + + void ICollection.CopyTo(DateTimeOffset?[] array, int arrayIndex) + { + for (int srcIndex = 0, destIndex = arrayIndex; srcIndex < Length; srcIndex++, destIndex++) + { + array[destIndex] = GetTimestamp(srcIndex); + } + } } } diff --git a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs index b11479c0d4460..c66569afeba85 100644 --- a/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs +++ b/csharp/src/Apache.Arrow/Ipc/ArrowStreamWriter.cs @@ -148,7 +148,7 @@ public void VisitArray(IArrowArray array) public void Visit(MonthDayNanosecondIntervalArray array) => VisitPrimitiveArray(array); private void VisitPrimitiveArray(PrimitiveArray array) - where T : struct + where T : struct, IEquatable { _buffers.Add(CreateBitmapBuffer(array.NullBitmapBuffer, array.Offset, array.Length)); _buffers.Add(CreateSlicedBuffer(array.ValueBuffer, array.Offset, array.Length)); diff --git a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs index 8f0210b28240f..d42ee5279e795 100644 --- a/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs +++ b/csharp/src/Apache.Arrow/Memory/NativeMemoryManager.cs @@ -40,10 +40,12 @@ internal NativeMemoryManager(INativeAllocationOwner owner, IntPtr ptr, int offse _owner = owner; } +#pragma warning disable CA2015 // TODO: is this correct? ~NativeMemoryManager() { Dispose(false); } +#pragma warning restore CA2015 public override unsafe Span GetSpan() { diff --git a/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs b/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs index 4ad5bde0874a8..e5e64b073f799 100644 --- a/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs +++ b/csharp/test/Apache.Arrow.Flight.Sql.Tests/FlightSqlServerTests.cs @@ -14,6 +14,7 @@ // limitations under the License. #nullable enable + using System; using System.Collections.Generic; using System.Collections.ObjectModel; @@ -65,7 +66,7 @@ public async Task EnsureTheCorrectActionsAreGiven() var streamWriter = new MockServerStreamWriter(); //When - await producer.ListActions(streamWriter, new MockServerCallContext()).ConfigureAwait(false); + await producer.ListActions(streamWriter, new MockServerCallContext()); var actions = streamWriter.Messages.ToArray(); Assert.Equal(FlightSqlUtils.FlightSqlActions, actions); @@ -115,7 +116,7 @@ public void EnsureTableSchemaIsCorrectWithoutTableSchema(bool includeTableSchema [InlineData(typeof(CommandGetImportedKeys), "GetImportedKeysFlightInfo")] [InlineData(typeof(CommandGetCrossReference), "GetCrossReferenceFlightInfo")] [InlineData(typeof(CommandGetXdbcTypeInfo), "GetXdbcTypeFlightInfo")] - public async void EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandType, string expectedResult) + public async Task EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandType, string expectedResult) { //Given var command = (IMessage) Activator.CreateInstance(commandType)!; @@ -131,7 +132,7 @@ public async void EnsureGetFlightInfoIsCorrectlyRoutedForCommand(Type commandTyp [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupportedAndHasNoDescriptor() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupportedAndHasNoDescriptor() { //Given var producer = new TestFlightSqlSever(); @@ -145,7 +146,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupp } [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupported() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupported() { //Given var producer = new TestFlightSqlSever(); @@ -175,7 +176,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenACommandIsNotSupp [InlineData(typeof(CommandGetImportedKeys), "DoGetImportedKeys")] [InlineData(typeof(CommandGetCrossReference), "DoGetCrossReference")] [InlineData(typeof(CommandGetXdbcTypeInfo), "DoGetXbdcTypeInfo")] - public async void EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, string expectedResult) + public async Task EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, string expectedResult) { //Given var producer = new TestFlightSqlSever(); @@ -192,7 +193,7 @@ public async void EnsureDoGetIsCorrectlyRoutedForADoGetCommand(Type commandType, } [Fact] - public async void EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNotSupported() + public async Task EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNotSupported() { //Given var producer = new TestFlightSqlSever(); @@ -213,7 +214,7 @@ public async void EnsureAnInvalidOperationExceptionIsThrownWhenADoGetCommandIsNo [InlineData(SqlAction.CloseRequest, typeof(ActionClosePreparedStatementRequest), "ClosePreparedStatement")] [InlineData(SqlAction.CreateRequest, typeof(ActionCreatePreparedStatementRequest), "CreatePreparedStatement")] [InlineData("BadCommand", typeof(ActionCreatePreparedStatementRequest), "Action type BadCommand not supported", true)] - public async void EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actionType, Type actionBodyType, string expectedResponse, bool isException = false) + public async Task EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actionType, Type actionBodyType, string expectedResponse, bool isException = false) { //Given var producer = new TestFlightSqlSever(); @@ -237,19 +238,19 @@ public async void EnsureDoActionIsCorrectlyRoutedForAnActionRequest(string actio [InlineData(typeof(CommandPreparedStatementQuery), "PutPreparedStatementQuery")] [InlineData(typeof(CommandPreparedStatementUpdate), "PutPreparedStatementUpdate")] [InlineData(typeof(CommandGetXdbcTypeInfo), "Command CommandGetXdbcTypeInfo not supported", true)] - public async void EnsureDoPutIsCorrectlyRoutedForTheCommand(Type commandType, string expectedResponse, bool isException = false) + public async Task EnsureDoPutIsCorrectlyRoutedForTheCommand(Type commandType, string expectedResponse, bool isException = false) { //Given var command = (IMessage) Activator.CreateInstance(commandType)!; var producer = new TestFlightSqlSever(); var descriptor = FlightDescriptor.CreateCommandDescriptor(command.PackAndSerialize().ToArray()); var recordBatch = new RecordBatch(new Schema(new List(), null), System.Array.Empty(), 0); - var reader = new MockStreamReader(await recordBatch.ToFlightData(descriptor).ConfigureAwait(false)); + var reader = new MockStreamReader(await recordBatch.ToFlightData(descriptor)); var batchReader = new FlightServerRecordBatchStreamReader(reader); var mockStreamWriter = new MockServerStreamWriter(); //When - async Task Act() => await producer.DoPut(batchReader, mockStreamWriter, new MockServerCallContext()).ConfigureAwait(false); + async Task Act() => await producer.DoPut(batchReader, mockStreamWriter, new MockServerCallContext()); var exception = await Record.ExceptionAsync(Act); string? actualMessage = isException ? exception?.Message : mockStreamWriter.Messages[0].ApplicationMetadata.ToStringUtf8(); @@ -271,7 +272,7 @@ private class MockServerCallContext : ServerCallContext protected override CancellationToken CancellationTokenCore => default; protected override Metadata ResponseTrailersCore => new(); protected override Status StatusCore { get; set; } - protected override WriteOptions WriteOptionsCore { get; set; } = WriteOptions.Default; + protected override WriteOptions? WriteOptionsCore { get; set; } = WriteOptions.Default; protected override AuthContext AuthContextCore => new("", new Dictionary>()); } } @@ -325,7 +326,7 @@ public static async Task GetSchema(this IEnumerable flightDa public static async Task> ToFlightData(this RecordBatch recordBatch, FlightDescriptor? descriptor = null) { var responseStream = new MockFlightServerRecordBatchStreamWriter(); - await responseStream.WriteRecordBatchAsync(recordBatch).ConfigureAwait(false); + await responseStream.WriteRecordBatchAsync(recordBatch); if (descriptor == null) { return responseStream.FlightData; diff --git a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs index ebc38354b5c28..aac4e4209240a 100644 --- a/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs +++ b/csharp/test/Apache.Arrow.Flight.Tests/FlightTests.cs @@ -288,9 +288,9 @@ public async Task TestHandshake() { var duplexStreamingCall = _flightClient.Handshake(); - await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.Empty)); + await duplexStreamingCall.RequestStream.CompleteAsync(); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); Assert.Equal("Done", results.First().Payload.ToStringUtf8()); @@ -303,10 +303,10 @@ public async Task TestSingleExchange() var duplexStreamingCall = _flightClient.DoExchange(flightDescriptor); var expectedBatch = CreateTestBatch(0, 100); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch); + await duplexStreamingCall.RequestStream.CompleteAsync(); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); ArrowReaderVerifier.CompareBatches(expectedBatch, results.FirstOrDefault()); @@ -320,11 +320,11 @@ public async Task TestMultipleExchange() var expectedBatch1 = CreateTestBatch(0, 100); var expectedBatch2 = CreateTestBatch(100, 100); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch1).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch2).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch1); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch2); + await duplexStreamingCall.RequestStream.CompleteAsync(); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); ArrowReaderVerifier.CompareBatches(expectedBatch1, results[0]); ArrowReaderVerifier.CompareBatches(expectedBatch2, results[1]); @@ -338,8 +338,8 @@ public async Task TestExchangeWithMetadata() var expectedBatch = CreateTestBatch(0, 100); var expectedMetadata = ByteString.CopyFromUtf8("test metadata"); - await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch, expectedMetadata).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(expectedBatch, expectedMetadata); + await duplexStreamingCall.RequestStream.CompleteAsync(); List actualMetadata = new List(); List actualBatch = new List(); @@ -358,9 +358,9 @@ public async Task TestHandshakeWithSpecificMessage() { var duplexStreamingCall = _flightClient.Handshake(); - await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.CopyFromUtf8("Hello"))).ConfigureAwait(false); - await duplexStreamingCall.RequestStream.CompleteAsync().ConfigureAwait(false); - var results = await duplexStreamingCall.ResponseStream.ToListAsync().ConfigureAwait(false); + await duplexStreamingCall.RequestStream.WriteAsync(new FlightHandshakeRequest(ByteString.CopyFromUtf8("Hello"))); + await duplexStreamingCall.RequestStream.CompleteAsync(); + var results = await duplexStreamingCall.ResponseStream.ToListAsync(); Assert.Single(results); Assert.Equal("Hello handshake", results.First().Payload.ToStringUtf8()); diff --git a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs index 31a5676f01315..7232f74b8bec6 100644 --- a/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs +++ b/csharp/test/Apache.Arrow.IntegrationTest/JsonFile.cs @@ -908,8 +908,8 @@ private static byte[] ConvertHexStringToByteArray(string hexString) }; private void GenerateArray(Func createArray) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -929,8 +929,8 @@ private void GenerateArray(Func(Func createArray, Func parse) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); @@ -950,8 +950,8 @@ private void GenerateLongArray(Func(Func createArray, Func construct) + where T : struct, IEquatable where TArray : PrimitiveArray - where T : struct { ArrowBuffer validityBuffer = GetValidityBuffer(out int nullCount); diff --git a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs index 682ebec323dc0..c3c21c412d20d 100644 --- a/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/ArrowArrayTests.cs @@ -101,9 +101,9 @@ public void EnumerateArray() { var array = new Int64Array.Builder().Append(1).Append(2).Build(); - foreach(long? foo in (IEnumerable)array) + foreach(long? foo in array) { - Assert.InRange(foo.Value, 1, 2); + Assert.InRange(foo!.Value, 1, 2); } foreach (object foo in (IEnumerable)array) @@ -115,12 +115,145 @@ public void EnumerateArray() [Fact] public void ArrayAsReadOnlyList() { - Int64Array array = new Int64Array.Builder().Append(1).Append(2).Build(); - var readOnlyList = (IReadOnlyList)array; + TestArrayAsReadOnlyList([1, 2]); + TestArrayAsReadOnlyList([1, 2]); + TestArrayAsReadOnlyList([true, false]); + TestArrayAsReadOnlyList([DateTime.MinValue.Date, DateTime.MaxValue.Date]); + TestArrayAsReadOnlyList([DateTime.MinValue.Date, DateTime.MaxValue.Date]); + TestArrayAsReadOnlyList([DateTimeOffset.MinValue, DateTimeOffset.MinValue.AddYears(100)]); + +#if NET5_0_OR_GREATER + TestArrayAsReadOnlyList([DateOnly.MinValue, DateOnly.MaxValue]); + TestArrayAsReadOnlyList([DateOnly.MinValue, DateOnly.MaxValue]); + TestArrayAsReadOnlyList([TimeOnly.MinValue, TimeOnly.MinValue.AddHours(23)]); + TestArrayAsReadOnlyList([TimeOnly.MinValue, TimeOnly.MaxValue]); + TestArrayAsReadOnlyList([(Half)1.1, (Half)2.2f]); +#endif + } + + // Parameter 'values' must contain two distinct values + private static void TestArrayAsReadOnlyList(IReadOnlyList values) + where T : struct + where TArray : IArrowArray + where TArrayBuilder : IArrowArrayBuilder, new() + { + Assert.Equal(2, values.Count); + TArray array = new TArrayBuilder().Append(values[0]).AppendNull().Append(values[1]).Build(default); + Assert.NotNull(array); + var readOnlyList = (IReadOnlyList)array; Assert.Equal(array.Length, readOnlyList.Count); - Assert.Equal(readOnlyList[0], 1); - Assert.Equal(readOnlyList[1], 2); + Assert.Equal(3, readOnlyList.Count); + Assert.Equal(values[0], readOnlyList[0]); + Assert.Null(readOnlyList[1]); + Assert.Equal(values[1], readOnlyList[2]); + } + + [Fact] + public void ArrayAsCollection() + { + TestPrimitiveArrayAsCollection([1, 2, 3, 4]); + TestPrimitiveArrayAsCollection([1, 2, 3, 4]); + TestPrimitiveArrayAsCollection([true, true, true, false]); + TestPrimitiveArrayAsCollection([DateTime.MinValue.Date, DateTime.MaxValue.Date, DateTime.Today, DateTime.Today]); + TestPrimitiveArrayAsCollection([DateTime.MinValue.Date, DateTime.MaxValue.Date, DateTime.Today, DateTime.Today]); + TestPrimitiveArrayAsCollection([DateTimeOffset.MinValue, DateTimeOffset.MinValue.AddYears(100), DateTimeOffset.Now, DateTimeOffset.UtcNow]); + +#if NET5_0_OR_GREATER + TestPrimitiveArrayAsCollection([DateOnly.MinValue, DateOnly.MaxValue, DateOnly.FromDayNumber(1), DateOnly.FromDayNumber(2)]); + TestPrimitiveArrayAsCollection([DateOnly.MinValue, DateOnly.MaxValue, DateOnly.FromDayNumber(1), DateOnly.FromDayNumber(2)]); + TestPrimitiveArrayAsCollection([TimeOnly.MinValue, TimeOnly.MinValue.AddHours(23), TimeOnly.MinValue.AddHours(1), TimeOnly.MinValue.AddHours(2)]); + TestPrimitiveArrayAsCollection([TimeOnly.MinValue, TimeOnly.MaxValue, TimeOnly.MinValue.AddHours(1), TimeOnly.MinValue.AddHours(2)]); + TestPrimitiveArrayAsCollection([(Half)1.1, (Half)2.2f, (Half)3.3f, (Half)4.4f]); +#endif + + byte[][] byteArrs = [new byte[1], [], [255], new byte[2]]; + TestObjectArrayAsCollection(new BinaryArray.Builder().Append(byteArrs[0].AsEnumerable()).AppendNull().Append(byteArrs[1].AsEnumerable()).Append(byteArrs[0].AsEnumerable()).Build(), System.Array.Empty(), byteArrs); + + string[] strings = ["abc", "abd", "acd", "adc"]; + TestObjectArrayAsCollection(new StringArray.Builder().Append(strings[0]).AppendNull().Append(strings[1]).Append(strings[0]).Build(), null, strings); + } + + // Parameter 'values' must contain four values. The last value must be distinct from the rest. + private static void TestPrimitiveArrayAsCollection(IReadOnlyList values) + where T : struct + where TArray : IArrowArray, ICollection + where TArrayBuilder : IArrowArrayBuilder, new() + { + Assert.Equal(4, values.Count); + TArray array = new TArrayBuilder().Append(values[0]).AppendNull().Append(values[1]).Append(values[0]).Build(default); + Assert.NotNull(array); + var collection = (ICollection)array; + + Assert.Equal(array.Length, collection.Count); + Assert.Equal(4, collection.Count); + Assert.True(collection.IsReadOnly); + + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Add(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Remove(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(collection.Clear).Message); + + Assert.True(collection.Contains(values[0])); + Assert.True(collection.Contains(values[1])); + Assert.True(collection.Contains(default)); + Assert.False(collection.Contains(values[3])); + + T sentinel = values[2]; + T?[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + collection.CopyTo(destArr, 1); + Assert.Equal(sentinel, destArr[0]); + Assert.Equal(values[0], destArr[1]); + Assert.Null(destArr[2]); + Assert.Equal(values[1], destArr[3]); + Assert.Equal(values[0], destArr[4]); + Assert.Equal(sentinel, destArr[0]); + } + + // Parameter 'values' must contain four values. The last value must be distinct from the rest. + private static void TestObjectArrayAsCollection(TArray array, T nullValue, IReadOnlyList values) + where T : class + where TArray : IArrowArray, ICollection + { + Assert.NotNull(array); + Assert.Equal(4, values.Count); + var collection = (ICollection)array; + + Assert.Equal(array.Length, collection.Count); + Assert.Equal(4, collection.Count); + Assert.True(collection.IsReadOnly); + + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Add(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(() => collection.Remove(values[3])).Message); + Assert.Equal("Collection is read-only.", Assert.Throws(collection.Clear).Message); + + Assert.True(collection.Contains(values[0])); + Assert.True(collection.Contains(values[1])); + Assert.True(collection.Contains(default)); + Assert.False(collection.Contains(values[3])); + + T sentinel = values[2]; + T[] destArr = { sentinel, sentinel, sentinel, sentinel, sentinel, sentinel }; + collection.CopyTo(destArr, 1); + Assert.Equal(sentinel, destArr[0]); + Assert.Equal(values[0], destArr[1]); + Assert.Equal(nullValue, destArr[2]); + Assert.Equal(values[1], destArr[3]); + Assert.Equal(values[0], destArr[4]); + Assert.Equal(sentinel, destArr[0]); + } + + [Fact] + public void ContainsDoesNotMatchDefaultValueInArrayWithNullValue() + { + Int64Array array = new Int64Array.Builder().Append(1).Append(2).AppendNull().Build(); + Assert.NotNull(array); + var collection = (ICollection)array; + + Assert.True(collection.Contains(1)); + Assert.True(collection.Contains(2)); + Assert.True(collection.Contains(default)); + // A null value is stored as a null bit in the null bitmap, and a default value in the value buffer. Check that we do not match the default value. + Assert.False(collection.Contains(0)); } [Fact] diff --git a/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs b/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs index 2a674b942c17b..6e4742cad06f2 100644 --- a/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/Date32ArrayTests.cs @@ -131,7 +131,7 @@ public void AppendGivesUtcDate(DateTimeOffset dateTimeOffset) public class AppendDateOnly { [Theory] - [MemberData(nameof(GetDateOnlyData), MemberType = typeof(Date64ArrayTests))] + [MemberData(nameof(GetDateOnlyData), MemberType = typeof(Date32ArrayTests))] public void AppendDateGivesSameDate(DateOnly date) { // Arrange diff --git a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs index 59080d739b10b..412f67de5f0fb 100644 --- a/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/DurationArrayTests.cs @@ -115,7 +115,7 @@ public void AppendTimeSpanGivesSameTimeSpan(TimeSpan? timeSpan, DurationType typ Assert.Equal(timeSpan, array.GetTimeSpan(0)); IReadOnlyList asList = array; - Assert.Equal(1, asList.Count); + Assert.Single(asList); Assert.Equal(timeSpan, asList[0]); } } diff --git a/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs b/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs index 4375c39cdfaf6..01809735d14c9 100644 --- a/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs +++ b/csharp/test/Apache.Arrow.Tests/Extensions/DateTimeOffsetExtensions.cs @@ -14,8 +14,6 @@ // limitations under the License. using System; -using System.Collections.Generic; -using System.Text; namespace Apache.Arrow.Tests { diff --git a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs index 712a87a252b6c..c603ef63a4d3e 100644 --- a/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs +++ b/csharp/test/Apache.Arrow.Tests/UnionArrayTests.cs @@ -110,7 +110,7 @@ private static void CompareValue(UnionArray originalArray, int originalIndex, Un } private static void CompareFieldValue(byte typeId, UnionArray originalArray, int originalIndex, UnionArray slicedArray, int sliceIndex) - where T: struct + where T : struct, IEquatable where TArray : PrimitiveArray { if (originalArray is DenseUnionArray denseOriginalArray) diff --git a/dev/README.md b/dev/README.md index db9a10d527334..b04dd35b1c1ff 100644 --- a/dev/README.md +++ b/dev/README.md @@ -48,17 +48,32 @@ After installed, it runs the merge script. you'll have to install Python dependencies yourself and then run `dev/merge_arrow_pr.py` directly.) +The merge script requires tokens for access control. There are two options +for configuring your tokens: environment variables or a configuration file. + +> Note: Arrow only requires a GitHub token. Parquet can use GitHub or +JIRA tokens. + +#### Pass tokens via Environment Variables + The merge script uses the GitHub REST API. You must set a -`ARROW_GITHUB_API_TOKEN` environment variable to use a -[Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). +`ARROW_GITHUB_API_TOKEN` environment variable to use a +[Personal Access Token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/creating-a-personal-access-token). You need to add `workflow` scope to the Personal Access Token. -You can specify the +You can specify the [Personal Access Token](https://confluence.atlassian.com/enterprise/using-personal-access-tokens-1026032365.html) -of your JIRA account in the +of your JIRA account in the `APACHE_JIRA_TOKEN` environment variable. If the variable is not set, the script will ask you for it. +#### Pass tokens via configuration file + +``` +cp ./merge.conf.sample ~/.config/arrow/merge.conf +``` +Update your new `merge.conf` file with your Personal Access Tokens. + Example output: ```text diff --git a/dev/archery/archery/crossbow/core.py b/dev/archery/archery/crossbow/core.py index c85f1f754b997..0b5d242bbaccf 100644 --- a/dev/archery/archery/crossbow/core.py +++ b/dev/archery/archery/crossbow/core.py @@ -427,8 +427,14 @@ def create_branch(self, branch_name, files, parents=None, message='', return branch def create_tag(self, tag_name, commit_id, message=''): + git_object_commit = ( + pygit2.GIT_OBJECT_COMMIT + if getattr(pygit2, 'GIT_OBJECT_COMMIT') + else pygit2.GIT_OBJ_COMMIT + ) tag_id = self.repo.create_tag(tag_name, commit_id, - pygit2.GIT_OBJ_COMMIT, self.signature, + git_object_commit, + self.signature, message) # append to the pushable references diff --git a/dev/archery/archery/utils/lint.py b/dev/archery/archery/utils/lint.py index 108c9ded361e7..c9d05fffd9168 100644 --- a/dev/archery/archery/utils/lint.py +++ b/dev/archery/archery/utils/lint.py @@ -157,13 +157,13 @@ def cmake_linter(src, fix=False): 'go/**/CMakeLists.txt', 'java/**/CMakeLists.txt', 'matlab/**/CMakeLists.txt', - 'python/CMakeLists.txt', + 'python/**/CMakeLists.txt', ], exclude_patterns=[ 'cpp/cmake_modules/FindNumPy.cmake', 'cpp/cmake_modules/FindPythonLibsNew.cmake', 'cpp/cmake_modules/UseCython.cmake', - 'cpp/src/arrow/util/config.h.cmake', + 'cpp/src/arrow/util/*.h.cmake', ] ) method = cmake_format.fix if fix else cmake_format.check diff --git a/dev/conbench_envs/hooks.sh b/dev/conbench_envs/hooks.sh index a77189764aed3..0745357d2c0d3 100755 --- a/dev/conbench_envs/hooks.sh +++ b/dev/conbench_envs/hooks.sh @@ -59,7 +59,8 @@ build_arrow_cpp() { } build_arrow_python() { - ci/scripts/python_build.sh $(pwd) $(pwd) + mkdir -p /tmp/arrow + ci/scripts/python_build.sh $(pwd) /tmp/arrow } build_arrow_r() { @@ -69,7 +70,8 @@ build_arrow_r() { } build_arrow_java() { - ci/scripts/java_build.sh $(pwd) $(pwd) + mkdir -p /tmp/arrow + ci/scripts/java_build.sh $(pwd) /tmp/arrow } install_archery() { diff --git a/dev/release/01-prepare-test.rb b/dev/release/01-prepare-test.rb index 8fb23f45f0f3a..11e75612818ac 100644 --- a/dev/release/01-prepare-test.rb +++ b/dev/release/01-prepare-test.rb @@ -108,6 +108,13 @@ def test_version_pre_tag "+version = '#{@release_version}'"], ], }, + { + path: "c_glib/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@release_version}\","], + ], + }, { path: "ci/scripts/PKGBUILD", hunks: [ diff --git a/dev/release/post-11-bump-versions-test.rb b/dev/release/post-11-bump-versions-test.rb index 78d9320bfb312..5706b1303667a 100644 --- a/dev/release/post-11-bump-versions-test.rb +++ b/dev/release/post-11-bump-versions-test.rb @@ -86,6 +86,13 @@ def test_version_post_tag "+version = '#{@next_snapshot_version}'"], ], }, + { + path: "c_glib/vcpkg.json", + hunks: [ + ["- \"version-string\": \"#{@snapshot_version}\",", + "+ \"version-string\": \"#{@next_snapshot_version}\","], + ], + }, { path: "ci/scripts/PKGBUILD", hunks: [ @@ -197,6 +204,12 @@ def test_version_post_tag ] if release_type == :major expected_changes += [ + { + path: "c_glib/tool/generate-version-header.py", + hunks: [ + ["+ (#{@next_major_version}, 0),"], + ], + }, { path: "docs/source/index.rst", hunks: [ diff --git a/dev/release/utils-prepare.sh b/dev/release/utils-prepare.sh index 51367087228a4..015f7109cd251 100644 --- a/dev/release/utils-prepare.sh +++ b/dev/release/utils-prepare.sh @@ -40,6 +40,22 @@ update_versions() { meson.build rm -f meson.build.bak git add meson.build + + # Add a new version entry only when the next release is a new major release + if [ "${type}" = "snapshot" -a \ + "${next_version}" = "${major_version}.0.0" ]; then + sed -i.bak -E -e \ + "s/^ALL_VERSIONS = \[$/&\\n (${major_version}, 0),/" \ + tool/generate-version-header.py + rm -f tool/generate-version-header.py.bak + git add tool/generate-version-header.py + fi + + sed -i.bak -E -e \ + "s/\"version-string\": \".+\"/\"version-string\": \"${version}\"/" \ + vcpkg.json + rm -f vcpkg.json.bak + git add vcpkg.json popd pushd "${ARROW_DIR}/ci/scripts" diff --git a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog index 60e745301d9db..04aa586dc3c96 100644 --- a/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow-apt-source/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow-apt-source (16.1.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Thu, 09 May 2024 07:21:29 -0000 + apache-arrow-apt-source (16.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in index 676c9e0d16dea..f0eb785dd6bc7 100644 --- a/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in +++ b/dev/tasks/linux-packages/apache-arrow-release/yum/apache-arrow-release.spec.in @@ -102,6 +102,9 @@ else fi %changelog +* Thu May 09 2024 Raúl Cumplido - 16.1.0-1 +- New upstream release. + * Tue Apr 16 2024 Raúl Cumplido - 16.0.0-1 - New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/debian/changelog b/dev/tasks/linux-packages/apache-arrow/debian/changelog index e255e84096e4e..35cc598fe6f87 100644 --- a/dev/tasks/linux-packages/apache-arrow/debian/changelog +++ b/dev/tasks/linux-packages/apache-arrow/debian/changelog @@ -1,3 +1,9 @@ +apache-arrow (16.1.0-1) unstable; urgency=low + + * New upstream release. + + -- Raúl Cumplido Thu, 09 May 2024 07:21:29 -0000 + apache-arrow (16.0.0-1) unstable; urgency=low * New upstream release. diff --git a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in index 3ede1814b865d..c6148e9260586 100644 --- a/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in +++ b/dev/tasks/linux-packages/apache-arrow/yum/arrow.spec.in @@ -881,6 +881,9 @@ Documentation for Apache Parquet GLib. %endif %changelog +* Thu May 09 2024 Raúl Cumplido - 16.1.0-1 +- New upstream release. + * Tue Apr 16 2024 Raúl Cumplido - 16.0.0-1 - New upstream release. diff --git a/dev/tasks/r/github.packages.yml b/dev/tasks/r/github.packages.yml index 41d8b230f8bf4..0539eae6cc9d9 100644 --- a/dev/tasks/r/github.packages.yml +++ b/dev/tasks/r/github.packages.yml @@ -227,11 +227,20 @@ jobs: working-directory: 'arrow' extra-packages: cpp11 - name: Set CRAN like openssl - if: contains(matrix.platform.name, 'arm64') + if: contains(matrix.platform.name, 'macOS') + # The -E forwards the GITHUB_* envvars + shell: sudo -E Rscript {0} run: | - # The arm64 runners contain openssl 1.1.1t in this path that is always included first so we need to override the - # default setting of the brew --prefix as root dir to avoid version conflicts. - echo "OPENSSL_ROOT_DIR=/opt/R/arm64" >> $GITHUB_ENV + # get the mac-recipes version of openssl from CRAN + source("https://mac.R-project.org/bin/install.R") + install.libs("openssl") + + # override our cmakes default setting of the brew --prefix as root dir to avoid version conflicts. + if (Sys.info()[["machine"]] == "arm64"){ + cat("OPENSSL_ROOT_DIR=/opt/R/arm64\n", file=Sys.getenv("GITHUB_ENV"), append = TRUE) + } else { + cat("OPENSSL_ROOT_DIR=/opt/R/x86_64\n", file=Sys.getenv("GITHUB_ENV"), append = TRUE) + } - name: Build Binary id: build shell: Rscript {0} @@ -239,6 +248,7 @@ jobs: NOT_CRAN: "false" # actions/setup-r sets this implicitly ARROW_R_DEV: "true" LIBARROW_BINARY: "true" # has to be set as long as allowlist not updated + LIBARROW_BUILD: "false" ARROW_R_ENFORCE_CHECKSUM: "true" ARROW_R_CHECKSUM_PATH: "{{ '${{ github.workspace }}' }}/repo/libarrow/bin" run: | diff --git a/docker-compose.yml b/docker-compose.yml index a1d8f60a268d8..9f575e2030179 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -286,6 +286,7 @@ services: <<: [*common, *ccache, *sccache, *cpp] ARROW_BUILD_BENCHMARKS: "ON" ARROW_BUILD_EXAMPLES: "ON" + ARROW_BUILD_OPENMP_BENCHMARKS: "ON" ARROW_ENABLE_TIMING_TESTS: # inherit ARROW_EXTRA_ERROR_CONTEXT: "ON" ARROW_MIMALLOC: "ON" diff --git a/docs/source/_static/versions.json b/docs/source/_static/versions.json index f8ff19095b3fd..e879fc69138d0 100644 --- a/docs/source/_static/versions.json +++ b/docs/source/_static/versions.json @@ -5,11 +5,16 @@ "url": "https://arrow.apache.org/docs/dev/" }, { - "name": "16.0 (stable)", + "name": "16.1 (stable)", "version": "", "url": "https://arrow.apache.org/docs/", "preferred": true }, + { + "name": "16.0", + "version": "16.0/", + "url": "https://arrow.apache.org/docs/16.0/" + }, { "name": "15.0", "version": "15.0/", diff --git a/docs/source/conf.py b/docs/source/conf.py index b487200555a09..1e6c113e33188 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -535,7 +535,7 @@ # # latex_appendices = [] -# It false, will not define \strong, \code, itleref, \crossref ... but only +# It false, will not define \strong, \code, \titleref, \crossref ... but only # \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added # packages. # diff --git a/docs/source/cpp/acero/developer_guide.rst b/docs/source/cpp/acero/developer_guide.rst index 80ca68556fc40..7dd08fe3ce2ce 100644 --- a/docs/source/cpp/acero/developer_guide.rst +++ b/docs/source/cpp/acero/developer_guide.rst @@ -327,8 +327,8 @@ An engine could choose to create a thread task for every execution of a node. H this leads to problems with cache locality. For example, let's assume we have a basic plan consisting of three exec nodes, scan, project, and then filter (this is a very common use case). Now let's assume there are 100 batches. In a task-per-operator model we would have tasks like "Scan Batch 5", "Project Batch 5", and "Filter Batch 5". Each -of those tasks is potentially going to access the same data. For example, maybe the `project` and `filter` nodes need -to read the same column. A column which is intially created in a decode phase of the `scan` node. To maximize cache +of those tasks is potentially going to access the same data. For example, maybe the ``project`` and ``filter`` nodes need +to read the same column. A column which is intially created in a decode phase of the ``scan`` node. To maximize cache utilization we would need to carefully schedule our tasks to ensure that all three of those tasks are run consecutively and assigned to the same CPU core. @@ -412,7 +412,7 @@ Ordered Execution ================= Some nodes either establish an ordering to their outgoing batches or they need to be able to process batches in order. -Acero handles ordering using the `batch_index` property on an ExecBatch. If a node has a deterministic output order +Acero handles ordering using the ``batch_index`` property on an ExecBatch. If a node has a deterministic output order then it should apply a batch index on batches that it emits. For example, the OrderByNode applies a new ordering to batches (regardless of the incoming ordering). The scan node is able to attach an implicit ordering to batches which reflects the order of the rows in the files being scanned. @@ -461,8 +461,8 @@ Acero's tracing is currently half-implemented and there are major gaps in profil effort at tracing with open telemetry and most of the necessary pieces are in place. The main thing currently lacking is some kind of effective visualization of the tracing results. -In order to use the tracing that is present today you will need to build with Arrow with `ARROW_WITH_OPENTELEMETRY=ON`. -Then you will need to set the environment variable `ARROW_TRACING_BACKEND=otlp_http`. This will configure open telemetry +In order to use the tracing that is present today you will need to build with Arrow with ``ARROW_WITH_OPENTELEMETRY=ON``. +Then you will need to set the environment variable ``ARROW_TRACING_BACKEND=otlp_http``. This will configure open telemetry to export trace results (as OTLP) to the HTTP endpoint http://localhost:4318/v1/traces. You will need to configure an open telemetry collector to collect results on that endpoint and you will need to configure a trace viewer of some kind such as Jaeger: https://www.jaegertracing.io/docs/1.21/opentelemetry/ diff --git a/docs/source/cpp/acero/overview.rst b/docs/source/cpp/acero/overview.rst index 8be4cbc1b1772..34e0b143bc2ce 100644 --- a/docs/source/cpp/acero/overview.rst +++ b/docs/source/cpp/acero/overview.rst @@ -209,16 +209,16 @@ must have the same length. There are a few key differences from ExecBatch: Both the record batch and the exec batch have strong ownership of the arrays & buffers -* An `ExecBatch` does not have a schema. This is because an `ExecBatch` is assumed to be +* An ``ExecBatch`` does not have a schema. This is because an ``ExecBatch`` is assumed to be part of a stream of batches and the stream is assumed to have a consistent schema. So - the schema for an `ExecBatch` is typically stored in the ExecNode. -* Columns in an `ExecBatch` are either an `Array` or a `Scalar`. When a column is a `Scalar` - this means that the column has a single value for every row in the batch. An `ExecBatch` + the schema for an ``ExecBatch`` is typically stored in the ExecNode. +* Columns in an ``ExecBatch`` are either an ``Array`` or a ``Scalar``. When a column is a ``Scalar`` + this means that the column has a single value for every row in the batch. An ``ExecBatch`` also has a length property which describes how many rows are in a batch. So another way to - view a `Scalar` is a constant array with `length` elements. -* An `ExecBatch` contains additional information used by the exec plan. For example, an - `index` can be used to describe a batch's position in an ordered stream. We expect - that `ExecBatch` will also evolve to contain additional fields such as a selection vector. + view a ``Scalar`` is a constant array with ``length`` elements. +* An ``ExecBatch`` contains additional information used by the exec plan. For example, an + ``index`` can be used to describe a batch's position in an ordered stream. We expect + that ``ExecBatch`` will also evolve to contain additional fields such as a selection vector. .. figure:: scalar_vs_array.svg @@ -231,8 +231,8 @@ only zero copy if there are no scalars in the exec batch. .. note:: Both Acero and the compute module have "lightweight" versions of batches and arrays. - In the compute module these are called `BatchSpan`, `ArraySpan`, and `BufferSpan`. In - Acero the concept is called `KeyColumnArray`. These types were developed concurrently + In the compute module these are called ``BatchSpan``, ``ArraySpan``, and ``BufferSpan``. In + Acero the concept is called ``KeyColumnArray``. These types were developed concurrently and serve the same purpose. They aim to provide an array container that can be completely stack allocated (provided the data type is non-nested) in order to avoid heap allocation overhead. Ideally these two concepts will be merged someday. @@ -247,9 +247,9 @@ execution of the nodes. Both ExecPlan and ExecNode are tied to the lifecycle of They have state and are not expected to be restartable. .. warning:: - The structures within Acero, including `ExecBatch`, are still experimental. The `ExecBatch` - class should not be used outside of Acero. Instead, an `ExecBatch` should be converted to - a more standard structure such as a `RecordBatch`. + The structures within Acero, including ``ExecBatch``, are still experimental. The ``ExecBatch`` + class should not be used outside of Acero. Instead, an ``ExecBatch`` should be converted to + a more standard structure such as a ``RecordBatch``. Similarly, an ExecPlan is an internal concept. Users creating plans should be using Declaration objects. APIs for consuming and executing plans should abstract away the details of the underlying diff --git a/docs/source/cpp/acero/user_guide.rst b/docs/source/cpp/acero/user_guide.rst index adcc17216e5ae..0271be2180e99 100644 --- a/docs/source/cpp/acero/user_guide.rst +++ b/docs/source/cpp/acero/user_guide.rst @@ -455,8 +455,8 @@ can be selected from :ref:`this list of aggregation functions will be added which should alleviate this constraint. The aggregation can provide results as a group or scalar. For instances, -an operation like `hash_count` provides the counts per each unique record -as a grouped result while an operation like `sum` provides a single record. +an operation like ``hash_count`` provides the counts per each unique record +as a grouped result while an operation like ``sum`` provides a single record. Scalar Aggregation example: @@ -490,7 +490,7 @@ caller will repeatedly call this function until the generator function is exhaus will accumulate in memory. An execution plan should only have one "terminal" node (one sink node). An :class:`ExecPlan` can terminate early due to cancellation or an error, before the output is fully consumed. However, the plan can be safely destroyed independently -of the sink, which will hold the unconsumed batches by `exec_plan->finished()`. +of the sink, which will hold the unconsumed batches by ``exec_plan->finished()``. As a part of the Source Example, the Sink operation is also included; @@ -515,7 +515,7 @@ The consuming function may be called before a previous invocation has completed. function does not run quickly enough then many concurrent executions could pile up, blocking the CPU thread pool. The execution plan will not be marked finished until all consuming function callbacks have been completed. -Once all batches have been delivered the execution plan will wait for the `finish` future to complete +Once all batches have been delivered the execution plan will wait for the ``finish`` future to complete before marking the execution plan finished. This allows for workflows where the consumption function converts batches into async tasks (this is currently done internally for the dataset write node). diff --git a/docs/source/cpp/api/flightsql.rst b/docs/source/cpp/api/flightsql.rst index 565b605108d9f..0f49a76f20687 100644 --- a/docs/source/cpp/api/flightsql.rst +++ b/docs/source/cpp/api/flightsql.rst @@ -22,8 +22,6 @@ Arrow Flight SQL ================ -.. note:: Flight SQL is currently experimental and APIs are subject to change. - Common Types ============ diff --git a/docs/source/cpp/build_system.rst b/docs/source/cpp/build_system.rst index 0c94d7e5ce5dc..e80bca4c949dc 100644 --- a/docs/source/cpp/build_system.rst +++ b/docs/source/cpp/build_system.rst @@ -167,7 +167,7 @@ file into an executable linked with the Arrow C++ shared library: .. code-block:: makefile my_example: my_example.cc - $(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow) + $(CXX) -o $@ $(CXXFLAGS) $< $$(pkg-config --cflags --libs arrow) Many build systems support pkg-config. For example: diff --git a/docs/source/cpp/compute.rst b/docs/source/cpp/compute.rst index 546b6e5716df7..701c7d573ac0e 100644 --- a/docs/source/cpp/compute.rst +++ b/docs/source/cpp/compute.rst @@ -514,8 +514,8 @@ Mixed time resolution temporal inputs will be cast to finest input resolution. +------------+---------------------------------------------+ It's compatible with Redshift's decimal promotion rules. All decimal digits - are preserved for `add`, `subtract` and `multiply` operations. The result - precision of `divide` is at least the sum of precisions of both operands with + are preserved for ``add``, ``subtract`` and ``multiply`` operations. The result + precision of ``divide`` is at least the sum of precisions of both operands with enough scale kept. Error is returned if the result precision is beyond the decimal value range. @@ -1029,7 +1029,7 @@ These functions trim off characters on both sides (trim), or the left (ltrim) or +--------------------------+------------+-------------------------+---------------------+----------------------------------------+---------+ * \(1) Only characters specified in :member:`TrimOptions::characters` will be - trimmed off. Both the input string and the `characters` argument are + trimmed off. Both the input string and the ``characters`` argument are interpreted as ASCII characters. * \(2) Only trim off ASCII whitespace characters (``'\t'``, ``'\n'``, ``'\v'``, @@ -1570,7 +1570,7 @@ is the same, even though the UTC years would be different. Timezone handling ~~~~~~~~~~~~~~~~~ -`assume_timezone` function is meant to be used when an external system produces +``assume_timezone`` function is meant to be used when an external system produces "timezone-naive" timestamps which need to be converted to "timezone-aware" timestamps (see for example the `definition `__ @@ -1581,11 +1581,11 @@ Input timestamps are assumed to be relative to the timezone given in UTC-relative timestamps with the timezone metadata set to the above value. An error is returned if the timestamps already have the timezone metadata set. -`local_timestamp` function converts UTC-relative timestamps to local "timezone-naive" +``local_timestamp`` function converts UTC-relative timestamps to local "timezone-naive" timestamps. The timezone is taken from the timezone metadata of the input -timestamps. This function is the inverse of `assume_timezone`. Please note: +timestamps. This function is the inverse of ``assume_timezone``. Please note: **all temporal functions already operate on timestamps as if they were in local -time of the metadata provided timezone**. Using `local_timestamp` is only meant to be +time of the metadata provided timezone**. Using ``local_timestamp`` is only meant to be used when an external system expects local timestamps. +-----------------+-------+-------------+---------------+---------------------------------+-------+ @@ -1649,8 +1649,8 @@ overflow is detected. * \(1) CumulativeOptions has two optional parameters. The first parameter :member:`CumulativeOptions::start` is a starting value for the running - accumulation. It has a default value of 0 for `sum`, 1 for `prod`, min of - input type for `max`, and max of input type for `min`. Specified values of + accumulation. It has a default value of 0 for ``sum``, 1 for ``prod``, min of + input type for ``max``, and max of input type for ``min``. Specified values of ``start`` must be castable to the input type. The second parameter :member:`CumulativeOptions::skip_nulls` is a boolean. When set to false (the default), the first encountered null is propagated. When set to diff --git a/docs/source/cpp/env_vars.rst b/docs/source/cpp/env_vars.rst index 116c151824c75..0a082b0a5d859 100644 --- a/docs/source/cpp/env_vars.rst +++ b/docs/source/cpp/env_vars.rst @@ -181,6 +181,10 @@ that changing their value later will have an effect. The number of entries to keep in the Gandiva JIT compilation cache. The cache is in-memory and does not persist across processes. + The default cache size is 5000. The value of this environment variable + should be a positive integer and should not exceed the maximum value + of int32. Otherwise the default value is used. + .. envvar:: HADOOP_HOME The path to the Hadoop installation. diff --git a/docs/source/cpp/parquet.rst b/docs/source/cpp/parquet.rst index 96897d139b351..9d2a5d791fed0 100644 --- a/docs/source/cpp/parquet.rst +++ b/docs/source/cpp/parquet.rst @@ -522,8 +522,8 @@ An Arrow Dictionary type is written out as its value type. It can still be recreated at read time using Parquet metadata (see "Roundtripping Arrow types" below). -Roundtripping Arrow types -~~~~~~~~~~~~~~~~~~~~~~~~~ +Roundtripping Arrow types and schema +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ While there is no bijection between Arrow types and Parquet types, it is possible to serialize the Arrow schema as part of the Parquet file metadata. @@ -531,8 +531,7 @@ This is enabled using :func:`ArrowWriterProperties::store_schema`. On the read path, the serialized schema will be automatically recognized and will recreate the original Arrow data, converting the Parquet data as -required (for example, a LargeList will be recreated from the Parquet LIST -type). +required. As an example, when serializing an Arrow LargeList to Parquet: @@ -542,6 +541,20 @@ As an example, when serializing an Arrow LargeList to Parquet: :func:`ArrowWriterProperties::store_schema` was enabled when writing the file; otherwise, it is decoded as an Arrow List. +Parquet field id +"""""""""""""""" + +The Parquet format supports an optional integer *field id* which can be assigned +to a given field. This is used for example in the +`Apache Iceberg specification `__. + +On the writer side, if ``PARQUET:field_id`` is present as a metadata key on an +Arrow field, then its value is parsed as a non-negative integer and is used as +the field id for the corresponding Parquet field. + +On the reader side, Arrow will convert such a field id to a metadata key named +``PARQUET:field_id`` on the corresponding Arrow field. + Serialization details """"""""""""""""""""" @@ -549,6 +562,7 @@ The Arrow schema is serialized as a :ref:`Arrow IPC ` schema message then base64-encoded and stored under the ``ARROW:schema`` metadata key in the Parquet file metadata. + Limitations ~~~~~~~~~~~ diff --git a/docs/source/developers/cpp/building.rst b/docs/source/developers/cpp/building.rst index 7b80d2138c33e..b052b856c9bd5 100644 --- a/docs/source/developers/cpp/building.rst +++ b/docs/source/developers/cpp/building.rst @@ -312,7 +312,7 @@ depends on ``python`` being available). On some Linux distributions, running the test suite might require setting an explicit locale. If you see any locale-related errors, try setting the -environment variable (which requires the `locales` package or equivalent): +environment variable (which requires the ``locales`` package or equivalent): .. code-block:: diff --git a/docs/source/developers/documentation.rst b/docs/source/developers/documentation.rst index 8b1ea28c0f54b..a479065f6297e 100644 --- a/docs/source/developers/documentation.rst +++ b/docs/source/developers/documentation.rst @@ -259,7 +259,7 @@ Build the docs in the target directory: sphinx-build ./source/developers ./source/developers/_build -c ./source -D master_doc=temp_index This builds everything in the target directory to a folder inside of it -called ``_build`` using the config file in the `source` directory. +called ``_build`` using the config file in the ``source`` directory. Once you have verified the HTML documents, you can remove temporary index file: diff --git a/docs/source/developers/guide/step_by_step/arrow_codebase.rst b/docs/source/developers/guide/step_by_step/arrow_codebase.rst index 0beece991b197..0c194ab3a3f70 100644 --- a/docs/source/developers/guide/step_by_step/arrow_codebase.rst +++ b/docs/source/developers/guide/step_by_step/arrow_codebase.rst @@ -99,8 +99,8 @@ can be called from a function in another language. After a function is defined C++ we must create the binding manually to use it in that implementation. .. note:: - There is much you can learn by checking **Pull Requests** - and **unit tests** for similar issues. + There is much you can learn by checking **Pull Requests** + and **unit tests** for similar issues. .. tab-set:: diff --git a/docs/source/developers/guide/step_by_step/set_up.rst b/docs/source/developers/guide/step_by_step/set_up.rst index 9a2177568d6f5..9c808ceee7be6 100644 --- a/docs/source/developers/guide/step_by_step/set_up.rst +++ b/docs/source/developers/guide/step_by_step/set_up.rst @@ -118,10 +118,10 @@ Should give you a result similar to this: .. code:: console - origin https://github.com//arrow.git (fetch) - origin https://github.com//arrow.git (push) - upstream https://github.com/apache/arrow (fetch) - upstream https://github.com/apache/arrow (push) + origin https://github.com//arrow.git (fetch) + origin https://github.com//arrow.git (push) + upstream https://github.com/apache/arrow (fetch) + upstream https://github.com/apache/arrow (push) If you did everything correctly, you should now have a copy of the code in the ``arrow`` directory and two remotes that refer to your own GitHub diff --git a/docs/source/developers/java/development.rst b/docs/source/developers/java/development.rst index 17d47c324ce12..3f0ff6cdd0103 100644 --- a/docs/source/developers/java/development.rst +++ b/docs/source/developers/java/development.rst @@ -118,7 +118,7 @@ This checks the code style of all source code under the current directory or fro $ mvn checkstyle:check -Maven `pom.xml` style is enforced with Spotless using `Apache Maven pom.xml guidelines`_ +Maven ``pom.xml`` style is enforced with Spotless using `Apache Maven pom.xml guidelines`_ You can also just check the style without building the project. This checks the style of all pom.xml files under the current directory or from within an individual module. diff --git a/docs/source/developers/python.rst b/docs/source/developers/python.rst index be9fac067cb52..e84cd25201e08 100644 --- a/docs/source/developers/python.rst +++ b/docs/source/developers/python.rst @@ -302,10 +302,24 @@ created above (stored in ``$ARROW_HOME``): .. code-block:: - $ mkdir arrow/cpp/build - $ pushd arrow/cpp/build - $ cmake -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ - -DCMAKE_INSTALL_LIBDIR=lib \ + $ cmake -S arrow/cpp -B arrow/cpp/build \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ + --preset ninja-release-python + $ cmake --build arrow/cpp/build --target install + +``ninja-release-python`` is not the only preset available - if you would like a +build with more features like CUDA, Flight and Gandiva support you may opt for +the ``ninja-release-python-maximal`` preset. If you wanted less features, (i.e. +removing ORC and dataset support) you could opt for +``ninja-release-python-minimal``. Changing the word ``release`` to ``debug`` +with any of the aforementioned presets will generate a debug build of Arrow. + +The presets are provided as a convenience, but you may instead opt to +specify the individual components: + +.. code-block:: + $ cmake -S arrow/cpp -B arrow/cpp/build \ + -DCMAKE_INSTALL_PREFIX=$ARROW_HOME \ -DCMAKE_BUILD_TYPE=Debug \ -DARROW_BUILD_TESTS=ON \ -DARROW_COMPUTE=ON \ @@ -321,11 +335,8 @@ created above (stored in ``$ARROW_HOME``): -DARROW_WITH_SNAPPY=ON \ -DARROW_WITH_ZLIB=ON \ -DARROW_WITH_ZSTD=ON \ - -DPARQUET_REQUIRE_ENCRYPTION=ON \ - .. - $ make -j4 - $ make install - $ popd + -DPARQUET_REQUIRE_ENCRYPTION=ON + $ cmake --build arrow/cpp/build --target install -j4 There are a number of optional components that can be switched ON by adding flags with ``ON``: diff --git a/docs/source/developers/release.rst b/docs/source/developers/release.rst index 0b3a83dc5aabe..d903cc71bd5c4 100644 --- a/docs/source/developers/release.rst +++ b/docs/source/developers/release.rst @@ -106,7 +106,7 @@ If there is consensus and there is a Release Manager willing to take the effort the release a patch release can be created. Committers can tag issues that should be included on the next patch release using the -`backport-candidate` label. Is the responsability of the author or the committer to add the +``backport-candidate`` label. Is the responsability of the author or the committer to add the label to the issue to help the Release Manager identify the issues that should be backported. If a specific issue is identified as the reason to create a patch release the Release Manager @@ -117,7 +117,7 @@ Be sure to go through on the following checklist: #. Create milestone #. Create maintenance branch #. Include issue that was requested as requiring new patch release -#. Add new milestone to issues with `backport-candidate` label +#. Add new milestone to issues with ``backport-candidate`` label #. cherry-pick issues into maintenance branch Creating a Release Candidate diff --git a/docs/source/format/CanonicalExtensions.rst b/docs/source/format/CanonicalExtensions.rst index c60f095dd354d..c258f889dc6ac 100644 --- a/docs/source/format/CanonicalExtensions.rst +++ b/docs/source/format/CanonicalExtensions.rst @@ -77,7 +77,7 @@ Official List Fixed shape tensor ================== -* Extension name: `arrow.fixed_shape_tensor`. +* Extension name: ``arrow.fixed_shape_tensor``. * The storage type of the extension: ``FixedSizeList`` where: @@ -153,7 +153,7 @@ Fixed shape tensor Variable shape tensor ===================== -* Extension name: `arrow.variable_shape_tensor`. +* Extension name: ``arrow.variable_shape_tensor``. * The storage type of the extension is: ``StructArray`` where struct is composed of **data** and **shape** fields describing a single diff --git a/docs/source/format/Columnar.rst b/docs/source/format/Columnar.rst index ec6a7fa5e334a..7c853de7829be 100644 --- a/docs/source/format/Columnar.rst +++ b/docs/source/format/Columnar.rst @@ -312,7 +312,7 @@ Each value in this layout consists of 0 or more bytes. While primitive arrays have a single values buffer, variable-size binary have an **offsets** buffer and **data** buffer. -The offsets buffer contains `length + 1` signed integers (either +The offsets buffer contains ``length + 1`` signed integers (either 32-bit or 64-bit, depending on the logical type), which encode the start position of each slot in the data buffer. The length of the value in each slot is computed using the difference between the offset @@ -374,7 +374,7 @@ locations are indicated using a **views** buffer, which may point to one of potentially several **data** buffers or may contain the characters inline. -The views buffer contains `length` view structures with the following layout: +The views buffer contains ``length`` view structures with the following layout: :: @@ -394,7 +394,7 @@ should be interpreted. In the short string case the string's bytes are inlined — stored inside the view itself, in the twelve bytes which follow the length. Any remaining bytes -after the string itself are padded with `0`. +after the string itself are padded with ``0``. In the long string case, a buffer index indicates which data buffer stores the data bytes and an offset indicates where in that buffer the diff --git a/docs/source/format/FlightSql.rst b/docs/source/format/FlightSql.rst index 181efce286e70..b4b85e77a2e5f 100644 --- a/docs/source/format/FlightSql.rst +++ b/docs/source/format/FlightSql.rst @@ -32,9 +32,6 @@ with any database that supports the necessary endpoints. Flight SQL clients wrap the underlying Flight client to provide methods for the new RPC methods described here. -.. warning:: Flight SQL is **experimental** and changes to the - protocol may still be made. - RPC Methods =========== @@ -196,7 +193,7 @@ in the ``app_metadata`` field of the Flight RPC ``PutResult`` returned. When used with DoPut: load the stream of Arrow record batches into the specified target table and return the number of rows ingested - via a `DoPutUpdateResult` message. + via a ``DoPutUpdateResult`` message. Flight Server Session Management -------------------------------- diff --git a/docs/source/format/Integration.rst b/docs/source/format/Integration.rst index c800255687796..436747989acf3 100644 --- a/docs/source/format/Integration.rst +++ b/docs/source/format/Integration.rst @@ -501,7 +501,7 @@ integration testing actually tests. There are two types of integration test cases: the ones populated on the fly by the data generator in the Archery utility, and *gold* files that exist -in the `arrow-testing ` +in the `arrow-testing `_ repository. Data Generator Tests diff --git a/docs/source/java/algorithm.rst b/docs/source/java/algorithm.rst index 06ed32bd48cf7..d4838967d614f 100644 --- a/docs/source/java/algorithm.rst +++ b/docs/source/java/algorithm.rst @@ -82,7 +82,7 @@ for fixed width and variable width vectors, respectively. Both algorithms run in 3. **Index sorter**: this sorter does not actually sort the vector. Instead, it returns an integer vector, which correspond to indices of vector elements in sorted order. With the index vector, one can -easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k``th +easily construct a sorted vector. In addition, some other tasks can be easily achieved, like finding the ``k`` th smallest value in the vector. Index sorting is supported by ``org.apache.arrow.algorithm.sort.IndexSorter``, which runs in ``O(nlog(n))`` time. It is applicable to vectors of any type. diff --git a/docs/source/java/flight_sql_jdbc_driver.rst b/docs/source/java/flight_sql_jdbc_driver.rst index cc8822247b007..f95c2ac755d97 100644 --- a/docs/source/java/flight_sql_jdbc_driver.rst +++ b/docs/source/java/flight_sql_jdbc_driver.rst @@ -162,7 +162,7 @@ the Flight SQL service as gRPC headers. For example, the following URI :: This will connect without authentication or encryption, to a Flight SQL service running on ``localhost`` on port 12345. Each request will -also include a `database=mydb` gRPC header. +also include a ``database=mydb`` gRPC header. Connection parameters may also be supplied using the Properties object when using the JDBC Driver Manager to connect. When supplying using diff --git a/docs/source/java/install.rst b/docs/source/java/install.rst index a551edc36c477..dc6a55c87fcd6 100644 --- a/docs/source/java/install.rst +++ b/docs/source/java/install.rst @@ -63,7 +63,7 @@ Modifying the command above for Flight: Otherwise, you may see errors like ``java.lang.IllegalAccessError: superclass access check failed: class org.apache.arrow.flight.ArrowMessage$ArrowBufRetainingCompositeByteBuf (in module org.apache.arrow.flight.core) cannot access class io.netty.buffer.CompositeByteBuf (in unnamed module ...) because module -org.apache.arrow.flight.core does not read unnamed module ... +org.apache.arrow.flight.core does not read unnamed module ...`` Finally, if you are using arrow-dataset, you'll also need to report that JDK internals need to be exposed. Modifying the command above for arrow-memory: diff --git a/docs/source/java/ipc.rst b/docs/source/java/ipc.rst index 01341ff2cc391..f5939179177d5 100644 --- a/docs/source/java/ipc.rst +++ b/docs/source/java/ipc.rst @@ -81,7 +81,7 @@ Here we used an in-memory stream, but this could have been a socket or some othe writer.end(); Note that, since the :class:`VectorSchemaRoot` in the writer is a container that can hold batches, batches flow through -:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before `writeBatch`, so that later batches +:class:`VectorSchemaRoot` as part of a pipeline, so we need to populate data before ``writeBatch``, so that later batches could overwrite previous ones. Now the :class:`ByteArrayOutputStream` contains the complete stream which contains 5 record batches. diff --git a/docs/source/java/overview.rst b/docs/source/java/overview.rst index 9d9cbad8a26c1..7780ee32ec9bc 100644 --- a/docs/source/java/overview.rst +++ b/docs/source/java/overview.rst @@ -54,10 +54,10 @@ but some modules are JNI bindings to the C++ library. - (Experimental) A library for converting JDBC data to Arrow data. - Native * - flight-core - - (Experimental) An RPC mechanism for transferring ValueVectors. + - An RPC mechanism for transferring ValueVectors. - Native * - flight-sql - - (Experimental) Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight. + - Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight. - Native * - flight-integration-tests - Integration tests for Flight RPC. diff --git a/docs/source/java/quickstartguide.rst b/docs/source/java/quickstartguide.rst index a71ddc5b5e55f..1f3ec861d3f46 100644 --- a/docs/source/java/quickstartguide.rst +++ b/docs/source/java/quickstartguide.rst @@ -195,10 +195,10 @@ Example: Create a dataset of names (strings) and ages (32-bit signed integers). .. code-block:: shell VectorSchemaRoot created: - age name - 10 Dave - 20 Peter - 30 Mary + age name + 10 Dave + 20 Peter + 30 Mary Interprocess Communication (IPC) @@ -306,10 +306,10 @@ Example: Read the dataset from the previous example from an Arrow IPC file (rand Record batches in file: 1 VectorSchemaRoot read: - age name - 10 Dave - 20 Peter - 30 Mary + age name + 10 Dave + 20 Peter + 30 Mary More examples available at `Arrow Java Cookbook`_. diff --git a/docs/source/java/substrait.rst b/docs/source/java/substrait.rst index c5857dcc23f75..fa20dbd61dbfb 100644 --- a/docs/source/java/substrait.rst +++ b/docs/source/java/substrait.rst @@ -100,9 +100,9 @@ Here is an example of a Java program that queries a Parquet file using Java Subs .. code-block:: text // Results example: - FieldPath(0) FieldPath(1) FieldPath(2) FieldPath(3) - 0 ALGERIA 0 haggle. carefully final deposits detect slyly agai - 1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon + FieldPath(0) FieldPath(1) FieldPath(2) FieldPath(3) + 0 ALGERIA 0 haggle. carefully final deposits detect slyly agai + 1 ARGENTINA 1 al foxes promise slyly according to the regular accounts. bold requests alon Executing Projections and Filters Using Extended Expressions ============================================================ @@ -189,13 +189,13 @@ This Java program: .. code-block:: text - column-1 column-2 - 13 ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account - 14 SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely - 12 VIETNAM - hely enticingly express accounts. even, final - 13 RUSSIA - requests against the platelets use never according to the quickly regular pint - 13 UNITED KINGDOM - eans boost carefully special requests. accounts are. carefull - 11 UNITED STATES - y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be + column-1 column-2 + 13 ROMANIA - ular asymptotes are about the furious multipliers. express dependencies nag above the ironically ironic account + 14 SAUDI ARABIA - ts. silent requests haggle. closely express packages sleep across the blithely + 12 VIETNAM - hely enticingly express accounts. even, final + 13 RUSSIA - requests against the platelets use never according to the quickly regular pint + 13 UNITED KINGDOM - eans boost carefully special requests. accounts are. carefull + 11 UNITED STATES - y final packages. slow foxes cajole quickly. quickly silent platelets breach ironic accounts. unusual pinto be .. _`Substrait`: https://substrait.io/ .. _`Substrait Java`: https://github.com/substrait-io/substrait-java diff --git a/docs/source/java/table.rst b/docs/source/java/table.rst index 603910f51694f..5aa95e153cea0 100644 --- a/docs/source/java/table.rst +++ b/docs/source/java/table.rst @@ -75,7 +75,7 @@ Tables are created from a ``VectorSchemaRoot`` as shown below. The memory buffer Table t = new Table(someVectorSchemaRoot); -If you now update the vectors held by the ``VectorSchemaRoot`` (using some version of `ValueVector#setSafe()`), it would reflect those changes, but the values in table *t* are unchanged. +If you now update the vectors held by the ``VectorSchemaRoot`` (using some version of ``ValueVector#setSafe()``), it would reflect those changes, but the values in table *t* are unchanged. Creating a Table from FieldVectors ********************************** @@ -243,7 +243,7 @@ It is important to recognize that rows are NOT reified as objects, but rather op Getting a row ************* -Calling `immutableRow()` on any table instance returns a new ``Row`` instance. +Calling ``immutableRow()`` on any table instance returns a new ``Row`` instance. .. code-block:: Java @@ -262,7 +262,7 @@ Since rows are iterable, you can traverse a table using a standard while loop: // do something useful here } -``Table`` implements `Iterable` so you can access rows directly from a table in an enhanced *for* loop: +``Table`` implements ``Iterable`` so you can access rows directly from a table in an enhanced *for* loop: .. code-block:: Java @@ -272,7 +272,7 @@ Since rows are iterable, you can traverse a table using a standard while loop: ... } -Finally, while rows are usually iterated in the order of the underlying data vectors, but they are also positionable using the `Row#setPosition()` method, so you can skip to a specific row. Row numbers are 0-based. +Finally, while rows are usually iterated in the order of the underlying data vectors, but they are also positionable using the ``Row#setPosition()`` method, so you can skip to a specific row. Row numbers are 0-based. .. code-block:: Java @@ -281,7 +281,7 @@ Finally, while rows are usually iterated in the order of the underlying data vec Any changes to position are applied to all the columns in the table. -Note that you must call `next()`, or `setPosition()` before accessing values via a row. Failure to do so results in a runtime exception. +Note that you must call ``next()``, or ``setPosition()`` before accessing values via a row. Failure to do so results in a runtime exception. Read operations using rows ************************** @@ -304,7 +304,7 @@ You can also get value using a nullable ``ValueHolder``. For example: This can be used to retrieve values without creating a new Object for each. -In addition to getting values, you can check if a value is null using `isNull()`. This is important if the vector contains any nulls, as asking for a value from a vector can cause NullPointerExceptions in some cases. +In addition to getting values, you can check if a value is null using ``isNull()``. This is important if the vector contains any nulls, as asking for a value from a vector can cause NullPointerExceptions in some cases. .. code-block:: Java @@ -352,13 +352,13 @@ Working with the C-Data interface The ability to work with native code is required for many Arrow features. This section describes how tables can be be exported for use with native code -Exporting works by converting the data to a ``VectorSchemaRoot`` instance and using the existing facilities to transfer the data. You could do it yourself, but that isn't ideal because conversion to a vector schema root breaks the immutability guarantees. Using the `exportTable()` methods in the `Data`_ class avoids this concern. +Exporting works by converting the data to a ``VectorSchemaRoot`` instance and using the existing facilities to transfer the data. You could do it yourself, but that isn't ideal because conversion to a vector schema root breaks the immutability guarantees. Using the ``exportTable()`` methods in the `Data`_ class avoids this concern. .. code-block:: Java Data.exportTable(bufferAllocator, table, dictionaryProvider, outArrowArray); -If the table contains dictionary-encoded vectors and was constructed with a ``DictionaryProvider``, the provider argument to `exportTable()` can be omitted and the table's provider attribute will be used: +If the table contains dictionary-encoded vectors and was constructed with a ``DictionaryProvider``, the provider argument to ``exportTable()`` can be omitted and the table's provider attribute will be used: .. code-block:: Java diff --git a/docs/source/python/api/arrays.rst b/docs/source/python/api/arrays.rst index e6f6c3dbbd3d1..aefed00b3d2e0 100644 --- a/docs/source/python/api/arrays.rst +++ b/docs/source/python/api/arrays.rst @@ -63,8 +63,8 @@ may expose data type-specific methods or properties. FixedSizeBinaryArray LargeBinaryArray LargeStringArray - BinaryViewArray, - StringViewArray, + BinaryViewArray + StringViewArray Time32Array Time64Array Date32Array diff --git a/docs/source/python/api/compute.rst b/docs/source/python/api/compute.rst index ae48578a1bd61..5423eebfbab40 100644 --- a/docs/source/python/api/compute.rst +++ b/docs/source/python/api/compute.rst @@ -173,7 +173,7 @@ variants which detect domain errors where appropriate. Comparisons ----------- -These functions expect two inputs of the same type. If one of the inputs is `null` +These functions expect two inputs of the same type. If one of the inputs is ``null`` they return ``null``. .. autosummary:: @@ -540,7 +540,6 @@ Compute Options AssumeTimezoneOptions CastOptions CountOptions - CountOptions CumulativeSumOptions DayOfWeekOptions DictionaryEncodeOptions @@ -566,7 +565,6 @@ Compute Options RoundToMultipleOptions RunEndEncodeOptions ScalarAggregateOptions - ScalarAggregateOptions SelectKOptions SetLookupOptions SliceOptions @@ -578,7 +576,6 @@ Compute Options StructFieldOptions TakeOptions TDigestOptions - TDigestOptions TrimOptions VarianceOptions WeekOptions diff --git a/docs/source/python/compute.rst b/docs/source/python/compute.rst index ce3dfabb0e689..c2b46c8f3f673 100644 --- a/docs/source/python/compute.rst +++ b/docs/source/python/compute.rst @@ -275,7 +275,7 @@ take two datasets and join them: ds1 = ds.dataset(table1) ds2 = ds.dataset(table2) - joined_ds = ds1.join(ds2, key="id") + joined_ds = ds1.join(ds2, keys="id") The resulting dataset will be an :class:`.InMemoryDataset` containing the joined data:: diff --git a/docs/source/python/data.rst b/docs/source/python/data.rst index 9156157fcd0c2..598c8c125fb83 100644 --- a/docs/source/python/data.rst +++ b/docs/source/python/data.rst @@ -76,7 +76,7 @@ We use the name **logical type** because the **physical** storage may be the same for one or more types. For example, ``int64``, ``float64``, and ``timestamp[ms]`` all occupy 64 bits per value. -These objects are `metadata`; they are used for describing the data in arrays, +These objects are ``metadata``; they are used for describing the data in arrays, schemas, and record batches. In Python, they can be used in functions where the input data (e.g. Python objects) may be coerced to more than one Arrow type. @@ -99,7 +99,7 @@ types' children. For example, we can define a list of int32 values with: t6 = pa.list_(t1) t6 -A `struct` is a collection of named fields: +A ``struct`` is a collection of named fields: .. ipython:: python @@ -561,7 +561,7 @@ schema without having to get any of the batches.:: It can also be sent between languages using the :ref:`C stream interface `. -Conversion of RecordBatch do Tensor +Conversion of RecordBatch to Tensor ----------------------------------- Each array of the ``RecordBatch`` has it's own contiguous memory that is not necessarily diff --git a/docs/source/python/extending_types.rst b/docs/source/python/extending_types.rst index 8df0ef0b1fe99..83fce84f47c08 100644 --- a/docs/source/python/extending_types.rst +++ b/docs/source/python/extending_types.rst @@ -101,7 +101,7 @@ define the ``__arrow_array__`` method to return an Arrow array:: import pyarrow return pyarrow.array(..., type=type) -The ``__arrow_array__`` method takes an optional `type` keyword which is passed +The ``__arrow_array__`` method takes an optional ``type`` keyword which is passed through from :func:`pyarrow.array`. The method is allowed to return either a :class:`~pyarrow.Array` or a :class:`~pyarrow.ChunkedArray`. diff --git a/docs/source/python/filesystems.rst b/docs/source/python/filesystems.rst index 22f983a60c349..23d10aaaad720 100644 --- a/docs/source/python/filesystems.rst +++ b/docs/source/python/filesystems.rst @@ -182,7 +182,7 @@ Example how you can read contents from a S3 bucket:: Note that it is important to configure :class:`S3FileSystem` with the correct -region for the bucket being used. If `region` is not set, the AWS SDK will +region for the bucket being used. If ``region`` is not set, the AWS SDK will choose a value, defaulting to 'us-east-1' if the SDK version is <1.8. Otherwise it will try to use a variety of heuristics (environment variables, configuration profile, EC2 metadata server) to resolve the region. @@ -277,7 +277,7 @@ load time, since the library may not be in your LD_LIBRARY_PATH), and relies on some environment variables. * ``HADOOP_HOME``: the root of your installed Hadoop distribution. Often has - `lib/native/libhdfs.so`. + ``lib/native/libhdfs.so``. * ``JAVA_HOME``: the location of your Java SDK installation. diff --git a/docs/source/python/flight.rst b/docs/source/python/flight.rst index f07b9511ccf68..b63d256547de0 100644 --- a/docs/source/python/flight.rst +++ b/docs/source/python/flight.rst @@ -17,6 +17,7 @@ .. currentmodule:: pyarrow.flight .. highlight:: python +.. _flight: ================ Arrow Flight RPC diff --git a/docs/source/python/install.rst b/docs/source/python/install.rst index 4b966e6d2653d..84d6253691f09 100644 --- a/docs/source/python/install.rst +++ b/docs/source/python/install.rst @@ -39,6 +39,13 @@ Install the latest version of PyArrow from conda install -c conda-forge pyarrow +.. note:: + + While the ``pyarrow`` `conda-forge `_ package is + the right choice for most users, both a minimal and maximal variant of the + package exist, either of which may be better for your use case. See + :ref:`python-conda-differences`. + Using Pip --------- @@ -83,7 +90,7 @@ While Arrow uses the OS-provided timezone database on Linux and macOS, it requir user-provided database on Windows. To download and extract the text version of the IANA timezone database follow the instructions in the C++ :ref:`download-timezone-database` or use pyarrow utility function -`pyarrow.util.download_tzdata_on_windows()` that does the same. +``pyarrow.util.download_tzdata_on_windows()`` that does the same. By default, the timezone database will be detected at ``%USERPROFILE%\Downloads\tzdata``. If the database has been downloaded in a different location, you will need to set @@ -93,3 +100,85 @@ a custom path to the database from Python: >>> import pyarrow as pa >>> pa.set_timezone_db_path("custom_path") + + +.. _python-conda-differences: + +Differences between conda-forge packages +---------------------------------------- + +On `conda-forge `_, PyArrow is published as three +separate packages, each providing varying levels of functionality. This is in +contrast to PyPi, where only a single PyArrow package is provided. + +The purpose of this split is to minimize the size of the installed package for +most users (``pyarrow``), provide a smaller, minimal package for specialized use +cases (``pyarrow-core``), while still providing a complete package for users who +require it (``pyarrow-all``). What was historically ``pyarrow`` on +`conda-forge `_ is now ``pyarrow-all``, though most +users can continue using ``pyarrow``. + +The ``pyarrow-core`` package includes the following functionality: + +- :ref:`data` +- :ref:`compute` (i.e., ``pyarrow.compute``) +- :ref:`io` +- :ref:`ipc` (i.e., ``pyarrow.ipc``) +- :ref:`filesystem` (i.e., ``pyarrow.fs``. Note: It's planned to move cloud fileystems (i.e., :ref:`S3`, :ref:`GCS`, etc) into ``pyarrow`` in a future release though :ref:`filesystem-localfs` will remain in ``pyarrow-core``.) +- File formats: :ref:`Arrow/Feather`, :ref:`JSON`, :ref:`CSV`, :ref:`ORC` (but not Parquet) + +The ``pyarrow`` package adds the following: + +- Acero (i.e., ``pyarrow.acero``) +- :ref:`dataset` (i.e., ``pyarrow.dataset``) +- :ref:`Parquet` (i.e., ``pyarrow.parquet``) +- Substrait (i.e., ``pyarrow.substrait``) + +Finally, ``pyarrow-all`` adds: + +- :ref:`flight` and Flight SQL (i.e., ``pyarrow.flight``) +- Gandiva (i.e., ``pyarrow.gandiva``) + +The following table lists the functionality provided by each package and may be +useful when deciding to use one package over another or when +:ref:`python-conda-custom-selection`. + ++------------+---------------------+--------------+---------+-------------+ +| Component | Package | pyarrow-core | pyarrow | pyarrow-all | ++------------+---------------------+--------------+---------+-------------+ +| Core | pyarrow-core | ✓ | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Parquet | libparquet | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Dataset | libarrow-dataset | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Acero | libarrow-acero | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Substrait | libarrow-substrait | | ✓ | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Flight | libarrow-flight | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Flight SQL | libarrow-flight-sql | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ +| Gandiva | libarrow-gandiva | | | ✓ | ++------------+---------------------+--------------+---------+-------------+ + +.. _python-conda-custom-selection: + +Creating A Custom Selection +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +If you know which components you need and want to control what's installed, you +can create a custom selection of packages to include only the extra features you +need. For example, to install ``pyarrow-core`` and add support for reading and +writing Parquet, install ``libparquet`` alongside ``pyarrow-core``: + +.. code-block:: shell + + conda install -c conda-forge pyarrow-core libparquet + +Or if you wish to use ``pyarrow`` but need support for Flight RPC: + +.. code-block:: shell + + conda install -c conda-forge pyarrow libarrow-flight diff --git a/docs/source/python/integration/extending.rst b/docs/source/python/integration/extending.rst index b380fea7e902c..d4d099bcf43c8 100644 --- a/docs/source/python/integration/extending.rst +++ b/docs/source/python/integration/extending.rst @@ -474,7 +474,7 @@ Toolchain Compatibility (Linux) The Python wheels for Linux are built using the `PyPA manylinux images `_ which use -the CentOS `devtoolset-9`. In addition to the other notes +the CentOS ``devtoolset-9``. In addition to the other notes above, if you are compiling C++ using these shared libraries, you will need to make sure you use a compatible toolchain as well or you might see a segfault during runtime. diff --git a/docs/source/python/memory.rst b/docs/source/python/memory.rst index 23474b923718d..7b49d48ab20fa 100644 --- a/docs/source/python/memory.rst +++ b/docs/source/python/memory.rst @@ -46,7 +46,7 @@ parent-child relationships. There are many implementations of ``arrow::Buffer``, but they all provide a standard interface: a data pointer and length. This is similar to Python's -built-in `buffer protocol` and ``memoryview`` objects. +built-in ``buffer protocol`` and ``memoryview`` objects. A :class:`Buffer` can be created from any Python object implementing the buffer protocol by calling the :func:`py_buffer` function. Let's consider diff --git a/docs/source/python/timestamps.rst b/docs/source/python/timestamps.rst index cecbd5b595bc7..80a1b7280cbfa 100644 --- a/docs/source/python/timestamps.rst +++ b/docs/source/python/timestamps.rst @@ -24,7 +24,7 @@ Arrow/Pandas Timestamps Arrow timestamps are stored as a 64-bit integer with column metadata to associate a time unit (e.g. milliseconds, microseconds, or nanoseconds), and an -optional time zone. Pandas (`Timestamp`) uses a 64-bit integer representing +optional time zone. Pandas (``Timestamp``) uses a 64-bit integer representing nanoseconds and an optional time zone. Python/Pandas timestamp types without a associated time zone are referred to as "Time Zone Naive". Python/Pandas timestamp types with an associated time zone are diff --git a/format/FlightSql.proto b/format/FlightSql.proto index bf3fcb6c3d229..6fca141d692a7 100644 --- a/format/FlightSql.proto +++ b/format/FlightSql.proto @@ -43,7 +43,6 @@ package arrow.flight.protocol.sql; * where there is one row per requested piece of metadata information. */ message CommandGetSqlInfo { - option (experimental) = true; /* * Values are modelled after ODBC's SQLGetInfo() function. This information is intended to provide @@ -1131,7 +1130,6 @@ enum Searchable { * The returned data should be ordered by data_type and then by type_name. */ message CommandGetXdbcTypeInfo { - option (experimental) = true; /* * Specifies the data type to search for the info. @@ -1153,7 +1151,6 @@ message CommandGetXdbcTypeInfo { * The returned data should be ordered by catalog_name. */ message CommandGetCatalogs { - option (experimental) = true; } /* @@ -1171,7 +1168,6 @@ message CommandGetCatalogs { * The returned data should be ordered by catalog_name, then db_schema_name. */ message CommandGetDbSchemas { - option (experimental) = true; /* * Specifies the Catalog to search for the tables. @@ -1219,7 +1215,6 @@ message CommandGetDbSchemas { * The returned data should be ordered by catalog_name, db_schema_name, table_name, then table_type, followed by table_schema if requested. */ message CommandGetTables { - option (experimental) = true; /* * Specifies the Catalog to search for the tables. @@ -1272,7 +1267,6 @@ message CommandGetTables { * The returned data should be ordered by table_type. */ message CommandGetTableTypes { - option (experimental) = true; } /* @@ -1293,7 +1287,6 @@ message CommandGetTableTypes { * The returned data should be ordered by catalog_name, db_schema_name, table_name, key_name, then key_sequence. */ message CommandGetPrimaryKeys { - option (experimental) = true; /* * Specifies the catalog to search for the table. @@ -1348,7 +1341,6 @@ enum UpdateDeleteRules { * update_rule and delete_rule returns a byte that is equivalent to actions declared on UpdateDeleteRules enum. */ message CommandGetExportedKeys { - option (experimental) = true; /* * Specifies the catalog to search for the foreign key table. @@ -1399,7 +1391,6 @@ message CommandGetExportedKeys { * - 4 = SET DEFAULT */ message CommandGetImportedKeys { - option (experimental) = true; /* * Specifies the catalog to search for the primary key table. @@ -1452,7 +1443,6 @@ message CommandGetImportedKeys { * - 4 = SET DEFAULT */ message CommandGetCrossReference { - option (experimental) = true; /** * The catalog name where the parent table is. @@ -1499,7 +1489,6 @@ message CommandGetCrossReference { * Request message for the "CreatePreparedStatement" action on a Flight SQL enabled backend. */ message ActionCreatePreparedStatementRequest { - option (experimental) = true; // The valid SQL string to create a prepared statement for. string query = 1; @@ -1512,7 +1501,6 @@ message ActionCreatePreparedStatementRequest { * An embedded message describing a Substrait plan to execute. */ message SubstraitPlan { - option (experimental) = true; // The serialized substrait.Plan to create a prepared statement for. // XXX(ARROW-16902): this is bytes instead of an embedded message @@ -1529,7 +1517,6 @@ message SubstraitPlan { * Request message for the "CreatePreparedSubstraitPlan" action on a Flight SQL enabled backend. */ message ActionCreatePreparedSubstraitPlanRequest { - option (experimental) = true; // The serialized substrait.Plan to create a prepared statement for. SubstraitPlan plan = 1; @@ -1548,7 +1535,6 @@ message ActionCreatePreparedSubstraitPlanRequest { * The result should be wrapped in a google.protobuf.Any message. */ message ActionCreatePreparedStatementResult { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1570,7 +1556,6 @@ message ActionCreatePreparedStatementResult { * Closes server resources associated with the prepared statement handle. */ message ActionClosePreparedStatementRequest { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1581,7 +1566,6 @@ message ActionClosePreparedStatementRequest { * Begins a transaction. */ message ActionBeginTransactionRequest { - option (experimental) = true; } /* @@ -1592,7 +1576,6 @@ message ActionBeginTransactionRequest { * FLIGHT_SQL_TRANSACTION_SUPPORT_SAVEPOINT. */ message ActionBeginSavepointRequest { - option (experimental) = true; // The transaction to which a savepoint belongs. bytes transaction_id = 1; @@ -1610,7 +1593,6 @@ message ActionBeginSavepointRequest { * The result should be wrapped in a google.protobuf.Any message. */ message ActionBeginTransactionResult { - option (experimental) = true; // Opaque handle for the transaction on the server. bytes transaction_id = 1; @@ -1626,7 +1608,6 @@ message ActionBeginTransactionResult { * The result should be wrapped in a google.protobuf.Any message. */ message ActionBeginSavepointResult { - option (experimental) = true; // Opaque handle for the savepoint on the server. bytes savepoint_id = 1; @@ -1641,7 +1622,6 @@ message ActionBeginSavepointResult { * invalidated, as are all associated savepoints. */ message ActionEndTransactionRequest { - option (experimental) = true; enum EndTransaction { END_TRANSACTION_UNSPECIFIED = 0; @@ -1667,7 +1647,6 @@ message ActionEndTransactionRequest { * savepoints created after the current savepoint. */ message ActionEndSavepointRequest { - option (experimental) = true; enum EndSavepoint { END_SAVEPOINT_UNSPECIFIED = 0; @@ -1702,7 +1681,6 @@ message ActionEndSavepointRequest { * - GetFlightInfo: execute the query. */ message CommandStatementQuery { - option (experimental) = true; // The SQL syntax. string query = 1; @@ -1729,7 +1707,6 @@ message CommandStatementQuery { * - DoPut: execute the query. */ message CommandStatementSubstraitPlan { - option (experimental) = true; // A serialized substrait.Plan SubstraitPlan plan = 1; @@ -1742,7 +1719,6 @@ message CommandStatementSubstraitPlan { * This should be used only once and treated as an opaque value, that is, clients should not attempt to parse this. */ message TicketStatementQuery { - option (experimental) = true; // Unique identifier for the instance of the statement to execute. bytes statement_handle = 1; @@ -1770,7 +1746,6 @@ message TicketStatementQuery { * - GetFlightInfo: execute the prepared statement instance. */ message CommandPreparedStatementQuery { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1781,7 +1756,6 @@ message CommandPreparedStatementQuery { * for the RPC call DoPut to cause the server to execute the included SQL update. */ message CommandStatementUpdate { - option (experimental) = true; // The SQL syntax. string query = 1; @@ -1795,7 +1769,6 @@ message CommandStatementUpdate { * prepared statement handle as an update. */ message CommandPreparedStatementUpdate { - option (experimental) = true; // Opaque handle for the prepared statement on the server. bytes prepared_statement_handle = 1; @@ -1807,7 +1780,6 @@ message CommandPreparedStatementUpdate { * FlightData into the target destination. */ message CommandStatementIngest { - option (experimental) = true; // Options for table definition behavior message TableDefinitionOptions { @@ -1866,7 +1838,6 @@ message CommandStatementIngest { * in the request, containing results from the update. */ message DoPutUpdateResult { - option (experimental) = true; // The number of records updated. A return value of -1 represents // an unknown updated record count. @@ -1880,7 +1851,6 @@ message DoPutUpdateResult { * can continue as though the fields in this message were not provided or set to sensible default values. */ message DoPutPreparedStatementResult { - option (experimental) = true; // Represents a (potentially updated) opaque handle for the prepared statement on the server. // Because the handle could potentially be updated, any previous handles for this prepared @@ -1912,7 +1882,6 @@ message DoPutPreparedStatementResult { */ message ActionCancelQueryRequest { option deprecated = true; - option (experimental) = true; // The result of the GetFlightInfo RPC that initiated the query. // XXX(ARROW-16902): this must be a serialized FlightInfo, but is @@ -1931,7 +1900,6 @@ message ActionCancelQueryRequest { */ message ActionCancelQueryResult { option deprecated = true; - option (experimental) = true; enum CancelResult { // The cancellation status is unknown. Servers should avoid using diff --git a/go/go.mod b/go/go.mod index 7c14ddcf9e216..e846c61033f47 100644 --- a/go/go.mod +++ b/go/go.mod @@ -37,7 +37,7 @@ require ( golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 golang.org/x/sync v0.7.0 golang.org/x/sys v0.20.0 - golang.org/x/tools v0.20.0 + golang.org/x/tools v0.21.0 golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 gonum.org/v1/gonum v0.15.0 google.golang.org/grpc v1.63.2 @@ -47,7 +47,7 @@ require ( require ( github.com/google/uuid v1.6.0 - github.com/hamba/avro/v2 v2.21.1 + github.com/hamba/avro/v2 v2.22.0 github.com/substrait-io/substrait-go v0.4.2 github.com/tidwall/sjson v1.2.5 ) @@ -75,8 +75,8 @@ require ( github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/pretty v1.2.0 // indirect golang.org/x/mod v0.17.0 // indirect - golang.org/x/net v0.24.0 // indirect - golang.org/x/text v0.14.0 // indirect + golang.org/x/net v0.25.0 // indirect + golang.org/x/text v0.15.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240227224415-6ceb2ff114de // indirect gopkg.in/yaml.v3 v3.0.1 // indirect modernc.org/gc/v3 v3.0.0-20240107210532-573471604cb6 // indirect diff --git a/go/go.sum b/go/go.sum index 70e3a533d03f3..6bceb4e5877ca 100644 --- a/go/go.sum +++ b/go/go.sum @@ -43,8 +43,8 @@ github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26 h1:Xim43kblpZXfIBQsbu github.com/google/pprof v0.0.0-20221118152302-e6195bd50e26/go.mod h1:dDKJzRmX4S37WGHujM7tX//fmj1uioxKzKxz3lo4HJo= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hamba/avro/v2 v2.21.1 h1:400/jTdLWQ3ib58y83VXlTJKijRouYQszY1SO0cMGt4= -github.com/hamba/avro/v2 v2.21.1/go.mod h1:ouJ4PkiAEP49u0lAtQyd5Gv04MehKj+7lXwD3zpLpY0= +github.com/hamba/avro/v2 v2.22.0 h1:IaBMFv5xmjo38f0oaP9jZiJFXg+lmHPPg7d9YotMnPg= +github.com/hamba/avro/v2 v2.22.0/go.mod h1:HOeTrE3kvWnBAgsufqhAzDDV5gvS0QXs65Z6BHfGgbg= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= @@ -111,14 +111,14 @@ github.com/zeebo/assert v1.3.0 h1:g7C04CbJuIDKNPFHmsk4hwZDO5O+kntRxzaUoNXj+IQ= github.com/zeebo/assert v1.3.0/go.mod h1:Pq9JiuJQpG8JLJdtkwrJESF0Foym2/D9XMU5ciN/wJ0= github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= -golang.org/x/crypto v0.22.0 h1:g1v0xeRhjcugydODzvb3mEM9SQ0HGp9s/nh3COQ/C30= -golang.org/x/crypto v0.22.0/go.mod h1:vr6Su+7cTlO45qkww3VDJlzDn0ctJvRgYbC2NvXHt+M= +golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= +golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225 h1:LfspQV/FYTatPTr/3HzIcmiUFH7PGP+OQ6mgDYo3yuQ= golang.org/x/exp v0.0.0-20240222234643-814bf88cf225/go.mod h1:CxmFvTBINI24O/j8iY7H1xHzx2i4OsyguNBmN/uPtqc= golang.org/x/mod v0.17.0 h1:zY54UmvipHiNd+pm+m0x9KhZ9hl1/7QNMyxXbc6ICqA= golang.org/x/mod v0.17.0/go.mod h1:hTbmBsO62+eylJbnUtE2MGJUyE7QWk4xUqPFrRgJ+7c= -golang.org/x/net v0.24.0 h1:1PcaxkF854Fu3+lvBIx5SYn9wRlBzzcnHZSiaFFAb0w= -golang.org/x/net v0.24.0/go.mod h1:2Q7sJY5mzlzWjKtYUEXSlBWCdyaioyXzRB2RtU8KVE8= +golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= +golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= golang.org/x/sync v0.7.0 h1:YsImfSBoP9QPYL0xyKJPq0gcaJdG3rInoqxTWbfQu9M= golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= @@ -126,10 +126,10 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= -golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= -golang.org/x/tools v0.20.0 h1:hz/CVckiOxybQvFw6h7b/q80NTr9IUQb4s1IIzW7KNY= -golang.org/x/tools v0.20.0/go.mod h1:WvitBU7JJf6A4jOdg4S1tviW9bhUxkgeCui/0JHctQg= +golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= +golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= +golang.org/x/tools v0.21.0 h1:qc0xYgIbsSDt9EyWz05J5wfa7LOVW0YTLOXrqdLAWIw= +golang.org/x/tools v0.21.0/go.mod h1:aiJjzUbINMkxbQROHiO6hDPo2LHcIPhhQsa9DLh0yGk= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 h1:+cNy6SZtPcJQH3LJVLOSmiC7MMxXNOb3PU/VUEz+EhU= golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.15.0 h1:2lYxjRbTYyxkJxlhC+LvJIx3SsANPdRybu1tGj9/OrQ= diff --git a/go/parquet/internal/encoding/types.go b/go/parquet/internal/encoding/types.go index 51f48c797488f..2d7a5d6b1d166 100644 --- a/go/parquet/internal/encoding/types.go +++ b/go/parquet/internal/encoding/types.go @@ -185,7 +185,7 @@ func (b *PooledBufferWriter) Reserve(nbytes int) { b.buf = bufferPool.Get().(*memory.Buffer) } - newCap := utils.Max(b.buf.Cap()+b.offset, 256) + newCap := utils.Max(b.buf.Cap(), 256) for newCap < b.pos+nbytes { newCap = bitutil.NextPowerOf2(newCap) } @@ -361,11 +361,16 @@ func (b *BufferWriter) Truncate() { func (b *BufferWriter) Reset(initial int) { if b.buffer != nil { b.buffer.Release() + } else { + b.buffer = memory.NewResizableBuffer(b.mem) } b.pos = 0 b.offset = 0 - b.Reserve(initial) + + if initial > 0 { + b.Reserve(initial) + } } // Reserve ensures that there is at least enough capacity to write nbytes diff --git a/go/parquet/pqarrow/file_writer_test.go b/go/parquet/pqarrow/file_writer_test.go index 425e4479f6d5c..fc965279a928d 100644 --- a/go/parquet/pqarrow/file_writer_test.go +++ b/go/parquet/pqarrow/file_writer_test.go @@ -18,6 +18,7 @@ package pqarrow_test import ( "bytes" + "math" "strings" "testing" @@ -87,3 +88,44 @@ func TestFileWriterNumRows(t *testing.T) { require.NoError(t, writer.Close()) assert.Equal(t, 4, writer.NumRows()) } + +func TestFileWriterBuffered(t *testing.T) { + schema := arrow.NewSchema([]arrow.Field{ + {Name: "one", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + {Name: "two", Nullable: true, Type: arrow.PrimitiveTypes.Float64}, + }, nil) + + data := `[ + {"one": 1, "two": 2}, + {"one": 1, "two": null}, + {"one": null, "two": 2}, + {"one": null, "two": null} + ]` + + alloc := memory.NewCheckedAllocator(memory.DefaultAllocator) + defer alloc.AssertSize(t, 0) + + record, _, err := array.RecordFromJSON(alloc, schema, strings.NewReader(data)) + require.NoError(t, err) + defer record.Release() + + output := &bytes.Buffer{} + writer, err := pqarrow.NewFileWriter( + schema, + output, + parquet.NewWriterProperties( + parquet.WithAllocator(alloc), + // Ensure enough space so we can close the writer with rows still buffered + parquet.WithMaxRowGroupLength(math.MaxInt64), + ), + pqarrow.NewArrowWriterProperties( + pqarrow.WithAllocator(alloc), + ), + ) + require.NoError(t, err) + + require.NoError(t, writer.WriteBuffered(record)) + + require.NoError(t, writer.Close()) + assert.Equal(t, 4, writer.NumRows()) +} diff --git a/java/adapter/avro/pom.xml b/java/adapter/avro/pom.xml index 645e8c4ff2e60..0046fcac62a22 100644 --- a/java/adapter/avro/pom.xml +++ b/java/adapter/avro/pom.xml @@ -25,27 +25,36 @@ http://maven.apache.org + + org.apache.arrow arrow-memory-core + + org.apache.arrow arrow-memory-netty runtime + + org.apache.arrow arrow-vector + org.immutables - value + value-annotations + org.apache.avro avro ${dep.avro.version} + diff --git a/java/adapter/jdbc/pom.xml b/java/adapter/jdbc/pom.xml index 33360c64b13b6..17681538ac97e 100644 --- a/java/adapter/jdbc/pom.xml +++ b/java/adapter/jdbc/pom.xml @@ -26,17 +26,20 @@ + org.apache.arrow arrow-memory-core + org.apache.arrow arrow-memory-netty runtime + org.apache.arrow arrow-vector @@ -45,9 +48,10 @@ org.immutables - value + value-annotations + com.h2database h2 @@ -90,6 +94,9 @@ jdk11+ [11,] + + !m2e.version + diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java index 7d50676688e0f..7420a8c23dd48 100644 --- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java +++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/binder/ColumnBinderArrowTypeVisitor.java @@ -256,4 +256,9 @@ public ColumnBinder visit(ArrowType.Interval type) { public ColumnBinder visit(ArrowType.Duration type) { throw new UnsupportedOperationException("No column binder implemented for type " + type); } + + @Override + public ColumnBinder visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("No column binder implemented for type " + type); + } } diff --git a/java/adapter/orc/pom.xml b/java/adapter/orc/pom.xml index ec6f73a3e9e40..ca817510bf3e3 100644 --- a/java/adapter/orc/pom.xml +++ b/java/adapter/orc/pom.xml @@ -46,7 +46,7 @@ org.immutables - value + value-annotations org.apache.orc @@ -134,22 +134,5 @@ - - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - - - - org.apache.arrow:arrow-format - - - - - - diff --git a/java/algorithm/pom.xml b/java/algorithm/pom.xml index 92fa5c8553505..0854da48b718a 100644 --- a/java/algorithm/pom.xml +++ b/java/algorithm/pom.xml @@ -44,7 +44,7 @@ org.immutables - value + value-annotations diff --git a/java/bom/pom.xml b/java/bom/pom.xml index 0af50c638055e..12b9950ad80fc 100644 --- a/java/bom/pom.xml +++ b/java/bom/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 31 + 18 org.apache.arrow @@ -27,19 +27,6 @@ - - 1.8 - 1.8 - 3.11.0 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.0 - 3.2.2 - 3.6.3 - 3.5.0 @@ -151,9 +138,11 @@ ${project.version} + + @@ -167,10 +156,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 com.diffplug.spotless @@ -197,10 +188,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 diff --git a/java/c/pom.xml b/java/c/pom.xml index 43a62a8303bfe..bfb233315a839 100644 --- a/java/c/pom.xml +++ b/java/c/pom.xml @@ -50,7 +50,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -83,4 +83,5 @@ + diff --git a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java index bc6139cc84c54..99873dadad242 100644 --- a/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java +++ b/java/c/src/main/java/org/apache/arrow/c/BufferImportTypeVisitor.java @@ -53,6 +53,7 @@ import org.apache.arrow.vector.complex.UnionVector; import org.apache.arrow.vector.ipc.message.ArrowFieldNode; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.util.DataSizeRoundingUtil; /** @@ -328,4 +329,9 @@ public List visit(ArrowType.Interval type) { public List visit(ArrowType.Duration type) { return Arrays.asList(maybeImportBitmap(type), importFixedBytes(type, 1, DurationVector.TYPE_WIDTH)); } + + @Override + public List visit(ListView type) { + throw new UnsupportedOperationException("Importing buffers for view type: " + type + " not supported"); + } } diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java index d9afd0189d807..27acf84d30157 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorLoader.java @@ -90,8 +90,12 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch .fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = null; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } for (FieldVector fieldVector : result.getChildrenFromFields()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } result.loadFieldBuffers(new ArrowFieldNode(recordBatch.getLength(), 0), Collections.singletonList(null)); if (nodes.hasNext() || buffers.hasNext()) { @@ -102,10 +106,15 @@ public StructVector load(BufferAllocator allocator, ArrowRecordBatch recordBatch } private void loadBuffers(FieldVector vector, Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (variadicBufferCounts != null) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -138,7 +147,7 @@ private void loadBuffers(FieldVector vector, Field field, Iterator buf for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java index aa6d9b4d0f6a7..8d015157ebf38 100644 --- a/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java +++ b/java/c/src/main/java/org/apache/arrow/c/StructVectorUnloader.java @@ -21,6 +21,7 @@ import java.util.List; import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.vector.BaseVariableWidthViewVector; import org.apache.arrow.vector.FieldVector; import org.apache.arrow.vector.TypeLayout; import org.apache.arrow.vector.complex.StructVector; @@ -87,17 +88,28 @@ public StructVectorUnloader(StructVector root, boolean includeNullCount, Compres public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getChildrenFromFields()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } return new ArrowRecordBatch(root.getValueCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), - alignBuffers); + variadicBufferCounts, alignBuffers); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + variadicBufferCounts.add(variadicBufferCount); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format("wrong number of buffers for field %s in vector %s. found: %s", vector.getField(), vector.getClass().getSimpleName(), fieldBuffers)); @@ -106,7 +118,7 @@ private void appendNodes(FieldVector vector, List nodes, List org.immutables - value + value-annotations org.apache.commons diff --git a/java/dataset/pom.xml b/java/dataset/pom.xml index 2121119af398e..3dea16204a4db 100644 --- a/java/dataset/pom.xml +++ b/java/dataset/pom.xml @@ -46,7 +46,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -201,7 +201,7 @@ org.apache.maven.plugins maven-surefire-plugin - --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + --add-reads=org.apache.arrow.dataset=com.fasterxml.jackson.databind --add-opens=java.base/java.nio=org.apache.arrow.dataset,org.apache.arrow.memory.core,ALL-UNNAMED diff --git a/java/flight/flight-core/pom.xml b/java/flight/flight-core/pom.xml index 163b4c24031b1..b565572b383ab 100644 --- a/java/flight/flight-core/pom.xml +++ b/java/flight/flight-core/pom.xml @@ -15,12 +15,13 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-core jar Arrow Flight Core - (Experimental)An RPC mechanism for transferring ValueVectors. + An RPC mechanism for transferring ValueVectors. 1 @@ -118,13 +119,13 @@ org.immutables - value + value-annotations com.google.api.grpc proto-google-common-protos - 2.37.1 + 2.39.1 test @@ -150,6 +151,13 @@ org.apache.maven.plugins maven-shade-plugin + + 3.2.4 shade-main @@ -236,6 +244,7 @@ org.apache.maven.plugins maven-dependency-plugin + 3.3.0 analyze @@ -255,6 +264,7 @@ org.codehaus.mojo build-helper-maven-plugin + 1.9.1 add-generated-sources-to-classpath @@ -272,6 +282,7 @@ maven-assembly-plugin + 3.7.1 jar-with-dependencies @@ -288,6 +299,13 @@ + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + @@ -295,14 +313,18 @@ jdk11+ [11,] + + !m2e.version + org.apache.maven.plugins maven-surefire-plugin - - --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + + --add-opens=org.apache.arrow.flight.core/org.apache.arrow.flight.perf.impl=protobuf.java --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED + false ${project.basedir}/../../../testing/data @@ -312,4 +334,5 @@ + diff --git a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java index 7091caa5e98bc..af22cd8aade22 100644 --- a/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java +++ b/java/flight/flight-core/src/main/java/org/apache/arrow/flight/ServerSessionMiddleware.java @@ -26,8 +26,6 @@ /** * Middleware for handling Flight SQL Sessions including session cookie handling. - * - * Currently experimental. */ public class ServerSessionMiddleware implements FlightServerMiddleware { Factory factory; diff --git a/java/flight/flight-integration-tests/pom.xml b/java/flight/flight-integration-tests/pom.xml index cd2c28ba8959f..74016d81e91e5 100644 --- a/java/flight/flight-integration-tests/pom.xml +++ b/java/flight/flight-integration-tests/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-integration-tests @@ -62,6 +63,7 @@ maven-assembly-plugin + 3.7.1 jar-with-dependencies diff --git a/java/flight/flight-sql-jdbc-core/pom.xml b/java/flight/flight-sql-jdbc-core/pom.xml index ef3f2469b73dd..fbab69df3b305 100644 --- a/java/flight/flight-sql-jdbc-core/pom.xml +++ b/java/flight/flight-sql-jdbc-core/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-sql-jdbc-core @@ -46,17 +47,20 @@ + org.apache.arrow arrow-memory-core + org.apache.arrow arrow-memory-netty runtime + org.apache.arrow arrow-vector @@ -132,6 +136,11 @@ + + + src/main/resources + + maven-surefire-plugin @@ -145,6 +154,7 @@ org.codehaus.mojo properties-maven-plugin + 1.2.1 write-project-properties-to-file diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java index fd9127c226910..70a58ff440ed4 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/AvaticaParameterBinder.java @@ -254,6 +254,11 @@ public Boolean visit(ArrowType.Interval type) { public Boolean visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).bindParameter(vector, typedValue, index); } + + @Override + public Boolean visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("Binding is not yet supported for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java index 93b5faaef32c7..6ec33fafcfa46 100644 --- a/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java +++ b/java/flight/flight-sql-jdbc-core/src/main/java/org/apache/arrow/driver/jdbc/utils/ConvertUtils.java @@ -274,6 +274,11 @@ public AvaticaParameter visit(ArrowType.Interval type) { public AvaticaParameter visit(ArrowType.Duration type) { return new DurationAvaticaParameterConverter(type).createParameter(field); } + + @Override + public AvaticaParameter visit(ArrowType.ListView type) { + throw new UnsupportedOperationException("AvaticaParameter not yet supported for type " + type); + } } } diff --git a/java/flight/flight-sql-jdbc-driver/pom.xml b/java/flight/flight-sql-jdbc-driver/pom.xml index 4456270e7b347..b3afbe1defdba 100644 --- a/java/flight/flight-sql-jdbc-driver/pom.xml +++ b/java/flight/flight-sql-jdbc-driver/pom.xml @@ -15,6 +15,7 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-sql-jdbc-driver diff --git a/java/flight/flight-sql/pom.xml b/java/flight/flight-sql/pom.xml index cf466ab1720cf..e6d703c673ad5 100644 --- a/java/flight/flight-sql/pom.xml +++ b/java/flight/flight-sql/pom.xml @@ -15,12 +15,13 @@ org.apache.arrow arrow-flight 17.0.0-SNAPSHOT + ../pom.xml flight-sql jar Arrow Flight SQL - (Experimental)Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight + Contains utility classes to expose Flight SQL semantics for clients and servers over Arrow Flight 1 @@ -51,7 +52,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -118,6 +119,9 @@ jdk11+ [11,] + + !m2e.version + @@ -132,4 +136,5 @@ + diff --git a/java/format/pom.xml b/java/format/pom.xml index 4483047e20960..e9eded79de660 100644 --- a/java/format/pom.xml +++ b/java/format/pom.xml @@ -31,6 +31,7 @@ + @@ -41,5 +42,6 @@ + diff --git a/java/gandiva/pom.xml b/java/gandiva/pom.xml index c5703c62dfe23..26a28d55d238e 100644 --- a/java/gandiva/pom.xml +++ b/java/gandiva/pom.xml @@ -22,12 +22,13 @@ jar Arrow Gandiva Java wrappers around the native Gandiva SQL expression compiler. - + 1.8 + 1.8 + 3.25.1 true ../../../cpp/release-build - org.apache.arrow @@ -35,7 +36,7 @@ org.immutables - value + value-annotations org.apache.arrow @@ -50,6 +51,7 @@ com.google.protobuf protobuf-java + ${protobuf.version} com.google.guava @@ -60,7 +62,6 @@ slf4j-api - @@ -87,6 +88,14 @@ + + + + kr.motd.maven + os-maven-plugin + 1.7.1 + + @@ -96,6 +105,7 @@ org.apache.maven.plugins maven-source-plugin + 2.2.1 attach-sources @@ -108,6 +118,7 @@ org.apache.maven.plugins maven-javadoc-plugin + 3.6.3 attach-javadocs @@ -120,6 +131,7 @@ org.apache.maven.plugins maven-gpg-plugin + 3.2.2 sign-artifacts @@ -134,4 +146,5 @@ + diff --git a/java/maven/module-info-compiler-maven-plugin/pom.xml b/java/maven/module-info-compiler-maven-plugin/pom.xml index 5909b6b3484fc..9c1e8fe058110 100644 --- a/java/maven/module-info-compiler-maven-plugin/pom.xml +++ b/java/maven/module-info-compiler-maven-plugin/pom.xml @@ -64,19 +64,44 @@ org.apache.maven.plugin-tools maven-plugin-annotations - ${maven.plugin.tools.version} + 3.11.0 provided + + + maven-clean-plugin + 3.3.2 + + + maven-plugin-plugin + 3.12.0 + + + maven-jar-plugin + 3.3.0 + + + maven-install-plugin + 3.1.1 + + + maven-deploy-plugin + 3.1.1 + + + maven-invoker-plugin + 3.1.0 + com.gradle - gradle-enterprise-maven-extension + develocity-maven-extension - + @@ -84,7 +109,7 @@ - + @@ -93,6 +118,7 @@ org.apache.maven.plugins maven-plugin-plugin + 3.12.0 true diff --git a/java/maven/pom.xml b/java/maven/pom.xml index 558532012a1ae..f290ded2e2913 100644 --- a/java/maven/pom.xml +++ b/java/maven/pom.xml @@ -15,13 +15,6 @@ Note: Do not inherit from the Arrow parent POM as plugins can be referenced during the parent POM, introducing circular dependencies. --> - - org.apache - apache - 31 - - - org.apache.arrow.maven.plugins arrow-maven-plugins 17.0.0-SNAPSHOT @@ -34,38 +27,25 @@ true - - 1.8 - 1.8 - 3.12.0 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.0 - 3.2.2 - 3.6.3 - 3.5.0 - com.diffplug.spotless - spotless-maven-plugin - 2.30.0 + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.5.0 - pl.project13.maven - git-commit-id-plugin - 4.0.5 + org.apache.maven.plugins + maven-site-plugin + 3.12.1 - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 + com.diffplug.spotless + spotless-maven-plugin + 2.30.0 @@ -139,6 +119,11 @@ **/logback.xml + true + + true + true + org.apache.arrow ${username} @@ -158,17 +143,43 @@ + + org.apache.maven.plugins + maven-resources-plugin + + UTF-8 + + org.apache.maven.plugins maven-compiler-plugin + UTF-8 + 1.8 + 1.8 2048m + false true maven-enforcer-plugin + + validate_java_and_maven_version + + enforce + + verify + false + + + + [3.3.0,4) + + + + avoid_bad_dependencies @@ -194,6 +205,8 @@ pl.project13.maven git-commit-id-plugin + 4.0.5 + dd.MM.yyyy '@' HH:mm:ss z false @@ -235,6 +248,7 @@ org.apache.maven.plugins maven-checkstyle-plugin + 3.1.0 ../dev/checkstyle/checkstyle.xml ../dev/checkstyle/checkstyle.license @@ -274,6 +288,7 @@ org.cyclonedx cyclonedx-maven-plugin + 2.7.11 @@ -338,10 +353,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 diff --git a/java/memory/memory-core/pom.xml b/java/memory/memory-core/pom.xml index 8e39ae43d116f..1e29ccf8ab9db 100644 --- a/java/memory/memory-core/pom.xml +++ b/java/memory/memory-core/pom.xml @@ -33,7 +33,7 @@ org.immutables - value + value-annotations org.checkerframework @@ -61,6 +61,9 @@ jdk11+ [11,] + + !m2e.version + @@ -89,6 +92,7 @@ org.apache.maven.plugins maven-surefire-plugin + opens-tests @@ -97,9 +101,12 @@ test - - - + + -Dfoo=bar + + + **/TestArrowBuf.java + **/TestOpens.java @@ -122,6 +129,9 @@ org.apache.maven.plugins maven-compiler-plugin + 8 + 8 + UTF-8 -Xmaxerrs @@ -140,6 +150,12 @@ ${checker.framework.version} + + + org.immutables.value.internal.$processor$.$Processor + + org.checkerframework.checker.nullness.NullnessChecker + diff --git a/java/memory/memory-core/src/main/java/module-info.java b/java/memory/memory-core/src/main/java/module-info.java index 5024b7f45769c..d8c22dd993dd9 100644 --- a/java/memory/memory-core/src/main/java/module-info.java +++ b/java/memory/memory-core/src/main/java/module-info.java @@ -23,7 +23,5 @@ exports org.apache.arrow.util; requires transitive jdk.unsupported; requires jsr305; - requires org.immutables.value; requires org.slf4j; - requires org.checkerframework.checker.qual; } diff --git a/java/memory/memory-netty/pom.xml b/java/memory/memory-netty/pom.xml index bb4ccd6c26f2a..d815276b09e50 100644 --- a/java/memory/memory-netty/pom.xml +++ b/java/memory/memory-netty/pom.xml @@ -53,7 +53,7 @@ org.immutables - value + value-annotations diff --git a/java/memory/memory-unsafe/pom.xml b/java/memory/memory-unsafe/pom.xml index 502ac2cc8051a..f1aa8fde1faa1 100644 --- a/java/memory/memory-unsafe/pom.xml +++ b/java/memory/memory-unsafe/pom.xml @@ -28,7 +28,7 @@ org.immutables - value + value-annotations diff --git a/java/performance/pom.xml b/java/performance/pom.xml index e9023ece080a3..765b6a58cd8f0 100644 --- a/java/performance/pom.xml +++ b/java/performance/pom.xml @@ -22,7 +22,9 @@ JMH Performance benchmarks for other Arrow libraries. + UTF-8 1.37 + 1.8 benchmarks true .* @@ -81,6 +83,42 @@ + + + + maven-clean-plugin + 3.3.2 + + + maven-deploy-plugin + 3.1.1 + + + maven-install-plugin + 3.1.1 + + + maven-jar-plugin + 3.3.0 + + + maven-javadoc-plugin + 3.6.3 + + + maven-resources-plugin + 3.3.1 + + + maven-source-plugin + 2.2.1 + + + maven-surefire-plugin + 3.2.5 + + + org.apache.maven.plugins @@ -128,6 +166,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 ${skip.perf.benchmarks} test @@ -164,4 +203,5 @@ + diff --git a/java/pom.xml b/java/pom.xml index f3639858d7818..925ec585152bc 100644 --- a/java/pom.xml +++ b/java/pom.xml @@ -15,7 +15,7 @@ org.apache apache - 31 + 18 org.apache.arrow @@ -85,7 +85,7 @@ 33.0.0-jre 4.1.108.Final 1.63.0 - 3.25.1 + 3.23.1 2.17.0 3.4.0 23.5.26 @@ -95,28 +95,10 @@ true 9+181-r4173-1 2.24.0 + 3.12.1 5.11.0 5.2.0 3.42.0 - none - -Xdoclint:none - - 1.8 - 1.8 - 3.11.0 - 3.2.5 - 0.16.1 - 3.7.1 - 3.12.1 - 3.6.1 - 3.2.0 - - 3.2.2 - 3.6.3 - 3.5.0 @@ -133,6 +115,7 @@ org.checkerframework checker-qual ${checker.framework.version} + provided com.google.flatbuffers @@ -167,8 +150,8 @@ org.immutables - value - 2.10.0 + value-annotations + 2.10.1 provided @@ -286,16 +269,40 @@ 8.3.0 test + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + 3.1.2 + + + org.apache.rat + apache-rat-plugin + 0.16.1 + + + org.apache.maven.plugins + maven-resources-plugin + 3.3.1 + org.apache.maven.plugins maven-compiler-plugin + ${maven-compiler-plugin.version} - true **/module-info.java **/module-info.java false @@ -303,13 +310,23 @@ org.immutables value - 2.10.0 + 2.10.1 + + maven-enforcer-plugin + 3.4.1 + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + maven-surefire-plugin + 3.2.5 true true @@ -324,9 +341,22 @@ 1048576 + + + org.junit.jupiter + junit-jupiter-engine + ${dep.junit.jupiter.version} + + + org.apache.maven.surefire + surefire-junit-platform + 3.2.5 + + maven-failsafe-plugin + 3.2.5 ${project.build.directory} @@ -415,22 +445,6 @@ - - - org.apache.drill.tools - drill-fmpp-maven-plugin - [1.0,) - - generate - - - - - false - true - - - @@ -438,7 +452,9 @@ org.apache.maven.plugins maven-javadoc-plugin + 3.6.3 + 8 **/module-info.java @@ -449,11 +465,21 @@ module-info-compiler-maven-plugin ${project.version} + + org.apache.maven.plugins + maven-project-info-reports-plugin + 3.5.0 + + + org.apache.maven.plugins + maven-site-plugin + 3.12.1 + com.gradle - gradle-enterprise-maven-extension + develocity-maven-extension - + @@ -478,7 +504,7 @@ - + @@ -496,36 +522,6 @@ spotless-maven-plugin 2.30.0 - - org.codehaus.mojo - build-helper-maven-plugin - 1.9.1 - - - org.codehaus.mojo - properties-maven-plugin - 1.2.1 - - - org.codehaus.mojo - exec-maven-plugin - 3.2.0 - - - pl.project13.maven - git-commit-id-plugin - 4.0.5 - - - org.cyclonedx - cyclonedx-maven-plugin - 2.8.0 - - - org.apache.drill.tools - drill-fmpp-maven-plugin - 1.21.1 - @@ -599,6 +595,11 @@ **/logback.xml + true + + true + true + org.apache.arrow ${username} @@ -618,17 +619,42 @@ + + org.apache.maven.plugins + maven-resources-plugin + + UTF-8 + + org.apache.maven.plugins maven-compiler-plugin + 1.8 + 1.8 2048m + false true maven-enforcer-plugin + + validate_java_and_maven_version + + enforce + + verify + false + + + + [3.3.0,4) + + + + avoid_bad_dependencies @@ -644,6 +670,9 @@ org.mortbay.jetty:servlet-api org.mortbay.jetty:servlet-api-2.5 log4j:log4j + + org.immutables:value + org.checkerframework:checker @@ -654,6 +683,8 @@ pl.project13.maven git-commit-id-plugin + 4.0.5 + dd.MM.yyyy '@' HH:mm:ss z false @@ -695,6 +726,7 @@ org.apache.maven.plugins maven-checkstyle-plugin + 3.1.0 **/module-info.java dev/checkstyle/checkstyle.xml @@ -749,7 +781,6 @@ javax.annotation:javax.annotation-api:* org.apache.hadoop:hadoop-client-api - org.checkerframework:checker-qual @@ -758,6 +789,7 @@ org.cyclonedx cyclonedx-maven-plugin + 2.7.11 @@ -788,10 +820,12 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 com.diffplug.spotless @@ -826,6 +860,7 @@ org.apache.maven.plugins maven-javadoc-plugin + 3.6.3 **/module-info.java @@ -853,15 +888,28 @@ org.apache.maven.plugins maven-project-info-reports-plugin + 3.5.0 org.apache.maven.plugins maven-site-plugin + 3.12.1 + + java-nodoclint + + [1.8,) + + + none + -Xdoclint:none + + + arrow-c-data @@ -909,6 +957,7 @@ org.apache.maven.plugins maven-compiler-plugin + true -XDcompilePolicy=simple -Xplugin:ErrorProne @@ -941,6 +990,9 @@ org.apache.maven.plugins maven-compiler-plugin + 8 + 8 + UTF-8 -XDcompilePolicy=simple -Xplugin:ErrorProne -XepExcludedPaths:.*/(target/generated-sources)/.* @@ -964,16 +1016,6 @@ - - - - - jdk11+ - - [11,] - - - org.apache.maven.plugins maven-surefire-plugin @@ -981,13 +1023,6 @@ --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - org.apache.maven.plugins - maven-failsafe-plugin - - --add-opens=java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED - - @@ -1028,6 +1063,7 @@ org.jacoco jacoco-maven-plugin + 0.8.11 @@ -1073,6 +1109,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 cdata-cmake @@ -1129,6 +1166,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 jni-cpp-cmake @@ -1235,6 +1273,7 @@ org.codehaus.mojo exec-maven-plugin + 3.2.0 jni-cpp-cmake @@ -1324,4 +1363,5 @@ + diff --git a/java/tools/pom.xml b/java/tools/pom.xml index 58b790c9f027f..5d9db75e525bd 100644 --- a/java/tools/pom.xml +++ b/java/tools/pom.xml @@ -36,7 +36,7 @@ org.immutables - value + value-annotations com.google.guava @@ -54,11 +54,6 @@ 1.3.14 test - com.fasterxml.jackson.core jackson-core @@ -90,6 +85,7 @@ maven-assembly-plugin + 3.7.1 jar-with-dependencies @@ -105,21 +101,7 @@ - - org.apache.maven.plugins - maven-dependency-plugin - - - analyze - verify - - - com.fasterxml.jackson.core:* - - - - - + diff --git a/java/vector/pom.xml b/java/vector/pom.xml index ca932ae6f26f9..c39504df2b207 100644 --- a/java/vector/pom.xml +++ b/java/vector/pom.xml @@ -32,7 +32,7 @@ org.immutables - value + value-annotations com.fasterxml.jackson.core @@ -76,7 +76,64 @@ + + + + true + + + false + + apache + apache + https://repo.maven.apache.org/maven2/ + + + + + + + codegen + + ${basedir}/src/main/codegen + + + + + + + org.eclipse.m2e + lifecycle-mapping + 1.0.0 + + + + + + org.apache.drill.tools + drill-fmpp-maven-plugin + [1.0,) + + generate + + + + + false + true + + + + + + + + + + org.apache.maven.plugins @@ -106,10 +163,33 @@ + + maven-resources-plugin + + + + copy-fmpp-resources + + copy-resources + + initialize + + ${project.build.directory}/codegen + + + src/main/codegen + false + + + + + + org.apache.drill.tools drill-fmpp-maven-plugin + 1.21.1 generate-fmpp @@ -119,8 +199,8 @@ generate-sources src/main/codegen/config.fmpp - ${project.build.directory}/generated-sources - src/main/codegen/templates + ${project.build.directory}/generated-sources/fmpp + ${project.build.directory}/codegen/templates @@ -128,6 +208,13 @@ org.apache.maven.plugins maven-shade-plugin + + 3.2.4 @@ -156,6 +243,7 @@ + @@ -188,4 +276,5 @@ + diff --git a/java/vector/src/main/codegen/data/ArrowTypes.tdd b/java/vector/src/main/codegen/data/ArrowTypes.tdd index 9fe40f2319bfd..72df4779793f0 100644 --- a/java/vector/src/main/codegen/data/ArrowTypes.tdd +++ b/java/vector/src/main/codegen/data/ArrowTypes.tdd @@ -129,6 +129,11 @@ name: "Duration", fields: [{name: "unit", type: short, valueType: TimeUnit}], complex: false + }, + { + name: "ListView", + fields: [], + complex: true } ] } diff --git a/java/vector/src/main/codegen/templates/StructWriters.java b/java/vector/src/main/codegen/templates/StructWriters.java index b6dd2b75c526a..b676173ac39d9 100644 --- a/java/vector/src/main/codegen/templates/StructWriters.java +++ b/java/vector/src/main/codegen/templates/StructWriters.java @@ -61,6 +61,7 @@ public class ${mode}StructWriter extends AbstractFieldWriter { this.initialCapacity = 0; for (Field child : container.getField().getChildren()) { MinorType minorType = Types.getMinorTypeForArrowType(child.getType()); + addVectorAsNullable = child.isNullable(); switch (minorType) { case STRUCT: struct(child.getName()); diff --git a/java/vector/src/main/codegen/templates/UnionListWriter.java b/java/vector/src/main/codegen/templates/UnionListWriter.java index 5c0565ee27175..eeb964c055f71 100644 --- a/java/vector/src/main/codegen/templates/UnionListWriter.java +++ b/java/vector/src/main/codegen/templates/UnionListWriter.java @@ -26,7 +26,7 @@ import java.math.BigDecimal; <@pp.dropOutputFile /> -<#list ["List", "LargeList"] as listName> +<#list ["List", "ListView", "LargeList"] as listName> <@pp.changeOutputFile name="/org/apache/arrow/vector/complex/impl/Union${listName}Writer.java" /> @@ -59,6 +59,10 @@ public class Union${listName}Writer extends AbstractFieldWriter { private static final int OFFSET_WIDTH = 4; + <#if listName = "ListView"> + private static final long SIZE_WIDTH = 4; + + public Union${listName}Writer(${listName}Vector vector) { this(vector, NullableStructWriterFactory.getNullableStructWriterFactoryInstance()); } @@ -193,6 +197,24 @@ public void endList() { setPosition(idx() + 1); listStarted = false; } + <#elseif listName == "ListView"> + @Override + public void startList() { + vector.startNewValue(idx()); + writer.setPosition(vector.getOffsetBuffer().getInt((idx()) * OFFSET_WIDTH)); + listStarted = true; + } + + @Override + public void endList() { + int sizeUptoIdx = 0; + for (int i = 0; i < idx(); i++) { + sizeUptoIdx += vector.getSizeBuffer().getInt(i * SIZE_WIDTH); + } + vector.getSizeBuffer().setInt(idx() * SIZE_WIDTH, writer.idx() - sizeUptoIdx); + setPosition(idx() + 1); + listStarted = false; + } <#else> @Override public void startList() { diff --git a/java/vector/src/main/codegen/templates/UnionReader.java b/java/vector/src/main/codegen/templates/UnionReader.java index 956bc91e9185c..243bd832255c2 100644 --- a/java/vector/src/main/codegen/templates/UnionReader.java +++ b/java/vector/src/main/codegen/templates/UnionReader.java @@ -39,7 +39,7 @@ @SuppressWarnings("unused") public class UnionReader extends AbstractFieldReader { - private static final int NUM_SUPPORTED_TYPES = 48; + private static final int NUM_SUPPORTED_TYPES = 49; private BaseReader[] readers = new BaseReader[NUM_SUPPORTED_TYPES]; public UnionVector data; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java index 2f80775a48f58..b3e86fab05462 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BaseVariableWidthViewVector.java @@ -46,7 +46,7 @@ */ public abstract class BaseVariableWidthViewVector extends BaseValueVector implements VariableWidthFieldVector { // A single element of a view comprises 16 bytes - protected static final int ELEMENT_SIZE = 16; + public static final int ELEMENT_SIZE = 16; public static final int INITIAL_VIEW_VALUE_ALLOCATION = 4096; private static final int INITIAL_BYTE_COUNT = INITIAL_VIEW_VALUE_ALLOCATION * ELEMENT_SIZE; private static final int MAX_BUFFER_SIZE = (int) Math.min(MAX_ALLOCATION_SIZE, Integer.MAX_VALUE); @@ -70,14 +70,14 @@ public abstract class BaseVariableWidthViewVector extends BaseValueVector implem * * */ // 12 byte unsigned int to track inline views - protected static final int INLINE_SIZE = 12; + public static final int INLINE_SIZE = 12; // The first 4 bytes of view are allocated for length - protected static final int LENGTH_WIDTH = 4; + public static final int LENGTH_WIDTH = 4; // The second 4 bytes of view are allocated for prefix width - protected static final int PREFIX_WIDTH = 4; + public static final int PREFIX_WIDTH = 4; // The third 4 bytes of view are allocated for buffer index - protected static final int BUF_INDEX_WIDTH = 4; - protected static final byte[] EMPTY_BYTE_ARRAY = new byte[]{}; + public static final int BUF_INDEX_WIDTH = 4; + public static final byte[] EMPTY_BYTE_ARRAY = new byte[]{}; protected ArrowBuf validityBuffer; // The view buffer is used to store the variable width view elements protected ArrowBuf viewBuffer; @@ -158,6 +158,15 @@ public ArrowBuf getDataBuffer() { return viewBuffer; } + /** + * Get the buffers that store the data for views in the vector. + * + * @return list of ArrowBuf + */ + public List getDataBuffers() { + return dataBuffers; + } + /** * BaseVariableWidthViewVector doesn't support offset buffer. * @@ -359,8 +368,21 @@ public List getChildrenFromFields() { */ @Override public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { - // TODO: https://github.com/apache/arrow/issues/40931 - throw new UnsupportedOperationException("loadFieldBuffers is not supported for BaseVariableWidthViewVector"); + ArrowBuf bitBuf = ownBuffers.get(0); + ArrowBuf viewBuf = ownBuffers.get(1); + List dataBufs = ownBuffers.subList(2, ownBuffers.size()); + + this.clear(); + + this.viewBuffer = viewBuf.getReferenceManager().retain(viewBuf, allocator); + this.validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuf, allocator); + + for (ArrowBuf dataBuf : dataBufs) { + this.dataBuffers.add(dataBuf.getReferenceManager().retain(dataBuf, allocator)); + } + + lastSet = fieldNode.getLength() - 1; + valueCount = fieldNode.getLength(); } /** @@ -694,13 +716,6 @@ public Field getField() { * impact the reference counts for this buffer, so it only should be used for in-context * access. Also note that this buffer changes regularly, thus * external classes shouldn't hold a reference to it (unless they change it). - *

- * Note: This method only returns validityBuffer and valueBuffer. - * But it doesn't return the data buffers. - *

- * TODO: Implement a strategy to retrieve the data buffers. - * data buffer retrieval. - * * @param clear Whether to clear vector before returning, the buffers will still be refcounted * but the returned array will be the only reference to them * @return The underlying {@link ArrowBuf buffers} that is used by this @@ -713,9 +728,15 @@ public ArrowBuf[] getBuffers(boolean clear) { if (getBufferSize() == 0) { buffers = new ArrowBuf[0]; } else { - buffers = new ArrowBuf[2]; + final int dataBufferSize = dataBuffers.size(); + // validity and view buffers + final int fixedBufferSize = 2; + buffers = new ArrowBuf[fixedBufferSize + dataBufferSize]; buffers[0] = validityBuffer; buffers[1] = viewBuffer; + for (int i = fixedBufferSize; i < fixedBufferSize + dataBufferSize; i++) { + buffers[i] = dataBuffers.get(i - fixedBufferSize); + } } if (clear) { for (final ArrowBuf buffer : buffers) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java index 9725693348a48..4eeb92a0c9199 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/BufferLayout.java @@ -28,12 +28,18 @@ public class BufferLayout { /** * Enumeration of the different logical types a buffer can have. + * Data buffer is common to most of the layouts. + * Offset buffer is used for variable width types. + * Validity buffer is used for nullable types. + * Type buffer is used for Union types. + * Size buffer is used for ListView and LargeListView types. */ public enum BufferType { DATA("DATA"), OFFSET("OFFSET"), VALIDITY("VALIDITY"), - TYPE("TYPE_ID"); + TYPE("TYPE_ID"), + SIZE("SIZE"); private final String name; @@ -57,6 +63,7 @@ public String getName() { private static final BufferLayout VALUES_32 = new BufferLayout(BufferType.DATA, 32); private static final BufferLayout VALUES_16 = new BufferLayout(BufferType.DATA, 16); private static final BufferLayout VALUES_8 = new BufferLayout(BufferType.DATA, 8); + private static final BufferLayout SIZE_BUFFER = new BufferLayout(BufferType.SIZE, 32); public static BufferLayout typeBuffer() { return TYPE_BUFFER; @@ -70,6 +77,10 @@ public static BufferLayout largeOffsetBuffer() { return LARGE_OFFSET_BUFFER; } + public static BufferLayout sizeBuffer() { + return SIZE_BUFFER; + } + /** * Returns a databuffer for the given bitwidth. Only supports powers of two between 8 and 128 * inclusive. diff --git a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java index 18032528c86d8..0d01d77632bde 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/TypeLayout.java @@ -28,6 +28,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.ArrowType.ArrowTypeVisitor; import org.apache.arrow.vector.types.pojo.ArrowType.Binary; +import org.apache.arrow.vector.types.pojo.ArrowType.BinaryView; import org.apache.arrow.vector.types.pojo.ArrowType.Bool; import org.apache.arrow.vector.types.pojo.ArrowType.Date; import org.apache.arrow.vector.types.pojo.ArrowType.Decimal; @@ -101,7 +102,7 @@ public TypeLayout visit(Timestamp type) { } @Override - public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public TypeLayout visit(ArrowType.List type) { List vectors = asList( BufferLayout.validityVector(), BufferLayout.offsetBuffer() @@ -109,6 +110,16 @@ public TypeLayout visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) return new TypeLayout(vectors); } + @Override + public TypeLayout visit(ArrowType.ListView type) { + List vectors = asList( + BufferLayout.validityVector(), + BufferLayout.offsetBuffer(), + BufferLayout.sizeBuffer() + ); + return new TypeLayout(vectors); + } + @Override public TypeLayout visit(ArrowType.LargeList type) { List vectors = asList( @@ -176,8 +187,7 @@ public TypeLayout visit(Binary type) { @Override public TypeLayout visit(ArrowType.BinaryView type) { - // TODO: https://github.com/apache/arrow/issues/40934 - throw new UnsupportedOperationException("BinaryView not supported"); + return newVariableWidthViewTypeLayout(); } @Override @@ -187,8 +197,7 @@ public TypeLayout visit(Utf8 type) { @Override public TypeLayout visit(Utf8View type) { - // TODO: https://github.com/apache/arrow/issues/40934 - throw new UnsupportedOperationException("Utf8View not supported"); + return newVariableWidthViewTypeLayout(); } @Override @@ -206,7 +215,12 @@ private TypeLayout newVariableWidthTypeLayout() { BufferLayout.byteVector()); } + private TypeLayout newVariableWidthViewTypeLayout() { + return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.byteVector()); + } + private TypeLayout newLargeVariableWidthTypeLayout() { + // NOTE: only considers the non variadic buffers return newPrimitiveTypeLayout(BufferLayout.validityVector(), BufferLayout.largeOffsetBuffer(), BufferLayout.byteVector()); } @@ -312,11 +326,17 @@ public Integer visit(Timestamp type) { } @Override - public Integer visit(org.apache.arrow.vector.types.pojo.ArrowType.List type) { + public Integer visit(ArrowType.List type) { // validity buffer + offset buffer return 2; } + @Override + public Integer visit(ArrowType.ListView type) { + // validity buffer + offset buffer + size buffer + return 3; + } + @Override public Integer visit(ArrowType.LargeList type) { // validity buffer + offset buffer @@ -361,9 +381,9 @@ public Integer visit(Binary type) { } @Override - public Integer visit(ArrowType.BinaryView type) { - // TODO: https://github.com/apache/arrow/issues/40935 - return VARIABLE_WIDTH_BUFFER_COUNT; + public Integer visit(BinaryView type) { + // NOTE: only consider the validity and view buffers + return 2; } @Override @@ -373,8 +393,8 @@ public Integer visit(Utf8 type) { @Override public Integer visit(Utf8View type) { - // TODO: https://github.com/apache/arrow/issues/40935 - return VARIABLE_WIDTH_BUFFER_COUNT; + // NOTE: only consider the validity and view buffers + return 2; } @Override diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java index 510cef24c7e16..9590e70f46770 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorLoader.java @@ -80,8 +80,13 @@ public void load(ArrowRecordBatch recordBatch) { CompressionUtil.CodecType.fromCompressionType(recordBatch.getBodyCompression().getCodec()); decompressionNeeded = codecType != CompressionUtil.CodecType.NO_COMPRESSION; CompressionCodec codec = decompressionNeeded ? factory.createCodec(codecType) : NoCompressionCodec.INSTANCE; + Iterator variadicBufferCounts = null; + if (recordBatch.getVariadicBufferCounts() != null && !recordBatch.getVariadicBufferCounts().isEmpty()) { + variadicBufferCounts = recordBatch.getVariadicBufferCounts().iterator(); + } + for (FieldVector fieldVector : root.getFieldVectors()) { - loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec); + loadBuffers(fieldVector, fieldVector.getField(), buffers, nodes, codec, variadicBufferCounts); } root.setRowCount(recordBatch.getLength()); if (nodes.hasNext() || buffers.hasNext()) { @@ -95,10 +100,16 @@ private void loadBuffers( Field field, Iterator buffers, Iterator nodes, - CompressionCodec codec) { + CompressionCodec codec, + Iterator variadicBufferCounts) { checkArgument(nodes.hasNext(), "no more field nodes for field %s and vector %s", field, vector); ArrowFieldNode fieldNode = nodes.next(); - int bufferLayoutCount = TypeLayout.getTypeBufferCount(field.getType()); + // variadicBufferLayoutCount will be 0 for vectors of type except BaseVariableWidthViewVector + long variadicBufferLayoutCount = 0; + if (variadicBufferCounts != null) { + variadicBufferLayoutCount = variadicBufferCounts.next(); + } + int bufferLayoutCount = (int) (variadicBufferLayoutCount + TypeLayout.getTypeBufferCount(field.getType())); List ownBuffers = new ArrayList<>(bufferLayoutCount); for (int j = 0; j < bufferLayoutCount; j++) { ArrowBuf nextBuf = buffers.next(); @@ -130,7 +141,7 @@ private void loadBuffers( for (int i = 0; i < childrenFromFields.size(); i++) { Field child = children.get(i); FieldVector fieldVector = childrenFromFields.get(i); - loadBuffers(fieldVector, child, buffers, nodes, codec); + loadBuffers(fieldVector, child, buffers, nodes, codec, variadicBufferCounts); } } } diff --git a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java index 1d44e37ac71af..8528099b6d619 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/VectorUnloader.java @@ -80,19 +80,30 @@ public VectorUnloader( public ArrowRecordBatch getRecordBatch() { List nodes = new ArrayList<>(); List buffers = new ArrayList<>(); + List variadicBufferCounts = new ArrayList<>(); for (FieldVector vector : root.getFieldVectors()) { - appendNodes(vector, nodes, buffers); + appendNodes(vector, nodes, buffers, variadicBufferCounts); } // Do NOT retain buffers in ArrowRecordBatch constructor since we have already retained them. return new ArrowRecordBatch( - root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), alignBuffers, - /*retainBuffers*/ false); + root.getRowCount(), nodes, buffers, CompressionUtil.createBodyCompression(codec), + variadicBufferCounts, alignBuffers, /*retainBuffers*/ false); } - private void appendNodes(FieldVector vector, List nodes, List buffers) { + private long getVariadicBufferCount(FieldVector vector) { + if (vector instanceof BaseVariableWidthViewVector) { + return ((BaseVariableWidthViewVector) vector).getDataBuffers().size(); + } + return 0L; + } + + private void appendNodes(FieldVector vector, List nodes, List buffers, + List variadicBufferCounts) { nodes.add(new ArrowFieldNode(vector.getValueCount(), includeNullCount ? vector.getNullCount() : -1)); List fieldBuffers = vector.getFieldBuffers(); - int expectedBufferCount = TypeLayout.getTypeBufferCount(vector.getField().getType()); + long variadicBufferCount = getVariadicBufferCount(vector); + int expectedBufferCount = (int) (TypeLayout.getTypeBufferCount(vector.getField().getType()) + variadicBufferCount); + variadicBufferCounts.add(variadicBufferCount); if (fieldBuffers.size() != expectedBufferCount) { throw new IllegalArgumentException(String.format( "wrong number of buffers for field %s in vector %s. found: %s", @@ -107,7 +118,7 @@ private void appendNodes(FieldVector vector, List nodes, List leftDataBuffers = leftVector.getDataBuffers(); + List rightDataBuffers = rightVector.getDataBuffers(); + + for (int i = 0; i < range.getLength(); i++) { + int leftIndex = range.getLeftStart() + i; + int rightIndex = range.getRightStart() + i; + + boolean isNull = leftVector.isNull(leftIndex); + if (isNull != rightVector.isNull(rightIndex)) { + return false; + } + + if (isNull) { + continue; + } + + int startLeftByteOffset = leftIndex * elementSize; + + int startRightByteOffset = rightIndex * elementSize; + + int leftDataBufferValueLength = leftVector.getValueLength(leftIndex); + int rightDataBufferValueLength = rightVector.getValueLength(rightIndex); + + if (leftDataBufferValueLength != rightDataBufferValueLength) { + return false; + } + + if (leftDataBufferValueLength > BaseVariableWidthViewVector.INLINE_SIZE) { + // if the value is stored in the dataBuffers + int leftDataBufferIndex = leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth); + int rightDataBufferIndex = rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth); + + final int leftDataOffset = + leftViewBuffer.getInt(startLeftByteOffset + lengthWidth + prefixWidth + bufIndexWidth); + final int rightDataOffset = + rightViewBuffer.getInt(startRightByteOffset + lengthWidth + prefixWidth + bufIndexWidth); + + ArrowBuf leftDataBuffer = leftDataBuffers.get(leftDataBufferIndex); + ArrowBuf rightDataBuffer = rightDataBuffers.get(rightDataBufferIndex); + + // check equality in the considered string stored in the dataBuffers + int retDataBuf = ByteFunctionHelpers.equal( + leftDataBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength, + rightDataBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength); + + if (retDataBuf == 0) { + return false; + } + } else { + // if the value is stored in the view + final int leftDataOffset = startLeftByteOffset + lengthWidth; + final int rightDataOffset = startRightByteOffset + lengthWidth; + + // check equality in the considered string stored in the view + int retDataBuf = ByteFunctionHelpers.equal( + leftViewBuffer, leftDataOffset, leftDataOffset + leftDataBufferValueLength, + rightViewBuffer, rightDataOffset, rightDataOffset + rightDataBufferValueLength); + + if (retDataBuf == 0) { + return false; + } + } + + } + return true; + } + protected boolean compareListVectors(Range range) { ListVector leftVector = (ListVector) left; ListVector rightVector = (ListVector) right; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java new file mode 100644 index 0000000000000..73a25738854f3 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/BaseRepeatedValueViewVector.java @@ -0,0 +1,405 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; + +import java.util.Collections; +import java.util.Iterator; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.util.Preconditions; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BaseFixedWidthVector; +import org.apache.arrow.vector.BaseValueVector; +import org.apache.arrow.vector.BaseVariableWidthVector; +import org.apache.arrow.vector.DensityAwareVector; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.NullVector; +import org.apache.arrow.vector.UInt4Vector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.ZeroVector; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.SchemaChangeRuntimeException; + +public abstract class BaseRepeatedValueViewVector extends BaseValueVector + implements RepeatedValueVector, BaseListVector { + + public static final FieldVector DEFAULT_DATA_VECTOR = ZeroVector.INSTANCE; + public static final String DATA_VECTOR_NAME = "$data$"; + + public static final byte OFFSET_WIDTH = 4; + public static final byte SIZE_WIDTH = 4; + protected ArrowBuf offsetBuffer; + protected ArrowBuf sizeBuffer; + protected FieldVector vector; + protected final CallBack repeatedCallBack; + protected int valueCount; + protected long offsetAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH; + protected long sizeAllocationSizeInBytes = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH; + private final String name; + + protected String defaultDataVectorName = DATA_VECTOR_NAME; + + protected BaseRepeatedValueViewVector(String name, BufferAllocator allocator, CallBack callBack) { + this(name, allocator, DEFAULT_DATA_VECTOR, callBack); + } + + protected BaseRepeatedValueViewVector( + String name, BufferAllocator allocator, FieldVector vector, CallBack callBack) { + super(allocator); + this.name = name; + this.offsetBuffer = allocator.getEmpty(); + this.sizeBuffer = allocator.getEmpty(); + this.vector = Preconditions.checkNotNull(vector, "data vector cannot be null"); + this.repeatedCallBack = callBack; + this.valueCount = 0; + } + + @Override + public String getName() { + return name; + } + + @Override + public boolean allocateNewSafe() { + boolean dataAlloc = false; + try { + allocateBuffers(); + dataAlloc = vector.allocateNewSafe(); + } catch (Exception e) { + clear(); + return false; + } finally { + if (!dataAlloc) { + clear(); + } + } + return dataAlloc; + } + + private void allocateBuffers() { + offsetBuffer = allocateBuffers(offsetAllocationSizeInBytes); + sizeBuffer = allocateBuffers(sizeAllocationSizeInBytes); + } + + private ArrowBuf allocateBuffers(final long size) { + final int curSize = (int) size; + ArrowBuf buffer = allocator.buffer(curSize); + buffer.readerIndex(0); + buffer.setZero(0, buffer.capacity()); + return buffer; + } + + @Override + public void reAlloc() { + reallocateBuffers(); + vector.reAlloc(); + } + + protected void reallocateBuffers() { + reallocOffsetBuffer(); + reallocSizeBuffer(); + } + + private void reallocOffsetBuffer() { + final long currentBufferCapacity = offsetBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (offsetAllocationSizeInBytes > 0) { + newAllocationSize = offsetAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * OFFSET_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) OFFSET_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= offsetBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, offsetBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + offsetBuffer.getReferenceManager().release(1); + offsetBuffer = newBuf; + offsetAllocationSizeInBytes = newAllocationSize; + } + + private void reallocSizeBuffer() { + final long currentBufferCapacity = sizeBuffer.capacity(); + long newAllocationSize = currentBufferCapacity * 2; + if (newAllocationSize == 0) { + if (sizeAllocationSizeInBytes > 0) { + newAllocationSize = sizeAllocationSizeInBytes; + } else { + newAllocationSize = INITIAL_VALUE_ALLOCATION * SIZE_WIDTH * 2; + } + } + + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + newAllocationSize = Math.min(newAllocationSize, (long) SIZE_WIDTH * Integer.MAX_VALUE); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE || newAllocationSize <= sizeBuffer.capacity()) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, sizeBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + sizeBuffer.getReferenceManager().release(1); + sizeBuffer = newBuf; + sizeAllocationSizeInBytes = newAllocationSize; + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public void setInitialCapacity(int numRecords) { + offsetAllocationSizeInBytes = (numRecords) * OFFSET_WIDTH; + sizeAllocationSizeInBytes = (numRecords) * SIZE_WIDTH; + if (vector instanceof BaseFixedWidthVector || vector instanceof BaseVariableWidthVector) { + vector.setInitialCapacity(numRecords * RepeatedValueVector.DEFAULT_REPEAT_PER_RECORD); + } else { + vector.setInitialCapacity(numRecords); + } + } + + @Override + public void setInitialCapacity(int numRecords, double density) { + if ((numRecords * density) >= Integer.MAX_VALUE) { + throw new OversizedAllocationException("Requested amount of memory is more than max allowed"); + } + + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + + int innerValueCapacity = Math.max((int) (numRecords * density), 1); + + if (vector instanceof DensityAwareVector) { + ((DensityAwareVector) vector).setInitialCapacity(innerValueCapacity, density); + } else { + vector.setInitialCapacity(innerValueCapacity); + } + } + + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + offsetAllocationSizeInBytes = numRecords * OFFSET_WIDTH; + sizeAllocationSizeInBytes = numRecords * SIZE_WIDTH; + vector.setInitialCapacity(totalNumberOfElements); + } + + @Override + public int getValueCapacity() { + throw new UnsupportedOperationException( + "Get value capacity is not supported in RepeatedValueVector"); + } + + protected int getOffsetBufferValueCapacity() { + return capAtMaxInt(offsetBuffer.capacity() / OFFSET_WIDTH); + } + + protected int getSizeBufferValueCapacity() { + return capAtMaxInt(sizeBuffer.capacity() / SIZE_WIDTH); + } + + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + vector.getBufferSize(); + } + + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + + int innerVectorValueCount = 0; + + for (int i = 0; i < valueCount; i++) { + innerVectorValueCount += sizeBuffer.getInt(i * SIZE_WIDTH); + } + + return (valueCount * OFFSET_WIDTH) + (valueCount * SIZE_WIDTH) + + vector.getBufferSizeFor(innerVectorValueCount); + } + + @Override + public Iterator iterator() { + return Collections.singleton(getDataVector()).iterator(); + } + + @Override + public void clear() { + offsetBuffer = releaseBuffer(offsetBuffer); + sizeBuffer = releaseBuffer(sizeBuffer); + vector.clear(); + valueCount = 0; + super.clear(); + } + + @Override + public void reset() { + offsetBuffer.setZero(0, offsetBuffer.capacity()); + sizeBuffer.setZero(0, sizeBuffer.capacity()); + vector.reset(); + valueCount = 0; + } + + @Override + public ArrowBuf[] getBuffers(boolean clear) { + return new ArrowBuf[0]; + } + + @Override + public int getValueCount() { + return valueCount; + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + while (valueCount > getOffsetBufferValueCapacity()) { + reallocateBuffers(); + } + final int childValueCount = valueCount == 0 ? 0 : getLengthOfChildVector(); + vector.setValueCount(childValueCount); + } + + protected int getLengthOfChildVector() { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < valueCount; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + + protected int getLengthOfChildVectorByIndex(int index) { + int maxOffsetSizeSum = offsetBuffer.getInt(0) + sizeBuffer.getInt(0); + int minOffset = offsetBuffer.getInt(0); + for (int i = 0; i < index; i++) { + int currentOffset = offsetBuffer.getInt(i * OFFSET_WIDTH); + int currentSize = sizeBuffer.getInt(i * SIZE_WIDTH); + int currentSum = currentOffset + currentSize; + + maxOffsetSizeSum = Math.max(maxOffsetSizeSum, currentSum); + minOffset = Math.min(minOffset, currentOffset); + } + + return maxOffsetSizeSum - minOffset; + } + + /** + * Initialize the data vector (and execute callback) if it hasn't already been done, + * returns the data vector. + */ + public AddOrGetResult addOrGetVector(FieldType fieldType) { + boolean created = false; + if (vector instanceof NullVector) { + vector = fieldType.createNewSingleVector(defaultDataVectorName, allocator, repeatedCallBack); + // returned vector must have the same field + created = true; + if (repeatedCallBack != null && + // not a schema change if changing from ZeroVector to ZeroVector + (fieldType.getType().getTypeID() != ArrowType.ArrowTypeID.Null)) { + repeatedCallBack.doWork(); + } + } + + if (vector.getField().getType().getTypeID() != fieldType.getType().getTypeID()) { + final String msg = String.format("Inner vector type mismatch. Requested type: [%s], actual type: [%s]", + fieldType.getType().getTypeID(), vector.getField().getType().getTypeID()); + throw new SchemaChangeRuntimeException(msg); + } + + return new AddOrGetResult<>((T) vector, created); + } + + protected void replaceDataVector(FieldVector v) { + vector.clear(); + vector = v; + } + + public abstract boolean isEmpty(int index); + + /** + * Start a new value at the given index. + * @param index the index to start the new value at + * @return the offset in the data vector where the new value starts + */ + public int startNewValue(int index) { + while (index >= getOffsetBufferValueCapacity()) { + reallocOffsetBuffer(); + } + while (index >= getSizeBufferValueCapacity()) { + reallocSizeBuffer(); + } + + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + } + + setValueCount(index + 1); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + @Deprecated + public UInt4Vector getOffsetVector() { + throw new UnsupportedOperationException("There is no inner offset vector"); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java new file mode 100644 index 0000000000000..b19691e7aaab7 --- /dev/null +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/ListViewVector.java @@ -0,0 +1,872 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector.complex; + +import static java.util.Collections.singletonList; +import static org.apache.arrow.memory.util.LargeMemoryUtil.capAtMaxInt; +import static org.apache.arrow.memory.util.LargeMemoryUtil.checkedCastToInt; +import static org.apache.arrow.util.Preconditions.checkArgument; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.OutOfMemoryException; +import org.apache.arrow.memory.util.ArrowBufPointer; +import org.apache.arrow.memory.util.ByteFunctionHelpers; +import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.memory.util.hash.ArrowBufHasher; +import org.apache.arrow.vector.AddOrGetResult; +import org.apache.arrow.vector.BitVectorHelper; +import org.apache.arrow.vector.BufferBacked; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.ValueVector; +import org.apache.arrow.vector.compare.VectorVisitor; +import org.apache.arrow.vector.complex.impl.UnionListReader; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.reader.FieldReader; +import org.apache.arrow.vector.ipc.message.ArrowFieldNode; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.arrow.vector.util.CallBack; +import org.apache.arrow.vector.util.JsonStringArrayList; +import org.apache.arrow.vector.util.OversizedAllocationException; +import org.apache.arrow.vector.util.TransferPair; + +/** + * A list view vector contains lists of a specific type of elements. + * Its structure contains four elements. + *

    + *
  1. A validity buffer.
  2. + *
  3. An offset buffer, that denotes lists starts.
  4. + *
  5. A size buffer, that denotes lists ends.
  6. + *
  7. A child data vector that contains the elements of lists.
  8. + *
+ * The latter three are managed by its superclass. + */ + +/* +* TODO: consider merging the functionality in `BaseRepeatedValueVector` into this class. +*/ +public class ListViewVector extends BaseRepeatedValueViewVector implements PromotableVector { + + protected ArrowBuf validityBuffer; + protected UnionListReader reader; + private CallBack callBack; + protected Field field; + protected int validityAllocationSizeInBytes; + + public static ListViewVector empty(String name, BufferAllocator allocator) { + return new ListViewVector(name, allocator, FieldType.nullable(ArrowType.ListView.INSTANCE), null); + } + + /** + * Constructs a new instance. + * + * @param name The name of the instance. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param fieldType The type of this list. + * @param callBack A schema change callback. + */ + public ListViewVector(String name, BufferAllocator allocator, FieldType fieldType, CallBack callBack) { + this(new Field(name, fieldType, null), allocator, callBack); + } + + /** + * Constructs a new instance. + * + * @param field The field materialized by this vector. + * @param allocator The allocator to use for allocating/reallocating buffers. + * @param callBack A schema change callback. + */ + public ListViewVector(Field field, BufferAllocator allocator, CallBack callBack) { + super(field.getName(), allocator, callBack); + this.validityBuffer = allocator.getEmpty(); + this.field = field; + this.callBack = callBack; + this.validityAllocationSizeInBytes = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION); + } + + @Override + public void initializeChildrenFromFields(List children) { + checkArgument(children.size() == 1, + "ListViews have one child Field. Found: %s", children.isEmpty() ? "none" : children); + + Field field = children.get(0); + AddOrGetResult addOrGetVector = addOrGetVector(field.getFieldType()); + checkArgument(addOrGetVector.isCreated(), "Child vector already existed: %s", addOrGetVector.getVector()); + + addOrGetVector.getVector().initializeChildrenFromFields(field.getChildren()); + this.field = new Field(this.field.getName(), this.field.getFieldType(), children); + } + + @Override + public void setInitialCapacity(int numRecords) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords); + } + + /** + * Specialized version of setInitialCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param density density of ListViewVector. + * Density is the average size of a list per position in the ListViewVector. + * For example, a + * density value of 10 implies each position in the list + * vector has a list of 10 values. + * A density value of 0.1 implies out of 10 positions in + * the list vector, 1 position has a list of size 1, and + * the remaining positions are null (no lists) or empty lists. + * This helps in tightly controlling the memory we provision + * for inner data vector. + */ + @Override + public void setInitialCapacity(int numRecords, double density) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialCapacity(numRecords, density); + } + + /** + * Specialized version of setInitialTotalCapacity() for ListViewVector. + * This is used by some callers when they want to explicitly control and be + * conservative about memory allocated for inner data vector. + * This is very useful when we are working with memory constraints for a query + * and have a fixed amount of memory reserved for the record batch. + * In such cases, we are likely to face OOM or related problems when + * we reserve memory for a record batch with value count x and + * do setInitialCapacity(x) such that each vector allocates only + * what is necessary and not the default amount, but the multiplier + * forces the memory requirement to go beyond what was needed. + * + * @param numRecords value count + * @param totalNumberOfElements the total number of elements to allow + * for in this vector across all records. + */ + @Override + public void setInitialTotalCapacity(int numRecords, int totalNumberOfElements) { + validityAllocationSizeInBytes = getValidityBufferSizeFromCount(numRecords); + super.setInitialTotalCapacity(numRecords, totalNumberOfElements); + } + + @Override + public List getChildrenFromFields() { + return singletonList(getDataVector()); + } + + /** + * Load the buffers associated with this Field. + * @param fieldNode the fieldNode + * @param ownBuffers the buffers for this Field (own buffers only, children not included) + */ + @Override + public void loadFieldBuffers(ArrowFieldNode fieldNode, List ownBuffers) { + if (ownBuffers.size() != 3) { + throw new IllegalArgumentException("Illegal buffer count, expected " + + 3 + ", got: " + ownBuffers.size()); + } + + ArrowBuf bitBuffer = ownBuffers.get(0); + ArrowBuf offBuffer = ownBuffers.get(1); + ArrowBuf szBuffer = ownBuffers.get(2); + + validityBuffer.getReferenceManager().release(); + validityBuffer = BitVectorHelper.loadValidityBuffer(fieldNode, bitBuffer, allocator); + offsetBuffer.getReferenceManager().release(); + offsetBuffer = offBuffer.getReferenceManager().retain(offBuffer, allocator); + sizeBuffer.getReferenceManager().release(); + sizeBuffer = szBuffer.getReferenceManager().retain(szBuffer, allocator); + + validityAllocationSizeInBytes = checkedCastToInt(validityBuffer.capacity()); + offsetAllocationSizeInBytes = offsetBuffer.capacity(); + sizeAllocationSizeInBytes = sizeBuffer.capacity(); + + valueCount = fieldNode.getLength(); + } + + /** + * Set the reader and writer indexes for the inner buffers. + */ + private void setReaderAndWriterIndex() { + validityBuffer.readerIndex(0); + offsetBuffer.readerIndex(0); + sizeBuffer.readerIndex(0); + if (valueCount == 0) { + validityBuffer.writerIndex(0); + offsetBuffer.writerIndex(0); + sizeBuffer.writerIndex(0); + } else { + validityBuffer.writerIndex(getValidityBufferSizeFromCount(valueCount)); + offsetBuffer.writerIndex(valueCount * OFFSET_WIDTH); + sizeBuffer.writerIndex(valueCount * SIZE_WIDTH); + } + } + + @Override + public List getFieldBuffers() { + List result = new ArrayList<>(2); + setReaderAndWriterIndex(); + result.add(validityBuffer); + result.add(offsetBuffer); + result.add(sizeBuffer); + + return result; + } + + /** + * Export the buffers of the fields for C Data Interface. + * This method traverses the buffers and export buffer and buffer's memory address into a list of + * buffers and a pointer to the list of buffers. + */ + @Override + public void exportCDataBuffers(List buffers, ArrowBuf buffersPtr, long nullValue) { + throw new UnsupportedOperationException("exportCDataBuffers Not implemented yet"); + } + + @Override + public void allocateNew() throws OutOfMemoryException { + if (!allocateNewSafe()) { + throw new OutOfMemoryException("Failure while allocating memory"); + } + } + + @Override + public boolean allocateNewSafe() { + boolean success = false; + try { + /* release the current buffers, hence this is a new allocation + * Note that, the `clear` method call below is releasing validityBuffer + * calling the superclass clear method which is releasing the associated buffers + * (sizeBuffer and offsetBuffer). + */ + clear(); + /* allocate validity buffer */ + allocateValidityBuffer(validityAllocationSizeInBytes); + /* allocate offset, data and sizes buffer */ + success = super.allocateNewSafe(); + } finally { + if (!success) { + clear(); + } + } + return success; + } + + protected void allocateValidityBuffer(final long size) { + final int curSize = (int) size; + validityBuffer = allocator.buffer(curSize); + validityBuffer.readerIndex(0); + validityAllocationSizeInBytes = curSize; + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + @Override + public void reAlloc() { + /* reallocate the validity buffer */ + reallocValidityBuffer(); + /* reallocate the offset, size, and data */ + super.reAlloc(); + } + + protected void reallocValidityAndSizeAndOffsetBuffers() { + reallocateBuffers(); + reallocValidityBuffer(); + } + + private void reallocValidityBuffer() { + final int currentBufferCapacity = checkedCastToInt(validityBuffer.capacity()); + long newAllocationSize = getNewAllocationSize(currentBufferCapacity); + + final ArrowBuf newBuf = allocator.buffer(newAllocationSize); + newBuf.setBytes(0, validityBuffer, 0, currentBufferCapacity); + newBuf.setZero(currentBufferCapacity, newBuf.capacity() - currentBufferCapacity); + validityBuffer.getReferenceManager().release(1); + validityBuffer = newBuf; + validityAllocationSizeInBytes = (int) newAllocationSize; + } + + private long getNewAllocationSize(int currentBufferCapacity) { + long newAllocationSize = currentBufferCapacity * 2L; + if (newAllocationSize == 0) { + if (validityAllocationSizeInBytes > 0) { + newAllocationSize = validityAllocationSizeInBytes; + } else { + newAllocationSize = getValidityBufferSizeFromCount(INITIAL_VALUE_ALLOCATION) * 2L; + } + } + newAllocationSize = CommonUtil.nextPowerOfTwo(newAllocationSize); + assert newAllocationSize >= 1; + + if (newAllocationSize > MAX_ALLOCATION_SIZE) { + throw new OversizedAllocationException("Unable to expand the buffer"); + } + return newAllocationSize; + } + + @Override + public void copyFromSafe(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFromSafe operation yet."); + } + + @Override + public void copyFrom(int inIndex, int outIndex, ValueVector from) { + // TODO: https://github.com/apache/arrow/issues/41270 + throw new UnsupportedOperationException( + "ListViewVector does not support copyFrom operation yet."); + } + + @Override + public FieldVector getDataVector() { + return vector; + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator) { + return getTransferPair(ref, allocator, null); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator) { + return getTransferPair(field, allocator, null); + } + + @Override + public TransferPair getTransferPair(String ref, BufferAllocator allocator, CallBack callBack) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(String, BufferAllocator, CallBack) yet"); + } + + @Override + public TransferPair getTransferPair(Field field, BufferAllocator allocator, CallBack callBack) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support getTransferPair(Field, BufferAllocator, CallBack) yet"); + } + + @Override + public TransferPair makeTransferPair(ValueVector target) { + // TODO: https://github.com/apache/arrow/issues/41269 + throw new UnsupportedOperationException( + "ListVector does not support makeTransferPair(ValueVector) yet"); + } + + @Override + public long getValidityBufferAddress() { + return validityBuffer.memoryAddress(); + } + + @Override + public long getDataBufferAddress() { + throw new UnsupportedOperationException(); + } + + @Override + public long getOffsetBufferAddress() { + return offsetBuffer.memoryAddress(); + } + + @Override + public ArrowBuf getValidityBuffer() { + return validityBuffer; + } + + @Override + public ArrowBuf getDataBuffer() { + throw new UnsupportedOperationException(); + } + + @Override + public ArrowBuf getOffsetBuffer() { + return offsetBuffer; + } + + public ArrowBuf getSizeBuffer() { + return sizeBuffer; + } + + public long getSizeBufferAddress() { + return sizeBuffer.memoryAddress(); + } + + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @return hash code for the element at the given index + */ + @Override + public int hashCode(int index) { + return hashCode(index, null); + } + + /** + * Get the hash code for the element at the given index. + * @param index position of the element + * @param hasher hasher to use + * @return hash code for the element at the given index + */ + @Override + public int hashCode(int index, ArrowBufHasher hasher) { + if (isSet(index) == 0) { + return ArrowBufPointer.NULL_HASH_CODE; + } + int hash = 0; + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = sizeBuffer.getInt(index * OFFSET_WIDTH); + for (int i = start; i < end; i++) { + hash = ByteFunctionHelpers.combineHash(hash, vector.hashCode(i, hasher)); + } + return hash; + } + + @Override + public OUT accept(VectorVisitor visitor, IN value) { + throw new UnsupportedOperationException(); + } + + @Override + protected FieldReader getReaderImpl() { + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReaderImpl operation yet."); + } + + @Override + public UnionListReader getReader() { + // TODO: https://github.com/apache/arrow/issues/41569 + throw new UnsupportedOperationException( + "ListViewVector does not support getReader operation yet."); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this + * vector. + * @return size of underlying buffers. + */ + @Override + public int getBufferSize() { + if (valueCount == 0) { + return 0; + } + final int offsetBufferSize = valueCount * OFFSET_WIDTH; + final int sizeBufferSize = valueCount * SIZE_WIDTH; + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + return offsetBufferSize + sizeBufferSize + validityBufferSize + vector.getBufferSize(); + } + + /** + * Get the size (number of bytes) of underlying buffers used by this. + * @param valueCount the number of values to assume this vector contains + * @return size of underlying buffers. + */ + @Override + public int getBufferSizeFor(int valueCount) { + if (valueCount == 0) { + return 0; + } + final int validityBufferSize = getValidityBufferSizeFromCount(valueCount); + + return super.getBufferSizeFor(valueCount) + validityBufferSize; + } + + /** + * Get the field associated with the list view vector. + * @return the field + */ + @Override + public Field getField() { + if (field.getChildren().contains(getDataVector().getField())) { + return field; + } + field = new Field(field.getName(), field.getFieldType(), Collections.singletonList(getDataVector().getField())); + return field; + } + + /** + * Get the minor type for the vector. + * @return the minor type + */ + @Override + public MinorType getMinorType() { + return MinorType.LISTVIEW; + } + + /** + * Clear the vector data. + */ + @Override + public void clear() { + // calling superclass clear method which is releasing the sizeBufer and offsetBuffer + super.clear(); + validityBuffer = releaseBuffer(validityBuffer); + } + + /** + * Release the buffers associated with this vector. + */ + @Override + public void reset() { + super.reset(); + validityBuffer.setZero(0, validityBuffer.capacity()); + } + + /** + * Return the underlying buffers associated with this vector. Note that this doesn't + * impact the reference counts for this buffer, so it only should be used for in-context + * access. Also note that this buffer changes regularly, thus + * external classes shouldn't hold a reference to it (unless they change it). + * + * @param clear Whether to clear vector before returning, the buffers will still be refcounted + * but the returned array will be the only reference to them + * @return The underlying {@link ArrowBuf buffers} that is used by this + * vector instance. + */ + @Override + public ArrowBuf[] getBuffers(boolean clear) { + setReaderAndWriterIndex(); + final ArrowBuf[] buffers; + if (getBufferSize() == 0) { + buffers = new ArrowBuf[0]; + } else { + List list = new ArrayList<>(); + // the order must be validity, offset and size buffers + list.add(validityBuffer); + list.add(offsetBuffer); + list.add(sizeBuffer); + list.addAll(Arrays.asList(vector.getBuffers(false))); + buffers = list.toArray(new ArrowBuf[list.size()]); + } + if (clear) { + for (ArrowBuf buffer : buffers) { + buffer.getReferenceManager().retain(); + } + clear(); + } + return buffers; + } + + /** + * Get the element in the list view vector at a particular index. + * @param index position of the element + * @return Object at given position + */ + @Override + public List getObject(int index) { + if (isSet(index) == 0) { + return null; + } + final List vals = new JsonStringArrayList<>(); + final int start = offsetBuffer.getInt(index * OFFSET_WIDTH); + final int end = start + sizeBuffer.getInt((index) * SIZE_WIDTH); + final ValueVector vv = getDataVector(); + for (int i = start; i < end; i++) { + vals.add(vv.getObject(i)); + } + + return vals; + } + + /** + * Check if an element at given index is null. + * + * @param index position of an element + * @return true if an element at given index is null, false otherwise + */ + @Override + public boolean isNull(int index) { + return (isSet(index) == 0); + } + + /** + * Check if an element at given index is an empty list. + * @param index position of an element + * @return true if an element at given index is an empty list or NULL, false otherwise + */ + @Override + public boolean isEmpty(int index) { + if (isNull(index)) { + return true; + } else { + return sizeBuffer.getInt(index * SIZE_WIDTH) == 0; + } + } + + /** + * Same as {@link #isNull(int)}. + * + * @param index position of the element + * @return 1 if element at given index is not null, 0 otherwise + */ + public int isSet(int index) { + final int byteIndex = index >> 3; + final byte b = validityBuffer.getByte(byteIndex); + final int bitIndex = index & 7; + return (b >> bitIndex) & 0x01; + } + + /** + * Get the number of elements that are null in the vector. + * + * @return the number of null elements. + */ + @Override + public int getNullCount() { + return BitVectorHelper.getNullCount(validityBuffer, valueCount); + } + + /** + * Get the value capacity by considering validity and offset capacity. + * Note that the size buffer capacity is not considered here since it has + * the same capacity as the offset buffer. + * + * @return the value capacity + */ + @Override + public int getValueCapacity() { + return getValidityAndOffsetValueCapacity(); + } + + private int getValidityAndSizeValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + final int sizeValueCapacity = Math.max(getSizeBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, sizeValueCapacity); + } + + private int getValidityAndOffsetValueCapacity() { + final int offsetValueCapacity = Math.max(getOffsetBufferValueCapacity(), 0); + return Math.min(offsetValueCapacity, getValidityBufferValueCapacity()); + } + + private int getValidityBufferValueCapacity() { + return capAtMaxInt(validityBuffer.capacity() * 8); + } + + /** + * Set the element at the given index to null. + * @param index the value to change + */ + @Override + public void setNull(int index) { + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + offsetBuffer.setInt(index * OFFSET_WIDTH, 0); + sizeBuffer.setInt(index * SIZE_WIDTH, 0); + BitVectorHelper.unsetBit(validityBuffer, index); + } + + /** + * Start new value in the ListView vector. + * + * @param index index of the value to start + * @return offset of the new value + */ + @Override + public int startNewValue(int index) { + while (index >= getValidityAndSizeValueCapacity()) { + reallocValidityAndSizeAndOffsetBuffers(); + } + + if (index > 0) { + final int prevOffset = getLengthOfChildVectorByIndex(index); + offsetBuffer.setInt(index * OFFSET_WIDTH, prevOffset); + } + + BitVectorHelper.setBit(validityBuffer, index); + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + /** + * Validate the invariants of the offset and size buffers. + * 0 <= offsets[i] <= length of the child array + * 0 <= offsets[i] + size[i] <= length of the child array + * @param offset the offset at a given index + * @param size the size at a given index + */ + private void validateInvariants(int offset, int size) { + if (offset < 0) { + throw new IllegalArgumentException("Offset cannot be negative"); + } + + if (size < 0) { + throw new IllegalArgumentException("Size cannot be negative"); + } + + // 0 <= offsets[i] <= length of the child array + if (offset > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset is out of bounds."); + } + + // 0 <= offsets[i] + size[i] <= length of the child array + if (offset + size > this.vector.getValueCount()) { + throw new IllegalArgumentException("Offset + size <= length of the child array."); + } + } + + /** + * Set the offset at the given index. + * Make sure to use this function after updating `field` vector and using `setValidity` + * @param index index of the value to set + * @param value value to set + */ + public void setOffset(int index, int value) { + validateInvariants(value, sizeBuffer.getInt(index * SIZE_WIDTH)); + + offsetBuffer.setInt(index * OFFSET_WIDTH, value); + } + + /** + * Set the size at the given index. + * Make sure to use this function after using `setOffset`. + * @param index index of the value to set + * @param value value to set + */ + public void setSize(int index, int value) { + validateInvariants(offsetBuffer.getInt(index * SIZE_WIDTH), value); + + sizeBuffer.setInt(index * SIZE_WIDTH, value); + } + + /** + * Set the validity at the given index. + * @param index index of the value to set + * @param value value to set (0 for unset and 1 for a set) + */ + public void setValidity(int index, int value) { + if (value == 0) { + BitVectorHelper.unsetBit(validityBuffer, index); + } else { + BitVectorHelper.setBit(validityBuffer, index); + } + } + + @Override + public void setValueCount(int valueCount) { + this.valueCount = valueCount; + if (valueCount > 0) { + while (valueCount > getValidityAndSizeValueCapacity()) { + /* check if validity and offset buffers need to be re-allocated */ + reallocValidityAndSizeAndOffsetBuffers(); + } + } + /* valueCount for the data vector is the current end offset */ + final int childValueCount = (valueCount == 0) ? 0 : getLengthOfChildVector(); + /* set the value count of data vector and this will take care of + * checking whether data buffer needs to be reallocated. + */ + vector.setValueCount(childValueCount); + } + + @Override + public int getElementStartIndex(int index) { + return offsetBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + public int getElementEndIndex(int index) { + return sizeBuffer.getInt(index * OFFSET_WIDTH); + } + + @Override + public AddOrGetResult addOrGetVector(FieldType fieldType) { + AddOrGetResult result = super.addOrGetVector(fieldType); + invalidateReader(); + return result; + } + + @Override + public UnionVector promoteToUnion() { + UnionVector vector = new UnionVector("$data$", allocator, /* field type*/ null, callBack); + replaceDataVector(vector); + invalidateReader(); + if (callBack != null) { + callBack.doWork(); + } + return vector; + } + + private void invalidateReader() { + reader = null; + } + + @Deprecated + @Override + public List getFieldInnerVectors() { + throw new UnsupportedOperationException("There are no inner vectors. Use getFieldBuffers"); + } + + public UnionListViewWriter getWriter() { + return new UnionListViewWriter(this); + } + + @Override + public int getValueCount() { + return valueCount; + } + + /** + * Get the density of this ListVector. + * @return density + */ + public double getDensity() { + if (valueCount == 0) { + return 0.0D; + } + final double totalListSize = getLengthOfChildVector(); + return totalListSize / valueCount; + } + + /** + * Validating ListViewVector creation based on the specification guideline. + */ + @Override + public void validate() { + for (int i = 0; i < valueCount; i++) { + final int offset = offsetBuffer.getInt(i * OFFSET_WIDTH); + final int size = sizeBuffer.getInt(i * SIZE_WIDTH); + validateInvariants(offset, size); + } + } + + /** + * End the current value. + * + * @param index index of the value to end + * @param size number of elements in the list that was written + */ + public void endValue(int index, int size) { + sizeBuffer.setInt(index * SIZE_WIDTH, size); + } +} diff --git a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java index 7f724829ef1eb..c59b997286d2d 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/complex/impl/PromotableWriter.java @@ -29,6 +29,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -54,6 +55,7 @@ public class PromotableWriter extends AbstractPromotableFieldWriter { private final AbstractStructVector parentContainer; private final ListVector listVector; + private final ListViewVector listViewVector; private final FixedSizeListVector fixedListVector; private final LargeListVector largeListVector; private final NullableStructWriterFactory nullableStructWriterFactory; @@ -94,6 +96,7 @@ public PromotableWriter( NullableStructWriterFactory nullableStructWriterFactory) { this.parentContainer = parentContainer; this.listVector = null; + this.listViewVector = null; this.fixedListVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; @@ -142,6 +145,27 @@ public PromotableWriter( ListVector listVector, NullableStructWriterFactory nullableStructWriterFactory) { this.listVector = listVector; + this.listViewVector = null; + this.parentContainer = null; + this.fixedListVector = null; + this.largeListVector = null; + this.nullableStructWriterFactory = nullableStructWriterFactory; + init(v); + } + + /** + * Constructs a new instance. + * + * @param v The vector to initialize the writer with. + * @param listViewVector The vector that serves as a parent of v. + * @param nullableStructWriterFactory The factory to create the delegate writer. + */ + public PromotableWriter( + ValueVector v, + ListViewVector listViewVector, + NullableStructWriterFactory nullableStructWriterFactory) { + this.listViewVector = listViewVector; + this.listVector = null; this.parentContainer = null; this.fixedListVector = null; this.largeListVector = null; @@ -163,6 +187,7 @@ public PromotableWriter( this.fixedListVector = fixedListVector; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.largeListVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); @@ -183,6 +208,7 @@ public PromotableWriter( this.fixedListVector = null; this.parentContainer = null; this.listVector = null; + this.listViewVector = null; this.nullableStructWriterFactory = nullableStructWriterFactory; init(v); } @@ -280,6 +306,8 @@ protected FieldWriter getWriter(MinorType type, ArrowType arrowType) { v = listVector.addOrGetVector(fieldType).getVector(); } else if (fixedListVector != null) { v = fixedListVector.addOrGetVector(fieldType).getVector(); + } else if (listViewVector != null) { + v = listViewVector.addOrGetVector(fieldType).getVector(); } else { v = largeListVector.addOrGetVector(fieldType).getVector(); } @@ -322,6 +350,8 @@ private FieldWriter promoteToUnion() { unionVector = fixedListVector.promoteToUnion(); } else if (largeListVector != null) { unionVector = largeListVector.promoteToUnion(); + } else if (listViewVector != null) { + unionVector = listViewVector.promoteToUnion(); } unionVector.addVector((FieldVector) tp.getTo()); writer = new UnionWriter(unionVector, nullableStructWriterFactory); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java index e927acd4816ad..5c9efc445e0c4 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileReader.java @@ -715,6 +715,7 @@ private ArrowBuf readIntoBuffer(BufferAllocator allocator, BufferType bufferType private void readFromJsonIntoVector(Field field, FieldVector vector) throws JsonParseException, IOException { ArrowType type = field.getType(); + // TODO: https://github.com/apache/arrow/issues/41733 TypeLayout typeLayout = TypeLayout.getTypeLayout(type); List vectorTypes = typeLayout.getBufferTypes(); ArrowBuf[] vectorBuffers = new ArrowBuf[vectorTypes.size()]; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java index f5e267e81256c..670881b238ecb 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/JsonFileWriter.java @@ -208,6 +208,7 @@ private void writeBatch(VectorSchemaRoot recordBatch) throws IOException { } private void writeFromVectorIntoJson(Field field, FieldVector vector) throws IOException { + // TODO: https://github.com/apache/arrow/issues/41733 List vectorTypes = TypeLayout.getTypeLayout(field.getType()).getBufferTypes(); List vectorBuffers = vector.getFieldBuffers(); if (vectorTypes.size() != vectorBuffers.size()) { diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java index f81d049a9257f..b910cfc6ecc25 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/ArrowRecordBatch.java @@ -56,17 +56,19 @@ public class ArrowRecordBatch implements ArrowMessage { private final List buffersLayout; + private final List variadicBufferCounts; + private boolean closed = false; public ArrowRecordBatch( int length, List nodes, List buffers) { - this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, true); + this(length, nodes, buffers, NoCompressionCodec.DEFAULT_BODY_COMPRESSION, null, true); } public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression) { - this(length, nodes, buffers, bodyCompression, true); + this(length, nodes, buffers, bodyCompression, null, true); } /** @@ -81,7 +83,7 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers) { - this(length, nodes, buffers, bodyCompression, alignBuffers, /*retainBuffers*/ true); + this(length, nodes, buffers, bodyCompression, null, alignBuffers, /*retainBuffers*/ true); } /** @@ -98,12 +100,48 @@ public ArrowRecordBatch( public ArrowRecordBatch( int length, List nodes, List buffers, ArrowBodyCompression bodyCompression, boolean alignBuffers, boolean retainBuffers) { + this(length, nodes, buffers, bodyCompression, null, alignBuffers, retainBuffers); + } + + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers) { + this(length, nodes, buffers, bodyCompression, variadicBufferCounts, alignBuffers, /*retainBuffers*/ true); + } + + /** + * Construct a record batch from nodes. + * + * @param length how many rows in this batch + * @param nodes field level info + * @param buffers will be retained until this recordBatch is closed + * @param bodyCompression compression info. + * @param variadicBufferCounts the number of buffers in each variadic section. + * @param alignBuffers Whether to align buffers to an 8 byte boundary. + * @param retainBuffers Whether to retain() each source buffer in the constructor. If false, the caller is + * responsible for retaining the buffers beforehand. + */ + public ArrowRecordBatch( + int length, List nodes, List buffers, + ArrowBodyCompression bodyCompression, List variadicBufferCounts, boolean alignBuffers, + boolean retainBuffers) { super(); this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; List arrowBuffers = new ArrayList<>(buffers.size()); long offset = 0; for (ArrowBuf arrowBuf : buffers) { @@ -129,12 +167,14 @@ public ArrowRecordBatch( // to distinguish this from the public constructor. private ArrowRecordBatch( boolean dummy, int length, List nodes, - List buffers, ArrowBodyCompression bodyCompression) { + List buffers, ArrowBodyCompression bodyCompression, + List variadicBufferCounts) { this.length = length; this.nodes = nodes; this.buffers = buffers; Preconditions.checkArgument(bodyCompression != null, "body compression cannot be null"); this.bodyCompression = bodyCompression; + this.variadicBufferCounts = variadicBufferCounts; this.closed = false; List arrowBuffers = new ArrayList<>(); long offset = 0; @@ -179,6 +219,14 @@ public List getBuffers() { return buffers; } + /** + * Get the record batch variadic buffer counts. + * @return the variadic buffer counts + */ + public List getVariadicBufferCounts() { + return variadicBufferCounts; + } + /** * Create a new ArrowRecordBatch which has the same information as this batch but whose buffers * are owned by that Allocator. @@ -195,7 +243,7 @@ public ArrowRecordBatch cloneWithTransfer(final BufferAllocator allocator) { .writerIndex(buf.writerIndex())) .collect(Collectors.toList()); close(); - return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression); + return new ArrowRecordBatch(false, length, nodes, newBufs, bodyCompression, variadicBufferCounts); } /** @@ -217,6 +265,24 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { compressOffset = bodyCompression.writeTo(builder); } + + // Start the variadicBufferCounts vector. + int variadicBufferCountsOffset = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufferCountsOffset = variadicBufferCounts.size(); + int elementSizeInBytes = 8; // Size of long in bytes + builder.startVector(elementSizeInBytes, variadicBufferCountsOffset, elementSizeInBytes); + + // Add each long to the builder. Note that elements should be added in reverse order. + for (int i = variadicBufferCounts.size() - 1; i >= 0; i--) { + long value = variadicBufferCounts.get(i); + builder.addLong(value); + } + + // End the vector. This returns an offset that you can use to refer to the vector. + variadicBufferCountsOffset = builder.endVector(); + } + RecordBatch.startRecordBatch(builder); RecordBatch.addLength(builder, length); RecordBatch.addNodes(builder, nodesOffset); @@ -224,6 +290,12 @@ public int writeTo(FlatBufferBuilder builder) { if (bodyCompression.getCodec() != NoCompressionCodec.COMPRESSION_TYPE) { RecordBatch.addCompression(builder, compressOffset); } + + // Add the variadicBufferCounts to the RecordBatch + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + RecordBatch.addVariadicBufferCounts(builder, variadicBufferCountsOffset); + } + return RecordBatch.endRecordBatch(builder); } @@ -247,8 +319,13 @@ public void close() { @Override public String toString() { + int variadicBufCount = 0; + if (variadicBufferCounts != null && !variadicBufferCounts.isEmpty()) { + variadicBufCount = variadicBufferCounts.size(); + } return "ArrowRecordBatch [length=" + length + ", nodes=" + nodes + ", #buffers=" + buffers.size() + - ", buffersLayout=" + buffersLayout + ", closed=" + closed + "]"; + ", #variadicBufferCounts=" + variadicBufCount + ", buffersLayout=" + buffersLayout + + ", closed=" + closed + "]"; } /** diff --git a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java index 9deb42c498cbb..099103cd178f8 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/ipc/message/MessageSerializer.java @@ -701,7 +701,8 @@ public static MessageMetadataResult readMessage(ReadChannel in) throws IOExcepti throw new IOException( "Unexpected end of stream trying to read message."); } - messageBuffer.rewind(); + // see https://github.com/apache/arrow/issues/41717 for reason why we cast to java.nio.Buffer + ByteBuffer rewindBuffer = (ByteBuffer) ((java.nio.Buffer) messageBuffer).rewind(); // Load the message. Message message = Message.getRootAsMessage(messageBuffer); diff --git a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java index 89d8441d42aa9..e10a65e3b2c53 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/types/Types.java @@ -71,6 +71,7 @@ import org.apache.arrow.vector.complex.FixedSizeListVector; import org.apache.arrow.vector.complex.LargeListVector; import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; import org.apache.arrow.vector.complex.MapVector; import org.apache.arrow.vector.complex.StructVector; import org.apache.arrow.vector.complex.UnionVector; @@ -136,6 +137,7 @@ import org.apache.arrow.vector.types.pojo.ArrowType.LargeBinary; import org.apache.arrow.vector.types.pojo.ArrowType.LargeUtf8; import org.apache.arrow.vector.types.pojo.ArrowType.List; +import org.apache.arrow.vector.types.pojo.ArrowType.ListView; import org.apache.arrow.vector.types.pojo.ArrowType.Map; import org.apache.arrow.vector.types.pojo.ArrowType.Null; import org.apache.arrow.vector.types.pojo.ArrowType.Struct; @@ -692,6 +694,20 @@ public FieldWriter getNewFieldWriter(ValueVector vector) { return new UnionListWriter((ListVector) vector); } }, + LISTVIEW(ListView.INSTANCE) { + @Override + public FieldVector getNewVector( + Field field, + BufferAllocator allocator, + CallBack schemaChangeCallback) { + return new ListViewVector(field.getName(), allocator, field.getFieldType(), schemaChangeCallback); + } + + @Override + public FieldWriter getNewFieldWriter(ValueVector vector) { + return new UnionListWriter((ListVector) vector); + } + }, LARGELIST(ArrowType.LargeList.INSTANCE) { @Override public FieldVector getNewVector(Field field, BufferAllocator allocator, CallBack schemaChangeCallback) { @@ -1064,6 +1080,11 @@ public MinorType visit(Duration type) { return MinorType.DURATION; } + @Override + public MinorType visit(ListView type) { + return MinorType.LISTVIEW; + } + @Override public MinorType visit(ExtensionType type) { return MinorType.EXTENSIONTYPE; diff --git a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java index 0a67db0455b41..af5a67049f722 100644 --- a/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java +++ b/java/vector/src/main/java/org/apache/arrow/vector/validate/ValidateVectorBufferVisitor.java @@ -51,6 +51,7 @@ private void validateVectorCommon(ValueVector vector) { if (vector instanceof FieldVector) { FieldVector fieldVector = (FieldVector) vector; + // TODO: https://github.com/apache/arrow/issues/41734 int typeBufferCount = TypeLayout.getTypeBufferCount(arrowType); validateOrThrow(fieldVector.getFieldBuffers().size() == typeBufferCount, "Expected %s buffers in vector of type %s, got %s.", diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java new file mode 100644 index 0000000000000..e64ed77b1eb9f --- /dev/null +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestListViewVector.java @@ -0,0 +1,1651 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.arrow.vector; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.complex.BaseRepeatedValueVector; +import org.apache.arrow.vector.complex.BaseRepeatedValueViewVector; +import org.apache.arrow.vector.complex.ListVector; +import org.apache.arrow.vector.complex.ListViewVector; +import org.apache.arrow.vector.complex.impl.UnionListViewWriter; +import org.apache.arrow.vector.complex.impl.UnionListWriter; +import org.apache.arrow.vector.holders.DurationHolder; +import org.apache.arrow.vector.holders.TimeStampMilliTZHolder; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.Types.MinorType; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +public class TestListViewVector { + + private BufferAllocator allocator; + + @BeforeEach + public void init() { + allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); + } + + @AfterEach + public void terminate() throws Exception { + allocator.close(); + } + + @Test + public void testBasicListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + /* the second list at index 1 is null (we are not setting any)*/ + + /* write the third list at index 2 */ + listViewWriter.setPosition(2); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(0); + listViewWriter.bigInt().writeBigInt(-127); + listViewWriter.bigInt().writeBigInt(127); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.endList(); + + /* write the fourth list at index 3 (empty list) */ + listViewWriter.setPosition(3); + listViewWriter.startList(); + listViewWriter.endList(); + + /* write the fifth list at index 4 */ + listViewWriter.setPosition(4); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(1); + listViewWriter.bigInt().writeBigInt(2); + listViewWriter.bigInt().writeBigInt(3); + listViewWriter.bigInt().writeBigInt(4); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + // check value count + assertEquals(5, listViewVector.getValueCount()); + + /* get vector at index 0 -- the value is a BigIntVector*/ + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + final FieldVector dataVec = listViewVector.getDataVector(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check data vector + assertEquals(12, ((BigIntVector) dataVec).get(0)); + assertEquals(-7, ((BigIntVector) dataVec).get(1)); + assertEquals(25, ((BigIntVector) dataVec).get(2)); + assertEquals(0, ((BigIntVector) dataVec).get(3)); + assertEquals(-127, ((BigIntVector) dataVec).get(4)); + assertEquals(127, ((BigIntVector) dataVec).get(5)); + assertEquals(50, ((BigIntVector) dataVec).get(6)); + assertEquals(1, ((BigIntVector) dataVec).get(7)); + assertEquals(2, ((BigIntVector) dataVec).get(8)); + assertEquals(3, ((BigIntVector) dataVec).get(9)); + assertEquals(4, ((BigIntVector) dataVec).get(10)); + + listViewVector.validate(); + } + } + + @Test + public void testImplicitNullVectors() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + /* allocate memory */ + listViewWriter.allocate(); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + /* write the first list at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(-7); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet0 = offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size0 = sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + // after the first list is written, + // the initial offset must be 0, + // the size must be 3 (as there are 3 elements in the array), + // the lastSet must be 0 since, the first list is written at index 0. + + assertEquals(0, offSet0); + assertEquals(3, size0); + + listViewWriter.setPosition(5); + listViewWriter.startList(); + + // writing the 6th list at index 5, + // and the list items from index 1 through 4 are not populated. + // but since there is a gap between the 0th and 5th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + + for (int i = 1; i < 5; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + // Since the list is not written, the offset and size must equal to child vector's size + // i.e., 3, and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 5. + assertEquals(0, offSet); + assertEquals(0, size); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.bigInt().writeBigInt(25); + listViewWriter.endList(); + + int offSet5 = offSetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size5 = sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + assertEquals(3, offSet5); + assertEquals(2, size5); + + listViewWriter.setPosition(10); + listViewWriter.startList(); + + // writing the 11th list at index 10, + // and the list items from index 6 through 10 are not populated. + // but since there is a gap between the 5th and 11th list, in terms + // of buffer allocation, the offset and size buffers must be updated + // to reflect the implicit null vectors. + for (int i = 6; i < 10; i++) { + int offSet = offSetBuffer.getInt(i * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size = sizeBuffer.getInt(i * BaseRepeatedValueViewVector.SIZE_WIDTH); + // Since the list is not written, the offset and size must equal to 0 + // and size should be 0 as the list is not written. + // And the last set value is the value currently being written, which is 10. + assertEquals(0, offSet); + assertEquals(0, size); + } + + listViewWriter.bigInt().writeBigInt(12); + listViewWriter.endList(); + + int offSet11 = offSetBuffer.getInt(10 * BaseRepeatedValueViewVector.OFFSET_WIDTH); + int size11 = sizeBuffer.getInt(10 * BaseRepeatedValueViewVector.SIZE_WIDTH); + + assertEquals(5, offSet11); + assertEquals(1, size11); + + listViewVector.setValueCount(11); + + listViewVector.validate(); + } + } + + @Test + public void testNestedListViewVector() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + /* allocate memory */ + listViewWriter.allocate(); + + /* the dataVector that backs a listVector will also be a + * listVector for this test. + */ + + /* write one or more inner lists at index 0 */ + listViewWriter.setPosition(0); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(50); + listViewWriter.list().bigInt().writeBigInt(100); + listViewWriter.list().bigInt().writeBigInt(200); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(75); + listViewWriter.list().bigInt().writeBigInt(125); + listViewWriter.list().bigInt().writeBigInt(150); + listViewWriter.list().bigInt().writeBigInt(175); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + /* write one or more inner lists at index 1 */ + listViewWriter.setPosition(1); + listViewWriter.startList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(10); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(15); + listViewWriter.list().bigInt().writeBigInt(20); + listViewWriter.list().endList(); + + listViewWriter.list().startList(); + listViewWriter.list().bigInt().writeBigInt(25); + listViewWriter.list().bigInt().writeBigInt(30); + listViewWriter.list().bigInt().writeBigInt(35); + listViewWriter.list().endList(); + + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + // [[[50,100,200],[75,125,150,175]], [[10],[15,20],[25,30,35]]] + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listViewVector.validate(); + } + } + + @Test + public void testNestedListVector() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + + MinorType listType = MinorType.LISTVIEW; + MinorType scalarType = MinorType.BIGINT; + + listViewVector.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList1 = (ListViewVector) listViewVector.getDataVector(); + innerList1.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList2 = (ListViewVector) innerList1.getDataVector(); + innerList2.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList3 = (ListViewVector) innerList2.getDataVector(); + innerList3.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList4 = (ListViewVector) innerList3.getDataVector(); + innerList4.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList5 = (ListViewVector) innerList4.getDataVector(); + innerList5.addOrGetVector(FieldType.nullable(listType.getType())); + + ListViewVector innerList6 = (ListViewVector) innerList5.getDataVector(); + innerList6.addOrGetVector(FieldType.nullable(scalarType.getType())); + + listViewVector.setInitialCapacity(128); + + listViewVector.validate(); + } + } + + private void setValuesInBuffer(int[] bufValues, ArrowBuf buffer, long bufWidth) { + for (int i = 0; i < bufValues.length; i++) { + buffer.setInt(i * bufWidth, bufValues[i]); + } + } + + /* + * Setting up the buffers directly needs to be validated with the base method used in + * the ListVector class where we use the approach of startList(), + * write to the child vector and endList(). + *

+ * To support this, we have to consider the following scenarios; + *

+ * 1. Only using directly buffer-based inserts. + * 2. Default list insertion followed by buffer-based inserts. + * 3. Buffer-based inserts followed by default list insertion. + */ + + /* Setting up buffers directly would require the following steps to be taken + * 0. Allocate buffers in listViewVector by calling `allocateNew` method. + * 1. Initialize the child vector using `initializeChildrenFromFields` method. + * 2. Set values in the child vector. + * 3. Set validity, offset and size buffers using `setValidity`, + * `setOffset` and `setSize` methods. + * 4. Set value count using `setValueCount` method. + */ + @Test + public void testBasicListViewSet() { + + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + listViewVector.validate(); + } + } + + @Test + public void testBasicListViewSetNested() { + // Expected listview + // [[[50,100,200],[75,125,150,175]],[[10],[15,20],[25,30,35]]] + + // Setting child vector + // [[50,100,200],[75,125,150,175],[10],[15,20],[25,30,35]] + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.List(), + null, null); + FieldType childFieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field childField = new Field("child-vector", childFieldType, null); + List children = new ArrayList<>(); + children.add(childField); + Field field = new Field("child-vector", fieldType, children); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + ListVector childVector = (ListVector) fieldVector; + UnionListWriter listWriter = childVector.getWriter(); + listWriter.allocate(); + + listWriter.setPosition(0); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(50); + listWriter.bigInt().writeBigInt(100); + listWriter.bigInt().writeBigInt(200); + + listWriter.endList(); + + listWriter.setPosition(1); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(75); + listWriter.bigInt().writeBigInt(125); + listWriter.bigInt().writeBigInt(150); + listWriter.bigInt().writeBigInt(175); + + listWriter.endList(); + + listWriter.setPosition(2); + listWriter.startList(); + + listWriter.bigInt().writeBigInt(10); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(3); + + listWriter.bigInt().writeBigInt(15); + listWriter.bigInt().writeBigInt(20); + + listWriter.endList(); + + listWriter.startList(); + listWriter.setPosition(4); + + listWriter.bigInt().writeBigInt(25); + listWriter.bigInt().writeBigInt(30); + listWriter.bigInt().writeBigInt(35); + + listWriter.endList(); + + childVector.setValueCount(5); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 1); + + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 2); + + listViewVector.setSize(0, 2); + listViewVector.setSize(1, 3); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(2); + + assertEquals(2, listViewVector.getValueCount()); + + /* get listViewVector value at index 0 -- the value itself is a listViewVector */ + Object result = listViewVector.getObject(0); + ArrayList> resultSet = (ArrayList>) result; + ArrayList list; + + assertEquals(2, resultSet.size()); /* 2 inner lists at index 0 */ + assertEquals(3, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(4, resultSet.get(1).size()); /* size of the second inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(50), list.get(0)); + assertEquals(Long.valueOf(100), list.get(1)); + assertEquals(Long.valueOf(200), list.get(2)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(75), list.get(0)); + assertEquals(Long.valueOf(125), list.get(1)); + assertEquals(Long.valueOf(150), list.get(2)); + assertEquals(Long.valueOf(175), list.get(3)); + + /* get listViewVector value at index 1 -- the value itself is a listViewVector */ + result = listViewVector.getObject(1); + resultSet = (ArrayList>) result; + + assertEquals(3, resultSet.size()); /* 3 inner lists at index 1 */ + assertEquals(1, resultSet.get(0).size()); /* size of the first inner list */ + assertEquals(2, resultSet.get(1).size()); /* size of the second inner list */ + assertEquals(3, resultSet.get(2).size()); /* size of the third inner list */ + + list = resultSet.get(0); + assertEquals(Long.valueOf(10), list.get(0)); + + list = resultSet.get(1); + assertEquals(Long.valueOf(15), list.get(0)); + assertEquals(Long.valueOf(20), list.get(1)); + + list = resultSet.get(2); + assertEquals(Long.valueOf(25), list.get(0)); + assertEquals(Long.valueOf(30), list.get(1)); + assertEquals(Long.valueOf(35), list.get(2)); + + /* check underlying bitVector */ + assertFalse(listViewVector.isNull(0)); + assertFalse(listViewVector.isNull(1)); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + listViewVector.validate(); + } + } + + @Test + public void testBasicListViewSetWithListViewWriter() { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + FieldType fieldType = new FieldType(true, new ArrowType.Int(64, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + BigIntVector childVector = (BigIntVector) fieldVector; + childVector.allocateNew(7); + + childVector.set(0, 12); + childVector.set(1, -7); + childVector.set(2, 25); + childVector.set(3, 0); + childVector.set(4, -127); + childVector.set(5, 127); + childVector.set(6, 50); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + + listViewVector.setOffset(0, 0); + listViewVector.setOffset(1, 3); + listViewVector.setOffset(2, 3); + listViewVector.setOffset(3, 7); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(4); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + + listViewWriter.setPosition(4); + listViewWriter.startList(); + + listViewWriter.bigInt().writeBigInt(121); + listViewWriter.bigInt().writeBigInt(-71); + listViewWriter.bigInt().writeBigInt(251); + listViewWriter.endList(); + + listViewVector.setValueCount(5); + + // check offset buffer + assertEquals(0, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check values + assertEquals(12, ((BigIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-7, ((BigIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(25, ((BigIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(0, ((BigIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(-127, ((BigIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(127, ((BigIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(50, ((BigIntVector) listViewVector.getDataVector()).get(6)); + assertEquals(121, ((BigIntVector) listViewVector.getDataVector()).get(7)); + assertEquals(-71, ((BigIntVector) listViewVector.getDataVector()).get(8)); + assertEquals(251, ((BigIntVector) listViewVector.getDataVector()).get(9)); + + listViewVector.validate(); + } + } + + @Test + public void testGetBufferAddress() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("vector", allocator)) { + + UnionListViewWriter listViewWriter = listViewVector.getWriter(); + boolean error = false; + + listViewWriter.allocate(); + + listViewWriter.setPosition(0); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(50); + listViewWriter.bigInt().writeBigInt(100); + listViewWriter.bigInt().writeBigInt(200); + listViewWriter.endList(); + + listViewWriter.setPosition(1); + listViewWriter.startList(); + listViewWriter.bigInt().writeBigInt(250); + listViewWriter.bigInt().writeBigInt(300); + listViewWriter.endList(); + + listViewVector.setValueCount(2); + + /* check listVector contents */ + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(100), resultSet.get(1)); + assertEquals(Long.valueOf(200), resultSet.get(2)); + + result = listViewVector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(250), resultSet.get(0)); + assertEquals(Long.valueOf(300), resultSet.get(1)); + + List buffers = listViewVector.getFieldBuffers(); + + long bitAddress = listViewVector.getValidityBufferAddress(); + long offsetAddress = listViewVector.getOffsetBufferAddress(); + long sizeAddress = listViewVector.getSizeBufferAddress(); + + try { + listViewVector.getDataBufferAddress(); + } catch (UnsupportedOperationException ue) { + error = true; + } finally { + assertTrue(error); + } + + assertEquals(3, buffers.size()); + assertEquals(bitAddress, buffers.get(0).memoryAddress()); + assertEquals(offsetAddress, buffers.get(1).memoryAddress()); + assertEquals(sizeAddress, buffers.get(2).memoryAddress()); + + /* (3+2)/2 */ + assertEquals(2.5, listViewVector.getDensity(), 0); + listViewVector.validate(); + } + } + + @Test + public void testConsistentChildName() throws Exception { + try (ListViewVector listViewVector = ListViewVector.empty("sourceVector", allocator)) { + String emptyListStr = listViewVector.getField().toString(); + assertTrue(emptyListStr.contains(ListVector.DATA_VECTOR_NAME)); + + listViewVector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + String emptyVectorStr = listViewVector.getField().toString(); + assertTrue(emptyVectorStr.contains(ListVector.DATA_VECTOR_NAME)); + } + } + + @Test + public void testSetInitialCapacity() { + try (final ListViewVector vector = ListViewVector.empty("", allocator)) { + vector.addOrGetVector(FieldType.nullable(MinorType.INT.getType())); + + vector.setInitialCapacity(512); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512); + + vector.setInitialCapacity(512, 4); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 512 * 4); + + vector.setInitialCapacity(512, 0.1); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 51); + + vector.setInitialCapacity(512, 0.01); + vector.allocateNew(); + assertEquals(512, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 5); + + vector.setInitialCapacity(5, 0.1); + vector.allocateNew(); + assertEquals(8, vector.getValueCapacity()); + assertTrue(vector.getDataVector().getValueCapacity() >= 1); + + vector.validate(); + } + } + + @Test + public void testClearAndReuse() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + BigIntVector bigIntVector = + (BigIntVector) vector.addOrGetVector(FieldType.nullable(MinorType.BIGINT.getType())).getVector(); + vector.setInitialCapacity(10); + vector.allocateNew(); + + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + // Clear and release the buffers to trigger a realloc when adding next value + vector.clear(); + + // The list vector should reuse a buffer when reallocating the offset buffer + vector.startNewValue(0); + bigIntVector.setSafe(0, 7); + vector.endValue(0, 1); + vector.startNewValue(1); + bigIntVector.setSafe(1, 8); + vector.endValue(1, 1); + vector.setValueCount(2); + + result = vector.getObject(0); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(7), resultSet.get(0)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(Long.valueOf(8), resultSet.get(0)); + + vector.validate(); + } + } + + @Test + public void testWriterGetField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Int(32, true)), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testWriterUsingHolderGetTimestampMilliTZField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + TimeStampMilliTZHolder holder = new TimeStampMilliTZHolder(); + holder.timezone = "SomeFakeTimeZone"; + writer.startList(); + holder.value = 12341234L; + writer.timeStampMilliTZ().write(holder); + holder.value = 55555L; + writer.timeStampMilliTZ().write(holder); + + // Writing with a different timezone should throw + holder.timezone = "AsdfTimeZone"; + holder.value = 77777; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.timeStampMilliTZ().write(holder)); + assertEquals( + "holder.timezone: AsdfTimeZone not equal to vector timezone: SomeFakeTimeZone", + ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, "SomeFakeTimeZone")), null); + Field expectedField = new Field(vector.getName(), FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testWriterGetDurationField() { + // adopted from ListVector test cases + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + org.apache.arrow.vector.complex.writer.FieldWriter writer = vector.getWriter(); + writer.allocate(); + + DurationHolder durationHolder = new DurationHolder(); + durationHolder.unit = TimeUnit.MILLISECOND; + + writer.startList(); + durationHolder.value = 812374L; + writer.duration().write(durationHolder); + durationHolder.value = 143451L; + writer.duration().write(durationHolder); + + // Writing with a different unit should throw + durationHolder.unit = TimeUnit.SECOND; + durationHolder.value = 8888888; + IllegalArgumentException ex = assertThrows(IllegalArgumentException.class, + () -> writer.duration().write(durationHolder)); + assertEquals( + "holder.unit: SECOND not equal to vector unit: MILLISECOND", ex.getMessage()); + + writer.endList(); + vector.setValueCount(1); + + Field expectedDataField = new Field(BaseRepeatedValueVector.DATA_VECTOR_NAME, + FieldType.nullable(new ArrowType.Duration(TimeUnit.MILLISECOND)), null); + Field expectedField = new Field(vector.getName(), + FieldType.nullable(ArrowType.ListView.INSTANCE), + Arrays.asList(expectedDataField)); + + assertEquals(expectedField, writer.getField()); + + vector.validate(); + } + } + + @Test + public void testClose() throws Exception { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writer.startList(); + writer.integer().writeInt(1); + writer.integer().writeInt(2); + writer.endList(); + vector.setValueCount(2); + + assertTrue(vector.getBufferSize() > 0); + assertTrue(vector.getDataVector().getBufferSize() > 0); + + writer.close(); + assertEquals(0, vector.getBufferSize()); + assertEquals(0, vector.getDataVector().getBufferSize()); + + vector.validate(); + } + } + + @Test + public void testGetBufferSizeFor() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + //set some values + writeIntValues(writer, new int[] {1, 2}); + writeIntValues(writer, new int[] {3, 4}); + writeIntValues(writer, new int[] {5, 6}); + writeIntValues(writer, new int[] {7, 8, 9, 10}); + writeIntValues(writer, new int[] {11, 12, 13, 14}); + writer.setValueCount(5); + + IntVector dataVector = (IntVector) vector.getDataVector(); + int[] indices = new int[] {0, 2, 4, 6, 10, 14}; + + for (int valueCount = 1; valueCount <= 5; valueCount++) { + int validityBufferSize = BitVectorHelper.getValidityBufferSize(valueCount); + int offsetBufferSize = valueCount * BaseRepeatedValueViewVector.OFFSET_WIDTH; + int sizeBufferSize = valueCount * BaseRepeatedValueViewVector.SIZE_WIDTH; + + int expectedSize = validityBufferSize + offsetBufferSize + sizeBufferSize + + dataVector.getBufferSizeFor(indices[valueCount]); + assertEquals(expectedSize, vector.getBufferSizeFor(valueCount)); + } + vector.validate(); + } + } + + @Test + public void testIsEmpty() { + try (final ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + // set values [1,2], null, [], [5,6] + writeIntValues(writer, new int[] {1, 2}); + writer.setPosition(2); + writeIntValues(writer, new int[] {}); + writeIntValues(writer, new int[] {5, 6}); + writer.setValueCount(4); + + assertFalse(vector.isEmpty(0)); + assertTrue(vector.isNull(1)); + assertTrue(vector.isEmpty(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isEmpty(2)); + assertFalse(vector.isEmpty(3)); + + vector.validate(); + } + } + + @Test + public void testTotalCapacity() { + // adopted from ListVector test cases + final FieldType type = FieldType.nullable(MinorType.INT.getType()); + try (final ListViewVector vector = new ListViewVector("listview", allocator, type, null)) { + // Force the child vector to be allocated based on the type + // (this is a bad API: we have to track and repeat the type twice) + vector.addOrGetVector(type); + + // Specify the allocation size but do not allocate + vector.setInitialTotalCapacity(10, 100); + + // Finally, actually do the allocation + vector.allocateNewSafe(); + + // Note: allocator rounds up and can be greater than the requested allocation. + assertTrue(vector.getValueCapacity() >= 10); + assertTrue(vector.getDataVector().getValueCapacity() >= 100); + + vector.validate(); + } + } + + @Test + public void testSetNull1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.endList(); + + vector.setNull(1); + + writer.setPosition(2); + writer.startList(); + writer.bigInt().writeBigInt(30); + writer.bigInt().writeBigInt(40); + writer.endList(); + + vector.setNull(3); + vector.setNull(4); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(50); + writer.bigInt().writeBigInt(60); + writer.endList(); + + vector.setValueCount(6); + + assertFalse(vector.isNull(0)); + assertTrue(vector.isNull(1)); + assertFalse(vector.isNull(2)); + assertTrue(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(1, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(4, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + + result = vector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(30), resultSet.get(0)); + assertEquals(Long.valueOf(40), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(50), resultSet.get(0)); + assertEquals(Long.valueOf(60), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testSetNull2() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting nulls first and then writing values + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + vector.validate(); + } + } + + @Test + public void testSetNull3() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + // validate setting values first and then writing nulls + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(3); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + writer.setPosition(5); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.bigInt().writeBigInt(80); + writer.endList(); + + vector.setNull(0); + vector.setNull(2); + vector.setNull(4); + + vector.setValueCount(6); + + assertTrue(vector.isNull(0)); + assertFalse(vector.isNull(1)); + assertTrue(vector.isNull(2)); + assertFalse(vector.isNull(3)); + assertTrue(vector.isNull(4)); + assertFalse(vector.isNull(5)); + + // validate buffers + + final ArrowBuf validityBuffer = vector.getValidityBuffer(); + final ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + assertEquals(0, BitVectorHelper.get(validityBuffer, 0)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 1)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 2)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 3)); + assertEquals(0, BitVectorHelper.get(validityBuffer, 4)); + assertEquals(1, BitVectorHelper.get(validityBuffer, 5)); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offsetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(5, offsetBuffer.getInt(5 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(3, sizeBuffer.getInt(5 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // validate values + + Object result = vector.getObject(1); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(10), resultSet.get(0)); + assertEquals(Long.valueOf(20), resultSet.get(1)); + assertEquals(Long.valueOf(30), resultSet.get(2)); + + result = vector.getObject(3); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(40), resultSet.get(0)); + assertEquals(Long.valueOf(50), resultSet.get(1)); + + result = vector.getObject(5); + resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + assertEquals(Long.valueOf(80), resultSet.get(2)); + + vector.validate(); + } + } + + @Test + public void testOverWrite1() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + vector.setValueCount(2); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testOverwriteWithNull() { + try (ListViewVector vector = ListViewVector.empty("listview", allocator)) { + UnionListViewWriter writer = vector.getWriter(); + writer.allocate(); + + ArrowBuf offsetBuffer = vector.getOffsetBuffer(); + ArrowBuf sizeBuffer = vector.getSizeBuffer(); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(10); + writer.bigInt().writeBigInt(20); + writer.bigInt().writeBigInt(30); + writer.endList(); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(40); + writer.bigInt().writeBigInt(50); + writer.endList(); + + vector.setValueCount(2); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(0); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setNull(1); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + assertTrue(vector.isNull(0)); + assertTrue(vector.isNull(1)); + + writer.setPosition(0); + writer.startList(); + writer.bigInt().writeBigInt(60); + writer.bigInt().writeBigInt(70); + writer.endList(); + + assertEquals(0, offsetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + writer.setPosition(1); + writer.startList(); + writer.bigInt().writeBigInt(80); + writer.bigInt().writeBigInt(90); + writer.endList(); + + assertEquals(2, offsetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(2, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + vector.setValueCount(2); + + assertFalse(vector.isNull(0)); + assertFalse(vector.isNull(1)); + + Object result = vector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(60), resultSet.get(0)); + assertEquals(Long.valueOf(70), resultSet.get(1)); + + result = vector.getObject(1); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Long.valueOf(80), resultSet.get(0)); + assertEquals(Long.valueOf(90), resultSet.get(1)); + + vector.validate(); + } + } + + @Test + public void testOutOfOrderOffset1() { + // [[12, -7, 25], null, [0, -127, 127, 50], [], [50, 12]] + try (ListViewVector listViewVector = ListViewVector.empty("listview", allocator)) { + // Allocate buffers in listViewVector by calling `allocateNew` method. + listViewVector.allocateNew(); + + // Initialize the child vector using `initializeChildrenFromFields` method. + + FieldType fieldType = new FieldType(true, new ArrowType.Int(16, true), + null, null); + Field field = new Field("child-vector", fieldType, null); + listViewVector.initializeChildrenFromFields(Collections.singletonList(field)); + + // Set values in the child vector. + FieldVector fieldVector = listViewVector.getDataVector(); + fieldVector.clear(); + + SmallIntVector childVector = (SmallIntVector) fieldVector; + + childVector.allocateNew(7); + + childVector.set(0, 0); + childVector.set(1, -127); + childVector.set(2, 127); + childVector.set(3, 50); + childVector.set(4, 12); + childVector.set(5, -7); + childVector.set(6, 25); + + childVector.setValueCount(7); + + // Set validity, offset and size buffers using `setValidity`, + // `setOffset` and `setSize` methods. + listViewVector.setValidity(0, 1); + listViewVector.setValidity(1, 0); + listViewVector.setValidity(2, 1); + listViewVector.setValidity(3, 1); + listViewVector.setValidity(4, 1); + + listViewVector.setOffset(0, 4); + listViewVector.setOffset(1, 7); + listViewVector.setOffset(2, 0); + listViewVector.setOffset(3, 0); + listViewVector.setOffset(4, 3); + + listViewVector.setSize(0, 3); + listViewVector.setSize(1, 0); + listViewVector.setSize(2, 4); + listViewVector.setSize(3, 0); + listViewVector.setSize(4, 2); + + // Set value count using `setValueCount` method. + listViewVector.setValueCount(5); + + final ArrowBuf offSetBuffer = listViewVector.getOffsetBuffer(); + final ArrowBuf sizeBuffer = listViewVector.getSizeBuffer(); + + // check offset buffer + assertEquals(4, offSetBuffer.getInt(0 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(7, offSetBuffer.getInt(1 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(2 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(0, offSetBuffer.getInt(3 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + assertEquals(3, offSetBuffer.getInt(4 * BaseRepeatedValueViewVector.OFFSET_WIDTH)); + + // check size buffer + assertEquals(3, sizeBuffer.getInt(0 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(1 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(4, sizeBuffer.getInt(2 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(0, sizeBuffer.getInt(3 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + assertEquals(2, sizeBuffer.getInt(4 * BaseRepeatedValueViewVector.SIZE_WIDTH)); + + // check child vector + assertEquals(0, ((SmallIntVector) listViewVector.getDataVector()).get(0)); + assertEquals(-127, ((SmallIntVector) listViewVector.getDataVector()).get(1)); + assertEquals(127, ((SmallIntVector) listViewVector.getDataVector()).get(2)); + assertEquals(50, ((SmallIntVector) listViewVector.getDataVector()).get(3)); + assertEquals(12, ((SmallIntVector) listViewVector.getDataVector()).get(4)); + assertEquals(-7, ((SmallIntVector) listViewVector.getDataVector()).get(5)); + assertEquals(25, ((SmallIntVector) listViewVector.getDataVector()).get(6)); + + // check values + Object result = listViewVector.getObject(0); + ArrayList resultSet = (ArrayList) result; + assertEquals(3, resultSet.size()); + assertEquals(Short.valueOf("12"), resultSet.get(0)); + assertEquals(Short.valueOf("-7"), resultSet.get(1)); + assertEquals(Short.valueOf("25"), resultSet.get(2)); + + assertTrue(listViewVector.isNull(1)); + + result = listViewVector.getObject(2); + resultSet = (ArrayList) result; + assertEquals(4, resultSet.size()); + assertEquals(Short.valueOf("0"), resultSet.get(0)); + assertEquals(Short.valueOf("-127"), resultSet.get(1)); + assertEquals(Short.valueOf("127"), resultSet.get(2)); + assertEquals(Short.valueOf("50"), resultSet.get(3)); + + assertTrue(listViewVector.isEmpty(3)); + + result = listViewVector.getObject(4); + resultSet = (ArrayList) result; + assertEquals(2, resultSet.size()); + assertEquals(Short.valueOf("50"), resultSet.get(0)); + assertEquals(Short.valueOf("12"), resultSet.get(1)); + + listViewVector.validate(); + } + } + + private void writeIntValues(UnionListViewWriter writer, int[] values) { + writer.startList(); + for (int v: values) { + writer.integer().writeInt(v); + } + writer.endList(); + } + +} diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java index 43f4c3b536fdc..3ffbcc29c9e59 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestMapVector.java @@ -17,13 +17,15 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertSame; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertSame; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.util.ArrayList; +import java.util.Collections; +import java.util.List; import java.util.Map; import org.apache.arrow.memory.ArrowBuf; @@ -37,23 +39,24 @@ import org.apache.arrow.vector.complex.writer.BaseWriter.MapWriter; import org.apache.arrow.vector.types.Types.MinorType; import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; import org.apache.arrow.vector.util.JsonStringArrayList; import org.apache.arrow.vector.util.TransferPair; -import org.junit.After; -import org.junit.Before; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestMapVector { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new DirtyRootAllocator(Long.MAX_VALUE, (byte) 100); } - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -90,7 +93,7 @@ public void testBasicOperation() { mapReader.setPosition(i); for (int j = 0; j < i + 1; j++) { mapReader.next(); - assertEquals("record: " + i, j, mapReader.key().readLong().longValue()); + assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); assertEquals(j, mapReader.value().readInteger().intValue()); } } @@ -136,7 +139,7 @@ public void testBasicOperationNulls() { } else { for (int j = 0; j < i + 1; j++) { mapReader.next(); - assertEquals("record: " + i, j, mapReader.key().readLong().longValue()); + assertEquals(j, mapReader.key().readLong().longValue(), "record: " + i); if (i == 5) { assertFalse(mapReader.value().isSet()); } else { @@ -194,11 +197,11 @@ public void testCopyFrom() throws Exception { // assert the output vector is correct FieldReader reader = outVector.getReader(); - assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); reader.setPosition(1); - assertFalse("should be null", reader.isSet()); + assertFalse(reader.isSet(), "should be null"); reader.setPosition(2); - assertTrue("shouldn't be null", reader.isSet()); + assertTrue(reader.isSet(), "shouldn't be null"); /* index 0 */ @@ -460,15 +463,15 @@ public void testSplitAndTransfer() throws Exception { dataLength2 = toOffsetBuffer.getInt((i + 1) * MapVector.OFFSET_WIDTH) - toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); - assertEquals("Different data lengths at index: " + i + " and start: " + start, - dataLength1, dataLength2); + assertEquals(dataLength1, dataLength2, + "Different data lengths at index: " + i + " and start: " + start); offset1 = offsetBuffer.getInt((start + i) * MapVector.OFFSET_WIDTH); offset2 = toOffsetBuffer.getInt(i * MapVector.OFFSET_WIDTH); for (int j = 0; j < dataLength1; j++) { - assertEquals("Different data at indexes: " + offset1 + " and " + offset2, - dataVector.getObject(offset1), dataVector1.getObject(offset2)); + assertEquals(dataVector.getObject(offset1), dataVector1.getObject(offset2), + "Different data at indexes: " + offset1 + " and " + offset2); offset1++; offset2++; @@ -1178,4 +1181,21 @@ public void testGetTransferPairWithFieldAndCallBack() { toVector.clear(); } } + + @Test + public void testMakeTransferPairPreserveNullability() { + Field intField = new Field("int", FieldType.notNullable(MinorType.INT.getType()), null); + List fields = Collections.singletonList(intField); + Field structField = new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); + Field structField2 = new Field("struct", FieldType.notNullable(ArrowType.Struct.INSTANCE), fields); + FieldVector vec = structField.createVector(allocator); + + TransferPair tp = vec.getTransferPair(structField2, allocator); + tp.transfer(); + + FieldVector res = (FieldVector) tp.getTo(); + + assertEquals(intField, vec.getField().getChildren().get(0)); + assertEquals(intField, res.getField().getChildren().get(0)); + } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java index 97930f433d301..5a58133f2e2bd 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestTypeLayout.java @@ -17,82 +17,158 @@ package org.apache.arrow.vector; -import static org.junit.Assert.assertEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import java.util.Random; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.vector.types.DateUnit; import org.apache.arrow.vector.types.FloatingPointPrecision; import org.apache.arrow.vector.types.IntervalUnit; import org.apache.arrow.vector.types.TimeUnit; import org.apache.arrow.vector.types.UnionMode; import org.apache.arrow.vector.types.pojo.ArrowType; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; public class TestTypeLayout { + private BufferAllocator allocator; + + @BeforeEach + public void prepare() { + allocator = new RootAllocator(Integer.MAX_VALUE); + } + + @AfterEach + public void shutdown() { + allocator.close(); + } + + @Test public void testTypeBufferCount() { ArrowType type = new ArrowType.Int(8, true); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Sparse, new int[2]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Union(UnionMode.Dense, new int[1]); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Struct(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.List(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeList(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Map(false); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 128); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Decimal(10, 10, 256); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.FixedSizeBinary(5); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Bool(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Binary(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Utf8(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Null(); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Date(DateUnit.DAY); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Time(TimeUnit.MILLISECOND, 32); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Interval(IntervalUnit.DAY_TIME); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); type = new ArrowType.Duration(TimeUnit.MILLISECOND); - assertEquals(TypeLayout.getTypeBufferCount(type), TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } + + private String generateRandomString(int length) { + Random random = new Random(); + StringBuilder sb = new StringBuilder(length); + for (int i = 0; i < length; i++) { + sb.append(random.nextInt(10)); // 0-9 + } + return sb.toString(); + } + + @Test + public void testTypeBufferCountInVectorsWithVariadicBuffers() { + // empty vector + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } + // vector with long strings + try (ViewVarCharVector viewVarCharVector = new ViewVarCharVector("myvector", allocator)) { + viewVarCharVector.allocateNew(32, 6); + + viewVarCharVector.setSafe(0, generateRandomString(8).getBytes()); + viewVarCharVector.setSafe(1, generateRandomString(12).getBytes()); + viewVarCharVector.setSafe(2, generateRandomString(14).getBytes()); + viewVarCharVector.setSafe(3, generateRandomString(18).getBytes()); + viewVarCharVector.setSafe(4, generateRandomString(22).getBytes()); + viewVarCharVector.setSafe(5, generateRandomString(24).getBytes()); + + viewVarCharVector.setValueCount(6); + + ArrowType type = viewVarCharVector.getMinorType().getType(); + assertEquals(TypeLayout.getTypeBufferCount(type), + TypeLayout.getTypeLayout(type).getBufferLayouts().size()); + } } } diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java index efb5afac91b13..2d37b0b4eb9ad 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVarCharViewVector.java @@ -31,6 +31,7 @@ import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Objects; @@ -41,8 +42,11 @@ import org.apache.arrow.memory.RootAllocator; import org.apache.arrow.memory.util.ArrowBufPointer; import org.apache.arrow.memory.util.CommonUtil; +import org.apache.arrow.vector.ipc.message.ArrowRecordBatch; import org.apache.arrow.vector.testing.ValueVectorDataPopulator; import org.apache.arrow.vector.types.Types; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; import org.apache.arrow.vector.util.ReusableByteArray; import org.apache.arrow.vector.util.Text; import org.junit.jupiter.api.AfterEach; @@ -1451,6 +1455,68 @@ public void testSafeOverwriteLongFromALongerLongString() { } } + @Test + public void testVectorLoadUnload() { + + try (final ViewVarCharVector vector1 = new ViewVarCharVector("myvector", allocator)) { + + setVector(vector1, STR1, STR2, STR3, STR4, STR5, STR6); + + assertEquals(5, vector1.getLastSet()); + vector1.setValueCount(15); + assertEquals(14, vector1.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector1.get(0)); + assertArrayEquals(STR2, vector1.get(1)); + assertArrayEquals(STR3, vector1.get(2)); + assertArrayEquals(STR4, vector1.get(3)); + assertArrayEquals(STR5, vector1.get(4)); + assertArrayEquals(STR6, vector1.get(5)); + + Field field = vector1.getField(); + String fieldName = field.getName(); + + List fields = new ArrayList<>(); + List fieldVectors = new ArrayList<>(); + + fields.add(field); + fieldVectors.add(vector1); + + Schema schema = new Schema(fields); + + VectorSchemaRoot schemaRoot1 = new VectorSchemaRoot(schema, fieldVectors, vector1.getValueCount()); + VectorUnloader vectorUnloader = new VectorUnloader(schemaRoot1); + + try ( + ArrowRecordBatch recordBatch = vectorUnloader.getRecordBatch(); + BufferAllocator finalVectorsAllocator = allocator.newChildAllocator("new vector", 0, Long.MAX_VALUE); + VectorSchemaRoot schemaRoot2 = VectorSchemaRoot.create(schema, finalVectorsAllocator); + ) { + + VectorLoader vectorLoader = new VectorLoader(schemaRoot2); + vectorLoader.load(recordBatch); + + ViewVarCharVector vector2 = (ViewVarCharVector) schemaRoot2.getVector(fieldName); + /* + * lastSet would have internally been set by VectorLoader.load() when it invokes + * loadFieldBuffers. + */ + assertEquals(14, vector2.getLastSet()); + vector2.setValueCount(25); + assertEquals(24, vector2.getLastSet()); + + /* Check the vector output */ + assertArrayEquals(STR1, vector2.get(0)); + assertArrayEquals(STR2, vector2.get(1)); + assertArrayEquals(STR3, vector2.get(2)); + assertArrayEquals(STR4, vector2.get(3)); + assertArrayEquals(STR5, vector2.get(4)); + assertArrayEquals(STR6, vector2.get(5)); + } + } + } + private String generateRandomString(int length) { Random random = new Random(); StringBuilder sb = new StringBuilder(length); diff --git a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java index 71009a3337510..19700e02161c7 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/TestVectorReset.java @@ -93,6 +93,18 @@ public void testVariableTypeReset() { } } + @Test + public void testVariableViewTypeReset() { + try (final ViewVarCharVector vector = new ViewVarCharVector("ViewVarChar", allocator)) { + vector.allocateNewSafe(); + vector.set(0, "a".getBytes(StandardCharsets.UTF_8)); + vector.setLastSet(0); + vector.setValueCount(1); + resetVectorAndVerify(vector, vector.getBuffers(false)); + assertEquals(-1, vector.getLastSet()); + } + } + @Test public void testLargeVariableTypeReset() { try (final LargeVarCharVector vector = new LargeVarCharVector("LargeVarChar", allocator)) { diff --git a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java index ab8c6c634891e..c3e7ef8bf8b08 100644 --- a/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java +++ b/java/vector/src/test/java/org/apache/arrow/vector/compare/TestRangeEqualsVisitor.java @@ -18,8 +18,8 @@ package org.apache.arrow.vector.compare; import static org.apache.arrow.vector.testing.ValueVectorDataPopulator.setVector; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.nio.charset.Charset; import java.util.Arrays; @@ -33,6 +33,7 @@ import org.apache.arrow.vector.IntVector; import org.apache.arrow.vector.LargeVarCharVector; import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.ViewVarCharVector; import org.apache.arrow.vector.ZeroVector; import org.apache.arrow.vector.compare.util.ValueEpsilonEqualizers; import org.apache.arrow.vector.complex.DenseUnionVector; @@ -53,16 +54,16 @@ import org.apache.arrow.vector.types.pojo.ArrowType; import org.apache.arrow.vector.types.pojo.Field; import org.apache.arrow.vector.types.pojo.FieldType; -import org.junit.After; -import org.junit.Before; -import org.junit.Ignore; -import org.junit.Test; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Disabled; +import org.junit.jupiter.api.Test; public class TestRangeEqualsVisitor { private BufferAllocator allocator; - @Before + @BeforeEach public void init() { allocator = new RootAllocator(Long.MAX_VALUE); } @@ -71,8 +72,11 @@ public void init() { private static final byte[] STR1 = "AAAAA1".getBytes(utf8Charset); private static final byte[] STR2 = "BBBBBBBBB2".getBytes(utf8Charset); private static final byte[] STR3 = "CCCC3".getBytes(utf8Charset); + private static final byte[] STR4 = "12345678901234A".getBytes(utf8Charset); + private static final byte[] STR5 = "A2345678901234ABC".getBytes(utf8Charset); + private static final byte[] STR6 = "AB45678901234ABCD".getBytes(utf8Charset); - @After + @AfterEach public void terminate() throws Exception { allocator.close(); } @@ -132,6 +136,55 @@ public void testBaseVariableVectorRangeEquals() { } } + @Test + public void testBaseVariableViewVectorRangeEquals() { + try (final ViewVarCharVector vector1 = new ViewVarCharVector("varchar", allocator); + final ViewVarCharVector vector2 = new ViewVarCharVector("varchar", allocator)) { + + setVector(vector1, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); + setVector(vector2, STR1, STR2, STR4, STR3, STR2, STR5, STR1, STR6, STR1, STR2, STR4); + + RangeEqualsVisitor visitor = new RangeEqualsVisitor(vector1, vector2); + // inclusion of long string in the middle + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); + // inclusion of long string at the start + assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); + assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); + // inclusion of long string at the end + assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); + // unequal range + assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); + assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); + + // checking the same ranges when nulls are set + + vector1.setNull(1); + vector2.setNull(1); + + vector1.setNull(3); + vector2.setNull(3); + + vector1.setNull(5); + vector2.setNull(5); + + vector1.setNull(9); + vector2.setNull(9); + + // inclusion of long string in the middle + assertTrue(visitor.rangeEquals(new Range(1, 1, 3))); + assertFalse(visitor.rangeEquals(new Range(0, 1, 4))); + // inclusion of long string at the start + assertTrue(visitor.rangeEquals(new Range(2, 2, 4))); + assertFalse(visitor.rangeEquals(new Range(2, 5, 4))); + // inclusion of long string at the end + assertTrue(visitor.rangeEquals(new Range(4, 4, 4))); + // unequal range + assertTrue(visitor.rangeEquals(new Range(8, 0, 3))); + assertFalse(visitor.rangeEquals(new Range(4, 5, 3))); + } + } + @Test public void testListVectorWithDifferentChild() { try (final ListVector vector1 = ListVector.empty("list", allocator); @@ -476,7 +529,7 @@ public void testDenseUnionVectorEquals() { } } - @Ignore + @Disabled @Test public void testEqualsWithOutTypeCheck() { try (final IntVector intVector = new IntVector("int", allocator); diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc index b8f85b08632a3..1eb6de74fec65 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.cc +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.cc @@ -15,6 +15,7 @@ // specific language governing permissions and limitations // under the License. +#include "arrow/c/bridge.h" #include "arrow/util/utf8.h" #include "arrow/matlab/array/proxy/array.h" @@ -40,6 +41,7 @@ Array::Array(std::shared_ptr array) : array{std::move(array)} { REGISTER_METHOD(Array, getType); REGISTER_METHOD(Array, isEqual); REGISTER_METHOD(Array, slice); + REGISTER_METHOD(Array, exportToC); } std::shared_ptr Array::unwrap() { return array; } @@ -178,4 +180,20 @@ void Array::slice(libmexclass::proxy::method::Context& context) { output[0]["TypeID"] = factory.createScalar(type_id); context.outputs[0] = output; } + +void Array::exportToC(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + mda::StructArray opts = context.inputs[0]; + const mda::TypedArray array_address_mda = opts[0]["ArrowArrayAddress"]; + const mda::TypedArray schema_address_mda = opts[0]["ArrowSchemaAddress"]; + + auto arrow_array = reinterpret_cast(uint64_t(array_address_mda[0])); + auto arrow_schema = + reinterpret_cast(uint64_t(schema_address_mda[0])); + + MATLAB_ERROR_IF_NOT_OK_WITH_CONTEXT( + arrow::ExportArray(*array, arrow_array, arrow_schema), context, + error::C_EXPORT_FAILED); +} + } // namespace arrow::matlab::array::proxy diff --git a/matlab/src/cpp/arrow/matlab/array/proxy/array.h b/matlab/src/cpp/arrow/matlab/array/proxy/array.h index 61ba06a503bc4..c249693ac2797 100644 --- a/matlab/src/cpp/arrow/matlab/array/proxy/array.h +++ b/matlab/src/cpp/arrow/matlab/array/proxy/array.h @@ -45,6 +45,8 @@ class Array : public libmexclass::proxy::Proxy { void slice(libmexclass::proxy::method::Context& context); + void exportToC(libmexclass::proxy::method::Context& context); + std::shared_ptr array; }; diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.cc b/matlab/src/cpp/arrow/matlab/c/proxy/array.cc new file mode 100644 index 0000000000000..a5f3418f1bcfa --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/c/abi.h" + +#include "arrow/matlab/c/proxy/array.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +Array::Array() : arrowArray{} { REGISTER_METHOD(Array, getAddress); } + +Array::~Array() { + if (arrowArray.release != NULL) { + arrowArray.release(&arrowArray); + arrowArray.release = NULL; + } +} + +libmexclass::proxy::MakeResult Array::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void Array::getAddress(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + auto address = reinterpret_cast(&arrowArray); + context.outputs[0] = factory.createScalar(address); +} + +} // namespace arrow::matlab::c::proxy \ No newline at end of file diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array.h b/matlab/src/cpp/arrow/matlab/c/proxy/array.h new file mode 100644 index 0000000000000..bb35807fcd015 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/abi.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class Array : public libmexclass::proxy::Proxy { + public: + Array(); + + ~Array(); + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getAddress(libmexclass::proxy::method::Context& context); + + struct ArrowArray arrowArray; +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc new file mode 100644 index 0000000000000..b6f68332d1757 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.cc @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/array.h" +#include "arrow/c/bridge.h" + +#include "arrow/matlab/array/proxy/wrap.h" +#include "arrow/matlab/c/proxy/array_importer.h" +#include "arrow/matlab/error/error.h" + +#include "libmexclass/proxy/ProxyManager.h" + +namespace arrow::matlab::c::proxy { + +ArrayImporter::ArrayImporter() { REGISTER_METHOD(ArrayImporter, import); } + +libmexclass::proxy::MakeResult ArrayImporter::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void ArrayImporter::import(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + using namespace libmexclass::proxy; + + mda::StructArray args = context.inputs[0]; + const mda::TypedArray arrow_array_address_mda = args[0]["ArrowArrayAddress"]; + const mda::TypedArray arrow_schema_address_mda = + args[0]["ArrowSchemaAddress"]; + + const auto arrow_array_address = uint64_t(arrow_array_address_mda[0]); + const auto arrow_schema_address = uint64_t(arrow_schema_address_mda[0]); + + auto arrow_array = reinterpret_cast(arrow_array_address); + auto arrow_schema = reinterpret_cast(arrow_schema_address); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto array, + arrow::ImportArray(arrow_array, arrow_schema), + context, error::C_IMPORT_FAILED); + + MATLAB_ASSIGN_OR_ERROR_WITH_CONTEXT(auto array_proxy, + arrow::matlab::array::proxy::wrap(array), context, + error::UNKNOWN_PROXY_FOR_ARRAY_TYPE); + + mda::ArrayFactory factory; + const auto array_proxy_id = ProxyManager::manageProxy(array_proxy); + const auto array_proxy_id_mda = factory.createScalar(array_proxy_id); + const auto array_type_id_mda = + factory.createScalar(static_cast(array->type_id())); + + context.outputs[0] = array_proxy_id_mda; + context.outputs[1] = array_type_id_mda; +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h new file mode 100644 index 0000000000000..6459393058737 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/array_importer.h @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class ArrayImporter : public libmexclass::proxy::Proxy { + public: + ArrayImporter(); + + ~ArrayImporter() = default; + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void import(libmexclass::proxy::method::Context& context); +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc b/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc new file mode 100644 index 0000000000000..7f239f5628720 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/schema.cc @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include +#include "arrow/c/abi.h" + +#include "arrow/matlab/c/proxy/schema.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +Schema::Schema() : arrowSchema{} { REGISTER_METHOD(Schema, getAddress); } + +Schema::~Schema() { + if (arrowSchema.release != NULL) { + arrowSchema.release(&arrowSchema); + arrowSchema.release = NULL; + } +} + +libmexclass::proxy::MakeResult Schema::make( + const libmexclass::proxy::FunctionArguments& constructor_arguments) { + return std::make_shared(); +} + +void Schema::getAddress(libmexclass::proxy::method::Context& context) { + namespace mda = ::matlab::data; + + mda::ArrayFactory factory; + auto address = reinterpret_cast(&arrowSchema); + context.outputs[0] = factory.createScalar(address); +} + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/c/proxy/schema.h b/matlab/src/cpp/arrow/matlab/c/proxy/schema.h new file mode 100644 index 0000000000000..8f781ea9c7341 --- /dev/null +++ b/matlab/src/cpp/arrow/matlab/c/proxy/schema.h @@ -0,0 +1,39 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "arrow/c/abi.h" + +#include "libmexclass/proxy/Proxy.h" + +namespace arrow::matlab::c::proxy { + +class Schema : public libmexclass::proxy::Proxy { + public: + Schema(); + + ~Schema(); + + static libmexclass::proxy::MakeResult make( + const libmexclass::proxy::FunctionArguments& constructor_arguments); + + protected: + void getAddress(libmexclass::proxy::method::Context& context); + + struct ArrowSchema arrowSchema; +}; + +} // namespace arrow::matlab::c::proxy diff --git a/matlab/src/cpp/arrow/matlab/error/error.h b/matlab/src/cpp/arrow/matlab/error/error.h index db8b715141ee8..58c43d8843e4b 100644 --- a/matlab/src/cpp/arrow/matlab/error/error.h +++ b/matlab/src/cpp/arrow/matlab/error/error.h @@ -240,5 +240,7 @@ static const char* ARRAY_SLICE_NON_POSITIVE_OFFSET = static const char* ARRAY_SLICE_NEGATIVE_LENGTH = "arrow:array:slice:NegativeLength"; static const char* ARRAY_SLICE_FAILED_TO_CREATE_ARRAY_PROXY = "arrow:array:slice:FailedToCreateArrayProxy"; +static const char* C_EXPORT_FAILED = "arrow:c:export:ExportFailed"; +static const char* C_IMPORT_FAILED = "arrow:c:import:ImportFailed"; } // namespace arrow::matlab::error diff --git a/matlab/src/cpp/arrow/matlab/proxy/factory.cc b/matlab/src/cpp/arrow/matlab/proxy/factory.cc index 23492f75deacc..9b95fcf128090 100644 --- a/matlab/src/cpp/arrow/matlab/proxy/factory.cc +++ b/matlab/src/cpp/arrow/matlab/proxy/factory.cc @@ -25,6 +25,9 @@ #include "arrow/matlab/array/proxy/time64_array.h" #include "arrow/matlab/array/proxy/timestamp_array.h" #include "arrow/matlab/buffer/proxy/buffer.h" +#include "arrow/matlab/c/proxy/array.h" +#include "arrow/matlab/c/proxy/array_importer.h" +#include "arrow/matlab/c/proxy/schema.h" #include "arrow/matlab/error/error.h" #include "arrow/matlab/io/csv/proxy/table_reader.h" #include "arrow/matlab/io/csv/proxy/table_writer.h" @@ -99,6 +102,9 @@ libmexclass::proxy::MakeResult Factory::make_proxy( REGISTER_PROXY(arrow.io.feather.proxy.Reader , arrow::matlab::io::feather::proxy::Reader); REGISTER_PROXY(arrow.io.csv.proxy.TableWriter , arrow::matlab::io::csv::proxy::TableWriter); REGISTER_PROXY(arrow.io.csv.proxy.TableReader , arrow::matlab::io::csv::proxy::TableReader); + REGISTER_PROXY(arrow.c.proxy.Array , arrow::matlab::c::proxy::Array); + REGISTER_PROXY(arrow.c.proxy.ArrayImporter , arrow::matlab::c::proxy::ArrayImporter); + REGISTER_PROXY(arrow.c.proxy.Schema , arrow::matlab::c::proxy::Schema); // clang-format on return libmexclass::error::Error{error::UNKNOWN_PROXY_ERROR_ID, diff --git a/matlab/src/matlab/+arrow/+array/Array.m b/matlab/src/matlab/+arrow/+array/Array.m index 4402055932b60..01bacdf5755dc 100644 --- a/matlab/src/matlab/+arrow/+array/Array.m +++ b/matlab/src/matlab/+arrow/+array/Array.m @@ -97,6 +97,19 @@ function displayScalarObject(obj) % Invoke isEqual proxy object method tf = obj.Proxy.isEqual(proxyIDs); end + + function export(obj, cArrowArrayAddress, cArrowSchemaAddress) + arguments + obj(1, 1) arrow.array.Array + cArrowArrayAddress(1, 1) uint64 + cArrowSchemaAddress(1, 1) uint64 + end + args = struct(... + ArrowArrayAddress=cArrowArrayAddress,... + ArrowSchemaAddress=cArrowSchemaAddress... + ); + obj.Proxy.exportToC(args); + end end methods (Hidden) @@ -108,4 +121,15 @@ function displayScalarObject(obj) array = traits.ArrayConstructor(proxy); end end + + methods (Static) + function array = import(cArray, cSchema) + arguments + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + importer = arrow.c.internal.ArrayImporter(); + array = importer.import(cArray, cSchema); + end + end end diff --git a/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m b/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m new file mode 100644 index 0000000000000..3f2f7445b3d6d --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/+internal/ArrayImporter.m @@ -0,0 +1,50 @@ +%ARRAYIMPORTER Imports Arrow Array using the C Data Interface Format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef ArrayImporter < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + methods + + function obj = ArrayImporter() + proxyName = "arrow.c.proxy.ArrayImporter"; + proxy = arrow.internal.proxy.create(proxyName, struct()); + obj.Proxy = proxy; + end + + function array = import(obj, cArray, cSchema) + arguments + obj(1, 1) arrow.c.internal.ArrayImporter + cArray(1, 1) arrow.c.Array + cSchema(1, 1) arrow.c.Schema + end + args = struct(... + ArrowArrayAddress=cArray.Address,... + ArrowSchemaAddress=cSchema.Address... + ); + [proxyID, typeID] = obj.Proxy.import(args); + traits = arrow.type.traits.traits(arrow.type.ID(typeID)); + proxy = libmexclass.proxy.Proxy(Name=traits.ArrayProxyClassName, ID=proxyID); + array = traits.ArrayConstructor(proxy); + end + + end + +end + diff --git a/matlab/src/matlab/+arrow/+c/Array.m b/matlab/src/matlab/+arrow/+c/Array.m new file mode 100644 index 0000000000000..574fca9afebd8 --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/Array.m @@ -0,0 +1,37 @@ +%ARRAY Wrapper for an Arrow C Data Interface format ArrowArray C struct pointer. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Array < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent, GetAccess=public, SetAccess=private) + Address(1, 1) uint64 + end + + methods + function obj = Array() + proxyName = "arrow.c.proxy.Array"; + obj.Proxy = arrow.internal.proxy.create(proxyName); + end + + function address = get.Address(obj) + address = obj.Proxy.getAddress(); + end + end +end \ No newline at end of file diff --git a/matlab/src/matlab/+arrow/+c/Schema.m b/matlab/src/matlab/+arrow/+c/Schema.m new file mode 100644 index 0000000000000..29eba59016044 --- /dev/null +++ b/matlab/src/matlab/+arrow/+c/Schema.m @@ -0,0 +1,37 @@ +%SCHEMA Wrapper for an Arrow C Data Interface format ArrowSchema C struct pointer. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef Schema < matlab.mixin.Scalar + + properties (Hidden, SetAccess=private, GetAccess=public) + Proxy + end + + properties(Dependent, GetAccess=public, SetAccess=private) + Address(1, 1) uint64 + end + + methods + function obj = Schema() + proxyName = "arrow.c.proxy.Schema"; + obj.Proxy = arrow.internal.proxy.create(proxyName); + end + + function address = get.Address(obj) + address = obj.Proxy.getAddress(); + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/c/tArray.m b/matlab/test/arrow/c/tArray.m new file mode 100644 index 0000000000000..f8caf48065114 --- /dev/null +++ b/matlab/test/arrow/c/tArray.m @@ -0,0 +1,48 @@ +%TARRAY Defines unit tests for arrow.c.Array. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tArray < matlab.unittest.TestCase + + methods (Test) + function TestClassStructure(testCase) + array = arrow.c.Array(); + + % Verify array is an instance of arrow.c.Array. + testCase.verifyInstanceOf(array, "arrow.c.Array"); + + % Verify array has one public property named Address. + props = properties(array); + testCase.verifyEqual(props, {'Address'}); + end + + function TestAddressProperty(testCase) + array = arrow.c.Array(); + + % It's impossible to know what the value of Address will be. + % Just verify Address is a scalar uint64. + address = array.Address; + testCase.verifyInstanceOf(address, "uint64"); + testCase.verifyTrue(isscalar(address)); + end + + function TestAddressNoSetter(testCase) + % Verify the Address property is read-only. + array = arrow.c.Array(); + fcn = @() setfield(array, "Address", uint64(10)); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end +end \ No newline at end of file diff --git a/matlab/test/arrow/c/tRoundTrip.m b/matlab/test/arrow/c/tRoundTrip.m new file mode 100644 index 0000000000000..a72dbe2679a2d --- /dev/null +++ b/matlab/test/arrow/c/tRoundTrip.m @@ -0,0 +1,182 @@ +%TROUNDTRIP Tests for roundtripping using the C Data Interface format. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tRoundTrip < matlab.unittest.TestCase + + methods (Test) + + function EmptyArray(testCase) + expected = arrow.array(double.empty(0, 1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ArrayWithNulls(testCase) + % Scalar null + expected = arrow.array(double(NaN)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector with nulls + expected = arrow.array([1, NaN, 3, NaN, 5]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector all nulls + expected = arrow.array([NaN, NaN, NaN]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function Float64Array(testCase) + % Scalar + expected = arrow.array(double(1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array([1, 2, 3]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function StringArray(testCase) + % Scalar + expected = arrow.array("A"); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array(["A", "B", "C"]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function TimestampArray(testCase) + % Scalar + expected = arrow.array(datetime(2024, 1, 1)); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + + % Vector + expected = arrow.array([... + datetime(2024, 1, 1),... + datetime(2024, 1, 2),... + datetime(2024, 1, 3)... + ]); + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + + expected.export(cArray.Address, cSchema.Address); + actual = arrow.array.Array.import(cArray, cSchema); + + testCase.verifyEqual(actual, expected); + end + + function ExportErrorWrongInputTypes(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export("cArray.Address", "cSchema.Address"); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ExportTooFewInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ExportTooManyInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() A.export("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorWrongInputTypes(testCase) + cArray = "arrow.c.Array"; + cSchema = "arrow.c.Schema"; + fcn = @() arrow.array.Array.import(cArray, cSchema); + testCase.verifyError(fcn, "MATLAB:validation:UnableToConvert"); + end + + function ImportTooFewInputs(testCase) + fcn = @() arrow.array.Array.import(); + testCase.verifyError(fcn, "MATLAB:minrhs"); + end + + function ImportTooManyInputs(testCase) + A = arrow.array([1, 2, 3]); + fcn = @() arrow.array.Array.import("A", "B", "C"); + testCase.verifyError(fcn, "MATLAB:TooManyInputs"); + end + + function ImportErrorImportFailed(testCase) + cArray = arrow.c.Array(); + cSchema = arrow.c.Schema(); + % An arrow:c:import:ImportFailed error should be thrown + % if the supplied arrow.c.Array and arrow.c.Schema were + % never populated previously from an exported Array. + fcn = @() arrow.array.Array.import(cArray, cSchema); + testCase.verifyError(fcn, "arrow:c:import:ImportFailed"); + end + + end + +end diff --git a/matlab/test/arrow/c/tSchema.m b/matlab/test/arrow/c/tSchema.m new file mode 100644 index 0000000000000..16dcf1965b463 --- /dev/null +++ b/matlab/test/arrow/c/tSchema.m @@ -0,0 +1,48 @@ +%TSCHEMA Defines unit tests for arrow.c.Schema. + +% Licensed to the Apache Software Foundation (ASF) under one or more +% contributor license agreements. See the NOTICE file distributed with +% this work for additional information regarding copyright ownership. +% The ASF licenses this file to you under the Apache License, Version +% 2.0 (the "License"); you may not use this file except in compliance +% with the License. You may obtain a copy of the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, +% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or +% implied. See the License for the specific language governing +% permissions and limitations under the License. +classdef tSchema < matlab.unittest.TestCase + + methods (Test) + function TestClassStructure(testCase) + schema = arrow.c.Schema(); + + % Verify schema is an instance of arrow.c.Schema. + testCase.verifyInstanceOf(schema, "arrow.c.Schema"); + + % Verify schema has one public property named Address. + props = properties(schema); + testCase.verifyEqual(props, {'Address'}); + end + + function TestAddressProperty(testCase) + schema = arrow.c.Schema(); + + % It's impossible to know what the value of Address will be. + % Just verify Address is a scalar uint64. + address = schema.Address; + testCase.verifyInstanceOf(address, "uint64"); + testCase.verifyTrue(isscalar(address)); + end + + function TestAddressNoSetter(testCase) + % Verify the Address property is read-only. + schema = arrow.c.Schema(); + fcn = @() setfield(schema, "Address", uint64(10)); + testCase.verifyError(fcn, "MATLAB:class:SetProhibited"); + end + end +end \ No newline at end of file diff --git a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake index e1641842ca8b9..92e9f59145acc 100644 --- a/matlab/tools/cmake/BuildMatlabArrowInterface.cmake +++ b/matlab/tools/cmake/BuildMatlabArrowInterface.cmake @@ -75,7 +75,10 @@ set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_SOURCES "${CMAKE_SOURCE_DIR}/src/cpp/a "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_writer.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/io/csv/proxy/table_reader.cc" "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/index/validate.cc" - "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc") + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/buffer/proxy/buffer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/array_importer.cc" + "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/c/proxy/schema.cc") set(MATLAB_ARROW_LIBMEXCLASS_CLIENT_PROXY_FACTORY_INCLUDE_DIR "${CMAKE_SOURCE_DIR}/src/cpp/arrow/matlab/proxy") diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index 212862357ace2..a8bbed117163d 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -108,25 +108,6 @@ if(UNIX) endif() endif() -# Top level cmake dir -if("${CMAKE_SOURCE_DIR}" STREQUAL "${CMAKE_CURRENT_SOURCE_DIR}") - option(PYARROW_BUILD_ACERO "Build the PyArrow Acero integration" OFF) - option(PYARROW_BUILD_CUDA "Build the PyArrow CUDA support" OFF) - option(PYARROW_BUILD_DATASET "Build the PyArrow Dataset integration" OFF) - option(PYARROW_BUILD_FLIGHT "Build the PyArrow Flight integration" OFF) - option(PYARROW_BUILD_GANDIVA "Build the PyArrow Gandiva integration" OFF) - option(PYARROW_BUILD_ORC "Build the PyArrow ORC integration" OFF) - option(PYARROW_BUILD_PARQUET "Build the PyArrow Parquet integration" OFF) - option(PYARROW_BUILD_PARQUET_ENCRYPTION - "Build the PyArrow Parquet encryption integration" OFF) - option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) - option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) - option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) - set(PYARROW_CXXFLAGS - "" - CACHE STRING "Compiler flags to append when compiling Arrow") -endif() - find_program(CCACHE_FOUND ccache) if(CCACHE_FOUND AND NOT CMAKE_C_COMPILER_LAUNCHER @@ -265,11 +246,70 @@ message(STATUS "NumPy include dir: ${NUMPY_INCLUDE_DIRS}") include(UseCython) -# PyArrow C++ +# Arrow C++ and set default PyArrow build options include(GNUInstallDirs) - find_package(Arrow REQUIRED) +macro(define_option name description arrow_option) + set("PYARROW_${name}" + "AUTO" + CACHE STRING ${description}) + + if("${PYARROW_${name}}" STREQUAL "AUTO") + # by default, first check if env variable exists, otherwise use Arrow C++ config + set(env_variable "PYARROW_WITH_${name}") + if(DEFINED ENV{${env_variable}}) + if($ENV{${env_variable}}) + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + else() + if(${arrow_option}) + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + endif() + else() + if("${PYARROW_${name}}") + set("PYARROW_BUILD_${name}" ON) + else() + set("PYARROW_BUILD_${name}" OFF) + endif() + endif() +endmacro() + +define_option(ACERO "Build the PyArrow Acero integration" ARROW_ACERO) +define_option(CUDA "Build the PyArrow CUDA support" ARROW_CUDA) +define_option(DATASET "Build the PyArrow Dataset integration" ARROW_DATASET) +define_option(FLIGHT "Build the PyArrow Flight integration" ARROW_FLIGHT) +define_option(GANDIVA "Build the PyArrow Gandiva integration" ARROW_GANDIVA) +define_option(ORC "Build the PyArrow ORC integration" ARROW_ORC) +define_option(PARQUET "Build the PyArrow Parquet integration" ARROW_PARQUET) +define_option(PARQUET_ENCRYPTION "Build the PyArrow Parquet encryption integration" + PARQUET_REQUIRE_ENCRYPTION) +define_option(SUBSTRAIT "Build the PyArrow Substrait integration" ARROW_SUBSTRAIT) +define_option(AZURE "Build the PyArrow Azure integration" ARROW_AZURE) +define_option(GCS "Build the PyArrow GCS integration" ARROW_GCS) +define_option(S3 "Build the PyArrow S3 integration" ARROW_S3) +define_option(HDFS "Build the PyArrow HDFS integration" ARROW_HDFS) +option(PYARROW_BUNDLE_ARROW_CPP "Bundle the Arrow C++ libraries" OFF) +option(PYARROW_BUNDLE_CYTHON_CPP "Bundle the C++ files generated by Cython" OFF) +option(PYARROW_GENERATE_COVERAGE "Build with Cython code coverage enabled" OFF) +set(PYARROW_CXXFLAGS + "" + CACHE STRING "Compiler flags to append when compiling PyArrow C++") + +# enforce module dependencies +if(PYARROW_BUILD_SUBSTRAIT) + set(PYARROW_BUILD_DATASET ON) +endif() +if(PYARROW_BUILD_DATASET) + set(PYARROW_BUILD_ACERO ON) +endif() + +# PyArrow C++ set(PYARROW_CPP_ROOT_DIR pyarrow/src) set(PYARROW_CPP_SOURCE_DIR ${PYARROW_CPP_ROOT_DIR}/arrow/python) set(PYARROW_CPP_SRCS @@ -305,6 +345,7 @@ set(PYARROW_CPP_LINK_LIBS "") # Check all the options from Arrow and PyArrow C++ to be in line if(PYARROW_BUILD_DATASET) + message(STATUS "Building PyArrow with Dataset") if(NOT ARROW_DATASET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_DATASET=ON") endif() @@ -317,6 +358,7 @@ if(PYARROW_BUILD_DATASET) endif() if(PYARROW_BUILD_ACERO) + message(STATUS "Building PyArrow with Acero") if(NOT ARROW_ACERO) message(FATAL_ERROR "You must build Arrow C++ with ARROW_ACERO=ON") endif() @@ -328,16 +370,17 @@ if(PYARROW_BUILD_ACERO) endif() endif() -if(PYARROW_BUILD_PARQUET OR PYARROW_BUILD_PARQUET_ENCRYPTION) +if(PYARROW_BUILD_PARQUET) + message(STATUS "Building PyArrow with Parquet") if(NOT ARROW_PARQUET) message(FATAL_ERROR "You must build Arrow C++ with ARROW_PARQUET=ON") endif() find_package(Parquet REQUIRED) -endif() - -if(PYARROW_BUILD_HDFS) - if(NOT ARROW_HDFS) - message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") +else() + if(PYARROW_BUILD_PARQUET_ENCRYPTION) + message(WARNING "Building PyArrow with Parquet Encryption is requested, but Parquet itself is not enabled. Ignoring the Parquet Encryption setting." + ) + set(PYARROW_BUILD_PARQUET_ENCRYPTION OFF) endif() endif() @@ -400,6 +443,7 @@ endif() set(PYARROW_CPP_FLIGHT_SRCS ${PYARROW_CPP_SOURCE_DIR}/flight.cc) if(PYARROW_BUILD_FLIGHT) + message(STATUS "Building PyArrow with Flight") if(NOT ARROW_FLIGHT) message(FATAL_ERROR "You must build Arrow C++ with ARROW_FLIGHT=ON") endif() @@ -555,23 +599,39 @@ set_source_files_properties(pyarrow/lib.pyx PROPERTIES CYTHON_API TRUE) set(LINK_LIBS arrow_python) if(PYARROW_BUILD_AZURE) + message(STATUS "Building PyArrow with Azure") + if(NOT ARROW_AZURE) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_AZURE=ON") + endif() list(APPEND CYTHON_EXTENSIONS _azurefs) endif() if(PYARROW_BUILD_GCS) + message(STATUS "Building PyArrow with GCS") + if(NOT ARROW_GCS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_GCS=ON") + endif() list(APPEND CYTHON_EXTENSIONS _gcsfs) endif() if(PYARROW_BUILD_S3) + message(STATUS "Building PyArrow with S3") + if(NOT ARROW_S3) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_S3=ON") + endif() list(APPEND CYTHON_EXTENSIONS _s3fs) endif() if(PYARROW_BUILD_HDFS) + message(STATUS "Building PyArrow with HDFS") + if(NOT ARROW_HDFS) + message(FATAL_ERROR "You must build Arrow C++ with ARROW_HDFS=ON") + endif() list(APPEND CYTHON_EXTENSIONS _hdfs) endif() if(PYARROW_BUILD_CUDA) - # Arrow CUDA + message(STATUS "Building PyArrow with CUDA") if(NOT ARROW_CUDA) message(FATAL_ERROR "You must build Arrow C++ with ARROW_CUDA=ON") endif() @@ -646,8 +706,9 @@ if(PYARROW_BUILD_PARQUET) endif() endif() +# ORC if(PYARROW_BUILD_ORC) - # ORC + message(STATUS "Building PyArrow with ORC") if(NOT ARROW_ORC) message(FATAL_ERROR "You must build Arrow C++ with ARROW_ORC=ON") endif() @@ -679,6 +740,7 @@ endif() # Substrait if(PYARROW_BUILD_SUBSTRAIT) + message(STATUS "Building PyArrow with Substrait") if(NOT ARROW_SUBSTRAIT) message(FATAL_ERROR "You must build Arrow C++ with ARROW_SUBSTRAIT=ON") endif() @@ -696,6 +758,7 @@ endif() # Gandiva if(PYARROW_BUILD_GANDIVA) + message(STATUS "Building PyArrow with Gandiva") if(NOT ARROW_GANDIVA) message(FATAL_ERROR "You must build Arrow C++ with ARROW_GANDIVA=ON") endif() diff --git a/python/pyarrow/_parquet.pyx b/python/pyarrow/_parquet.pyx index 7bc68a288aa78..f7724b9b1fdc7 100644 --- a/python/pyarrow/_parquet.pyx +++ b/python/pyarrow/_parquet.pyx @@ -705,6 +705,22 @@ cdef class SortingColumn: """Whether null values appear before valid values (bool).""" return self.nulls_first + def to_dict(self): + """ + Get dictionary representation of the SortingColumn. + + Returns + ------- + dict + Dictionary with a key for each attribute of this class. + """ + d = dict( + column_index=self.column_index, + descending=self.descending, + nulls_first=self.nulls_first + ) + return d + cdef class RowGroupMetaData(_Weakrefable): """Metadata for a single row group.""" diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi index 946c82b258241..406830ad4dd69 100644 --- a/python/pyarrow/array.pxi +++ b/python/pyarrow/array.pxi @@ -3920,12 +3920,11 @@ cdef class StructArray(Array): result : StructArray """ if by is not None: - tosort = self._flattened_field(by) + tosort, sort_keys = self._flattened_field(by), [("", order)] else: - tosort = self + tosort, sort_keys = self, [(field.name, order) for field in self.type] indices = _pc().sort_indices( - tosort, - options=_pc().SortOptions(sort_keys=[("", order)], **kwargs) + tosort, options=_pc().SortOptions(sort_keys=sort_keys, **kwargs) ) return self.take(indices) diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index f461513e8b3cf..8bfc31edc747d 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -2801,6 +2801,8 @@ cdef extern from "arrow/extension_type.h" namespace "arrow": cdef cppclass CExtensionType" arrow::ExtensionType"(CDataType): c_string extension_name() shared_ptr[CDataType] storage_type() + int byte_width() + int bit_width() @staticmethod shared_ptr[CArray] WrapArray(shared_ptr[CDataType] ext_type, diff --git a/python/pyarrow/parquet/core.py b/python/pyarrow/parquet/core.py index f54a203c8794c..81798b1544474 100644 --- a/python/pyarrow/parquet/core.py +++ b/python/pyarrow/parquet/core.py @@ -797,8 +797,9 @@ def _sanitize_table(table, new_schema, flavor): Specify if the byte_stream_split encoding should be used in general or only for some columns. If both dictionary and byte_stream_stream are enabled, then dictionary is preferred. - The byte_stream_split encoding is valid only for floating-point data types - and should be combined with a compression codec. + The byte_stream_split encoding is valid for integer, floating-point + and fixed-size binary data types (including decimals); it should be + combined with a compression codec so as to achieve size reduction. column_encoding : string or dict, default None Specify the encoding scheme on a per column basis. Can only be used when ``use_dictionary`` is set to False, and diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc index 79da47567bf24..a2a325fde8dbd 100644 --- a/python/pyarrow/src/arrow/python/python_to_arrow.cc +++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc @@ -874,6 +874,10 @@ class PyListConverter : public ListConverter { if (PyArray_NDIM(ndarray) != 1) { return Status::Invalid("Can only convert 1-dimensional array values"); } + if (PyArray_ISBYTESWAPPED(ndarray)) { + // TODO + return Status::NotImplemented("Byte-swapped arrays not supported"); + } const int64_t size = PyArray_SIZE(ndarray); RETURN_NOT_OK(AppendTo(this->list_type_, size)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); diff --git a/python/pyarrow/tests/conftest.py b/python/pyarrow/tests/conftest.py index 57bc3c8fc6616..343b602995db6 100644 --- a/python/pyarrow/tests/conftest.py +++ b/python/pyarrow/tests/conftest.py @@ -192,7 +192,7 @@ def wrapper(*args, **kwargs): @pytest.fixture(scope='session') def s3_server(s3_connection, tmpdir_factory): - @retry(attempts=5, delay=0.1, backoff=2) + @retry(attempts=5, delay=1, backoff=2) def minio_server_health_check(address): resp = urllib.request.urlopen(f"http://{address}/minio/health/cluster") assert resp.getcode() == 200 diff --git a/python/pyarrow/tests/parquet/test_metadata.py b/python/pyarrow/tests/parquet/test_metadata.py index bf186bd923c4f..1eb0598b5c58f 100644 --- a/python/pyarrow/tests/parquet/test_metadata.py +++ b/python/pyarrow/tests/parquet/test_metadata.py @@ -303,14 +303,18 @@ def test_parquet_write_disable_statistics(tempdir): def test_parquet_sorting_column(): sorting_col = pq.SortingColumn(10) - assert sorting_col.column_index == 10 - assert sorting_col.descending is False - assert sorting_col.nulls_first is False + assert sorting_col.to_dict() == { + 'column_index': 10, + 'descending': False, + 'nulls_first': False + } sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True) - assert sorting_col.column_index == 0 - assert sorting_col.descending is True - assert sorting_col.nulls_first is True + assert sorting_col.to_dict() == { + 'column_index': 0, + 'descending': True, + 'nulls_first': True + } schema = pa.schema([('a', pa.int64()), ('b', pa.int64())]) sorting_cols = ( @@ -381,9 +385,13 @@ def test_parquet_file_sorting_columns(): # Can retrieve sorting columns from metadata metadata = pq.read_metadata(reader) - assert metadata.num_row_groups == 1 assert sorting_columns == metadata.row_group(0).sorting_columns + metadata_dict = metadata.to_dict() + assert metadata_dict.get('num_columns') == 2 + assert metadata_dict.get('num_rows') == 3 + assert metadata_dict.get('num_row_groups') == 1 + def test_field_id_metadata(): # ARROW-7080 diff --git a/python/pyarrow/tests/test_array.py b/python/pyarrow/tests/test_array.py index dbe29c5730758..b89e0ace157af 100644 --- a/python/pyarrow/tests/test_array.py +++ b/python/pyarrow/tests/test_array.py @@ -3536,6 +3536,14 @@ def test_struct_array_sort(): {"a": 5, "b": "foo"}, ] + sorted_arr = arr.sort() + assert sorted_arr.to_pylist() == [ + {"a": 5, "b": "foo"}, + {"a": 7, "b": "bar"}, + {"a": 7, "b": "car"}, + {"a": 35, "b": "foobar"}, + ] + arr_with_nulls = pa.StructArray.from_arrays([ pa.array([5, 7, 7, 35], type=pa.int64()), pa.array(["foo", "car", "bar", "foobar"]) @@ -3920,3 +3928,27 @@ def test_list_view_slice(list_view_type): j = sliced_array.offsets[1].as_py() assert sliced_array[0].as_py() == sliced_array.values[i:j].to_pylist() == [4] + + +@pytest.mark.parametrize('numpy_native_dtype', ['u2', 'i4', 'f8']) +def test_swapped_byte_order_fails(numpy_native_dtype): + # ARROW-39129 + + numpy_swapped_dtype = np.dtype(numpy_native_dtype).newbyteorder() + np_arr = np.arange(10, dtype=numpy_swapped_dtype) + + # Primitive type array, type is inferred from the numpy array + with pytest.raises(pa.ArrowNotImplementedError): + pa.array(np_arr) + + # Primitive type array, type is explicitly provided + with pytest.raises(pa.ArrowNotImplementedError): + pa.array(np_arr, type=pa.float64()) + + # List type array + with pytest.raises(pa.ArrowNotImplementedError): + pa.array([np_arr]) + + # Struct type array + with pytest.raises(pa.ArrowNotImplementedError): + pa.StructArray.from_arrays([np_arr], names=['a']) diff --git a/python/pyarrow/tests/test_cffi.py b/python/pyarrow/tests/test_cffi.py index 5bf41c3c14b6e..45a3db9b66fc5 100644 --- a/python/pyarrow/tests/test_cffi.py +++ b/python/pyarrow/tests/test_cffi.py @@ -45,7 +45,7 @@ ValueError, match="Cannot import released ArrowArray") assert_stream_released = pytest.raises( - ValueError, match="Cannot import released ArrowArrayStream") + ValueError, match="Cannot import released Arrow Stream") def PyCapsule_IsValid(capsule, name): diff --git a/python/pyarrow/tests/test_csv.py b/python/pyarrow/tests/test_csv.py index bc1dd8a09a768..9ddb5197e9120 100644 --- a/python/pyarrow/tests/test_csv.py +++ b/python/pyarrow/tests/test_csv.py @@ -1470,7 +1470,7 @@ def signal_from_thread(): pytest.fail("Failed to get an interruption during CSV reading") # Interruption should have arrived timely - assert last_duration <= 1.0 + assert last_duration <= 2.0 e = exc_info.__context__ assert isinstance(e, pa.ArrowCancelled) assert e.signum == signum diff --git a/python/pyarrow/tests/test_extension_type.py b/python/pyarrow/tests/test_extension_type.py index fe38bf651baae..9863d96058947 100644 --- a/python/pyarrow/tests/test_extension_type.py +++ b/python/pyarrow/tests/test_extension_type.py @@ -251,14 +251,14 @@ def test_ext_type_repr(): assert repr(ty) == "IntegerType(DataType(int64))" -def test_ext_type__lifetime(): +def test_ext_type_lifetime(): ty = UuidType() wr = weakref.ref(ty) del ty assert wr() is None -def test_ext_type__storage_type(): +def test_ext_type_storage_type(): ty = UuidType() assert ty.storage_type == pa.binary(16) assert ty.__class__ is UuidType @@ -267,6 +267,32 @@ def test_ext_type__storage_type(): assert ty.__class__ is ParamExtType +def test_ext_type_byte_width(): + # Test for fixed-size binary types + ty = UuidType() + assert ty.byte_width == 16 + ty = ParamExtType(5) + assert ty.byte_width == 5 + + # Test for non fixed-size binary types + ty = LabelType() + with pytest.raises(ValueError, match="Non-fixed width type"): + _ = ty.byte_width + + +def test_ext_type_bit_width(): + # Test for fixed-size binary types + ty = UuidType() + assert ty.bit_width == 128 + ty = ParamExtType(5) + assert ty.bit_width == 40 + + # Test for non fixed-size binary types + ty = LabelType() + with pytest.raises(ValueError, match="Non-fixed width type"): + _ = ty.bit_width + + def test_ext_type_as_py(): ty = UuidType() expected = uuid4() diff --git a/python/pyarrow/tests/test_types.py b/python/pyarrow/tests/test_types.py index 4f66a6f41672d..f7b6040f510af 100644 --- a/python/pyarrow/tests/test_types.py +++ b/python/pyarrow/tests/test_types.py @@ -1331,10 +1331,13 @@ def __init__(self, schema): def __arrow_c_schema__(self): return self.schema.__arrow_c_schema__() - schema = pa.schema([pa.field("field_name", pa.int32())]) + schema = pa.schema([pa.field("field_name", pa.int32())], metadata={"a": "b"}) + assert schema.metadata == {b"a": b"b"} wrapped_schema = Wrapper(schema) assert pa.schema(wrapped_schema) == schema + assert pa.schema(wrapped_schema).metadata == {b"a": b"b"} + assert pa.schema(wrapped_schema, metadata={"a": "c"}).metadata == {b"a": b"c"} def test_field_import_c_schema_interface(): diff --git a/python/pyarrow/types.pxi b/python/pyarrow/types.pxi index 018099ae7e659..5113df36557f4 100644 --- a/python/pyarrow/types.pxi +++ b/python/pyarrow/types.pxi @@ -1519,6 +1519,24 @@ cdef class BaseExtensionType(DataType): """ return pyarrow_wrap_data_type(self.ext_type.storage_type()) + @property + def byte_width(self): + """ + The byte width of the extension type. + """ + if self.ext_type.byte_width() == -1: + raise ValueError("Non-fixed width type") + return self.ext_type.byte_width() + + @property + def bit_width(self): + """ + The bit width of the extension type. + """ + if self.ext_type.bit_width() == -1: + raise ValueError("Non-fixed width type") + return self.ext_type.bit_width() + def wrap_array(self, storage): """ Wrap the given storage array as an extension array. @@ -5332,7 +5350,10 @@ def schema(fields, metadata=None): if isinstance(fields, Mapping): fields = fields.items() elif hasattr(fields, "__arrow_c_schema__"): - return Schema._import_from_c_capsule(fields.__arrow_c_schema__()) + result = Schema._import_from_c_capsule(fields.__arrow_c_schema__()) + if metadata is not None: + result = result.with_metadata(metadata) + return result for item in fields: if isinstance(item, tuple): diff --git a/python/setup.py b/python/setup.py index 6f3dddb29d248..ed2b7961e5fbb 100755 --- a/python/setup.py +++ b/python/setup.py @@ -152,32 +152,20 @@ def initialize_options(self): if not hasattr(sys, 'gettotalrefcount'): self.build_type = 'release' - self.with_azure = strtobool( - os.environ.get('PYARROW_WITH_AZURE', '0')) - self.with_gcs = strtobool( - os.environ.get('PYARROW_WITH_GCS', '0')) - self.with_s3 = strtobool( - os.environ.get('PYARROW_WITH_S3', '0')) - self.with_hdfs = strtobool( - os.environ.get('PYARROW_WITH_HDFS', '0')) - self.with_cuda = strtobool( - os.environ.get('PYARROW_WITH_CUDA', '0')) - self.with_substrait = strtobool( - os.environ.get('PYARROW_WITH_SUBSTRAIT', '0')) - self.with_flight = strtobool( - os.environ.get('PYARROW_WITH_FLIGHT', '0')) - self.with_acero = strtobool( - os.environ.get('PYARROW_WITH_ACERO', '0')) - self.with_dataset = strtobool( - os.environ.get('PYARROW_WITH_DATASET', '0')) - self.with_parquet = strtobool( - os.environ.get('PYARROW_WITH_PARQUET', '0')) - self.with_parquet_encryption = strtobool( - os.environ.get('PYARROW_WITH_PARQUET_ENCRYPTION', '0')) - self.with_orc = strtobool( - os.environ.get('PYARROW_WITH_ORC', '0')) - self.with_gandiva = strtobool( - os.environ.get('PYARROW_WITH_GANDIVA', '0')) + self.with_azure = None + self.with_gcs = None + self.with_s3 = None + self.with_hdfs = None + self.with_cuda = None + self.with_substrait = None + self.with_flight = None + self.with_acero = None + self.with_dataset = None + self.with_parquet = None + self.with_parquet_encryption = None + self.with_orc = None + self.with_gandiva = None + self.generate_coverage = strtobool( os.environ.get('PYARROW_GENERATE_COVERAGE', '0')) self.bundle_arrow_cpp = strtobool( @@ -185,15 +173,6 @@ def initialize_options(self): self.bundle_cython_cpp = strtobool( os.environ.get('PYARROW_BUNDLE_CYTHON_CPP', '0')) - self.with_parquet_encryption = (self.with_parquet_encryption and - self.with_parquet) - - # enforce module dependencies - if self.with_substrait: - self.with_dataset = True - if self.with_dataset: - self.with_acero = True - CYTHON_MODULE_NAMES = [ 'lib', '_fs', @@ -270,23 +249,30 @@ def append_cmake_bool(value, varname): cmake_options.append('-D{0}={1}'.format( varname, 'on' if value else 'off')) + def append_cmake_component(flag, varname): + # only pass this to cmake is the user pass the --with-component + # flag to setup.py build_ext + if flag is not None: + append_cmake_bool(flag, varname) + if self.cmake_generator: cmake_options += ['-G', self.cmake_generator] - append_cmake_bool(self.with_cuda, 'PYARROW_BUILD_CUDA') - append_cmake_bool(self.with_substrait, 'PYARROW_BUILD_SUBSTRAIT') - append_cmake_bool(self.with_flight, 'PYARROW_BUILD_FLIGHT') - append_cmake_bool(self.with_gandiva, 'PYARROW_BUILD_GANDIVA') - append_cmake_bool(self.with_acero, 'PYARROW_BUILD_ACERO') - append_cmake_bool(self.with_dataset, 'PYARROW_BUILD_DATASET') - append_cmake_bool(self.with_orc, 'PYARROW_BUILD_ORC') - append_cmake_bool(self.with_parquet, 'PYARROW_BUILD_PARQUET') - append_cmake_bool(self.with_parquet_encryption, - 'PYARROW_BUILD_PARQUET_ENCRYPTION') - append_cmake_bool(self.with_azure, 'PYARROW_BUILD_AZURE') - append_cmake_bool(self.with_gcs, 'PYARROW_BUILD_GCS') - append_cmake_bool(self.with_s3, 'PYARROW_BUILD_S3') - append_cmake_bool(self.with_hdfs, 'PYARROW_BUILD_HDFS') + append_cmake_component(self.with_cuda, 'PYARROW_CUDA') + append_cmake_component(self.with_substrait, 'PYARROW_SUBSTRAIT') + append_cmake_component(self.with_flight, 'PYARROW_FLIGHT') + append_cmake_component(self.with_gandiva, 'PYARROW_GANDIVA') + append_cmake_component(self.with_acero, 'PYARROW_ACERO') + append_cmake_component(self.with_dataset, 'PYARROW_DATASET') + append_cmake_component(self.with_orc, 'PYARROW_ORC') + append_cmake_component(self.with_parquet, 'PYARROW_PARQUET') + append_cmake_component(self.with_parquet_encryption, + 'PYARROW_PARQUET_ENCRYPTION') + append_cmake_component(self.with_azure, 'PYARROW_AZURE') + append_cmake_component(self.with_gcs, 'PYARROW_GCS') + append_cmake_component(self.with_s3, 'PYARROW_S3') + append_cmake_component(self.with_hdfs, 'PYARROW_HDFS') + append_cmake_bool(self.bundle_arrow_cpp, 'PYARROW_BUNDLE_ARROW_CPP') append_cmake_bool(self.bundle_cython_cpp, @@ -329,54 +315,8 @@ def append_cmake_bool(value, varname): self._found_names = [] for name in self.CYTHON_MODULE_NAMES: built_path = pjoin(install_prefix, name + ext_suffix) - if not os.path.exists(built_path): - print(f'Did not find {built_path}') - if self._failure_permitted(name): - print(f'Cython module {name} failure permitted') - continue - raise RuntimeError('PyArrow C-extension failed to build:', - os.path.abspath(built_path)) - - self._found_names.append(name) - - def _failure_permitted(self, name): - if name == '_parquet' and not self.with_parquet: - return True - if name == '_parquet_encryption' and not self.with_parquet_encryption: - return True - if name == '_orc' and not self.with_orc: - return True - if name == '_flight' and not self.with_flight: - return True - if name == '_substrait' and not self.with_substrait: - return True - if name == '_azurefs' and not self.with_azure: - return True - if name == '_gcsfs' and not self.with_gcs: - return True - if name == '_s3fs' and not self.with_s3: - return True - if name == '_hdfs' and not self.with_hdfs: - return True - if name == '_dataset' and not self.with_dataset: - return True - if name == '_acero' and not self.with_acero: - return True - if name == '_exec_plan' and not self.with_acero: - return True - if name == '_dataset_orc' and not ( - self.with_orc and self.with_dataset - ): - return True - if name == '_dataset_parquet' and not ( - self.with_parquet and self.with_dataset - ): - return True - if name == '_cuda' and not self.with_cuda: - return True - if name == 'gandiva' and not self.with_gandiva: - return True - return False + if os.path.exists(built_path): + self._found_names.append(name) def _get_build_dir(self): # Get the package directory from build_py diff --git a/r/DESCRIPTION b/r/DESCRIPTION index 38cbaa94a3c25..bb4470e29037d 100644 --- a/r/DESCRIPTION +++ b/r/DESCRIPTION @@ -1,6 +1,6 @@ Package: arrow Title: Integration to 'Apache' 'Arrow' -Version: 16.0.0.9000 +Version: 16.1.0.9000 Authors@R: c( person("Neal", "Richardson", email = "neal.p.richardson@gmail.com", role = c("aut")), person("Ian", "Cook", email = "ianmcook@gmail.com", role = c("aut")), diff --git a/r/NEWS.md b/r/NEWS.md index 05f934dac68f3..47c4ac1571dad 100644 --- a/r/NEWS.md +++ b/r/NEWS.md @@ -17,7 +17,9 @@ under the License. --> -# arrow 16.0.0.9000 +# arrow 16.1.0.9000 + +# arrow 16.1.0 * R functions that users write that use functions that Arrow supports in dataset queries now can be used in queries too. Previously, only functions that used arithmetic operators worked. For example, `time_hours <- function(mins) mins / 60` worked, but `time_hours_rounded <- function(mins) round(mins / 60)` did not; now both work. These are automatic translations rather than true user-defined functions (UDFs); for UDFs, see `register_scalar_function()`. (#41223) * `summarize()` supports more complex expressions, and correctly handles cases where column names are reused in expressions. diff --git a/r/inst/build_arrow_static.sh b/r/inst/build_arrow_static.sh index d28cbcb08fbec..825a230e78e5e 100755 --- a/r/inst/build_arrow_static.sh +++ b/r/inst/build_arrow_static.sh @@ -99,6 +99,7 @@ ${CMAKE} -DARROW_BOOST_USE_SHARED=OFF \ -DCMAKE_EXPORT_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_FIND_PACKAGE_NO_PACKAGE_REGISTRY=ON \ -DCMAKE_UNITY_BUILD=${CMAKE_UNITY_BUILD:-OFF} \ + -DOPENSSL_ROOT_DIR=${OPENSSL_ROOT_DIR} \ -Dre2_SOURCE=${re2_SOURCE:-BUNDLED} \ -Dxsimd_SOURCE=${xsimd_SOURCE:-} \ -Dzstd_SOURCE=${zstd_SOURCE:-} \ diff --git a/r/pkgdown/assets/versions.json b/r/pkgdown/assets/versions.json index 75d179f240515..43f0b3fac62a1 100644 --- a/r/pkgdown/assets/versions.json +++ b/r/pkgdown/assets/versions.json @@ -1,10 +1,10 @@ [ { - "name": "16.0.0.9000 (dev)", + "name": "16.1.0.9000 (dev)", "version": "dev/" }, { - "name": "16.0.0 (release)", + "name": "16.1.0 (release)", "version": "" }, {