diff --git a/.github/workflows/bindings_python_ci.yml b/.github/workflows/bindings_python_ci.yml new file mode 100644 index 000000000..82f13e752 --- /dev/null +++ b/.github/workflows/bindings_python_ci.yml @@ -0,0 +1,83 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +name: Bindings Python CI + +on: + push: + branches: + - main + pull_request: + branches: + - main + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }} + cancel-in-progress: true + +jobs: + check-rust: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Check format + run: cargo fmt --all -- --check + - name: Check clippy + run: cargo clippy --all-targets --all-features -- -D warnings + + check-python: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Install tools + run: | + pip install ruff + - name: Check format + working-directory: "bindings/python" + run: | + ruff format . --diff + - name: Check style + working-directory: "bindings/python" + run: | + ruff check . + + test: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: + - ubuntu-latest + - macos-latest + - windows-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: 3.8 + - uses: PyO3/maturin-action@v1 + with: + working-directory: "bindings/python" + command: build + args: --out dist --sdist + - name: Run tests + working-directory: "bindings/python" + shell: bash + run: | + set -e + pip install dist/pyiceberg_core-*.whl --force-reinstall + pip install pytest + pytest -v diff --git a/.github/workflows/ci_typos.yml b/.github/workflows/ci_typos.yml index 0030cc8d1..2b7235737 100644 --- a/.github/workflows/ci_typos.yml +++ b/.github/workflows/ci_typos.yml @@ -42,4 +42,4 @@ jobs: steps: - uses: actions/checkout@v4 - name: Check typos - uses: crate-ci/typos@v1.23.2 + uses: crate-ci/typos@v1.23.6 diff --git a/.gitignore b/.gitignore index 25b803228..05c11eda6 100644 --- a/.gitignore +++ b/.gitignore @@ -15,9 +15,12 @@ # specific language governing permissions and limitations # under the License. -/target -/Cargo.lock +target +Cargo.lock .idea .vscode **/.DS_Store dist/* +**/venv +*.so +*.pyc diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8fae0bb15..019bd03fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -110,14 +110,16 @@ $ cargo version cargo 1.69.0 (6e9a83356 2023-04-12) ``` -#### Install docker +#### Install Docker or Podman -Currently, iceberg-rust uses docker to set up environment for integration tests. +Currently, iceberg-rust uses Docker to set up environment for integration tests. Podman is also supported. -You can learn how to install docker from [here](https://docs.docker.com/get-docker/). +You can learn how to install Docker from [here](https://docs.docker.com/get-docker/). For macos users, you can install [OrbStack](https://orbstack.dev/) as a docker alternative. +For podman users, refer to [Using Podman instead of Docker](docs/contributing/podman.md) + ## Build * To compile the project: `make build` diff --git a/Cargo.toml b/Cargo.toml index c4f8482cc..2ec7ef1ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -18,11 +18,14 @@ [workspace] resolver = "2" members = [ - "crates/catalog/*", - "crates/examples", - "crates/iceberg", - "crates/integrations/*", - "crates/test_utils", + "crates/catalog/*", + "crates/examples", + "crates/iceberg", + "crates/integrations/*", + "crates/test_utils", +] +exclude = [ + "bindings/python" ] [workspace.package] @@ -36,7 +39,7 @@ rust-version = "1.77.1" [workspace.dependencies] anyhow = "1.0.72" -apache-avro = "0.16" +apache-avro = "0.17" array-init = "2" arrow-arith = { version = "52" } arrow-array = { version = "52" } @@ -67,12 +70,13 @@ log = "^0.4" mockito = "^1" murmur3 = "0.5.2" once_cell = "1" -opendal = "0.47" +opendal = "0.48" ordered-float = "4.0.0" parquet = "52" pilota = "0.11.2" pretty_assertions = "1.4.0" port_scanner = "0.1.5" +regex = "1.10.5" reqwest = { version = "^0.12", default-features = false, features = ["json"] } rust_decimal = "1.31.0" serde = { version = "^1.0", features = ["rc"] } @@ -85,7 +89,8 @@ tempfile = "3.8" tokio = { version = "1", default-features = false } typed-builder = "^0.19" url = "2" -uuid = "1.6.1" +urlencoding = "2" +uuid = { version = "1.6.1", features = ["v7"] } volo-thrift = "0.10" hive_metastore = "0.1.0" tera = "1" diff --git a/README.md b/README.md index 4f8265b79..9736732db 100644 --- a/README.md +++ b/README.md @@ -29,33 +29,39 @@ Working on [v0.3.0 Release Milestone](https://github.com/apache/iceberg-rust/mil The Apache Iceberg Rust project is composed of the following components: -| Name | Release | Docs | -|------------------------|------------------------------------------------------------|------------------------------------------------------| -| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] | -| [iceberg-datafusion] | - | - | -| [iceberg-catalog-glue] | - | - | -| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] | -| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] | +| Name | Release | Docs | +|------------------------|------------------------------------------------------------|---------------------------------------------------------------------------------------------------| +| [iceberg] | [![iceberg image]][iceberg link] | [![docs release]][iceberg release docs] [![docs dev]][iceberg dev docs] | +| [iceberg-datafusion] | - | [![docs dev]][iceberg-datafusion dev docs] | +| [iceberg-catalog-glue] | - | [![docs dev]][iceberg-catalog-glue dev docs] | +| [iceberg-catalog-hms] | [![iceberg-catalog-hms image]][iceberg-catalog-hms link] | [![docs release]][iceberg-catalog-hms release docs] [![docs dev]][iceberg-catalog-hms dev docs] | +| [iceberg-catalog-rest] | [![iceberg-catalog-rest image]][iceberg-catalog-rest link] | [![docs release]][iceberg-catalog-rest release docs] [![docs dev]][iceberg-catalog-rest dev docs] | [docs release]: https://img.shields.io/badge/docs-release-blue +[docs dev]: https://img.shields.io/badge/docs-dev-blue [iceberg]: crates/iceberg/README.md [iceberg image]: https://img.shields.io/crates/v/iceberg.svg [iceberg link]: https://crates.io/crates/iceberg [iceberg release docs]: https://docs.rs/iceberg +[iceberg dev docs]: https://rust.iceberg.apache.org/api/iceberg/ [iceberg-datafusion]: crates/integrations/datafusion/README.md +[iceberg-datafusion dev docs]: https://rust.iceberg.apache.org/api/iceberg_datafusion/ [iceberg-catalog-glue]: crates/catalog/glue/README.md +[iceberg-catalog-glue dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_glue/ [iceberg-catalog-hms]: crates/catalog/hms/README.md [iceberg-catalog-hms image]: https://img.shields.io/crates/v/iceberg-catalog-hms.svg [iceberg-catalog-hms link]: https://crates.io/crates/iceberg-catalog-hms [iceberg-catalog-hms release docs]: https://docs.rs/iceberg-catalog-hms +[iceberg-catalog-hms dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_hms/ [iceberg-catalog-rest]: crates/catalog/rest/README.md [iceberg-catalog-rest image]: https://img.shields.io/crates/v/iceberg-catalog-rest.svg [iceberg-catalog-rest link]: https://crates.io/crates/iceberg-catalog-rest [iceberg-catalog-rest release docs]: https://docs.rs/iceberg-catalog-rest +[iceberg-catalog-rest dev docs]: https://rust.iceberg.apache.org/api/iceberg_catalog_rest/ ## Supported Rust Version diff --git a/bindings/python/Cargo.toml b/bindings/python/Cargo.toml new file mode 100644 index 000000000..c2c1007b7 --- /dev/null +++ b/bindings/python/Cargo.toml @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "pyiceberg_core_rust" +version = "0.0.1" +edition = "2021" +homepage = "https://rust.iceberg.apache.org" +rust-version = "1.77.1" +# This crate is used to build python bindings, we don't want to publish it +publish = false + +license = "Apache-2.0" +keywords = ["iceberg"] + +[lib] +crate-type = ["cdylib"] + +[dependencies] +iceberg = { path = "../../crates/iceberg" } +pyo3 = { version = "0.22", features = ["extension-module"] } diff --git a/bindings/python/README.md b/bindings/python/README.md new file mode 100644 index 000000000..566a7bcb8 --- /dev/null +++ b/bindings/python/README.md @@ -0,0 +1,44 @@ + + +# Pyiceberg Core + +This project is used to build an iceberg-rust powered core for pyiceberg. + +## Setup + +```shell +python -m venv venv +source ./venv/bin/activate + +pip install maturin +``` + +## Build + +```shell +maturin develop +``` + +## Test + +```shell +maturin develop -E test +pytest -v +``` \ No newline at end of file diff --git a/bindings/python/pyproject.toml b/bindings/python/pyproject.toml new file mode 100644 index 000000000..4a489adde --- /dev/null +++ b/bindings/python/pyproject.toml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[build-system] +requires = ["maturin>=1.0,<2.0"] +build-backend = "maturin" + +[project] +name = "pyiceberg_core" +version = "0.0.1" +classifiers = [ + "Development Status :: 4 - Beta", + "Intended Audience :: Developers", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +[project.optional-dependencies] +test = ["pytest"] + +[tool.maturin] +features = ["pyo3/extension-module"] +python-source = "python" +module-name = "pyiceberg_core.pyiceberg_core_rust" + +[tool.ruff.lint] +ignore = ["F403", "F405"] diff --git a/bindings/python/python/pyiceberg_core/__init__.py b/bindings/python/python/pyiceberg_core/__init__.py new file mode 100644 index 000000000..067bb6f07 --- /dev/null +++ b/bindings/python/python/pyiceberg_core/__init__.py @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from .pyiceberg_core_rust import * + +__doc__ = pyiceberg_core_rust.__doc__ +__all__ = pyiceberg_core_rust.__all__ diff --git a/bindings/python/src/lib.rs b/bindings/python/src/lib.rs new file mode 100644 index 000000000..f0d5d1935 --- /dev/null +++ b/bindings/python/src/lib.rs @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use iceberg::io::FileIOBuilder; +use pyo3::prelude::*; + +#[pyfunction] +fn hello_world() -> PyResult { + let _ = FileIOBuilder::new_fs_io().build().unwrap(); + Ok("Hello, world!".to_string()) +} + +#[pymodule] +fn pyiceberg_core_rust(m: &Bound<'_, PyModule>) -> PyResult<()> { + m.add_function(wrap_pyfunction!(hello_world, m)?)?; + Ok(()) +} diff --git a/bindings/python/tests/test_basic.py b/bindings/python/tests/test_basic.py new file mode 100644 index 000000000..817793ba8 --- /dev/null +++ b/bindings/python/tests/test_basic.py @@ -0,0 +1,22 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from pyiceberg_core import hello_world + + +def test_hello_world(): + hello_world() diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs index 3edd8cdaf..d9c5b4e0b 100644 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ b/crates/catalog/glue/tests/glue_catalog_test.rs @@ -18,6 +18,7 @@ //! Integration tests for glue catalog. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -64,14 +65,11 @@ async fn get_catalog() -> GlueCatalog { docker_compose.get_container_ip("minio"), ) }; - let read_port = format!("{}:{}", glue_catalog_ip, GLUE_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("Waiting for 1s glue catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let glue_socket_addr = SocketAddr::new(glue_catalog_ip, GLUE_CATALOG_PORT); + let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT); + while !scan_port_addr(glue_socket_addr) { + log::info!("Waiting for 1s glue catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let props = HashMap::from([ @@ -83,7 +81,7 @@ async fn get_catalog() -> GlueCatalog { (AWS_REGION_NAME.to_string(), "us-east-1".to_string()), ( S3_ENDPOINT.to_string(), - format!("http://{}:{}", minio_ip, MINIO_PORT), + format!("http://{}", minio_socket_addr), ), (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), @@ -91,7 +89,7 @@ async fn get_catalog() -> GlueCatalog { ]); let config = GlueCatalogConfig::builder() - .uri(format!("http://{}:{}", glue_catalog_ip, GLUE_CATALOG_PORT)) + .uri(format!("http://{}", glue_socket_addr)) .warehouse("s3a://warehouse/hive".to_string()) .props(props.clone()) .build(); diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index e4974171f..5b8004439 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -18,6 +18,7 @@ //! Integration tests for hms catalog. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -63,22 +64,18 @@ async fn get_catalog() -> HmsCatalog { docker_compose.get_container_ip("minio"), ) }; - - let read_port = format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("scan read_port {} check", read_port); - log::info!("Waiting for 1s hms catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let hms_socket_addr = SocketAddr::new(hms_catalog_ip, HMS_CATALOG_PORT); + let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT); + while !scan_port_addr(hms_socket_addr) { + log::info!("scan hms_socket_addr {} check", hms_socket_addr); + log::info!("Waiting for 1s hms catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let props = HashMap::from([ ( S3_ENDPOINT.to_string(), - format!("http://{}:{}", minio_ip, MINIO_PORT), + format!("http://{}", minio_socket_addr), ), (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), @@ -86,7 +83,7 @@ async fn get_catalog() -> HmsCatalog { ]); let config = HmsCatalogConfig::builder() - .address(format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT)) + .address(hms_socket_addr.to_string()) .thrift_transport(HmsThriftTransport::Buffered) .warehouse("s3a://warehouse/hive".to_string()) .props(props) diff --git a/crates/catalog/memory/Cargo.toml b/crates/catalog/memory/Cargo.toml index c62974a15..011479efc 100644 --- a/crates/catalog/memory/Cargo.toml +++ b/crates/catalog/memory/Cargo.toml @@ -37,5 +37,6 @@ serde_json = { workspace = true } uuid = { workspace = true, features = ["v4"] } [dev-dependencies] +regex = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } diff --git a/crates/catalog/memory/src/catalog.rs b/crates/catalog/memory/src/catalog.rs index 69f476189..44086f8d3 100644 --- a/crates/catalog/memory/src/catalog.rs +++ b/crates/catalog/memory/src/catalog.rs @@ -33,19 +33,24 @@ use uuid::Uuid; use crate::namespace_state::NamespaceState; +/// namespace `location` property +const LOCATION: &str = "location"; + /// Memory catalog implementation. #[derive(Debug)] pub struct MemoryCatalog { root_namespace_state: Mutex, file_io: FileIO, + warehouse_location: Option, } impl MemoryCatalog { /// Creates an memory catalog. - pub fn new(file_io: FileIO) -> Self { + pub fn new(file_io: FileIO, warehouse_location: Option) -> Self { Self { root_namespace_state: Mutex::new(NamespaceState::default()), file_io, + warehouse_location, } } } @@ -165,11 +170,20 @@ impl Catalog for MemoryCatalog { let (table_creation, location) = match table_creation.location.clone() { Some(location) => (table_creation, location), None => { - let location = format!( - "{}/{}", - table_ident.namespace().join("/"), - table_ident.name() - ); + let namespace_properties = root_namespace_state.get_properties(namespace_ident)?; + let location_prefix = match namespace_properties.get(LOCATION) { + Some(namespace_location) => Ok(namespace_location.clone()), + None => match self.warehouse_location.clone() { + Some(warehouse_location) => Ok(format!("{}/{}", warehouse_location, namespace_ident.join("/"))), + None => Err(Error::new(ErrorKind::Unexpected, + format!( + "Cannot create table {:?}. No default path is set, please specify a location when creating a table.", + &table_ident + ))) + }, + }?; + + let location = format!("{}/{}", location_prefix, table_ident.name()); let new_table_creation = TableCreation { location: Some(location.clone()), @@ -273,13 +287,20 @@ mod tests { use iceberg::io::FileIOBuilder; use iceberg::spec::{NestedField, PartitionSpec, PrimitiveType, Schema, SortOrder, Type}; + use regex::Regex; use tempfile::TempDir; use super::*; + fn temp_path() -> String { + let temp_dir = TempDir::new().unwrap(); + temp_dir.path().to_str().unwrap().to_string() + } + fn new_memory_catalog() -> impl Catalog { let file_io = FileIOBuilder::new_fs_io().build().unwrap(); - MemoryCatalog::new(file_io) + let warehouse_location = temp_path(); + MemoryCatalog::new(file_io, Some(warehouse_location)) } async fn create_namespace(catalog: &C, namespace_ident: &NamespaceIdent) { @@ -312,16 +333,12 @@ mod tests { } async fn create_table(catalog: &C, table_ident: &TableIdent) { - let tmp_dir = TempDir::new().unwrap(); - let location = tmp_dir.path().to_str().unwrap().to_string(); - let _ = catalog .create_table( &table_ident.namespace, TableCreation::builder() .name(table_ident.name().into()) .schema(simple_table_schema()) - .location(location) .build(), ) .await @@ -373,6 +390,14 @@ mod tests { assert!(!table.readonly()); } + const UUID_REGEX_STR: &str = "[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}"; + + fn assert_table_metadata_location_matches(table: &Table, regex_str: &str) { + let actual = table.metadata_location().unwrap().to_string(); + let regex = Regex::new(regex_str).unwrap(); + assert!(regex.is_match(&actual)) + } + #[tokio::test] async fn test_list_namespaces_returns_empty_vector() { let catalog = new_memory_catalog(); @@ -989,12 +1014,220 @@ mod tests { .metadata_location() .unwrap() .to_string() - .starts_with(&location)); + .starts_with(&location)) + } - assert_table_eq( - &catalog.load_table(&expected_table_ident).await.unwrap(), - &expected_table_ident, - &simple_table_schema(), + #[tokio::test] + async fn test_create_table_falls_back_to_namespace_location_if_table_location_is_missing() { + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + let warehouse_location = temp_path(); + let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); + + let namespace_ident = NamespaceIdent::new("a".into()); + let mut namespace_properties = HashMap::new(); + let namespace_location = temp_path(); + namespace_properties.insert(LOCATION.to_string(), namespace_location.to_string()); + catalog + .create_namespace(&namespace_ident, namespace_properties) + .await + .unwrap(); + + let table_name = "tbl1"; + let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); + let expected_table_metadata_location_regex = format!( + "^{}/tbl1/metadata/0-{}.metadata.json$", + namespace_location, UUID_REGEX_STR, + ); + + let table = catalog + .create_table( + &namespace_ident, + TableCreation::builder() + .name(table_name.into()) + .schema(simple_table_schema()) + // no location specified for table + .build(), + ) + .await + .unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + + let table = catalog.load_table(&expected_table_ident).await.unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + } + + #[tokio::test] + async fn test_create_table_in_nested_namespace_falls_back_to_nested_namespace_location_if_table_location_is_missing( + ) { + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + let warehouse_location = temp_path(); + let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); + + let namespace_ident = NamespaceIdent::new("a".into()); + let mut namespace_properties = HashMap::new(); + let namespace_location = temp_path(); + namespace_properties.insert(LOCATION.to_string(), namespace_location.to_string()); + catalog + .create_namespace(&namespace_ident, namespace_properties) + .await + .unwrap(); + + let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); + let mut nested_namespace_properties = HashMap::new(); + let nested_namespace_location = temp_path(); + nested_namespace_properties + .insert(LOCATION.to_string(), nested_namespace_location.to_string()); + catalog + .create_namespace(&nested_namespace_ident, nested_namespace_properties) + .await + .unwrap(); + + let table_name = "tbl1"; + let expected_table_ident = + TableIdent::new(nested_namespace_ident.clone(), table_name.into()); + let expected_table_metadata_location_regex = format!( + "^{}/tbl1/metadata/0-{}.metadata.json$", + nested_namespace_location, UUID_REGEX_STR, + ); + + let table = catalog + .create_table( + &nested_namespace_ident, + TableCreation::builder() + .name(table_name.into()) + .schema(simple_table_schema()) + // no location specified for table + .build(), + ) + .await + .unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + + let table = catalog.load_table(&expected_table_ident).await.unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + } + + #[tokio::test] + async fn test_create_table_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing( + ) { + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + let warehouse_location = temp_path(); + let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); + + let namespace_ident = NamespaceIdent::new("a".into()); + // note: no location specified in namespace_properties + let namespace_properties = HashMap::new(); + catalog + .create_namespace(&namespace_ident, namespace_properties) + .await + .unwrap(); + + let table_name = "tbl1"; + let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); + let expected_table_metadata_location_regex = format!( + "^{}/a/tbl1/metadata/0-{}.metadata.json$", + warehouse_location, UUID_REGEX_STR + ); + + let table = catalog + .create_table( + &namespace_ident, + TableCreation::builder() + .name(table_name.into()) + .schema(simple_table_schema()) + // no location specified for table + .build(), + ) + .await + .unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + + let table = catalog.load_table(&expected_table_ident).await.unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + } + + #[tokio::test] + async fn test_create_table_in_nested_namespace_falls_back_to_warehouse_location_if_both_table_location_and_namespace_location_are_missing( + ) { + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + let warehouse_location = temp_path(); + let catalog = MemoryCatalog::new(file_io, Some(warehouse_location.clone())); + + let namespace_ident = NamespaceIdent::new("a".into()); + catalog + // note: no location specified in namespace_properties + .create_namespace(&namespace_ident, HashMap::new()) + .await + .unwrap(); + + let nested_namespace_ident = NamespaceIdent::from_strs(vec!["a", "b"]).unwrap(); + catalog + // note: no location specified in namespace_properties + .create_namespace(&nested_namespace_ident, HashMap::new()) + .await + .unwrap(); + + let table_name = "tbl1"; + let expected_table_ident = + TableIdent::new(nested_namespace_ident.clone(), table_name.into()); + let expected_table_metadata_location_regex = format!( + "^{}/a/b/tbl1/metadata/0-{}.metadata.json$", + warehouse_location, UUID_REGEX_STR + ); + + let table = catalog + .create_table( + &nested_namespace_ident, + TableCreation::builder() + .name(table_name.into()) + .schema(simple_table_schema()) + // no location specified for table + .build(), + ) + .await + .unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + + let table = catalog.load_table(&expected_table_ident).await.unwrap(); + assert_table_eq(&table, &expected_table_ident, &simple_table_schema()); + assert_table_metadata_location_matches(&table, &expected_table_metadata_location_regex); + } + + #[tokio::test] + async fn test_create_table_throws_error_if_table_location_and_namespace_location_and_warehouse_location_are_missing( + ) { + let file_io = FileIOBuilder::new_fs_io().build().unwrap(); + let catalog = MemoryCatalog::new(file_io, None); + + let namespace_ident = NamespaceIdent::new("a".into()); + create_namespace(&catalog, &namespace_ident).await; + + let table_name = "tbl1"; + let expected_table_ident = TableIdent::new(namespace_ident.clone(), table_name.into()); + + assert_eq!( + catalog + .create_table( + &namespace_ident, + TableCreation::builder() + .name(table_name.into()) + .schema(simple_table_schema()) + .build(), + ) + .await + .unwrap_err() + .to_string(), + format!( + "Unexpected => Cannot create table {:?}. No default path is set, please specify a location when creating a table.", + &expected_table_ident + ) ) } diff --git a/crates/catalog/rest/src/catalog.rs b/crates/catalog/rest/src/catalog.rs index 6a1ed5b6e..d74c8de06 100644 --- a/crates/catalog/rest/src/catalog.rs +++ b/crates/catalog/rest/src/catalog.rs @@ -1321,7 +1321,7 @@ mod tests { ); assert_eq!( Utc.timestamp_millis_opt(1646787054459).unwrap(), - table.metadata().last_updated_ms() + table.metadata().last_updated_timestamp().unwrap() ); assert_eq!( vec![&Arc::new( @@ -1511,7 +1511,11 @@ mod tests { ); assert_eq!( 1657810967051, - table.metadata().last_updated_ms().timestamp_millis() + table + .metadata() + .last_updated_timestamp() + .unwrap() + .timestamp_millis() ); assert_eq!( vec![&Arc::new( @@ -1682,7 +1686,11 @@ mod tests { ); assert_eq!( 1657810967051, - table.metadata().last_updated_ms().timestamp_millis() + table + .metadata() + .last_updated_timestamp() + .unwrap() + .timestamp_millis() ); assert_eq!( vec![&Arc::new( diff --git a/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml b/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml index b49b6c6c1..34ba3c874 100644 --- a/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml +++ b/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml @@ -15,6 +15,9 @@ # specific language governing permissions and limitations # under the License. +networks: + rest_bridge: + services: rest: image: tabulario/iceberg-rest:0.10.0 @@ -29,8 +32,10 @@ services: - CATALOG_S3_ENDPOINT=http://minio:9000 depends_on: - minio - links: - - minio:icebergdata.minio + networks: + rest_bridge: + aliases: + - icebergdata.minio expose: - 8181 @@ -40,10 +45,13 @@ services: - MINIO_ROOT_USER=admin - MINIO_ROOT_PASSWORD=password - MINIO_DOMAIN=minio + hostname: icebergdata.minio + networks: + rest_bridge: expose: - 9001 - 9000 - command: [ "server", "/data", "--console-address", ":9001" ] + command: ["server", "/data", "--console-address", ":9001"] mc: depends_on: @@ -55,3 +63,5 @@ services: - AWS_REGION=us-east-1 entrypoint: > /bin/sh -c " until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/icebergdata; /usr/bin/mc mb minio/icebergdata; /usr/bin/mc policy set public minio/icebergdata; tail -f /dev/null " + networks: + rest_bridge: diff --git a/crates/catalog/rest/tests/rest_catalog_test.rs b/crates/catalog/rest/tests/rest_catalog_test.rs index 3c9ec6937..e98890a86 100644 --- a/crates/catalog/rest/tests/rest_catalog_test.rs +++ b/crates/catalog/rest/tests/rest_catalog_test.rs @@ -18,6 +18,7 @@ //! Integration tests for rest catalog. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -59,18 +60,14 @@ async fn get_catalog() -> RestCatalog { docker_compose.get_container_ip("rest") }; - let read_port = format!("{}:{}", rest_catalog_ip, REST_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("Waiting for 1s rest catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let rest_socket_addr = SocketAddr::new(rest_catalog_ip, REST_CATALOG_PORT); + while !scan_port_addr(rest_socket_addr) { + log::info!("Waiting for 1s rest catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let config = RestCatalogConfig::builder() - .uri(format!("http://{}:{}", rest_catalog_ip, REST_CATALOG_PORT)) + .uri(format!("http://{}", rest_socket_addr)) .build(); RestCatalog::new(config) } diff --git a/crates/catalog/sql/Cargo.toml b/crates/catalog/sql/Cargo.toml new file mode 100644 index 000000000..5d145351c --- /dev/null +++ b/crates/catalog/sql/Cargo.toml @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +[package] +name = "iceberg-catalog-sql" +version = { workspace = true } +edition = { workspace = true } +homepage = { workspace = true } +rust-version = { workspace = true } + +categories = ["database"] +description = "Apache Iceberg Rust Sql Catalog" +repository = { workspace = true } +license = { workspace = true } +keywords = ["iceberg", "sql", "catalog"] + +[dependencies] +async-trait = { workspace = true } +iceberg = { workspace = true } +sqlx = { version = "0.7.4", features = ["any"], default-features = false } +typed-builder = { workspace = true } + +[dev-dependencies] +iceberg_test_utils = { path = "../../test_utils", features = ["tests"] } +itertools = { workspace = true } +regex = "1.10.5" +sqlx = { version = "0.7.4", features = ["tls-rustls", "runtime-tokio", "any", "sqlite", "migrate"], default-features = false } +tempfile = { workspace = true } +tokio = { workspace = true } diff --git a/crates/catalog/sql/src/catalog.rs b/crates/catalog/sql/src/catalog.rs new file mode 100644 index 000000000..078fff690 --- /dev/null +++ b/crates/catalog/sql/src/catalog.rs @@ -0,0 +1,279 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use std::borrow::Cow; +use std::collections::HashMap; +use std::time::Duration; + +use async_trait::async_trait; +use iceberg::io::FileIO; +use iceberg::table::Table; +use iceberg::{Catalog, Namespace, NamespaceIdent, Result, TableCommit, TableCreation, TableIdent}; +use sqlx::any::{install_default_drivers, AnyPoolOptions, AnyRow}; +use sqlx::AnyPool; +use typed_builder::TypedBuilder; + +use crate::error::from_sqlx_error; + +static CATALOG_TABLE_NAME: &str = "iceberg_tables"; +static CATALOG_FIELD_CATALOG_NAME: &str = "catalog_name"; +static CATALOG_FIELD_TABLE_NAME: &str = "table_name"; +static CATALOG_FIELD_TABLE_NAMESPACE: &str = "table_namespace"; +static CATALOG_FIELD_METADATA_LOCATION_PROP: &str = "metadata_location"; +static CATALOG_FIELD_PREVIOUS_METADATA_LOCATION_PROP: &str = "previous_metadata_location"; +static CATALOG_FIELD_RECORD_TYPE: &str = "iceberg_type"; + +static NAMESPACE_TABLE_NAME: &str = "iceberg_namespace_properties"; +static NAMESPACE_FIELD_NAME: &str = "namespace"; +static NAMESPACE_FIELD_PROPERTY_KEY: &str = "property_key"; +static NAMESPACE_FIELD_PROPERTY_VALUE: &str = "property_value"; + +static MAX_CONNECTIONS: u32 = 10; // Default the SQL pool to 10 connections if not provided +static IDLE_TIMEOUT: u64 = 10; // Default the maximum idle timeout per connection to 10s before it is closed +static TEST_BEFORE_ACQUIRE: bool = true; // Default the health-check of each connection to enabled prior to returning + +/// Sql catalog config +#[derive(Debug, TypedBuilder)] +pub struct SqlCatalogConfig { + uri: String, + name: String, + warehouse_location: String, + file_io: FileIO, + sql_bind_style: SqlBindStyle, + #[builder(default)] + props: HashMap, +} + +#[derive(Debug)] +/// Sql catalog implementation. +pub struct SqlCatalog { + _name: String, + connection: AnyPool, + _warehouse_location: String, + _fileio: FileIO, + sql_bind_style: SqlBindStyle, +} + +#[derive(Debug, PartialEq)] +/// Set the SQL parameter bind style to either $1..$N (Postgres style) or ? (SQLite/MySQL/MariaDB) +pub enum SqlBindStyle { + /// DollarNumeric uses parameters of the form `$1..$N``, which is the Postgres style + DollarNumeric, + /// QMark uses parameters of the form `?` which is the style for other dialects (SQLite/MySQL/MariaDB) + QMark, +} + +impl SqlCatalog { + /// Create new sql catalog instance + pub async fn new(config: SqlCatalogConfig) -> Result { + install_default_drivers(); + let max_connections: u32 = config + .props + .get("pool.max-connections") + .map(|v| v.parse().unwrap()) + .unwrap_or(MAX_CONNECTIONS); + let idle_timeout: u64 = config + .props + .get("pool.idle-timeout") + .map(|v| v.parse().unwrap()) + .unwrap_or(IDLE_TIMEOUT); + let test_before_acquire: bool = config + .props + .get("pool.test-before-acquire") + .map(|v| v.parse().unwrap()) + .unwrap_or(TEST_BEFORE_ACQUIRE); + + let pool = AnyPoolOptions::new() + .max_connections(max_connections) + .idle_timeout(Duration::from_secs(idle_timeout)) + .test_before_acquire(test_before_acquire) + .connect(&config.uri) + .await + .map_err(from_sqlx_error)?; + + sqlx::query(&format!( + "CREATE TABLE IF NOT EXISTS {CATALOG_TABLE_NAME} ( + {CATALOG_FIELD_CATALOG_NAME} VARCHAR(255) NOT NULL, + {CATALOG_FIELD_TABLE_NAMESPACE} VARCHAR(255) NOT NULL, + {CATALOG_FIELD_TABLE_NAME} VARCHAR(255) NOT NULL, + {CATALOG_FIELD_METADATA_LOCATION_PROP} VARCHAR(1000), + {CATALOG_FIELD_PREVIOUS_METADATA_LOCATION_PROP} VARCHAR(1000), + {CATALOG_FIELD_RECORD_TYPE} VARCHAR(5), + PRIMARY KEY ({CATALOG_FIELD_CATALOG_NAME}, {CATALOG_FIELD_TABLE_NAMESPACE}, {CATALOG_FIELD_TABLE_NAME}))" + )) + .execute(&pool) + .await + .map_err(from_sqlx_error)?; + + sqlx::query(&format!( + "CREATE TABLE IF NOT EXISTS {NAMESPACE_TABLE_NAME} ( + {CATALOG_FIELD_CATALOG_NAME} VARCHAR(255) NOT NULL, + {NAMESPACE_FIELD_NAME} VARCHAR(255) NOT NULL, + {NAMESPACE_FIELD_PROPERTY_KEY} VARCHAR(255), + {NAMESPACE_FIELD_PROPERTY_VALUE} VARCHAR(1000), + PRIMARY KEY ({CATALOG_FIELD_CATALOG_NAME}, {NAMESPACE_FIELD_NAME}, {NAMESPACE_FIELD_PROPERTY_KEY}))" + )) + .execute(&pool) + .await + .map_err(from_sqlx_error)?; + + Ok(SqlCatalog { + _name: config.name.to_owned(), + connection: pool, + _warehouse_location: config.warehouse_location, + _fileio: config.file_io, + sql_bind_style: config.sql_bind_style, + }) + } + + /// SQLX Any does not implement PostgresSQL bindings, so we have to do this. + pub async fn execute_statement( + &self, + query: &String, + args: Vec>, + ) -> Result> { + let query_with_placeholders: Cow = + if self.sql_bind_style == SqlBindStyle::DollarNumeric { + let mut query = query.clone(); + for i in 0..args.len() { + query = query.replacen("?", &format!("${}", i + 1), 1); + } + Cow::Owned(query) + } else { + Cow::Borrowed(query) + }; + + let mut sqlx_query = sqlx::query(&query_with_placeholders); + for arg in args { + sqlx_query = sqlx_query.bind(arg); + } + + sqlx_query + .fetch_all(&self.connection) + .await + .map_err(from_sqlx_error) + } +} + +#[async_trait] +impl Catalog for SqlCatalog { + async fn list_namespaces( + &self, + _parent: Option<&NamespaceIdent>, + ) -> Result> { + todo!() + } + + async fn create_namespace( + &self, + _namespace: &NamespaceIdent, + _properties: HashMap, + ) -> Result { + todo!() + } + + async fn get_namespace(&self, _namespace: &NamespaceIdent) -> Result { + todo!() + } + + async fn namespace_exists(&self, _namespace: &NamespaceIdent) -> Result { + todo!() + } + + async fn update_namespace( + &self, + _namespace: &NamespaceIdent, + _properties: HashMap, + ) -> Result<()> { + todo!() + } + + async fn drop_namespace(&self, _namespace: &NamespaceIdent) -> Result<()> { + todo!() + } + + async fn list_tables(&self, _namespace: &NamespaceIdent) -> Result> { + todo!() + } + + async fn table_exists(&self, _identifier: &TableIdent) -> Result { + todo!() + } + + async fn drop_table(&self, _identifier: &TableIdent) -> Result<()> { + todo!() + } + + async fn load_table(&self, _identifier: &TableIdent) -> Result { + todo!() + } + + async fn create_table( + &self, + _namespace: &NamespaceIdent, + _creation: TableCreation, + ) -> Result
{ + todo!() + } + + async fn rename_table(&self, _src: &TableIdent, _dest: &TableIdent) -> Result<()> { + todo!() + } + + async fn update_table(&self, _commit: TableCommit) -> Result
{ + todo!() + } +} + +#[cfg(test)] +mod tests { + use iceberg::io::FileIOBuilder; + use iceberg::Catalog; + use sqlx::migrate::MigrateDatabase; + use tempfile::TempDir; + + use crate::{SqlBindStyle, SqlCatalog, SqlCatalogConfig}; + + fn temp_path() -> String { + let temp_dir = TempDir::new().unwrap(); + temp_dir.path().to_str().unwrap().to_string() + } + + async fn new_sql_catalog(warehouse_location: String) -> impl Catalog { + let sql_lite_uri = format!("sqlite:{}", temp_path()); + sqlx::Sqlite::create_database(&sql_lite_uri).await.unwrap(); + + let config = SqlCatalogConfig::builder() + .uri(sql_lite_uri.to_string()) + .name("iceberg".to_string()) + .warehouse_location(warehouse_location) + .file_io(FileIOBuilder::new_fs_io().build().unwrap()) + .sql_bind_style(SqlBindStyle::QMark) + .build(); + + SqlCatalog::new(config).await.unwrap() + } + + #[tokio::test] + async fn test_initialized() { + let warehouse_loc = temp_path(); + new_sql_catalog(warehouse_loc.clone()).await; + // catalog instantiation should not fail even if tables exist + new_sql_catalog(warehouse_loc.clone()).await; + new_sql_catalog(warehouse_loc.clone()).await; + } +} diff --git a/crates/catalog/sql/src/error.rs b/crates/catalog/sql/src/error.rs new file mode 100644 index 000000000..90bba1f05 --- /dev/null +++ b/crates/catalog/sql/src/error.rs @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use iceberg::{Error, ErrorKind}; + +/// Format an sqlx error into iceberg error. +pub fn from_sqlx_error(error: sqlx::Error) -> Error { + Error::new( + ErrorKind::Unexpected, + "operation failed for hitting sqlx error".to_string(), + ) + .with_source(error) +} diff --git a/crates/catalog/sql/src/lib.rs b/crates/catalog/sql/src/lib.rs new file mode 100644 index 000000000..6861dab3f --- /dev/null +++ b/crates/catalog/sql/src/lib.rs @@ -0,0 +1,24 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Iceberg sql catalog implementation. + +#![deny(missing_docs)] + +mod catalog; +mod error; +pub use catalog::*; diff --git a/crates/iceberg/src/arrow/reader.rs b/crates/iceberg/src/arrow/reader.rs index fe8e357c5..58440bfdf 100644 --- a/crates/iceberg/src/arrow/reader.rs +++ b/crates/iceberg/src/arrow/reader.rs @@ -93,43 +93,51 @@ impl ArrowReader { let file_io = self.file_io.clone(); Ok(try_stream! { - while let Some(Ok(task)) = tasks.next().await { - // Collect Parquet column indices from field ids - let mut collector = CollectFieldIdVisitor { - field_ids: HashSet::default(), - }; - if let Some(predicates) = task.predicate() { - visit(&mut collector, predicates)?; - } - - let parquet_file = file_io - .new_input(task.data_file_path())?; - let (parquet_metadata, parquet_reader) = try_join!(parquet_file.metadata(), parquet_file.reader())?; - let arrow_file_reader = ArrowFileReader::new(parquet_metadata, parquet_reader); - - let mut batch_stream_builder = ParquetRecordBatchStreamBuilder::new(arrow_file_reader) - .await?; - - let parquet_schema = batch_stream_builder.parquet_schema(); - let arrow_schema = batch_stream_builder.schema(); - let projection_mask = self.get_arrow_projection_mask(task.project_field_ids(),task.schema(),parquet_schema, arrow_schema)?; - batch_stream_builder = batch_stream_builder.with_projection(projection_mask); - - let parquet_schema = batch_stream_builder.parquet_schema(); - let row_filter = self.get_row_filter(task.predicate(),parquet_schema, &collector)?; - - if let Some(row_filter) = row_filter { - batch_stream_builder = batch_stream_builder.with_row_filter(row_filter); - } - - if let Some(batch_size) = self.batch_size { - batch_stream_builder = batch_stream_builder.with_batch_size(batch_size); - } - - let mut batch_stream = batch_stream_builder.build()?; - - while let Some(batch) = batch_stream.next().await { - yield batch?; + while let Some(task_result) = tasks.next().await { + match task_result { + Ok(task) => { + // Collect Parquet column indices from field ids + let mut collector = CollectFieldIdVisitor { + field_ids: HashSet::default(), + }; + if let Some(predicates) = task.predicate() { + visit(&mut collector, predicates)?; + } + + let parquet_file = file_io + .new_input(task.data_file_path())?; + + let (parquet_metadata, parquet_reader) = try_join!(parquet_file.metadata(), parquet_file.reader())?; + let arrow_file_reader = ArrowFileReader::new(parquet_metadata, parquet_reader); + + let mut batch_stream_builder = ParquetRecordBatchStreamBuilder::new(arrow_file_reader) + .await?; + + let parquet_schema = batch_stream_builder.parquet_schema(); + let arrow_schema = batch_stream_builder.schema(); + let projection_mask = self.get_arrow_projection_mask(task.project_field_ids(),task.schema(),parquet_schema, arrow_schema)?; + batch_stream_builder = batch_stream_builder.with_projection(projection_mask); + + let parquet_schema = batch_stream_builder.parquet_schema(); + let row_filter = self.get_row_filter(task.predicate(),parquet_schema, &collector)?; + + if let Some(row_filter) = row_filter { + batch_stream_builder = batch_stream_builder.with_row_filter(row_filter); + } + + if let Some(batch_size) = self.batch_size { + batch_stream_builder = batch_stream_builder.with_batch_size(batch_size); + } + + let mut batch_stream = batch_stream_builder.build()?; + + while let Some(batch) = batch_stream.next().await { + yield batch?; + } + } + Err(e) => { + Err(e)? + } } } } diff --git a/crates/iceberg/src/arrow/schema.rs b/crates/iceberg/src/arrow/schema.rs index c92706953..a69605e90 100644 --- a/crates/iceberg/src/arrow/schema.rs +++ b/crates/iceberg/src/arrow/schema.rs @@ -649,178 +649,104 @@ mod tests { use super::*; use crate::spec::Schema; + /// Create a simple field with metadata. + fn simple_field(name: &str, ty: DataType, nullable: bool, value: &str) -> Field { + Field::new(name, ty, nullable).with_metadata(HashMap::from([( + PARQUET_FIELD_ID_META_KEY.to_string(), + value.to_string(), + )])) + } + fn arrow_schema_for_arrow_schema_to_schema_test() -> ArrowSchema { let fields = Fields::from(vec![ - Field::new("key", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "17".to_string(), - )])), - Field::new("value", DataType::Utf8, true).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "18".to_string(), - )])), + simple_field("key", DataType::Int32, false, "17"), + simple_field("value", DataType::Utf8, true, "18"), ]); let r#struct = DataType::Struct(fields); let map = DataType::Map( - Arc::new( - Field::new(DEFAULT_MAP_FIELD_NAME, r#struct, false).with_metadata(HashMap::from([ - (PARQUET_FIELD_ID_META_KEY.to_string(), "19".to_string()), - ])), - ), + Arc::new(simple_field(DEFAULT_MAP_FIELD_NAME, r#struct, false, "17")), false, ); let fields = Fields::from(vec![ - Field::new("aa", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "18".to_string(), - )])), - Field::new("bb", DataType::Utf8, true).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "19".to_string(), - )])), - Field::new( + simple_field("aa", DataType::Int32, false, "18"), + simple_field("bb", DataType::Utf8, true, "19"), + simple_field( "cc", DataType::Timestamp(TimeUnit::Microsecond, None), false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "20".to_string(), - )])), + "20", + ), ]); let r#struct = DataType::Struct(fields); ArrowSchema::new(vec![ - Field::new("a", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "2".to_string(), - )])), - Field::new("b", DataType::Int64, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "1".to_string(), - )])), - Field::new("c", DataType::Utf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "3".to_string(), - )])), - Field::new("n", DataType::LargeUtf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "21".to_string(), - )])), - Field::new("d", DataType::Timestamp(TimeUnit::Microsecond, None), true).with_metadata( - HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "4".to_string())]), - ), - Field::new("e", DataType::Boolean, true).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "6".to_string(), - )])), - Field::new("f", DataType::Float32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "5".to_string(), - )])), - Field::new("g", DataType::Float64, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "7".to_string(), - )])), - Field::new("p", DataType::Decimal128(10, 2), false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "27".to_string(), - )])), - Field::new("h", DataType::Date32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "8".to_string(), - )])), - Field::new("i", DataType::Time64(TimeUnit::Microsecond), false).with_metadata( - HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "9".to_string())]), + simple_field("a", DataType::Int32, false, "2"), + simple_field("b", DataType::Int64, false, "1"), + simple_field("c", DataType::Utf8, false, "3"), + simple_field("n", DataType::Utf8, false, "21"), + simple_field( + "d", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + "4", ), - Field::new( + simple_field("e", DataType::Boolean, true, "6"), + simple_field("f", DataType::Float32, false, "5"), + simple_field("g", DataType::Float64, false, "7"), + simple_field("p", DataType::Decimal128(10, 2), false, "27"), + simple_field("h", DataType::Date32, false, "8"), + simple_field("i", DataType::Time64(TimeUnit::Microsecond), false, "9"), + simple_field( "j", DataType::Timestamp(TimeUnit::Microsecond, Some("UTC".into())), false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "10".to_string(), - )])), - Field::new( + "10", + ), + simple_field( "k", DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "12".to_string(), - )])), - Field::new("l", DataType::Binary, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "13".to_string(), - )])), - Field::new("o", DataType::LargeBinary, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "22".to_string(), - )])), - Field::new("m", DataType::FixedSizeBinary(10), false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "11".to_string(), - )])), - Field::new( + "12", + ), + simple_field("l", DataType::Binary, false, "13"), + simple_field("o", DataType::LargeBinary, false, "22"), + simple_field("m", DataType::FixedSizeBinary(10), false, "11"), + simple_field( "list", - DataType::List(Arc::new( - Field::new("element", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "15".to_string(), - )])), - )), + DataType::List(Arc::new(simple_field( + "element", + DataType::Int32, + false, + "15", + ))), true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "14".to_string(), - )])), - Field::new( + "14", + ), + simple_field( "large_list", - DataType::LargeList(Arc::new( - Field::new("element", DataType::Utf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "23".to_string(), - )])), - )), + DataType::LargeList(Arc::new(simple_field( + "element", + DataType::Utf8, + false, + "23", + ))), true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "24".to_string(), - )])), - Field::new( + "24", + ), + simple_field( "fixed_list", DataType::FixedSizeList( - Arc::new( - Field::new("element", DataType::Binary, false).with_metadata( - HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "26".to_string(), - )]), - ), - ), + Arc::new(simple_field("element", DataType::Binary, false, "26")), 10, ), true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "25".to_string(), - )])), - Field::new("map", map, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "16".to_string(), - )])), - Field::new("struct", r#struct, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "17".to_string(), - )])), + "25", + ), + simple_field("map", map, false, "16"), + simple_field("struct", r#struct, false, "17"), ]) } @@ -1017,14 +943,8 @@ mod tests { fn arrow_schema_for_schema_to_arrow_schema_test() -> ArrowSchema { let fields = Fields::from(vec![ - Field::new("key", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "17".to_string(), - )])), - Field::new("value", DataType::Utf8, true).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "18".to_string(), - )])), + simple_field("key", DataType::Int32, false, "17"), + simple_field("value", DataType::Utf8, true, "18"), ]); let r#struct = DataType::Struct(fields); @@ -1034,152 +954,86 @@ mod tests { ); let fields = Fields::from(vec![ - Field::new("aa", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "18".to_string(), - )])), - Field::new("bb", DataType::Utf8, true).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "19".to_string(), - )])), - Field::new( + simple_field("aa", DataType::Int32, false, "18"), + simple_field("bb", DataType::Utf8, true, "19"), + simple_field( "cc", DataType::Timestamp(TimeUnit::Microsecond, None), false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "20".to_string(), - )])), + "20", + ), ]); let r#struct = DataType::Struct(fields); ArrowSchema::new(vec![ - Field::new("a", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "2".to_string(), - )])), - Field::new("b", DataType::Int64, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "1".to_string(), - )])), - Field::new("c", DataType::Utf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "3".to_string(), - )])), - Field::new("n", DataType::Utf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "21".to_string(), - )])), - Field::new("d", DataType::Timestamp(TimeUnit::Microsecond, None), true).with_metadata( - HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "4".to_string())]), - ), - Field::new("e", DataType::Boolean, true).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "6".to_string(), - )])), - Field::new("f", DataType::Float32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "5".to_string(), - )])), - Field::new("g", DataType::Float64, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "7".to_string(), - )])), - Field::new("p", DataType::Decimal128(10, 2), false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "27".to_string(), - )])), - Field::new("h", DataType::Date32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "8".to_string(), - )])), - Field::new("i", DataType::Time64(TimeUnit::Microsecond), false).with_metadata( - HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "9".to_string())]), + simple_field("a", DataType::Int32, false, "2"), + simple_field("b", DataType::Int64, false, "1"), + simple_field("c", DataType::Utf8, false, "3"), + simple_field("n", DataType::Utf8, false, "21"), + simple_field( + "d", + DataType::Timestamp(TimeUnit::Microsecond, None), + true, + "4", ), - Field::new( + simple_field("e", DataType::Boolean, true, "6"), + simple_field("f", DataType::Float32, false, "5"), + simple_field("g", DataType::Float64, false, "7"), + simple_field("p", DataType::Decimal128(10, 2), false, "27"), + simple_field("h", DataType::Date32, false, "8"), + simple_field("i", DataType::Time64(TimeUnit::Microsecond), false, "9"), + simple_field( "j", DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "10".to_string(), - )])), - Field::new( + "10", + ), + simple_field( "k", DataType::Timestamp(TimeUnit::Microsecond, Some("+00:00".into())), false, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "12".to_string(), - )])), - Field::new("l", DataType::LargeBinary, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "13".to_string(), - )])), - Field::new("o", DataType::LargeBinary, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "22".to_string(), - )])), - Field::new("m", DataType::FixedSizeBinary(10), false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "11".to_string(), - )])), - Field::new( + "12", + ), + simple_field("l", DataType::LargeBinary, false, "13"), + simple_field("o", DataType::LargeBinary, false, "22"), + simple_field("m", DataType::FixedSizeBinary(10), false, "11"), + simple_field( "list", - DataType::List(Arc::new( - Field::new("element", DataType::Int32, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "15".to_string(), - )])), - )), + DataType::List(Arc::new(simple_field( + "element", + DataType::Int32, + false, + "15", + ))), true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "14".to_string(), - )])), - Field::new( + "14", + ), + simple_field( "large_list", - DataType::List(Arc::new( - Field::new("element", DataType::Utf8, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "23".to_string(), - )])), - )), + DataType::List(Arc::new(simple_field( + "element", + DataType::Utf8, + false, + "23", + ))), true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "24".to_string(), - )])), - Field::new( + "24", + ), + simple_field( "fixed_list", - DataType::List(Arc::new( - Field::new("element", DataType::LargeBinary, false).with_metadata( - HashMap::from([(PARQUET_FIELD_ID_META_KEY.to_string(), "26".to_string())]), - ), - )), + DataType::List(Arc::new(simple_field( + "element", + DataType::LargeBinary, + false, + "26", + ))), true, - ) - .with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "25".to_string(), - )])), - Field::new("map", map, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "16".to_string(), - )])), - Field::new("struct", r#struct, false).with_metadata(HashMap::from([( - PARQUET_FIELD_ID_META_KEY.to_string(), - "17".to_string(), - )])), - Field::new("uuid", DataType::FixedSizeBinary(16), false).with_metadata(HashMap::from( - [(PARQUET_FIELD_ID_META_KEY.to_string(), "26".to_string())], - )), + "25", + ), + simple_field("map", map, false, "16"), + simple_field("struct", r#struct, false, "17"), + simple_field("uuid", DataType::FixedSizeBinary(16), false, "26"), ]) } diff --git a/crates/iceberg/src/avro/schema.rs b/crates/iceberg/src/avro/schema.rs index 653f52aec..7f8142745 100644 --- a/crates/iceberg/src/avro/schema.rs +++ b/crates/iceberg/src/avro/schema.rs @@ -19,22 +19,26 @@ use std::collections::BTreeMap; use apache_avro::schema::{ - DecimalSchema, FixedSchema, Name, RecordField as AvroRecordField, RecordFieldOrder, - RecordSchema, UnionSchema, + ArraySchema, DecimalSchema, FixedSchema, MapSchema, Name, RecordField as AvroRecordField, + RecordFieldOrder, RecordSchema, UnionSchema, }; use apache_avro::Schema as AvroSchema; use itertools::{Either, Itertools}; use serde_json::{Number, Value}; use crate::spec::{ - visit_schema, ListType, MapType, NestedFieldRef, PrimitiveType, Schema, SchemaVisitor, - StructType, + visit_schema, ListType, MapType, NestedField, NestedFieldRef, PrimitiveType, Schema, + SchemaVisitor, StructType, Type, }; -use crate::{Error, ErrorKind, Result}; +use crate::{ensure_data_valid, Error, ErrorKind, Result}; +const ELEMENT_ID: &str = "element-id"; const FILED_ID_PROP: &str = "field-id"; +const KEY_ID: &str = "key-id"; +const VALUE_ID: &str = "value-id"; const UUID_BYTES: usize = 16; const UUID_LOGICAL_TYPE: &str = "uuid"; +const MAP_LOGICAL_TYPE: &str = "map"; // # TODO: https://github.com/apache/iceberg-rust/issues/86 // This const may better to maintain in avro-rs. const LOGICAL_TYPE: &str = "logicalType"; @@ -124,8 +128,13 @@ impl SchemaVisitor for SchemaToAvroSchema { field_schema = avro_optional(field_schema)?; } - // TODO: We need to add element id prop here, but rust's avro schema doesn't support property except record schema. - Ok(Either::Left(AvroSchema::Array(Box::new(field_schema)))) + Ok(Either::Left(AvroSchema::Array(ArraySchema { + items: Box::new(field_schema), + attributes: BTreeMap::from([( + ELEMENT_ID.to_string(), + Value::Number(Number::from(list.element_field.id)), + )]), + }))) } fn map( @@ -141,7 +150,19 @@ impl SchemaVisitor for SchemaToAvroSchema { } if matches!(key_field_schema, AvroSchema::String) { - Ok(Either::Left(AvroSchema::Map(Box::new(value_field_schema)))) + Ok(Either::Left(AvroSchema::Map(MapSchema { + types: Box::new(value_field_schema), + attributes: BTreeMap::from([ + ( + KEY_ID.to_string(), + Value::Number(Number::from(map.key_field.id)), + ), + ( + VALUE_ID.to_string(), + Value::Number(Number::from(map.value_field.id)), + ), + ]), + }))) } else { // Avro map requires that key must be string type. Here we convert it to array if key is // not string type. @@ -187,7 +208,13 @@ impl SchemaVisitor for SchemaToAvroSchema { fields, )?; - Ok(Either::Left(AvroSchema::Array(item_avro_schema.into()))) + Ok(Either::Left(AvroSchema::Array(ArraySchema { + items: Box::new(item_avro_schema), + attributes: BTreeMap::from([( + LOGICAL_TYPE.to_string(), + Value::String(MAP_LOGICAL_TYPE.to_string()), + )]), + }))) } } @@ -255,6 +282,7 @@ pub(crate) fn avro_fixed_schema(len: usize, logical_type: Option<&str>) -> Resul doc: None, size: len, attributes, + default: None, })) } @@ -274,6 +302,7 @@ pub(crate) fn avro_decimal_schema(precision: usize, scale: usize) -> Result Result { ])?)) } -#[cfg(test)] -mod tests { - use std::fs::read_to_string; - - use apache_avro::schema::{Namespace, UnionSchema}; - use apache_avro::Schema as AvroSchema; - - use super::*; - use crate::ensure_data_valid; - use crate::spec::{ListType, MapType, NestedField, PrimitiveType, Schema, StructType, Type}; - - fn is_avro_optional(avro_schema: &AvroSchema) -> bool { - match avro_schema { - AvroSchema::Union(union) => union.is_nullable(), - _ => false, - } +fn is_avro_optional(avro_schema: &AvroSchema) -> bool { + match avro_schema { + AvroSchema::Union(union) => union.is_nullable(), + _ => false, } +} - /// Post order avro schema visitor. - pub(crate) trait AvroSchemaVisitor { - type T; +/// Post order avro schema visitor. +pub(crate) trait AvroSchemaVisitor { + type T; - fn record(&mut self, record: &RecordSchema, fields: Vec) -> Result; + fn record(&mut self, record: &RecordSchema, fields: Vec) -> Result; - fn union(&mut self, union: &UnionSchema, options: Vec) -> Result; + fn union(&mut self, union: &UnionSchema, options: Vec) -> Result; - fn array(&mut self, array: &AvroSchema, item: Self::T) -> Result; - fn map(&mut self, map: &AvroSchema, value: Self::T) -> Result; + fn array(&mut self, array: &ArraySchema, item: Self::T) -> Result; + fn map(&mut self, map: &MapSchema, value: Self::T) -> Result; + // There are two representation for iceberg map in avro: array of key-value records, or map when keys are strings (optional), + // ref: https://iceberg.apache.org/spec/#avro + fn map_array(&mut self, array: &RecordSchema, key: Self::T, value: Self::T) -> Result; - fn primitive(&mut self, schema: &AvroSchema) -> Result; - } + fn primitive(&mut self, schema: &AvroSchema) -> Result; +} - struct AvroSchemaToSchema { - next_id: i32, +/// Visit avro schema in post order visitor. +pub(crate) fn visit(schema: &AvroSchema, visitor: &mut V) -> Result { + match schema { + AvroSchema::Record(record) => { + let field_results = record + .fields + .iter() + .map(|f| visit(&f.schema, visitor)) + .collect::>>()?; + + visitor.record(record, field_results) + } + AvroSchema::Union(union) => { + let option_results = union + .variants() + .iter() + .map(|f| visit(f, visitor)) + .collect::>>()?; + + visitor.union(union, option_results) + } + AvroSchema::Array(item) => { + if let Some(logical_type) = item + .attributes + .get(LOGICAL_TYPE) + .and_then(|v| Value::as_str(v)) + { + if logical_type == MAP_LOGICAL_TYPE { + if let AvroSchema::Record(record_schema) = &*item.items { + let key = visit(&record_schema.fields[0].schema, visitor)?; + let value = visit(&record_schema.fields[1].schema, visitor)?; + return visitor.map_array(record_schema, key, value); + } else { + return Err(Error::new( + ErrorKind::DataInvalid, + "Can't convert avro map schema, item is not a record.", + )); + } + } else { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!( + "Logical type {logical_type} is not support in iceberg array type.", + ), + )); + } + } + let item_result = visit(&item.items, visitor)?; + visitor.array(item, item_result) + } + AvroSchema::Map(inner) => { + let item_result = visit(&inner.types, visitor)?; + visitor.map(inner, item_result) + } + schema => visitor.primitive(schema), } +} - impl AvroSchemaToSchema { - fn next_field_id(&mut self) -> i32 { - self.next_id += 1; - self.next_id - } +struct AvroSchemaToSchema; + +impl AvroSchemaToSchema { + /// A convenient way to get element id(i32) from attributes. + #[inline] + fn get_element_id_from_attributes( + attributes: &BTreeMap, + name: &str, + ) -> Result { + attributes + .get(name) + .ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Can't convert avro array schema, missing element id.", + ) + })? + .as_i64() + .ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Can't convert avro array schema, element id is not a valid i64 number.", + ) + })? + .try_into() + .map_err(|_| { + Error::new( + ErrorKind::DataInvalid, + "Can't convert avro array schema, element id is not a valid i32.", + ) + }) } +} - impl AvroSchemaVisitor for AvroSchemaToSchema { - // Only `AvroSchema::Null` will return `None` - type T = Option; - - fn record( - &mut self, - record: &RecordSchema, - field_types: Vec>, - ) -> Result> { - let mut fields = Vec::with_capacity(field_types.len()); - for (avro_field, typ) in record.fields.iter().zip_eq(field_types) { - let field_id = avro_field - .custom_attributes - .get(FILED_ID_PROP) - .and_then(Value::as_i64) - .ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Can't convert field, missing field id: {avro_field:?}"), - ) - })?; +impl AvroSchemaVisitor for AvroSchemaToSchema { + // Only `AvroSchema::Null` will return `None` + type T = Option; - let optional = is_avro_optional(&avro_field.schema); + fn record( + &mut self, + record: &RecordSchema, + field_types: Vec>, + ) -> Result> { + let mut fields = Vec::with_capacity(field_types.len()); + for (avro_field, typ) in record.fields.iter().zip_eq(field_types) { + let field_id = + Self::get_element_id_from_attributes(&avro_field.custom_attributes, FILED_ID_PROP)?; - let mut field = if optional { - NestedField::optional(field_id as i32, &avro_field.name, typ.unwrap()) - } else { - NestedField::required(field_id as i32, &avro_field.name, typ.unwrap()) - }; + let optional = is_avro_optional(&avro_field.schema); - if let Some(doc) = &avro_field.doc { - field = field.with_doc(doc); - } + let mut field = NestedField::new(field_id, &avro_field.name, typ.unwrap(), !optional); - fields.push(field.into()); + if let Some(doc) = &avro_field.doc { + field = field.with_doc(doc); } - Ok(Some(Type::Struct(StructType::new(fields)))) + fields.push(field.into()); } - fn union( - &mut self, - union: &UnionSchema, - mut options: Vec>, - ) -> Result> { + Ok(Some(Type::Struct(StructType::new(fields)))) + } + + fn union( + &mut self, + union: &UnionSchema, + mut options: Vec>, + ) -> Result> { + ensure_data_valid!( + options.len() <= 2 && !options.is_empty(), + "Can't convert avro union type {:?} to iceberg.", + union + ); + + if options.len() > 1 { ensure_data_valid!( - options.len() <= 2 && !options.is_empty(), + options[0].is_none(), "Can't convert avro union type {:?} to iceberg.", union ); - - if options.len() > 1 { - ensure_data_valid!( - options[0].is_none(), - "Can't convert avro union type {:?} to iceberg.", - union - ); - } - - if options.len() == 1 { - Ok(Some(options.remove(0).unwrap())) - } else { - Ok(Some(options.remove(1).unwrap())) - } } - fn array(&mut self, array: &AvroSchema, item: Option) -> Result { - if let AvroSchema::Array(item_schema) = array { - let element_field = NestedField::list_element( - self.next_field_id(), - item.unwrap(), - !is_avro_optional(item_schema), - ) - .into(); - Ok(Some(Type::List(ListType { element_field }))) - } else { - Err(Error::new( - ErrorKind::Unexpected, - "Expected avro array schema, but {array}", - )) - } + if options.len() == 1 { + Ok(Some(options.remove(0).unwrap())) + } else { + Ok(Some(options.remove(1).unwrap())) } + } - fn map(&mut self, map: &AvroSchema, value: Option) -> Result> { - if let AvroSchema::Map(value_schema) = map { - // Due to avro rust implementation's limitation, we can't store attributes in map schema, - // we will fix it later when it has been resolved. - let key_field = NestedField::map_key_element( - self.next_field_id(), - Type::Primitive(PrimitiveType::String), - ); - let value_field = NestedField::map_value_element( - self.next_field_id(), - value.unwrap(), - !is_avro_optional(value_schema), - ); - Ok(Some(Type::Map(MapType { - key_field: key_field.into(), - value_field: value_field.into(), - }))) - } else { - Err(Error::new( - ErrorKind::Unexpected, - "Expected avro map schema, but {map}", - )) - } - } + fn array(&mut self, array: &ArraySchema, item: Option) -> Result { + let element_field_id = Self::get_element_id_from_attributes(&array.attributes, ELEMENT_ID)?; + let element_field = NestedField::list_element( + element_field_id, + item.unwrap(), + !is_avro_optional(&array.items), + ) + .into(); + Ok(Some(Type::List(ListType { element_field }))) + } - fn primitive(&mut self, schema: &AvroSchema) -> Result> { - let typ = match schema { - AvroSchema::Decimal(decimal) => { - Type::decimal(decimal.precision as u32, decimal.scale as u32)? - } - AvroSchema::Date => Type::Primitive(PrimitiveType::Date), - AvroSchema::TimeMicros => Type::Primitive(PrimitiveType::Time), - AvroSchema::TimestampMicros => Type::Primitive(PrimitiveType::Timestamp), - AvroSchema::Boolean => Type::Primitive(PrimitiveType::Boolean), - AvroSchema::Int => Type::Primitive(PrimitiveType::Int), - AvroSchema::Long => Type::Primitive(PrimitiveType::Long), - AvroSchema::Float => Type::Primitive(PrimitiveType::Float), - AvroSchema::Double => Type::Primitive(PrimitiveType::Double), - AvroSchema::String | AvroSchema::Enum(_) => Type::Primitive(PrimitiveType::String), - AvroSchema::Fixed(fixed) => { - if let Some(logical_type) = fixed.attributes.get(LOGICAL_TYPE) { - let logical_type = logical_type.as_str().ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - "logicalType in attributes of avro schema is not a string type", - ) - })?; - match logical_type { - UUID_LOGICAL_TYPE => Type::Primitive(PrimitiveType::Uuid), - ty => { - return Err(Error::new( - ErrorKind::FeatureUnsupported, - format!( + fn map(&mut self, map: &MapSchema, value: Option) -> Result> { + let key_field_id = Self::get_element_id_from_attributes(&map.attributes, KEY_ID)?; + let key_field = + NestedField::map_key_element(key_field_id, Type::Primitive(PrimitiveType::String)); + let value_field_id = Self::get_element_id_from_attributes(&map.attributes, VALUE_ID)?; + let value_field = NestedField::map_value_element( + value_field_id, + value.unwrap(), + !is_avro_optional(&map.types), + ); + Ok(Some(Type::Map(MapType { + key_field: key_field.into(), + value_field: value_field.into(), + }))) + } + + fn primitive(&mut self, schema: &AvroSchema) -> Result> { + let typ = match schema { + AvroSchema::Decimal(decimal) => { + Type::decimal(decimal.precision as u32, decimal.scale as u32)? + } + AvroSchema::Date => Type::Primitive(PrimitiveType::Date), + AvroSchema::TimeMicros => Type::Primitive(PrimitiveType::Time), + AvroSchema::TimestampMicros => Type::Primitive(PrimitiveType::Timestamp), + AvroSchema::Boolean => Type::Primitive(PrimitiveType::Boolean), + AvroSchema::Int => Type::Primitive(PrimitiveType::Int), + AvroSchema::Long => Type::Primitive(PrimitiveType::Long), + AvroSchema::Float => Type::Primitive(PrimitiveType::Float), + AvroSchema::Double => Type::Primitive(PrimitiveType::Double), + AvroSchema::String | AvroSchema::Enum(_) => Type::Primitive(PrimitiveType::String), + AvroSchema::Fixed(fixed) => { + if let Some(logical_type) = fixed.attributes.get(LOGICAL_TYPE) { + let logical_type = logical_type.as_str().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "logicalType in attributes of avro schema is not a string type", + ) + })?; + match logical_type { + UUID_LOGICAL_TYPE => Type::Primitive(PrimitiveType::Uuid), + ty => { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + format!( "Logical type {ty} is not support in iceberg primitive type.", ), - )) - } + )) } - } else { - Type::Primitive(PrimitiveType::Fixed(fixed.size as u64)) } + } else { + Type::Primitive(PrimitiveType::Fixed(fixed.size as u64)) } - AvroSchema::Bytes => Type::Primitive(PrimitiveType::Binary), - AvroSchema::Null => return Ok(None), - _ => { - return Err(Error::new( - ErrorKind::Unexpected, - "Unable to convert avro {schema} to iceberg primitive type.", - )) - } - }; - - Ok(Some(typ)) - } - } - - /// Visit avro schema in post order visitor. - pub(crate) fn visit( - schema: &AvroSchema, - visitor: &mut V, - ) -> Result { - match schema { - AvroSchema::Record(record) => { - let field_results = record - .fields - .iter() - .map(|f| visit(&f.schema, visitor)) - .collect::>>()?; - - visitor.record(record, field_results) - } - AvroSchema::Union(union) => { - let option_results = union - .variants() - .iter() - .map(|f| visit(f, visitor)) - .collect::>>()?; - - visitor.union(union, option_results) } - AvroSchema::Array(item) => { - let item_result = visit(item, visitor)?; - visitor.array(schema, item_result) - } - AvroSchema::Map(inner) => { - let item_result = visit(inner, visitor)?; - visitor.map(schema, item_result) - } - schema => visitor.primitive(schema), - } - } - /// Converts avro schema to iceberg schema. - pub(crate) fn avro_schema_to_schema(avro_schema: &AvroSchema) -> Result { - if let AvroSchema::Record(_) = avro_schema { - let mut converter = AvroSchemaToSchema { next_id: 0 }; - let typ = - visit(avro_schema, &mut converter)?.expect("Iceberg schema should not be none."); - if let Type::Struct(s) = typ { - Schema::builder() - .with_fields(s.fields().iter().cloned()) - .build() - } else { - Err(Error::new( + AvroSchema::Bytes => Type::Primitive(PrimitiveType::Binary), + AvroSchema::Null => return Ok(None), + _ => { + return Err(Error::new( ErrorKind::Unexpected, - format!("Expected to convert avro record schema to struct type, but {typ}"), + "Unable to convert avro {schema} to iceberg primitive type.", )) } + }; + + Ok(Some(typ)) + } + + fn map_array( + &mut self, + array: &RecordSchema, + key: Option, + value: Option, + ) -> Result { + let key = key.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Can't convert avro map schema, missing key schema.", + ) + })?; + let value = value.ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + "Can't convert avro map schema, missing value schema.", + ) + })?; + let key_id = Self::get_element_id_from_attributes( + &array.fields[0].custom_attributes, + FILED_ID_PROP, + )?; + let value_id = Self::get_element_id_from_attributes( + &array.fields[1].custom_attributes, + FILED_ID_PROP, + )?; + let key_field = NestedField::map_key_element(key_id, key); + let value_field = NestedField::map_value_element( + value_id, + value, + !is_avro_optional(&array.fields[1].schema), + ); + Ok(Some(Type::Map(MapType { + key_field: key_field.into(), + value_field: value_field.into(), + }))) + } +} + +// # TODO +// Fix this when we have used `avro_schema_to_schema` inner. +#[allow(unused)] +/// Converts avro schema to iceberg schema. +pub(crate) fn avro_schema_to_schema(avro_schema: &AvroSchema) -> Result { + if let AvroSchema::Record(_) = avro_schema { + let mut converter = AvroSchemaToSchema; + let typ = visit(avro_schema, &mut converter)?.expect("Iceberg schema should not be none."); + if let Type::Struct(s) = typ { + Schema::builder() + .with_fields(s.fields().iter().cloned()) + .build() } else { Err(Error::new( - ErrorKind::DataInvalid, - "Can't convert non record avro schema to iceberg schema: {avro_schema}", + ErrorKind::Unexpected, + format!("Expected to convert avro record schema to struct type, but {typ}"), )) } + } else { + Err(Error::new( + ErrorKind::DataInvalid, + "Can't convert non record avro schema to iceberg schema: {avro_schema}", + )) } +} + +#[cfg(test)] +mod tests { + use std::fs::read_to_string; + use std::sync::Arc; + + use apache_avro::schema::{Namespace, UnionSchema}; + use apache_avro::Schema as AvroSchema; + + use super::*; + use crate::avro::schema::AvroSchemaToSchema; + use crate::spec::{ListType, MapType, NestedField, PrimitiveType, Schema, StructType, Type}; fn read_test_data_file_to_avro_schema(filename: &str) -> AvroSchema { let input = read_to_string(format!( @@ -557,22 +649,27 @@ mod tests { AvroSchema::parse_str(input.as_str()).unwrap() } - fn check_schema_conversion( - avro_schema: AvroSchema, - expected_iceberg_schema: Schema, - check_avro_to_iceberg: bool, - ) { - if check_avro_to_iceberg { - let converted_iceberg_schema = avro_schema_to_schema(&avro_schema).unwrap(); - assert_eq!(expected_iceberg_schema, converted_iceberg_schema); - } + /// Help function to check schema conversion between avro and iceberg: + /// 1. avro to iceberg + /// 2. iceberg to avro + /// 3. iceberg to avro to iceberg back + fn check_schema_conversion(avro_schema: AvroSchema, iceberg_schema: Schema) { + // 1. avro to iceberg + let converted_iceberg_schema = avro_schema_to_schema(&avro_schema).unwrap(); + assert_eq!(iceberg_schema, converted_iceberg_schema); + // 2. iceberg to avro let converted_avro_schema = schema_to_avro_schema( avro_schema.name().unwrap().fullname(Namespace::None), - &expected_iceberg_schema, + &iceberg_schema, ) .unwrap(); assert_eq!(avro_schema, converted_avro_schema); + + // 3.iceberg to avro to iceberg back + let converted_avro_converted_iceberg_schema = + avro_schema_to_schema(&converted_avro_schema).unwrap(); + assert_eq!(iceberg_schema, converted_avro_converted_iceberg_schema); } #[test] @@ -651,7 +748,6 @@ mod tests { check_schema_conversion( read_test_data_file_to_avro_schema("avro_schema_manifest_file_v1.json"), iceberg_schema, - false, ); } @@ -700,7 +796,7 @@ mod tests { .unwrap() }; - check_schema_conversion(avro_schema, iceberg_schema, false); + check_schema_conversion(avro_schema, iceberg_schema); } #[test] @@ -749,7 +845,7 @@ mod tests { .unwrap() }; - check_schema_conversion(avro_schema, iceberg_schema, false); + check_schema_conversion(avro_schema, iceberg_schema); } #[test] @@ -826,7 +922,144 @@ mod tests { .unwrap() }; - check_schema_conversion(avro_schema, iceberg_schema, false); + check_schema_conversion(avro_schema, iceberg_schema); + } + + #[test] + fn test_schema_with_array_map() { + let avro_schema = { + AvroSchema::parse_str( + r#" +{ + "type": "record", + "name": "avro_schema", + "fields": [ + { + "name": "optional", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "k102_v103", + "fields": [ + { + "name": "key", + "type": "boolean", + "field-id": 102 + }, + { + "name": "value", + "type": ["null", "boolean"], + "field-id": 103 + } + ] + }, + "default": [], + "element-id": 101, + "logicalType": "map" + }, + "field-id": 100 + },{ + "name": "required", + "type": { + "type": "array", + "items": { + "type": "record", + "name": "k105_v106", + "fields": [ + { + "name": "key", + "type": "boolean", + "field-id": 105 + }, + { + "name": "value", + "type": "boolean", + "field-id": 106 + } + ] + }, + "default": [], + "logicalType": "map" + }, + "field-id": 104 + }, { + "name": "string_map", + "type": { + "type": "map", + "values": ["null", "long"], + "key-id": 108, + "value-id": 109 + }, + "field-id": 107 + } + ] +} +"#, + ) + .unwrap() + }; + + let iceberg_schema = { + Schema::builder() + .with_fields(vec![ + Arc::new(NestedField::required( + 100, + "optional", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 102, + PrimitiveType::Boolean.into(), + ) + .into(), + value_field: NestedField::map_value_element( + 103, + PrimitiveType::Boolean.into(), + false, + ) + .into(), + }), + )), + Arc::new(NestedField::required( + 104, + "required", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 105, + PrimitiveType::Boolean.into(), + ) + .into(), + value_field: NestedField::map_value_element( + 106, + PrimitiveType::Boolean.into(), + true, + ) + .into(), + }), + )), + Arc::new(NestedField::required( + 107, + "string_map", + Type::Map(MapType { + key_field: NestedField::map_key_element( + 108, + PrimitiveType::String.into(), + ) + .into(), + value_field: NestedField::map_value_element( + 109, + PrimitiveType::Long.into(), + false, + ) + .into(), + }), + )), + ]) + .build() + .unwrap() + }; + + check_schema_conversion(avro_schema, iceberg_schema); } #[test] @@ -838,7 +1071,7 @@ mod tests { ]) .unwrap(); - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let mut converter = AvroSchemaToSchema; let options = avro_schema .variants() @@ -850,7 +1083,7 @@ mod tests { #[test] fn test_string_type() { - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let mut converter = AvroSchemaToSchema; let avro_schema = AvroSchema::String; assert_eq!( @@ -875,10 +1108,14 @@ mod tests { .unwrap() }; - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let AvroSchema::Map(avro_schema) = avro_schema else { + unreachable!() + }; + + let mut converter = AvroSchemaToSchema; let iceberg_type = Type::Map(MapType { - key_field: NestedField::map_key_element(1, PrimitiveType::String.into()).into(), - value_field: NestedField::map_value_element(2, PrimitiveType::Long.into(), false) + key_field: NestedField::map_key_element(101, PrimitiveType::String.into()).into(), + value_field: NestedField::map_value_element(102, PrimitiveType::Long.into(), false) .into(), }); @@ -902,7 +1139,7 @@ mod tests { .unwrap() }; - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let mut converter = AvroSchemaToSchema; let iceberg_type = Type::from(PrimitiveType::Fixed(22)); @@ -914,7 +1151,7 @@ mod tests { #[test] fn test_unknown_primitive() { - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let mut converter = AvroSchemaToSchema; assert!(converter.primitive(&AvroSchema::Duration).is_err()); } @@ -953,7 +1190,7 @@ mod tests { .unwrap() }; - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let mut converter = AvroSchemaToSchema; assert_eq!( Type::decimal(25, 19).unwrap(), @@ -963,7 +1200,7 @@ mod tests { #[test] fn test_date_type() { - let mut converter = AvroSchemaToSchema { next_id: 0 }; + let mut converter = AvroSchemaToSchema; assert_eq!( Type::from(PrimitiveType::Date), diff --git a/crates/iceberg/src/catalog/mod.rs b/crates/iceberg/src/catalog/mod.rs index 473832e88..aa2311b4a 100644 --- a/crates/iceberg/src/catalog/mod.rs +++ b/crates/iceberg/src/catalog/mod.rs @@ -29,7 +29,7 @@ use uuid::Uuid; use crate::spec::{ FormatVersion, Schema, Snapshot, SnapshotReference, SortOrder, TableMetadataBuilder, - UnboundPartitionSpec, + UnboundPartitionSpec, ViewRepresentations, }; use crate::table::Table; use crate::{Error, ErrorKind, Result}; @@ -439,6 +439,31 @@ impl TableUpdate { } } +/// ViewCreation represents the creation of a view in the catalog. +#[derive(Debug, TypedBuilder)] +pub struct ViewCreation { + /// The name of the view. + pub name: String, + /// The view's base location; used to create metadata file locations + pub location: String, + /// Representations for the view. + pub representations: ViewRepresentations, + /// The schema of the view. + pub schema: Schema, + /// The properties of the view. + #[builder(default)] + pub properties: HashMap, + /// The default namespace to use when a reference in the SELECT is a single identifier + pub default_namespace: NamespaceIdent, + /// Default catalog to use when a reference in the SELECT does not contain a catalog + #[builder(default)] + pub default_catalog: Option, + /// A string to string map of summary metadata about the version + /// Typical keys are "engine-name" and "engine-version" + #[builder(default)] + pub summary: HashMap, +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/crates/iceberg/src/error.rs b/crates/iceberg/src/error.rs index 6270b4347..2b69b4706 100644 --- a/crates/iceberg/src/error.rs +++ b/crates/iceberg/src/error.rs @@ -19,6 +19,8 @@ use std::backtrace::{Backtrace, BacktraceStatus}; use std::fmt; use std::fmt::{Debug, Display, Formatter}; +use chrono::{DateTime, TimeZone as _, Utc}; + /// Result that is a wrapper of `Result` pub type Result = std::result::Result; @@ -329,8 +331,36 @@ define_from_err!( "Failed to read a Parquet file" ); +define_from_err!( + futures::channel::mpsc::SendError, + ErrorKind::Unexpected, + "Failed to send a message to a channel" +); + define_from_err!(std::io::Error, ErrorKind::Unexpected, "IO Operation failed"); +/// Converts a timestamp in milliseconds to `DateTime`, handling errors. +/// +/// # Arguments +/// +/// * `timestamp_ms` - The timestamp in milliseconds to convert. +/// +/// # Returns +/// +/// This function returns a `Result, Error>` which is `Ok` with the `DateTime` if the conversion is successful, +/// or an `Err` with an appropriate error if the timestamp is ambiguous or invalid. +pub(crate) fn timestamp_ms_to_utc(timestamp_ms: i64) -> Result> { + match Utc.timestamp_millis_opt(timestamp_ms) { + chrono::LocalResult::Single(t) => Ok(t), + chrono::LocalResult::Ambiguous(_, _) => Err(Error::new( + ErrorKind::Unexpected, + "Ambiguous timestamp with two possible results", + )), + chrono::LocalResult::None => Err(Error::new(ErrorKind::DataInvalid, "Invalid timestamp")), + } + .map_err(|e| e.with_context("timestamp value", timestamp_ms.to_string())) +} + /// Helper macro to check arguments. /// /// diff --git a/crates/iceberg/src/io/storage.rs b/crates/iceberg/src/io/storage.rs index 7383b8f1b..870e61ec6 100644 --- a/crates/iceberg/src/io/storage.rs +++ b/crates/iceberg/src/io/storage.rs @@ -15,30 +15,28 @@ // specific language governing permissions and limitations // under the License. +use std::sync::Arc; + +#[cfg(feature = "storage-s3")] +use opendal::services::S3Config; use opendal::{Operator, Scheme}; use super::FileIOBuilder; -#[cfg(feature = "storage-fs")] -use super::FsConfig; -#[cfg(feature = "storage-memory")] -use super::MemoryConfig; -#[cfg(feature = "storage-s3")] -use super::S3Config; use crate::{Error, ErrorKind}; /// The storage carries all supported storage services in iceberg #[derive(Debug)] pub(crate) enum Storage { #[cfg(feature = "storage-memory")] - Memory { config: MemoryConfig }, + Memory, #[cfg(feature = "storage-fs")] - LocalFs { config: FsConfig }, + LocalFs, #[cfg(feature = "storage-s3")] S3 { /// s3 storage could have `s3://` and `s3a://`. /// Storing the scheme string here to return the correct path. scheme_str: String, - config: S3Config, + config: Arc, }, } @@ -50,17 +48,13 @@ impl Storage { match scheme { #[cfg(feature = "storage-memory")] - Scheme::Memory => Ok(Self::Memory { - config: MemoryConfig::new(props), - }), + Scheme::Memory => Ok(Self::Memory), #[cfg(feature = "storage-fs")] - Scheme::Fs => Ok(Self::LocalFs { - config: FsConfig::new(props), - }), + Scheme::Fs => Ok(Self::LocalFs), #[cfg(feature = "storage-s3")] Scheme::S3 => Ok(Self::S3 { scheme_str, - config: S3Config::new(props), + config: super::s3_config_parse(props)?.into(), }), _ => Err(Error::new( ErrorKind::FeatureUnsupported, @@ -88,8 +82,8 @@ impl Storage { let path = path.as_ref(); match self { #[cfg(feature = "storage-memory")] - Storage::Memory { config } => { - let op = config.build(path)?; + Storage::Memory => { + let op = super::memory_config_build()?; if let Some(stripped) = path.strip_prefix("memory:/") { Ok((op, stripped)) @@ -98,8 +92,8 @@ impl Storage { } } #[cfg(feature = "storage-fs")] - Storage::LocalFs { config } => { - let op = config.build(path)?; + Storage::LocalFs => { + let op = super::fs_config_build()?; if let Some(stripped) = path.strip_prefix("file:/") { Ok((op, stripped)) @@ -109,7 +103,7 @@ impl Storage { } #[cfg(feature = "storage-s3")] Storage::S3 { scheme_str, config } => { - let op = config.build(path)?; + let op = super::s3_config_build(config, path)?; let op_info = op.info(); // Check prefix of s3 path. @@ -141,14 +135,3 @@ impl Storage { } } } - -/// redact_secret will redact the secret part of the string. -#[inline] -pub(crate) fn redact_secret(s: &str) -> String { - let len = s.len(); - if len <= 6 { - return "***".to_string(); - } - - format!("{}***{}", &s[0..3], &s[len - 3..len]) -} diff --git a/crates/iceberg/src/io/storage_fs.rs b/crates/iceberg/src/io/storage_fs.rs index 0dc5b9dea..ff38d7613 100644 --- a/crates/iceberg/src/io/storage_fs.rs +++ b/crates/iceberg/src/io/storage_fs.rs @@ -15,37 +15,15 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; - -use opendal::{Operator, Scheme}; +use opendal::services::FsConfig; +use opendal::Operator; use crate::Result; -/// # TODO -/// -/// opendal has a plan to introduce native config support. -/// We manually parse the config here and those code will be finally removed. -#[derive(Default, Clone)] -pub(crate) struct FsConfig {} - -impl Debug for FsConfig { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("FsConfig").finish() - } -} - -impl FsConfig { - /// Decode from iceberg props. - pub fn new(_: HashMap) -> Self { - Self::default() - } +/// Build new opendal operator from give path. +pub(crate) fn fs_config_build() -> Result { + let mut cfg = FsConfig::default(); + cfg.root = Some("/".to_string()); - /// Build new opendal operator from give path. - /// - /// fs always build from `/` - pub fn build(&self, _: &str) -> Result { - let m = HashMap::from_iter([("root".to_string(), "/".to_string())]); - Ok(Operator::via_map(Scheme::Fs, m)?) - } + Ok(Operator::from_config(cfg)?.finish()) } diff --git a/crates/iceberg/src/io/storage_memory.rs b/crates/iceberg/src/io/storage_memory.rs index ed0cfadfe..ffc082d83 100644 --- a/crates/iceberg/src/io/storage_memory.rs +++ b/crates/iceberg/src/io/storage_memory.rs @@ -15,31 +15,11 @@ // specific language governing permissions and limitations // under the License. -use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; - -use opendal::{Operator, Scheme}; +use opendal::services::MemoryConfig; +use opendal::Operator; use crate::Result; -#[derive(Default, Clone)] -pub(crate) struct MemoryConfig {} - -impl Debug for MemoryConfig { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("MemoryConfig").finish() - } -} - -impl MemoryConfig { - /// Decode from iceberg props. - pub fn new(_: HashMap) -> Self { - Self::default() - } - - /// Build new opendal operator from given path. - pub fn build(&self, _: &str) -> Result { - let m = HashMap::new(); - Ok(Operator::via_map(Scheme::Memory, m)?) - } +pub(crate) fn memory_config_build() -> Result { + Ok(Operator::from_config(MemoryConfig::default())?.finish()) } diff --git a/crates/iceberg/src/io/storage_s3.rs b/crates/iceberg/src/io/storage_s3.rs index df843188f..6e92b902a 100644 --- a/crates/iceberg/src/io/storage_s3.rs +++ b/crates/iceberg/src/io/storage_s3.rs @@ -16,12 +16,11 @@ // under the License. use std::collections::HashMap; -use std::fmt::{Debug, Formatter}; -use opendal::{Operator, Scheme}; +use opendal::services::S3Config; +use opendal::Operator; use url::Url; -use crate::io::storage::redact_secret; use crate::{Error, ErrorKind, Result}; /// Following are arguments for [s3 file io](https://py.iceberg.apache.org/configuration/#s3). @@ -33,70 +32,84 @@ pub const S3_ACCESS_KEY_ID: &str = "s3.access-key-id"; pub const S3_SECRET_ACCESS_KEY: &str = "s3.secret-access-key"; /// S3 region. pub const S3_REGION: &str = "s3.region"; +/// S3 Path Style Access. +pub const S3_PATH_STYLE_ACCESS: &str = "s3.path-style-access"; +/// S3 Server Side Encryption Type. +pub const S3_SSE_TYPE: &str = "s3.sse.type"; +/// S3 Server Side Encryption Key. +/// If S3 encryption type is kms, input is a KMS Key ID. +/// In case this property is not set, default key "aws/s3" is used. +/// If encryption type is custom, input is a custom base-64 AES256 symmetric key. +pub const S3_SSE_KEY: &str = "s3.sse.key"; +/// S3 Server Side Encryption MD5. +pub const S3_SSE_MD5: &str = "s3.sse.md5"; -/// # TODO -/// -/// opendal has a plan to introduce native config support. -/// We manually parse the config here and those code will be finally removed. -#[derive(Default, Clone)] -pub(crate) struct S3Config { - pub endpoint: String, - pub access_key_id: String, - pub secret_access_key: String, - pub region: String, -} +/// Parse iceberg props to s3 config. +pub(crate) fn s3_config_parse(mut m: HashMap) -> Result { + let mut cfg = S3Config::default(); + if let Some(endpoint) = m.remove(S3_ENDPOINT) { + cfg.endpoint = Some(endpoint); + }; + if let Some(access_key_id) = m.remove(S3_ACCESS_KEY_ID) { + cfg.access_key_id = Some(access_key_id); + }; + if let Some(secret_access_key) = m.remove(S3_SECRET_ACCESS_KEY) { + cfg.secret_access_key = Some(secret_access_key); + }; + if let Some(region) = m.remove(S3_REGION) { + cfg.region = Some(region); + }; + if let Some(path_style_access) = m.remove(S3_PATH_STYLE_ACCESS) { + if ["true", "True", "1"].contains(&path_style_access.as_str()) { + cfg.enable_virtual_host_style = true; + } + }; + let s3_sse_key = m.remove(S3_SSE_KEY); + if let Some(sse_type) = m.remove(S3_SSE_TYPE) { + match sse_type.to_lowercase().as_str() { + // No Server Side Encryption + "none" => {} + // S3 SSE-S3 encryption (S3 managed keys). https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingServerSideEncryption.html + "s3" => { + cfg.server_side_encryption = Some("AES256".to_string()); + } + // S3 SSE KMS, either using default or custom KMS key. https://docs.aws.amazon.com/AmazonS3/latest/dev/UsingKMSEncryption.html + "kms" => { + cfg.server_side_encryption = Some("aws:kms".to_string()); + cfg.server_side_encryption_aws_kms_key_id = s3_sse_key; + } + // S3 SSE-C, using customer managed keys. https://docs.aws.amazon.com/AmazonS3/latest/dev/ServerSideEncryptionCustomerKeys.html + "custom" => { + cfg.server_side_encryption_customer_algorithm = Some("AES256".to_string()); + cfg.server_side_encryption_customer_key = s3_sse_key; + cfg.server_side_encryption_customer_key_md5 = m.remove(S3_SSE_MD5); + } + _ => { + return Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Invalid {}: {}. Expected one of (custom, kms, s3, none)", + S3_SSE_TYPE, sse_type + ), + )); + } + } + }; -impl Debug for S3Config { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("S3Config") - .field("endpoint", &self.endpoint) - .field("region", &self.region) - .field("access_key_id", &redact_secret(&self.access_key_id)) - .field("secret_access_key", &redact_secret(&self.secret_access_key)) - .finish() - } + Ok(cfg) } -impl S3Config { - /// Decode from iceberg props. - pub fn new(m: HashMap) -> Self { - let mut cfg = Self::default(); - if let Some(endpoint) = m.get(S3_ENDPOINT) { - cfg.endpoint = endpoint.clone(); - }; - if let Some(access_key_id) = m.get(S3_ACCESS_KEY_ID) { - cfg.access_key_id = access_key_id.clone(); - }; - if let Some(secret_access_key) = m.get(S3_SECRET_ACCESS_KEY) { - cfg.secret_access_key = secret_access_key.clone(); - }; - if let Some(region) = m.get(S3_REGION) { - cfg.region = region.clone(); - }; - - cfg - } - - /// Build new opendal operator from give path. - pub fn build(&self, path: &str) -> Result { - let url = Url::parse(path)?; - let bucket = url.host_str().ok_or_else(|| { - Error::new( - ErrorKind::DataInvalid, - format!("Invalid s3 url: {}, missing bucket", path), - ) - })?; - - let mut m = HashMap::with_capacity(5); - m.insert("bucket".to_string(), bucket.to_string()); - m.insert("endpoint".to_string(), self.endpoint.clone()); - m.insert("access_key_id".to_string(), self.access_key_id.clone()); - m.insert( - "secret_access_key".to_string(), - self.secret_access_key.clone(), - ); - m.insert("region".to_string(), self.region.clone()); +/// Build new opendal operator from give path. +pub(crate) fn s3_config_build(cfg: &S3Config, path: &str) -> Result { + let url = Url::parse(path)?; + let bucket = url.host_str().ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!("Invalid s3 url: {}, missing bucket", path), + ) + })?; - Ok(Operator::via_map(Scheme::S3, m)?) - } + let mut cfg = cfg.clone(); + cfg.bucket = bucket.to_string(); + Ok(Operator::from_config(cfg)?.finish()) } diff --git a/crates/iceberg/src/lib.rs b/crates/iceberg/src/lib.rs index 35d59323c..9682fa187 100644 --- a/crates/iceberg/src/lib.rs +++ b/crates/iceberg/src/lib.rs @@ -29,7 +29,7 @@ mod catalog; pub use catalog::{ Catalog, Namespace, NamespaceIdent, TableCommit, TableCreation, TableIdent, TableRequirement, - TableUpdate, + TableUpdate, ViewCreation, }; pub mod table; diff --git a/crates/iceberg/src/scan.rs b/crates/iceberg/src/scan.rs index 1f5602917..91b30aeea 100644 --- a/crates/iceberg/src/scan.rs +++ b/crates/iceberg/src/scan.rs @@ -17,14 +17,13 @@ //! Table scan api. -use std::collections::hash_map::Entry; use std::collections::HashMap; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; use arrow_array::RecordBatch; -use async_stream::try_stream; +use futures::channel::mpsc::{channel, Sender}; use futures::stream::BoxStream; -use futures::StreamExt; +use futures::{SinkExt, StreamExt, TryFutureExt, TryStreamExt}; use serde::{Deserialize, Serialize}; use crate::arrow::ArrowReaderBuilder; @@ -34,9 +33,10 @@ use crate::expr::visitors::inclusive_projection::InclusiveProjection; use crate::expr::visitors::manifest_evaluator::ManifestEvaluator; use crate::expr::{Bind, BoundPredicate, Predicate}; use crate::io::FileIO; +use crate::runtime::spawn; use crate::spec::{ - DataContentType, ManifestContentType, ManifestFile, Schema, SchemaRef, SnapshotRef, - TableMetadataRef, + DataContentType, ManifestContentType, ManifestEntryRef, ManifestFile, ManifestList, Schema, + SchemaRef, SnapshotRef, TableMetadataRef, }; use crate::table::Table; use crate::{Error, ErrorKind, Result}; @@ -55,10 +55,16 @@ pub struct TableScanBuilder<'a> { batch_size: Option, case_sensitive: bool, filter: Option, + concurrency_limit_manifest_files: usize, + concurrency_limit_manifest_entries: usize, } impl<'a> TableScanBuilder<'a> { pub(crate) fn new(table: &'a Table) -> Self { + let num_cpus = std::thread::available_parallelism() + .expect("failed to get number of CPUs") + .get(); + Self { table, column_names: vec![], @@ -66,6 +72,8 @@ impl<'a> TableScanBuilder<'a> { batch_size: None, case_sensitive: true, filter: None, + concurrency_limit_manifest_files: num_cpus, + concurrency_limit_manifest_entries: num_cpus, } } @@ -111,6 +119,26 @@ impl<'a> TableScanBuilder<'a> { self } + /// Sets the concurrency limit for both manifest files and manifest + /// entries for this scan + pub fn with_concurrency_limit(mut self, limit: usize) -> Self { + self.concurrency_limit_manifest_files = limit; + self.concurrency_limit_manifest_entries = limit; + self + } + + /// Sets the manifest file concurrency limit for this scan + pub fn with_manifest_file_concurrency_limit(mut self, limit: usize) -> Self { + self.concurrency_limit_manifest_files = limit; + self + } + + /// Sets the manifest entry concurrency limit for this scan + pub fn with_manifest_entry_concurrency_limit(mut self, limit: usize) -> Self { + self.concurrency_limit_manifest_entries = limit; + self + } + /// Build the table scan. pub fn build(self) -> Result { let snapshot = match self.snapshot_id { @@ -155,12 +183,6 @@ impl<'a> TableScanBuilder<'a> { } } - let bound_predicates = if let Some(ref predicates) = self.filter { - Some(predicates.bind(schema.clone(), true)?) - } else { - None - }; - let mut field_ids = vec![]; for column_name in &self.column_names { let field_id = schema.field_id_by_name(column_name).ok_or_else(|| { @@ -199,17 +221,33 @@ impl<'a> TableScanBuilder<'a> { field_ids.push(field_id); } - Ok(TableScan { + let snapshot_bound_predicate = if let Some(ref predicates) = self.filter { + Some(predicates.bind(schema.clone(), true)?) + } else { + None + }; + + let plan_context = PlanContext { snapshot, - file_io: self.table.file_io().clone(), table_metadata: self.table.metadata_ref(), - column_names: self.column_names, - field_ids, - bound_predicates, - schema, - batch_size: self.batch_size, + snapshot_schema: schema, case_sensitive: self.case_sensitive, - filter: self.filter.map(Arc::new), + predicate: self.filter.map(Arc::new), + snapshot_bound_predicate: snapshot_bound_predicate.map(Arc::new), + file_io: self.table.file_io().clone(), + field_ids: Arc::new(field_ids), + partition_filter_cache: Arc::new(PartitionFilterCache::new()), + manifest_evaluator_cache: Arc::new(ManifestEvaluatorCache::new()), + expression_evaluator_cache: Arc::new(ExpressionEvaluatorCache::new()), + }; + + Ok(TableScan { + batch_size: self.batch_size, + column_names: self.column_names, + concurrency_limit_manifest_files: self.concurrency_limit_manifest_files, + file_io: self.table.file_io().clone(), + plan_context, + concurrency_limit_manifest_entries: self.concurrency_limit_manifest_entries, }) } } @@ -217,116 +255,85 @@ impl<'a> TableScanBuilder<'a> { /// Table scan. #[derive(Debug)] pub struct TableScan { - snapshot: SnapshotRef, - table_metadata: TableMetadataRef, + plan_context: PlanContext, + batch_size: Option, file_io: FileIO, column_names: Vec, - field_ids: Vec, - bound_predicates: Option, - schema: SchemaRef, - batch_size: Option, + /// The maximum number of manifest files that will be + /// retrieved from [`FileIO`] concurrently + concurrency_limit_manifest_files: usize, + + /// The maximum number of [`ManifestEntry`]s that will + /// be processed in parallel + concurrency_limit_manifest_entries: usize, +} + +/// PlanContext wraps a [`SnapshotRef`] alongside all the other +/// objects that are required to perform a scan file plan. +#[derive(Debug)] +struct PlanContext { + snapshot: SnapshotRef, + + table_metadata: TableMetadataRef, + snapshot_schema: SchemaRef, case_sensitive: bool, - filter: Option>, + predicate: Option>, + snapshot_bound_predicate: Option>, + file_io: FileIO, + field_ids: Arc>, + + partition_filter_cache: Arc, + manifest_evaluator_cache: Arc, + expression_evaluator_cache: Arc, } impl TableScan { /// Returns a stream of [`FileScanTask`]s. pub async fn plan_files(&self) -> Result { - let context = FileScanStreamContext::new( - self.schema.clone(), - self.snapshot.clone(), - self.table_metadata.clone(), - self.file_io.clone(), - self.filter.clone(), - self.case_sensitive, - )?; - - let mut partition_filter_cache = PartitionFilterCache::new(); - let mut manifest_evaluator_cache = ManifestEvaluatorCache::new(); - let mut expression_evaluator_cache = ExpressionEvaluatorCache::new(); - - let field_ids = self.field_ids.clone(); - let bound_predicates = self.bound_predicates.clone(); - - Ok(try_stream! { - let manifest_list = context - .snapshot - .load_manifest_list(&context.file_io, &context.table_metadata) - .await?; - - for entry in manifest_list.entries() { - if !Self::content_type_is_data(entry) { - continue; - } - - let partition_spec_id = entry.partition_spec_id; - - let partition_filter = partition_filter_cache.get( - partition_spec_id, - &context, - )?; - - if let Some(partition_filter) = partition_filter { - let manifest_evaluator = manifest_evaluator_cache.get( - partition_spec_id, - partition_filter, - ); - - if !manifest_evaluator.eval(entry)? { - continue; - } - } + let concurrency_limit_manifest_files = self.concurrency_limit_manifest_files; + let concurrency_limit_manifest_entries = self.concurrency_limit_manifest_entries; + + // used to stream ManifestEntryContexts between stages of the file plan operation + let (manifest_entry_ctx_tx, manifest_entry_ctx_rx) = + channel(concurrency_limit_manifest_files); + // used to stream the results back to the caller + let (file_scan_task_tx, file_scan_task_rx) = channel(concurrency_limit_manifest_entries); + + let manifest_list = self.plan_context.get_manifest_list().await?; + + // get the [`ManifestFile`]s from the [`ManifestList`], filtering out any + // whose content type is not Data or whose partitions cannot match this + // scan's filter + let manifest_file_contexts = self + .plan_context + .build_manifest_file_contexts(manifest_list, manifest_entry_ctx_tx)?; + + // Concurrently load all [`Manifest`]s and stream their [`ManifestEntry`]s + spawn(async move { + futures::stream::iter(manifest_file_contexts) + .try_for_each_concurrent(concurrency_limit_manifest_files, |ctx| async move { + ctx.fetch_manifest_and_stream_manifest_entries().await + }) + .await + }); + + // Process the [`ManifestEntry`] stream in parallel + spawn(async move { + manifest_entry_ctx_rx + .map(|me_ctx| Ok((me_ctx, file_scan_task_tx.clone()))) + .try_for_each_concurrent( + concurrency_limit_manifest_entries, + |(manifest_entry_context, tx)| async move { + crate::runtime::spawn(async move { + Self::process_manifest_entry(manifest_entry_context, tx).await + }) + .await + }, + ) + .await + }); - let manifest = entry.load_manifest(&context.file_io).await?; - let mut manifest_entries_stream = - futures::stream::iter(manifest.entries().iter().filter(|e| e.is_alive())); - - while let Some(manifest_entry) = manifest_entries_stream.next().await { - let data_file = manifest_entry.data_file(); - - if let Some(partition_filter) = partition_filter { - let expression_evaluator = expression_evaluator_cache.get(partition_spec_id, partition_filter); - - if !expression_evaluator.eval(data_file)? { - continue; - } - } - - - if let Some(bound_predicate) = context.bound_filter() { - // reject any manifest entries whose data file's metrics don't match the filter. - if !InclusiveMetricsEvaluator::eval( - bound_predicate, - manifest_entry.data_file(), - false - )? { - continue; - } - } - - match manifest_entry.content_type() { - DataContentType::EqualityDeletes | DataContentType::PositionDeletes => { - yield Err(Error::new( - ErrorKind::FeatureUnsupported, - "Delete files are not supported yet.", - ))?; - } - DataContentType::Data => { - let scan_task: Result = Ok(FileScanTask { - data_file_path: manifest_entry.data_file().file_path().to_string(), - start: 0, - length: manifest_entry.file_size_in_bytes(), - project_field_ids: field_ids.clone(), - predicate: bound_predicates.clone(), - schema: context.schema.clone(), - }); - yield scan_task?; - } - } - } - } - } - .boxed()) + return Ok(file_scan_task_rx.boxed()); } /// Returns an [`ArrowRecordBatchStream`]. @@ -340,157 +347,468 @@ impl TableScan { arrow_reader_builder.build().read(self.plan_files().await?) } - /// Checks whether the [`ManifestContentType`] is `Data` or not. - fn content_type_is_data(entry: &ManifestFile) -> bool { - if let ManifestContentType::Data = entry.content { - return true; - } - false - } - /// Returns a reference to the column names of the table scan. pub fn column_names(&self) -> &[String] { &self.column_names } + /// Returns a reference to the snapshot of the table scan. + pub fn snapshot(&self) -> &SnapshotRef { + &self.plan_context.snapshot + } + + async fn process_manifest_entry( + manifest_entry_context: ManifestEntryContext, + mut file_scan_task_tx: Sender>, + ) -> Result<()> { + // skip processing this manifest entry if it has been marked as deleted + if !manifest_entry_context.manifest_entry.is_alive() { + return Ok(()); + } + + // abort the plan if we encounter a manifest entry whose data file's + // content type is currently unsupported + if manifest_entry_context.manifest_entry.content_type() != DataContentType::Data { + return Err(Error::new( + ErrorKind::FeatureUnsupported, + "Only Data files currently supported", + )); + } + + if let Some(ref bound_predicates) = manifest_entry_context.bound_predicates { + let BoundPredicates { + ref snapshot_bound_predicate, + ref partition_bound_predicate, + } = bound_predicates.as_ref(); + + let expression_evaluator_cache = + manifest_entry_context.expression_evaluator_cache.as_ref(); + + let expression_evaluator = expression_evaluator_cache.get( + manifest_entry_context.partition_spec_id, + partition_bound_predicate, + )?; + + // skip any data file whose partition data indicates that it can't contain + // any data that matches this scan's filter + if !expression_evaluator.eval(manifest_entry_context.manifest_entry.data_file())? { + return Ok(()); + } + + // skip any data file whose metrics don't match this scan's filter + if !InclusiveMetricsEvaluator::eval( + snapshot_bound_predicate, + manifest_entry_context.manifest_entry.data_file(), + false, + )? { + return Ok(()); + } + } + + // congratulations! the manifest entry has made its way through the + // entire plan without getting filtered out. Create a corresponding + // FileScanTask and push it to the result stream + file_scan_task_tx + .send(Ok(manifest_entry_context.into_file_scan_task())) + .await?; + + Ok(()) + } } -/// Holds the context necessary for file scanning operations -/// in a streaming environment. -#[derive(Debug)] -struct FileScanStreamContext { - schema: SchemaRef, - snapshot: SnapshotRef, - table_metadata: TableMetadataRef, +struct BoundPredicates { + partition_bound_predicate: BoundPredicate, + snapshot_bound_predicate: BoundPredicate, +} + +/// Wraps a [`ManifestFile`] alongside the objects that are needed +/// to process it in a thread-safe manner +struct ManifestFileContext { + manifest_file: ManifestFile, + + sender: Sender, + + field_ids: Arc>, file_io: FileIO, - bound_filter: Option, - case_sensitive: bool, + bound_predicates: Option>, + snapshot_schema: SchemaRef, + expression_evaluator_cache: Arc, } -impl FileScanStreamContext { - /// Creates a new [`FileScanStreamContext`]. - fn new( - schema: SchemaRef, - snapshot: SnapshotRef, - table_metadata: TableMetadataRef, - file_io: FileIO, - filter: Option>, - case_sensitive: bool, - ) -> Result { - let bound_filter = match filter { - Some(ref filter) => Some(filter.bind(schema.clone(), case_sensitive)?), - None => None, - }; +/// Wraps a [`ManifestEntryRef`] alongside the objects that are needed +/// to process it in a thread-safe manner +struct ManifestEntryContext { + manifest_entry: ManifestEntryRef, - Ok(Self { - schema, - snapshot, - table_metadata, + expression_evaluator_cache: Arc, + field_ids: Arc>, + bound_predicates: Option>, + partition_spec_id: i32, + snapshot_schema: SchemaRef, +} + +impl ManifestFileContext { + /// Consumes this [`ManifestFileContext`], fetching its Manifest from FileIO and then + /// streaming its constituent [`ManifestEntries`] to the channel provided in the context + async fn fetch_manifest_and_stream_manifest_entries(self) -> Result<()> { + let ManifestFileContext { file_io, - bound_filter, - case_sensitive, - }) + manifest_file, + bound_predicates, + snapshot_schema, + field_ids, + expression_evaluator_cache, + mut sender, + .. + } = self; + + let file_io_cloned = file_io.clone(); + let manifest = manifest_file.load_manifest(&file_io_cloned).await?; + + let (entries, _) = manifest.consume(); + + for manifest_entry in entries.into_iter() { + let manifest_entry_context = ManifestEntryContext { + manifest_entry, + expression_evaluator_cache: expression_evaluator_cache.clone(), + field_ids: field_ids.clone(), + partition_spec_id: manifest_file.partition_spec_id, + bound_predicates: bound_predicates.clone(), + snapshot_schema: snapshot_schema.clone(), + }; + + sender + .send(manifest_entry_context) + .map_err(|_| Error::new(ErrorKind::Unexpected, "mpsc channel SendError")) + .await?; + } + + Ok(()) } +} - /// Returns a reference to the [`BoundPredicate`] filter. - fn bound_filter(&self) -> Option<&BoundPredicate> { - self.bound_filter.as_ref() +impl ManifestEntryContext { + /// consume this `ManifestEntryContext`, returning a `FileScanTask` + /// created from it + fn into_file_scan_task(self) -> FileScanTask { + FileScanTask { + data_file_path: self.manifest_entry.file_path().to_string(), + start: 0, + length: self.manifest_entry.file_size_in_bytes(), + project_field_ids: self.field_ids.to_vec(), + predicate: self + .bound_predicates + .map(|x| x.as_ref().snapshot_bound_predicate.clone()), + schema: self.snapshot_schema, + } + } +} + +impl PlanContext { + async fn get_manifest_list(&self) -> Result { + self.snapshot + .load_manifest_list(&self.file_io, &self.table_metadata) + .await + } + + fn get_partition_filter(&self, manifest_file: &ManifestFile) -> Result> { + let partition_spec_id = manifest_file.partition_spec_id; + + let partition_filter = self.partition_filter_cache.get( + partition_spec_id, + &self.table_metadata, + &self.snapshot_schema, + self.case_sensitive, + self.predicate + .as_ref() + .ok_or(Error::new( + ErrorKind::Unexpected, + "Expected a predicate but none present", + ))? + .as_ref() + .bind(self.snapshot_schema.clone(), self.case_sensitive)?, + )?; + + Ok(partition_filter) + } + + fn build_manifest_file_contexts( + &self, + manifest_list: ManifestList, + sender: Sender, + ) -> Result>>> { + let filtered_entries = manifest_list + .consume_entries() + .into_iter() + .filter(|manifest_file| manifest_file.content == ManifestContentType::Data); + + // TODO: Ideally we could ditch this intermediate Vec as we return an iterator. + let mut filtered_mfcs = vec![]; + if self.predicate.is_some() { + for manifest_file in filtered_entries { + let partition_bound_predicate = self.get_partition_filter(&manifest_file)?; + + // evaluate the ManifestFile against the partition filter. Skip + // if it cannot contain any matching rows + if self + .manifest_evaluator_cache + .get( + manifest_file.partition_spec_id, + partition_bound_predicate.clone(), + ) + .eval(&manifest_file)? + { + let mfc = self.create_manifest_file_context( + manifest_file, + Some(partition_bound_predicate), + sender.clone(), + ); + filtered_mfcs.push(Ok(mfc)); + } + } + } else { + for manifest_file in filtered_entries { + let mfc = self.create_manifest_file_context(manifest_file, None, sender.clone()); + filtered_mfcs.push(Ok(mfc)); + } + } + + Ok(Box::new(filtered_mfcs.into_iter())) + } + + fn create_manifest_file_context( + &self, + manifest_file: ManifestFile, + partition_filter: Option>, + sender: Sender, + ) -> ManifestFileContext { + let bound_predicates = + if let (Some(ref partition_bound_predicate), Some(snapshot_bound_predicate)) = + (partition_filter, &self.snapshot_bound_predicate) + { + Some(Arc::new(BoundPredicates { + partition_bound_predicate: partition_bound_predicate.as_ref().clone(), + snapshot_bound_predicate: snapshot_bound_predicate.as_ref().clone(), + })) + } else { + None + }; + + ManifestFileContext { + manifest_file, + bound_predicates, + sender, + file_io: self.file_io.clone(), + snapshot_schema: self.snapshot_schema.clone(), + field_ids: self.field_ids.clone(), + expression_evaluator_cache: self.expression_evaluator_cache.clone(), + } } } /// Manages the caching of [`BoundPredicate`] objects /// for [`PartitionSpec`]s based on partition spec id. #[derive(Debug)] -struct PartitionFilterCache(HashMap); +struct PartitionFilterCache(RwLock>>); impl PartitionFilterCache { /// Creates a new [`PartitionFilterCache`] /// with an empty internal HashMap. fn new() -> Self { - Self(HashMap::new()) + Self(RwLock::new(HashMap::new())) } /// Retrieves a [`BoundPredicate`] from the cache /// or computes it if not present. fn get( - &mut self, + &self, spec_id: i32, - context: &FileScanStreamContext, - ) -> Result> { - match context.bound_filter() { - None => Ok(None), - Some(filter) => match self.0.entry(spec_id) { - Entry::Occupied(e) => Ok(Some(e.into_mut())), - Entry::Vacant(e) => { - let partition_spec = context - .table_metadata - .partition_spec_by_id(spec_id) - .ok_or(Error::new( - ErrorKind::Unexpected, - format!("Could not find partition spec for id {}", spec_id), - ))?; - - let partition_type = partition_spec.partition_type(context.schema.as_ref())?; - let partition_fields = partition_type.fields().to_owned(); - let partition_schema = Arc::new( - Schema::builder() - .with_schema_id(partition_spec.spec_id()) - .with_fields(partition_fields) - .build()?, - ); + table_metadata: &TableMetadataRef, + schema: &SchemaRef, + case_sensitive: bool, + filter: BoundPredicate, + ) -> Result> { + // we need a block here to ensure that the `read()` gets dropped before we hit the `write()` + // below, otherwise we hit deadlock + { + let read = self.0.read().map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "PartitionFilterCache RwLock was poisoned", + ) + })?; - let mut inclusive_projection = InclusiveProjection::new(partition_spec.clone()); + if read.contains_key(&spec_id) { + return Ok(read.get(&spec_id).unwrap().clone()); + } + } - let partition_filter = inclusive_projection - .project(filter)? - .rewrite_not() - .bind(partition_schema.clone(), context.case_sensitive)?; + let partition_spec = table_metadata + .partition_spec_by_id(spec_id) + .ok_or(Error::new( + ErrorKind::Unexpected, + format!("Could not find partition spec for id {}", spec_id), + ))?; - Ok(Some(e.insert(partition_filter))) - } - }, - } + let partition_type = partition_spec.partition_type(schema.as_ref())?; + let partition_fields = partition_type.fields().to_owned(); + let partition_schema = Arc::new( + Schema::builder() + .with_schema_id(partition_spec.spec_id) + .with_fields(partition_fields) + .build()?, + ); + + let mut inclusive_projection = InclusiveProjection::new(partition_spec.clone()); + + let partition_filter = inclusive_projection + .project(&filter)? + .rewrite_not() + .bind(partition_schema.clone(), case_sensitive)?; + + self.0 + .write() + .map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "PartitionFilterCache RwLock was poisoned", + ) + })? + .insert(spec_id, Arc::new(partition_filter)); + + let read = self.0.read().map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "PartitionFilterCache RwLock was poisoned", + ) + })?; + + Ok(read.get(&spec_id).unwrap().clone()) } } /// Manages the caching of [`ManifestEvaluator`] objects /// for [`PartitionSpec`]s based on partition spec id. #[derive(Debug)] -struct ManifestEvaluatorCache(HashMap); +struct ManifestEvaluatorCache(RwLock>>); impl ManifestEvaluatorCache { /// Creates a new [`ManifestEvaluatorCache`] /// with an empty internal HashMap. fn new() -> Self { - Self(HashMap::new()) + Self(RwLock::new(HashMap::new())) } /// Retrieves a [`ManifestEvaluator`] from the cache /// or computes it if not present. - fn get(&mut self, spec_id: i32, partition_filter: &BoundPredicate) -> &mut ManifestEvaluator { + fn get(&self, spec_id: i32, partition_filter: Arc) -> Arc { + // we need a block here to ensure that the `read()` gets dropped before we hit the `write()` + // below, otherwise we hit deadlock + { + let read = self + .0 + .read() + .map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "ManifestEvaluatorCache RwLock was poisoned", + ) + }) + .unwrap(); + + if read.contains_key(&spec_id) { + return read.get(&spec_id).unwrap().clone(); + } + } + self.0 - .entry(spec_id) - .or_insert(ManifestEvaluator::new(partition_filter.clone())) + .write() + .map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "ManifestEvaluatorCache RwLock was poisoned", + ) + }) + .unwrap() + .insert( + spec_id, + Arc::new(ManifestEvaluator::new(partition_filter.as_ref().clone())), + ); + + let read = self + .0 + .read() + .map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "ManifestEvaluatorCache RwLock was poisoned", + ) + }) + .unwrap(); + + read.get(&spec_id).unwrap().clone() } } /// Manages the caching of [`ExpressionEvaluator`] objects /// for [`PartitionSpec`]s based on partition spec id. #[derive(Debug)] -struct ExpressionEvaluatorCache(HashMap); +struct ExpressionEvaluatorCache(RwLock>>); impl ExpressionEvaluatorCache { /// Creates a new [`ExpressionEvaluatorCache`] /// with an empty internal HashMap. fn new() -> Self { - Self(HashMap::new()) + Self(RwLock::new(HashMap::new())) } /// Retrieves a [`ExpressionEvaluator`] from the cache /// or computes it if not present. - fn get(&mut self, spec_id: i32, partition_filter: &BoundPredicate) -> &mut ExpressionEvaluator { + fn get( + &self, + spec_id: i32, + partition_filter: &BoundPredicate, + ) -> Result> { + // we need a block here to ensure that the `read()` gets dropped before we hit the `write()` + // below, otherwise we hit deadlock + { + let read = self.0.read().map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "PartitionFilterCache RwLock was poisoned", + ) + })?; + + if read.contains_key(&spec_id) { + return Ok(read.get(&spec_id).unwrap().clone()); + } + } + self.0 - .entry(spec_id) - .or_insert(ExpressionEvaluator::new(partition_filter.clone())) + .write() + .map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "ManifestEvaluatorCache RwLock was poisoned", + ) + }) + .unwrap() + .insert( + spec_id, + Arc::new(ExpressionEvaluator::new(partition_filter.clone())), + ); + + let read = self + .0 + .read() + .map_err(|_| { + Error::new( + ErrorKind::Unexpected, + "ManifestEvaluatorCache RwLock was poisoned", + ) + }) + .unwrap(); + + Ok(read.get(&spec_id).unwrap().clone()) } } @@ -817,7 +1135,7 @@ mod tests { let table_scan = table.scan().build().unwrap(); assert_eq!( table.metadata().current_snapshot().unwrap().snapshot_id(), - table_scan.snapshot.snapshot_id() + table_scan.snapshot().snapshot_id() ); } @@ -838,7 +1156,7 @@ mod tests { .snapshot_id(3051729675574597004) .build() .unwrap(); - assert_eq!(table_scan.snapshot.snapshot_id(), 3051729675574597004); + assert_eq!(table_scan.snapshot().snapshot_id(), 3051729675574597004); } #[tokio::test] diff --git a/crates/iceberg/src/spec/datatypes.rs b/crates/iceberg/src/spec/datatypes.rs index cb51db6fc..d8883878e 100644 --- a/crates/iceberg/src/spec/datatypes.rs +++ b/crates/iceberg/src/spec/datatypes.rs @@ -33,6 +33,7 @@ use super::values::Literal; use crate::ensure_data_valid; use crate::error::Result; use crate::spec::datatypes::_decimal::{MAX_PRECISION, REQUIRED_LENGTH}; +use crate::spec::PrimitiveLiteral; /// Field name for list type. pub(crate) const LIST_FILED_NAME: &str = "element"; @@ -234,6 +235,29 @@ pub enum PrimitiveType { Binary, } +impl PrimitiveType { + /// Check whether literal is compatible with the type. + pub fn compatible(&self, literal: &PrimitiveLiteral) -> bool { + matches!( + (self, literal), + (PrimitiveType::Boolean, PrimitiveLiteral::Boolean(_)) + | (PrimitiveType::Int, PrimitiveLiteral::Int(_)) + | (PrimitiveType::Long, PrimitiveLiteral::Long(_)) + | (PrimitiveType::Float, PrimitiveLiteral::Float(_)) + | (PrimitiveType::Double, PrimitiveLiteral::Double(_)) + | (PrimitiveType::Decimal { .. }, PrimitiveLiteral::Decimal(_)) + | (PrimitiveType::Date, PrimitiveLiteral::Date(_)) + | (PrimitiveType::Time, PrimitiveLiteral::Time(_)) + | (PrimitiveType::Timestamp, PrimitiveLiteral::Timestamp(_)) + | (PrimitiveType::Timestamptz, PrimitiveLiteral::Timestamptz(_)) + | (PrimitiveType::String, PrimitiveLiteral::String(_)) + | (PrimitiveType::Uuid, PrimitiveLiteral::Uuid(_)) + | (PrimitiveType::Fixed(_), PrimitiveLiteral::Fixed(_)) + | (PrimitiveType::Binary, PrimitiveLiteral::Binary(_)) + ) + } +} + impl Serialize for Type { fn serialize(&self, serializer: S) -> std::result::Result where S: Serializer { @@ -557,6 +581,19 @@ impl From for SerdeNestedField { pub type NestedFieldRef = Arc; impl NestedField { + /// Construct a new field. + pub fn new(id: i32, name: impl ToString, field_type: Type, required: bool) -> Self { + Self { + id, + name: name.to_string(), + required, + field_type: Box::new(field_type), + doc: None, + initial_default: None, + write_default: None, + } + } + /// Construct a required field. pub fn required(id: i32, name: impl ToString, field_type: Type) -> Self { Self { @@ -896,10 +933,10 @@ mod tests { Type::Struct(StructType { fields: vec![ NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid)) - .with_initial_default(Literal::Primitive(PrimitiveLiteral::UUID( + .with_initial_default(Literal::Primitive(PrimitiveLiteral::Uuid( Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb").unwrap(), ))) - .with_write_default(Literal::Primitive(PrimitiveLiteral::UUID( + .with_write_default(Literal::Primitive(PrimitiveLiteral::Uuid( Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae").unwrap(), ))) .into(), @@ -965,10 +1002,10 @@ mod tests { let struct_type = Type::Struct(StructType::new(vec![ NestedField::required(1, "id", Type::Primitive(PrimitiveType::Uuid)) - .with_initial_default(Literal::Primitive(PrimitiveLiteral::UUID( + .with_initial_default(Literal::Primitive(PrimitiveLiteral::Uuid( Uuid::parse_str("0db3e2a8-9d1d-42b9-aa7b-74ebe558dceb").unwrap(), ))) - .with_write_default(Literal::Primitive(PrimitiveLiteral::UUID( + .with_write_default(Literal::Primitive(PrimitiveLiteral::Uuid( Uuid::parse_str("ec5911be-b0a7-458c-8438-c9a3e53cffae").unwrap(), ))) .into(), @@ -1087,4 +1124,48 @@ mod tests { assert_eq!(5, Type::decimal_required_bytes(10).unwrap()); assert_eq!(16, Type::decimal_required_bytes(38).unwrap()); } + + #[test] + fn test_primitive_type_compatitable() { + let types = vec![ + PrimitiveType::Boolean, + PrimitiveType::Int, + PrimitiveType::Long, + PrimitiveType::Float, + PrimitiveType::Double, + PrimitiveType::Decimal { + precision: 9, + scale: 2, + }, + PrimitiveType::Date, + PrimitiveType::Time, + PrimitiveType::Timestamp, + PrimitiveType::Timestamptz, + PrimitiveType::String, + PrimitiveType::Uuid, + PrimitiveType::Fixed(8), + PrimitiveType::Binary, + ]; + let literals = vec![ + PrimitiveLiteral::Boolean(true), + PrimitiveLiteral::Int(1), + PrimitiveLiteral::Long(1), + PrimitiveLiteral::Float(1.0.into()), + PrimitiveLiteral::Double(1.0.into()), + PrimitiveLiteral::Decimal(1), + PrimitiveLiteral::Date(1), + PrimitiveLiteral::Time(1), + PrimitiveLiteral::Timestamp(1), + PrimitiveLiteral::Timestamptz(1), + PrimitiveLiteral::String("1".to_string()), + PrimitiveLiteral::Uuid(Uuid::new_v4()), + PrimitiveLiteral::Fixed(vec![1]), + PrimitiveLiteral::Binary(vec![1]), + ]; + for (i, t) in types.iter().enumerate() { + for (j, l) in literals.iter().enumerate() { + assert_eq!(i == j, t.compatible(l)); + } + } + } } diff --git a/crates/iceberg/src/spec/manifest.rs b/crates/iceberg/src/spec/manifest.rs index 4f51dc9cd..14b8a8000 100644 --- a/crates/iceberg/src/spec/manifest.rs +++ b/crates/iceberg/src/spec/manifest.rs @@ -94,6 +94,12 @@ impl Manifest { &self.entries } + /// Consume this Manifest, returning its constituent parts + pub fn consume(self) -> (Vec, ManifestMetadata) { + let Self { entries, metadata } = self; + (entries, metadata) + } + /// Constructor from [`ManifestMetadata`] and [`ManifestEntry`]s. pub fn new(metadata: ManifestMetadata, entries: Vec) -> Self { Self { diff --git a/crates/iceberg/src/spec/manifest_list.rs b/crates/iceberg/src/spec/manifest_list.rs index e81889068..3aaecf12d 100644 --- a/crates/iceberg/src/spec/manifest_list.rs +++ b/crates/iceberg/src/spec/manifest_list.rs @@ -78,6 +78,11 @@ impl ManifestList { pub fn entries(&self) -> &[ManifestFile] { &self.entries } + + /// Take ownership of the entries in the manifest list, consuming it + pub fn consume_entries(self) -> impl IntoIterator { + Box::new(self.entries.into_iter()) + } } /// A manifest list writer. diff --git a/crates/iceberg/src/spec/mod.rs b/crates/iceberg/src/spec/mod.rs index 199fc4a16..793f00d34 100644 --- a/crates/iceberg/src/spec/mod.rs +++ b/crates/iceberg/src/spec/mod.rs @@ -27,6 +27,8 @@ mod sort; mod table_metadata; mod transform; mod values; +mod view_metadata; +mod view_version; pub use datatypes::*; pub use manifest::*; @@ -38,3 +40,5 @@ pub use sort::*; pub use table_metadata::*; pub use transform::*; pub use values::*; +pub use view_metadata::*; +pub use view_version::*; diff --git a/crates/iceberg/src/spec/table_metadata.rs b/crates/iceberg/src/spec/table_metadata.rs index 087434361..cd7f046c6 100644 --- a/crates/iceberg/src/spec/table_metadata.rs +++ b/crates/iceberg/src/spec/table_metadata.rs @@ -24,7 +24,7 @@ use std::fmt::{Display, Formatter}; use std::sync::Arc; use _serde::TableMetadataEnum; -use chrono::{DateTime, TimeZone, Utc}; +use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; use serde_repr::{Deserialize_repr, Serialize_repr}; use uuid::Uuid; @@ -34,7 +34,7 @@ use super::{ PartitionSpec, PartitionSpecRef, SchemaId, SchemaRef, SnapshotRef, SortOrder, SortOrderRef, DEFAULT_PARTITION_SPEC_ID, }; -use crate::error::Result; +use crate::error::{timestamp_ms_to_utc, Result}; use crate::{Error, ErrorKind, TableCreation}; static MAIN_BRANCH: &str = "main"; @@ -143,8 +143,14 @@ impl TableMetadata { /// Returns last updated time. #[inline] - pub fn last_updated_ms(&self) -> DateTime { - Utc.timestamp_millis_opt(self.last_updated_ms).unwrap() + pub fn last_updated_timestamp(&self) -> Result> { + timestamp_ms_to_utc(self.last_updated_ms) + } + + /// Returns last updated time in milliseconds. + #[inline] + pub fn last_updated_ms(&self) -> i64 { + self.last_updated_ms } /// Returns schemas @@ -328,7 +334,7 @@ impl TableMetadataBuilder { let table_metadata = TableMetadata { format_version: FormatVersion::V2, - table_uuid: Uuid::new_v4(), + table_uuid: Uuid::now_v7(), location: location.ok_or_else(|| { Error::new( ErrorKind::DataInvalid, @@ -472,7 +478,7 @@ pub(super) mod _serde { /// Helper to serialize and deserialize the format version. #[derive(Debug, PartialEq, Eq)] - pub(super) struct VersionNumber; + pub(crate) struct VersionNumber; impl Serialize for TableMetadata { fn serialize(&self, serializer: S) -> Result @@ -903,8 +909,14 @@ pub struct SnapshotLog { impl SnapshotLog { /// Returns the last updated timestamp as a DateTime with millisecond precision - pub fn timestamp(self) -> DateTime { - Utc.timestamp_millis_opt(self.timestamp_ms).unwrap() + pub fn timestamp(self) -> Result> { + timestamp_ms_to_utc(self.timestamp_ms) + } + + /// Returns the timestamp in milliseconds + #[inline] + pub fn timestamp_ms(&self) -> i64 { + self.timestamp_ms } } diff --git a/crates/iceberg/src/spec/values.rs b/crates/iceberg/src/spec/values.rs index a8817fbbb..8c2e4abe3 100644 --- a/crates/iceberg/src/spec/values.rs +++ b/crates/iceberg/src/spec/values.rs @@ -76,7 +76,7 @@ pub enum PrimitiveLiteral { /// UTF-8 bytes (without length) String(String), /// 16-byte big-endian value - UUID(Uuid), + Uuid(Uuid), /// Binary value Fixed(Vec), /// Binary value (without length) @@ -278,8 +278,8 @@ impl PartialOrd for Datum { PrimitiveType::String, ) => val.partial_cmp(other_val), ( - PrimitiveLiteral::UUID(val), - PrimitiveLiteral::UUID(other_val), + PrimitiveLiteral::Uuid(val), + PrimitiveLiteral::Uuid(other_val), PrimitiveType::Uuid, PrimitiveType::Uuid, ) => val.partial_cmp(other_val), @@ -333,7 +333,7 @@ impl Display for Datum { write!(f, "{}", microseconds_to_datetimetz(*val)) } (_, PrimitiveLiteral::String(val)) => write!(f, r#""{}""#, val), - (_, PrimitiveLiteral::UUID(val)) => write!(f, "{}", val), + (_, PrimitiveLiteral::Uuid(val)) => write!(f, "{}", val), (_, PrimitiveLiteral::Fixed(val)) => display_bytes(val, f), (_, PrimitiveLiteral::Binary(val)) => display_bytes(val, f), ( @@ -410,7 +410,7 @@ impl Datum { PrimitiveLiteral::String(std::str::from_utf8(bytes)?.to_string()) } PrimitiveType::Uuid => { - PrimitiveLiteral::UUID(Uuid::from_u128(u128::from_be_bytes(bytes.try_into()?))) + PrimitiveLiteral::Uuid(Uuid::from_u128(u128::from_be_bytes(bytes.try_into()?))) } PrimitiveType::Fixed(_) => PrimitiveLiteral::Fixed(Vec::from(bytes)), PrimitiveType::Binary => PrimitiveLiteral::Binary(Vec::from(bytes)), @@ -443,7 +443,7 @@ impl Datum { PrimitiveLiteral::Timestamp(val) => ByteBuf::from(val.to_le_bytes()), PrimitiveLiteral::Timestamptz(val) => ByteBuf::from(val.to_le_bytes()), PrimitiveLiteral::String(val) => ByteBuf::from(val.as_bytes()), - PrimitiveLiteral::UUID(val) => ByteBuf::from(val.as_u128().to_be_bytes()), + PrimitiveLiteral::Uuid(val) => ByteBuf::from(val.as_u128().to_be_bytes()), PrimitiveLiteral::Fixed(val) => ByteBuf::from(val.as_slice()), PrimitiveLiteral::Binary(val) => ByteBuf::from(val.as_slice()), PrimitiveLiteral::Decimal(_) => todo!(), @@ -868,7 +868,7 @@ impl Datum { pub fn uuid(uuid: Uuid) -> Self { Self { r#type: PrimitiveType::Uuid, - literal: PrimitiveLiteral::UUID(uuid), + literal: PrimitiveLiteral::Uuid(uuid), } } @@ -977,9 +977,22 @@ impl Datum { /// Convert the datum to `target_type`. pub fn to(self, target_type: &Type) -> Result { - // TODO: We should allow more type conversions match target_type { - Type::Primitive(typ) if typ == &self.r#type => Ok(self), + Type::Primitive(target_primitive_type) => { + match (&self.literal, &self.r#type, target_primitive_type) { + (PrimitiveLiteral::Date(val), _, PrimitiveType::Int) => Ok(Datum::int(*val)), + (PrimitiveLiteral::Int(val), _, PrimitiveType::Date) => Ok(Datum::date(*val)), + // TODO: implement more type conversions + (_, self_type, target_type) if self_type == target_type => Ok(self), + _ => Err(Error::new( + ErrorKind::DataInvalid, + format!( + "Can't convert datum from {} type to {} type.", + self.r#type, target_primitive_type + ), + )), + } + } _ => Err(Error::new( ErrorKind::DataInvalid, format!( @@ -1408,7 +1421,7 @@ impl Literal { /// Creates uuid literal. pub fn uuid(uuid: Uuid) -> Self { - Self::Primitive(PrimitiveLiteral::UUID(uuid)) + Self::Primitive(PrimitiveLiteral::Uuid(uuid)) } /// Creates uuid from str. See [`Uuid::parse_str`]. @@ -1655,7 +1668,7 @@ impl Literal { Ok(Some(Literal::Primitive(PrimitiveLiteral::String(s)))) } (PrimitiveType::Uuid, JsonValue::String(s)) => Ok(Some(Literal::Primitive( - PrimitiveLiteral::UUID(Uuid::parse_str(&s)?), + PrimitiveLiteral::Uuid(Uuid::parse_str(&s)?), ))), (PrimitiveType::Fixed(_), JsonValue::String(_)) => todo!(), (PrimitiveType::Binary, JsonValue::String(_)) => todo!(), @@ -1793,7 +1806,7 @@ impl Literal { .to_string(), )), PrimitiveLiteral::String(val) => Ok(JsonValue::String(val.clone())), - PrimitiveLiteral::UUID(val) => Ok(JsonValue::String(val.to_string())), + PrimitiveLiteral::Uuid(val) => Ok(JsonValue::String(val.to_string())), PrimitiveLiteral::Fixed(val) => Ok(JsonValue::String(val.iter().fold( String::new(), |mut acc, x| { @@ -1882,7 +1895,7 @@ impl Literal { PrimitiveLiteral::Fixed(any) => Box::new(any), PrimitiveLiteral::Binary(any) => Box::new(any), PrimitiveLiteral::String(any) => Box::new(any), - PrimitiveLiteral::UUID(any) => Box::new(any), + PrimitiveLiteral::Uuid(any) => Box::new(any), PrimitiveLiteral::Decimal(any) => Box::new(any), }, _ => unimplemented!(), @@ -2189,7 +2202,7 @@ mod _serde { super::PrimitiveLiteral::Timestamp(v) => RawLiteralEnum::Long(v), super::PrimitiveLiteral::Timestamptz(v) => RawLiteralEnum::Long(v), super::PrimitiveLiteral::String(v) => RawLiteralEnum::String(v), - super::PrimitiveLiteral::UUID(v) => { + super::PrimitiveLiteral::Uuid(v) => { RawLiteralEnum::Bytes(ByteBuf::from(v.as_u128().to_be_bytes())) } super::PrimitiveLiteral::Fixed(v) => RawLiteralEnum::Bytes(ByteBuf::from(v)), @@ -2614,6 +2627,7 @@ mod tests { use crate::avro::schema_to_avro_schema; use crate::spec::datatypes::{ListType, MapType, NestedField, StructType}; use crate::spec::Schema; + use crate::spec::Type::Primitive; fn check_json_serde(json: &str, expected_literal: Literal, expected_type: &Type) { let raw_json_value = serde_json::from_str::(json).unwrap(); @@ -2818,7 +2832,7 @@ mod tests { check_json_serde( record, - Literal::Primitive(PrimitiveLiteral::UUID( + Literal::Primitive(PrimitiveLiteral::Uuid( Uuid::parse_str("f79c3e09-677c-4bbd-a479-3f349cb785e7").unwrap(), )), &Type::Primitive(PrimitiveType::Uuid), @@ -3364,4 +3378,26 @@ mod tests { let datum = Datum::fixed(vec![1, 2, 3, 4, 5]); test_fn(datum); } + + #[test] + fn test_datum_date_convert_to_int() { + let datum_date = Datum::date(12345); + + let result = datum_date.to(&Primitive(PrimitiveType::Int)).unwrap(); + + let expected = Datum::int(12345); + + assert_eq!(result, expected); + } + + #[test] + fn test_datum_int_convert_to_date() { + let datum_int = Datum::int(12345); + + let result = datum_int.to(&Primitive(PrimitiveType::Date)).unwrap(); + + let expected = Datum::date(12345); + + assert_eq!(result, expected); + } } diff --git a/crates/iceberg/src/spec/view_metadata.rs b/crates/iceberg/src/spec/view_metadata.rs new file mode 100644 index 000000000..741e38649 --- /dev/null +++ b/crates/iceberg/src/spec/view_metadata.rs @@ -0,0 +1,728 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Defines the [view metadata](https://iceberg.apache.org/view-spec/#view-metadata). +//! The main struct here is [ViewMetadata] which defines the data for a view. + +use std::cmp::Ordering; +use std::collections::HashMap; +use std::fmt::{Display, Formatter}; +use std::sync::Arc; + +use _serde::ViewMetadataEnum; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_repr::{Deserialize_repr, Serialize_repr}; +use uuid::Uuid; + +use super::view_version::{ViewVersion, ViewVersionId, ViewVersionRef}; +use super::{SchemaId, SchemaRef}; +use crate::catalog::ViewCreation; +use crate::error::{timestamp_ms_to_utc, Result}; + +/// Reference to [`ViewMetadata`]. +pub type ViewMetadataRef = Arc; + +pub(crate) static INITIAL_VIEW_VERSION_ID: i32 = 1; + +#[derive(Debug, PartialEq, Deserialize, Eq, Clone)] +#[serde(try_from = "ViewMetadataEnum", into = "ViewMetadataEnum")] +/// Fields for the version 1 of the view metadata. +/// +/// We assume that this data structure is always valid, so we will panic when invalid error happens. +/// We check the validity of this data structure when constructing. +pub struct ViewMetadata { + /// Integer Version for the format. + pub(crate) format_version: ViewFormatVersion, + /// A UUID that identifies the view, generated when the view is created. + pub(crate) view_uuid: Uuid, + /// The view's base location; used to create metadata file locations + pub(crate) location: String, + /// ID of the current version of the view (version-id) + pub(crate) current_version_id: ViewVersionId, + /// A list of known versions of the view + pub(crate) versions: HashMap, + /// A list of version log entries with the timestamp and version-id for every + /// change to current-version-id + pub(crate) version_log: Vec, + /// A list of schemas, stored as objects with schema-id. + pub(crate) schemas: HashMap, + /// A string to string map of view properties. + /// Properties are used for metadata such as comment and for settings that + /// affect view maintenance. This is not intended to be used for arbitrary metadata. + pub(crate) properties: HashMap, +} + +impl ViewMetadata { + /// Returns format version of this metadata. + #[inline] + pub fn format_version(&self) -> ViewFormatVersion { + self.format_version + } + + /// Returns uuid of current view. + #[inline] + pub fn uuid(&self) -> Uuid { + self.view_uuid + } + + /// Returns view location. + #[inline] + pub fn location(&self) -> &str { + self.location.as_str() + } + + /// Returns the current version id. + #[inline] + pub fn current_version_id(&self) -> ViewVersionId { + self.current_version_id + } + + /// Returns all view versions. + #[inline] + pub fn versions(&self) -> impl Iterator { + self.versions.values() + } + + /// Lookup a view version by id. + #[inline] + pub fn version_by_id(&self, version_id: ViewVersionId) -> Option<&ViewVersionRef> { + self.versions.get(&version_id) + } + + /// Returns the current view version. + #[inline] + pub fn current_version(&self) -> &ViewVersionRef { + self.versions + .get(&self.current_version_id) + .expect("Current version id set, but not found in view versions") + } + + /// Returns schemas + #[inline] + pub fn schemas_iter(&self) -> impl Iterator { + self.schemas.values() + } + + /// Lookup schema by id. + #[inline] + pub fn schema_by_id(&self, schema_id: SchemaId) -> Option<&SchemaRef> { + self.schemas.get(&schema_id) + } + + /// Get current schema + #[inline] + pub fn current_schema(&self) -> &SchemaRef { + let schema_id = self.current_version().schema_id(); + self.schema_by_id(schema_id) + .expect("Current schema id set, but not found in view metadata") + } + + /// Returns properties of the view. + #[inline] + pub fn properties(&self) -> &HashMap { + &self.properties + } + + /// Returns view history. + #[inline] + pub fn history(&self) -> &[ViewVersionLog] { + &self.version_log + } +} + +/// Manipulating view metadata. +pub struct ViewMetadataBuilder(ViewMetadata); + +impl ViewMetadataBuilder { + /// Creates a new view metadata builder from the given view metadata. + pub fn new(origin: ViewMetadata) -> Self { + Self(origin) + } + + /// Creates a new view metadata builder from the given view creation. + pub fn from_view_creation(view_creation: ViewCreation) -> Result { + let ViewCreation { + location, + schema, + properties, + name: _, + representations, + default_catalog, + default_namespace, + summary, + } = view_creation; + let initial_version_id = super::INITIAL_VIEW_VERSION_ID; + let version = ViewVersion::builder() + .with_default_catalog(default_catalog) + .with_default_namespace(default_namespace) + .with_representations(representations) + .with_schema_id(schema.schema_id()) + .with_summary(summary) + .with_timestamp_ms(Utc::now().timestamp_millis()) + .with_version_id(initial_version_id) + .build(); + + let versions = HashMap::from_iter(vec![(initial_version_id, version.into())]); + + let view_metadata = ViewMetadata { + format_version: ViewFormatVersion::V1, + view_uuid: Uuid::now_v7(), + location, + current_version_id: initial_version_id, + versions, + version_log: Vec::new(), + schemas: HashMap::from_iter(vec![(schema.schema_id(), Arc::new(schema))]), + properties, + }; + + Ok(Self(view_metadata)) + } + + /// Changes uuid of view metadata. + pub fn assign_uuid(mut self, uuid: Uuid) -> Result { + self.0.view_uuid = uuid; + Ok(self) + } + + /// Returns the new view metadata after changes. + pub fn build(self) -> Result { + Ok(self.0) + } +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[serde(rename_all = "kebab-case")] +/// A log of when each snapshot was made. +pub struct ViewVersionLog { + /// ID that current-version-id was set to + version_id: ViewVersionId, + /// Timestamp when the view's current-version-id was updated (ms from epoch) + timestamp_ms: i64, +} + +impl ViewVersionLog { + #[inline] + /// Creates a new view version log. + pub fn new(version_id: ViewVersionId, timestamp: i64) -> Self { + Self { + version_id, + timestamp_ms: timestamp, + } + } + + /// Returns the version id. + #[inline] + pub fn version_id(&self) -> ViewVersionId { + self.version_id + } + + /// Returns the timestamp in milliseconds from epoch. + #[inline] + pub fn timestamp_ms(&self) -> i64 { + self.timestamp_ms + } + + /// Returns the last updated timestamp as a DateTime with millisecond precision. + pub fn timestamp(self) -> Result> { + timestamp_ms_to_utc(self.timestamp_ms) + } +} + +pub(super) mod _serde { + /// This is a helper module that defines types to help with serialization/deserialization. + /// For deserialization the input first gets read into either the [ViewMetadataV1] struct + /// and then converted into the [ViewMetadata] struct. Serialization works the other way around. + /// [ViewMetadataV1] is an internal struct that are only used for serialization and deserialization. + use std::{collections::HashMap, sync::Arc}; + + use serde::{Deserialize, Serialize}; + use uuid::Uuid; + + use super::{ViewFormatVersion, ViewVersionId, ViewVersionLog}; + use crate::spec::schema::_serde::SchemaV2; + use crate::spec::table_metadata::_serde::VersionNumber; + use crate::spec::view_version::_serde::ViewVersionV1; + use crate::spec::{ViewMetadata, ViewVersion}; + use crate::{Error, ErrorKind}; + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + #[serde(untagged)] + pub(super) enum ViewMetadataEnum { + V1(ViewMetadataV1), + } + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + #[serde(rename_all = "kebab-case")] + /// Defines the structure of a v1 view metadata for serialization/deserialization + pub(super) struct ViewMetadataV1 { + pub format_version: VersionNumber<1>, + pub(super) view_uuid: Uuid, + pub(super) location: String, + pub(super) current_version_id: ViewVersionId, + pub(super) versions: Vec, + pub(super) version_log: Vec, + pub(super) schemas: Vec, + pub(super) properties: Option>, + } + + impl Serialize for ViewMetadata { + fn serialize(&self, serializer: S) -> Result + where S: serde::Serializer { + // we must do a clone here + let metadata_enum: ViewMetadataEnum = + self.clone().try_into().map_err(serde::ser::Error::custom)?; + + metadata_enum.serialize(serializer) + } + } + + impl TryFrom for ViewMetadata { + type Error = Error; + fn try_from(value: ViewMetadataEnum) -> Result { + match value { + ViewMetadataEnum::V1(value) => value.try_into(), + } + } + } + + impl TryFrom for ViewMetadataEnum { + type Error = Error; + fn try_from(value: ViewMetadata) -> Result { + Ok(match value.format_version { + ViewFormatVersion::V1 => ViewMetadataEnum::V1(value.into()), + }) + } + } + + impl TryFrom for ViewMetadata { + type Error = Error; + fn try_from(value: ViewMetadataV1) -> Result { + let schemas = HashMap::from_iter( + value + .schemas + .into_iter() + .map(|schema| Ok((schema.schema_id, Arc::new(schema.try_into()?)))) + .collect::, Error>>()?, + ); + let versions = HashMap::from_iter( + value + .versions + .into_iter() + .map(|x| Ok((x.version_id, Arc::new(ViewVersion::from(x))))) + .collect::, Error>>()?, + ); + // Make sure at least the current schema exists + let current_version = + versions + .get(&value.current_version_id) + .ok_or(self::Error::new( + ErrorKind::DataInvalid, + format!( + "No version exists with the current version id {}.", + value.current_version_id + ), + ))?; + if !schemas.contains_key(¤t_version.schema_id()) { + return Err(self::Error::new( + ErrorKind::DataInvalid, + format!( + "No schema exists with the schema id {}.", + current_version.schema_id() + ), + )); + } + + Ok(ViewMetadata { + format_version: ViewFormatVersion::V1, + view_uuid: value.view_uuid, + location: value.location, + schemas, + properties: value.properties.unwrap_or_default(), + current_version_id: value.current_version_id, + versions, + version_log: value.version_log, + }) + } + } + + impl From for ViewMetadataV1 { + fn from(v: ViewMetadata) -> Self { + let schemas = v + .schemas + .into_values() + .map(|x| { + Arc::try_unwrap(x) + .unwrap_or_else(|schema| schema.as_ref().clone()) + .into() + }) + .collect(); + let versions = v + .versions + .into_values() + .map(|x| { + Arc::try_unwrap(x) + .unwrap_or_else(|version| version.as_ref().clone()) + .into() + }) + .collect(); + ViewMetadataV1 { + format_version: VersionNumber::<1>, + view_uuid: v.view_uuid, + location: v.location, + schemas, + properties: Some(v.properties), + current_version_id: v.current_version_id, + versions, + version_log: v.version_log, + } + } + } +} + +#[derive(Debug, Serialize_repr, Deserialize_repr, PartialEq, Eq, Clone, Copy)] +#[repr(u8)] +/// Iceberg format version +pub enum ViewFormatVersion { + /// Iceberg view spec version 1 + V1 = 1u8, +} + +impl PartialOrd for ViewFormatVersion { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for ViewFormatVersion { + fn cmp(&self, other: &Self) -> Ordering { + (*self as u8).cmp(&(*other as u8)) + } +} + +impl Display for ViewFormatVersion { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + match self { + ViewFormatVersion::V1 => write!(f, "v1"), + } + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + use std::fs; + use std::sync::Arc; + + use anyhow::Result; + use pretty_assertions::assert_eq; + use uuid::Uuid; + + use super::{ViewFormatVersion, ViewMetadataBuilder, ViewVersionLog}; + use crate::spec::{ + NestedField, PrimitiveType, Schema, SqlViewRepresentation, Type, ViewMetadata, + ViewRepresentations, ViewVersion, + }; + use crate::{NamespaceIdent, ViewCreation}; + + fn check_view_metadata_serde(json: &str, expected_type: ViewMetadata) { + let desered_type: ViewMetadata = serde_json::from_str(json).unwrap(); + assert_eq!(desered_type, expected_type); + + let sered_json = serde_json::to_string(&expected_type).unwrap(); + let parsed_json_value = serde_json::from_str::(&sered_json).unwrap(); + + assert_eq!(parsed_json_value, desered_type); + } + + fn get_test_view_metadata(file_name: &str) -> ViewMetadata { + let path = format!("testdata/view_metadata/{}", file_name); + let metadata: String = fs::read_to_string(path).unwrap(); + + serde_json::from_str(&metadata).unwrap() + } + + #[test] + fn test_view_data_v1() { + let data = r#" + { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version" : 1, + "location" : "s3://bucket/warehouse/default.db/event_agg", + "current-version-id" : 1, + "properties" : { + "comment" : "Daily event counts" + }, + "versions" : [ { + "version-id" : 1, + "timestamp-ms" : 1573518431292, + "schema-id" : 1, + "default-catalog" : "prod", + "default-namespace" : [ "default" ], + "summary" : { + "engine-name" : "Spark", + "engineVersion" : "3.3.2" + }, + "representations" : [ { + "type" : "sql", + "sql" : "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect" : "spark" + } ] + } ], + "schemas": [ { + "schema-id": 1, + "type" : "struct", + "fields" : [ { + "id" : 1, + "name" : "event_count", + "required" : false, + "type" : "int", + "doc" : "Count of events" + } ] + } ], + "version-log" : [ { + "timestamp-ms" : 1573518431292, + "version-id" : 1 + } ] + } + "#; + + let schema = Schema::builder() + .with_schema_id(1) + .with_fields(vec![Arc::new( + NestedField::optional(1, "event_count", Type::Primitive(PrimitiveType::Int)) + .with_doc("Count of events"), + )]) + .build() + .unwrap(); + let version = ViewVersion::builder() + .with_version_id(1) + .with_timestamp_ms(1573518431292) + .with_schema_id(1) + .with_default_catalog("prod".to_string().into()) + .with_default_namespace(NamespaceIdent::from_vec(vec!["default".to_string()]).unwrap()) + .with_summary(HashMap::from_iter(vec![ + ("engineVersion".to_string(), "3.3.2".to_string()), + ("engine-name".to_string(), "Spark".to_string()), + ])) + .with_representations(ViewRepresentations(vec![SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + } + .into()])) + .build(); + + let expected = ViewMetadata { + format_version: ViewFormatVersion::V1, + view_uuid: Uuid::parse_str("fa6506c3-7681-40c8-86dc-e36561f83385").unwrap(), + location: "s3://bucket/warehouse/default.db/event_agg".to_string(), + current_version_id: 1, + versions: HashMap::from_iter(vec![(1, Arc::new(version))]), + version_log: vec![ViewVersionLog { + timestamp_ms: 1573518431292, + version_id: 1, + }], + schemas: HashMap::from_iter(vec![(1, Arc::new(schema))]), + properties: HashMap::from_iter(vec![( + "comment".to_string(), + "Daily event counts".to_string(), + )]), + }; + + check_view_metadata_serde(data, expected); + } + + #[test] + fn test_invalid_view_uuid() -> Result<()> { + let data = r#" + { + "format-version" : 1, + "view-uuid": "xxxx" + } + "#; + assert!(serde_json::from_str::(data).is_err()); + Ok(()) + } + + #[test] + fn test_view_builder_from_view_creation() { + let representations = ViewRepresentations(vec![SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + } + .into()]); + let creation = ViewCreation::builder() + .location("s3://bucket/warehouse/default.db/event_agg".to_string()) + .name("view".to_string()) + .schema(Schema::builder().build().unwrap()) + .default_namespace(NamespaceIdent::from_vec(vec!["default".to_string()]).unwrap()) + .representations(representations) + .build(); + + let metadata = ViewMetadataBuilder::from_view_creation(creation) + .unwrap() + .build() + .unwrap(); + + assert_eq!( + metadata.location(), + "s3://bucket/warehouse/default.db/event_agg" + ); + assert_eq!(metadata.current_version_id(), 1); + assert_eq!(metadata.versions().count(), 1); + assert_eq!(metadata.schemas_iter().count(), 1); + assert_eq!(metadata.properties().len(), 0); + } + + #[test] + fn test_view_metadata_v1_file_valid() { + let metadata = + fs::read_to_string("testdata/view_metadata/ViewMetadataV1Valid.json").unwrap(); + + let schema = Schema::builder() + .with_schema_id(1) + .with_fields(vec![ + Arc::new( + NestedField::optional(1, "event_count", Type::Primitive(PrimitiveType::Int)) + .with_doc("Count of events"), + ), + Arc::new(NestedField::optional( + 2, + "event_date", + Type::Primitive(PrimitiveType::Date), + )), + ]) + .build() + .unwrap(); + + let version = ViewVersion::builder() + .with_version_id(1) + .with_timestamp_ms(1573518431292) + .with_schema_id(1) + .with_default_catalog("prod".to_string().into()) + .with_default_namespace(NamespaceIdent::from_vec(vec!["default".to_string()]).unwrap()) + .with_summary(HashMap::from_iter(vec![ + ("engineVersion".to_string(), "3.3.2".to_string()), + ("engine-name".to_string(), "Spark".to_string()), + ])) + .with_representations(ViewRepresentations(vec![SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + } + .into()])) + .build(); + + let expected = ViewMetadata { + format_version: ViewFormatVersion::V1, + view_uuid: Uuid::parse_str("fa6506c3-7681-40c8-86dc-e36561f83385").unwrap(), + location: "s3://bucket/warehouse/default.db/event_agg".to_string(), + current_version_id: 1, + versions: HashMap::from_iter(vec![(1, Arc::new(version))]), + version_log: vec![ViewVersionLog { + timestamp_ms: 1573518431292, + version_id: 1, + }], + schemas: HashMap::from_iter(vec![(1, Arc::new(schema))]), + properties: HashMap::from_iter(vec![( + "comment".to_string(), + "Daily event counts".to_string(), + )]), + }; + + check_view_metadata_serde(&metadata, expected); + } + + #[test] + fn test_view_builder_assign_uuid() { + let metadata = get_test_view_metadata("ViewMetadataV1Valid.json"); + let metadata_builder = ViewMetadataBuilder::new(metadata); + let uuid = Uuid::new_v4(); + let metadata = metadata_builder.assign_uuid(uuid).unwrap().build().unwrap(); + assert_eq!(metadata.uuid(), uuid); + } + + #[test] + fn test_view_metadata_v1_unsupported_version() { + let metadata = + fs::read_to_string("testdata/view_metadata/ViewMetadataUnsupportedVersion.json") + .unwrap(); + + let desered: Result = serde_json::from_str(&metadata); + + assert_eq!( + desered.unwrap_err().to_string(), + "data did not match any variant of untagged enum ViewMetadataEnum" + ) + } + + #[test] + fn test_view_metadata_v1_version_not_found() { + let metadata = + fs::read_to_string("testdata/view_metadata/ViewMetadataV1CurrentVersionNotFound.json") + .unwrap(); + + let desered: Result = serde_json::from_str(&metadata); + + assert_eq!( + desered.unwrap_err().to_string(), + "DataInvalid => No version exists with the current version id 2." + ) + } + + #[test] + fn test_view_metadata_v1_schema_not_found() { + let metadata = + fs::read_to_string("testdata/view_metadata/ViewMetadataV1SchemaNotFound.json").unwrap(); + + let desered: Result = serde_json::from_str(&metadata); + + assert_eq!( + desered.unwrap_err().to_string(), + "DataInvalid => No schema exists with the schema id 2." + ) + } + + #[test] + fn test_view_metadata_v1_missing_schema_for_version() { + let metadata = + fs::read_to_string("testdata/view_metadata/ViewMetadataV1MissingSchema.json").unwrap(); + + let desered: Result = serde_json::from_str(&metadata); + + assert_eq!( + desered.unwrap_err().to_string(), + "data did not match any variant of untagged enum ViewMetadataEnum" + ) + } + + #[test] + fn test_view_metadata_v1_missing_current_version() { + let metadata = + fs::read_to_string("testdata/view_metadata/ViewMetadataV1MissingCurrentVersion.json") + .unwrap(); + + let desered: Result = serde_json::from_str(&metadata); + + assert_eq!( + desered.unwrap_err().to_string(), + "data did not match any variant of untagged enum ViewMetadataEnum" + ) + } +} diff --git a/crates/iceberg/src/spec/view_version.rs b/crates/iceberg/src/spec/view_version.rs new file mode 100644 index 000000000..30686b5a4 --- /dev/null +++ b/crates/iceberg/src/spec/view_version.rs @@ -0,0 +1,313 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +/*! + * View Versions! + */ +use std::collections::HashMap; +use std::sync::Arc; + +use _serde::ViewVersionV1; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use typed_builder::TypedBuilder; + +use super::view_metadata::ViewVersionLog; +use crate::catalog::NamespaceIdent; +use crate::error::{timestamp_ms_to_utc, Result}; +use crate::spec::{SchemaId, SchemaRef, ViewMetadata}; +use crate::{Error, ErrorKind}; + +/// Reference to [`ViewVersion`]. +pub type ViewVersionRef = Arc; + +/// Alias for the integer type used for view version ids. +pub type ViewVersionId = i32; + +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize, TypedBuilder)] +#[serde(from = "ViewVersionV1", into = "ViewVersionV1")] +#[builder(field_defaults(setter(prefix = "with_")))] +/// A view versions represents the definition of a view at a specific point in time. +pub struct ViewVersion { + /// A unique long ID + version_id: ViewVersionId, + /// ID of the schema for the view version + schema_id: SchemaId, + /// Timestamp when the version was created (ms from epoch) + timestamp_ms: i64, + /// A string to string map of summary metadata about the version + summary: HashMap, + /// A list of representations for the view definition. + representations: ViewRepresentations, + /// Catalog name to use when a reference in the SELECT does not contain a catalog + #[builder(default = None)] + default_catalog: Option, + /// Namespace to use when a reference in the SELECT is a single identifier + default_namespace: NamespaceIdent, +} + +impl ViewVersion { + /// Get the version id of this view version. + #[inline] + pub fn version_id(&self) -> ViewVersionId { + self.version_id + } + + /// Get the schema id of this view version. + #[inline] + pub fn schema_id(&self) -> SchemaId { + self.schema_id + } + + /// Get the timestamp of when the view version was created + #[inline] + pub fn timestamp(&self) -> Result> { + timestamp_ms_to_utc(self.timestamp_ms) + } + + /// Get the timestamp of when the view version was created in milliseconds since epoch + #[inline] + pub fn timestamp_ms(&self) -> i64 { + self.timestamp_ms + } + + /// Get summary of the view version + #[inline] + pub fn summary(&self) -> &HashMap { + &self.summary + } + + /// Get this views representations + #[inline] + pub fn representations(&self) -> &ViewRepresentations { + &self.representations + } + + /// Get the default catalog for this view version + #[inline] + pub fn default_catalog(&self) -> Option<&String> { + self.default_catalog.as_ref() + } + + /// Get the default namespace to use when a reference in the SELECT is a single identifier + #[inline] + pub fn default_namespace(&self) -> &NamespaceIdent { + &self.default_namespace + } + + /// Get the schema of this snapshot. + pub fn schema(&self, view_metadata: &ViewMetadata) -> Result { + let r = view_metadata + .schema_by_id(self.schema_id()) + .ok_or_else(|| { + Error::new( + ErrorKind::DataInvalid, + format!("Schema with id {} not found", self.schema_id()), + ) + }) + .cloned(); + r + } + + /// Retrieve the history log entry for this view version. + #[allow(dead_code)] + pub(crate) fn log(&self) -> ViewVersionLog { + ViewVersionLog::new(self.version_id, self.timestamp_ms) + } +} + +/// A list of view representations. +#[derive(Debug, PartialEq, Eq, Clone, Serialize, Deserialize)] +pub struct ViewRepresentations(pub(crate) Vec); + +impl ViewRepresentations { + #[inline] + /// Get the number of representations + pub fn len(&self) -> usize { + self.0.len() + } + + #[inline] + /// Check if there are no representations + pub fn is_empty(&self) -> bool { + self.0.is_empty() + } + + /// Get an iterator over the representations + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } +} + +// Iterator for ViewRepresentations +impl IntoIterator for ViewRepresentations { + type Item = ViewRepresentation; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.0.into_iter() + } +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[serde(tag = "type")] +/// View definitions can be represented in multiple ways. +/// Representations are documented ways to express a view definition. +// ToDo: Make unique per Dialect +pub enum ViewRepresentation { + #[serde(rename = "sql")] + /// The SQL representation stores the view definition as a SQL SELECT, + Sql(SqlViewRepresentation), +} + +#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)] +#[serde(rename_all = "kebab-case")] +/// The SQL representation stores the view definition as a SQL SELECT, +/// with metadata such as the SQL dialect. +pub struct SqlViewRepresentation { + #[serde(rename = "sql")] + /// The SQL SELECT statement that defines the view. + pub sql: String, + #[serde(rename = "dialect")] + /// The dialect of the sql SELECT statement (e.g., "trino" or "spark") + pub dialect: String, +} + +pub(super) mod _serde { + /// This is a helper module that defines types to help with serialization/deserialization. + /// For deserialization the input first gets read into the [`ViewVersionV1`] struct. + /// and then converted into the [Snapshot] struct. Serialization works the other way around. + /// [ViewVersionV1] are internal struct that are only used for serialization and deserialization. + use serde::{Deserialize, Serialize}; + + use super::{ViewRepresentation, ViewRepresentations, ViewVersion}; + use crate::catalog::NamespaceIdent; + + #[derive(Debug, Serialize, Deserialize, PartialEq, Eq)] + #[serde(rename_all = "kebab-case")] + /// Defines the structure of a v1 view version for serialization/deserialization + pub(crate) struct ViewVersionV1 { + pub version_id: i32, + pub schema_id: i32, + pub timestamp_ms: i64, + pub summary: std::collections::HashMap, + pub representations: Vec, + #[serde(skip_serializing_if = "Option::is_none")] + pub default_catalog: Option, + pub default_namespace: NamespaceIdent, + } + + impl From for ViewVersion { + fn from(v1: ViewVersionV1) -> Self { + ViewVersion { + version_id: v1.version_id, + schema_id: v1.schema_id, + timestamp_ms: v1.timestamp_ms, + summary: v1.summary, + representations: ViewRepresentations(v1.representations), + default_catalog: v1.default_catalog, + default_namespace: v1.default_namespace, + } + } + } + + impl From for ViewVersionV1 { + fn from(v1: ViewVersion) -> Self { + ViewVersionV1 { + version_id: v1.version_id, + schema_id: v1.schema_id, + timestamp_ms: v1.timestamp_ms, + summary: v1.summary, + representations: v1.representations.0, + default_catalog: v1.default_catalog, + default_namespace: v1.default_namespace, + } + } + } +} + +impl From for ViewRepresentation { + fn from(sql: SqlViewRepresentation) -> Self { + ViewRepresentation::Sql(sql) + } +} + +#[cfg(test)] +mod tests { + use chrono::{TimeZone, Utc}; + + use crate::spec::view_version::ViewVersion; + use crate::spec::view_version::_serde::ViewVersionV1; + use crate::spec::ViewRepresentations; + + #[test] + fn view_version() { + let record = serde_json::json!( + { + "version-id" : 1, + "timestamp-ms" : 1573518431292i64, + "schema-id" : 1, + "default-catalog" : "prod", + "default-namespace" : [ "default" ], + "summary" : { + "engine-name" : "Spark", + "engineVersion" : "3.3.2" + }, + "representations" : [ { + "type" : "sql", + "sql" : "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect" : "spark" + } ] + } + ); + + let result: ViewVersion = serde_json::from_value::(record.clone()) + .unwrap() + .into(); + + // Roundtrip + assert_eq!(serde_json::to_value(result.clone()).unwrap(), record); + + assert_eq!(result.version_id(), 1); + assert_eq!( + result.timestamp().unwrap(), + Utc.timestamp_millis_opt(1573518431292).unwrap() + ); + assert_eq!(result.schema_id(), 1); + assert_eq!(result.default_catalog, Some("prod".to_string())); + assert_eq!(result.summary(), &{ + let mut map = std::collections::HashMap::new(); + map.insert("engine-name".to_string(), "Spark".to_string()); + map.insert("engineVersion".to_string(), "3.3.2".to_string()); + map + }); + assert_eq!( + result.representations().to_owned(), + ViewRepresentations(vec![super::ViewRepresentation::Sql( + super::SqlViewRepresentation { + sql: "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2" + .to_string(), + dialect: "spark".to_string(), + }, + )]) + ); + assert_eq!( + result.default_namespace.inner(), + vec!["default".to_string()] + ); + } +} diff --git a/crates/iceberg/src/table.rs b/crates/iceberg/src/table.rs index b9a701193..d28d6e5d6 100644 --- a/crates/iceberg/src/table.rs +++ b/crates/iceberg/src/table.rs @@ -104,6 +104,7 @@ impl Table { /// .snapshot_id(); /// # } /// ``` +#[derive(Debug, Clone)] pub struct StaticTable(Table); impl StaticTable { @@ -144,9 +145,9 @@ impl StaticTable { self.0.metadata_ref() } - /// Consumes the `StaticTable` and return it as a `Table` - /// Please use this method carefully as the Table it returns remains detached from a catalog - /// and can't be used to perform modifications on the table. + /// Consumes the `StaticTable` and return it as a `Table` + /// Please use this method carefully as the Table it returns remains detached from a catalog + /// and can't be used to perform modifications on the table. pub fn into_table(self) -> Table { self.0 } diff --git a/crates/iceberg/src/transform/bucket.rs b/crates/iceberg/src/transform/bucket.rs index c67051691..e19e5b841 100644 --- a/crates/iceberg/src/transform/bucket.rs +++ b/crates/iceberg/src/transform/bucket.rs @@ -229,7 +229,7 @@ impl TransformFunction for Bucket { PrimitiveLiteral::Time(v) => self.bucket_time(*v), PrimitiveLiteral::Timestamp(v) => self.bucket_timestamp(*v), PrimitiveLiteral::String(v) => self.bucket_str(v.as_str()), - PrimitiveLiteral::UUID(v) => self.bucket_bytes(v.as_ref()), + PrimitiveLiteral::Uuid(v) => self.bucket_bytes(v.as_ref()), PrimitiveLiteral::Binary(v) => self.bucket_bytes(v.as_ref()), PrimitiveLiteral::Fixed(v) => self.bucket_bytes(v.as_ref()), _ => { diff --git a/crates/iceberg/testdata/file_io_s3/docker-compose.yaml b/crates/iceberg/testdata/file_io_s3/docker-compose.yaml index 0793d225b..cbce31864 100644 --- a/crates/iceberg/testdata/file_io_s3/docker-compose.yaml +++ b/crates/iceberg/testdata/file_io_s3/docker-compose.yaml @@ -18,7 +18,7 @@ services: minio: image: minio/minio:RELEASE.2024-02-26T09-33-48Z - ports: + expose: - 9000 - 9001 environment: diff --git a/crates/iceberg/testdata/view_metadata/ViewMetadataUnsupportedVersion.json b/crates/iceberg/testdata/view_metadata/ViewMetadataUnsupportedVersion.json new file mode 100644 index 000000000..c5627b8af --- /dev/null +++ b/crates/iceberg/testdata/view_metadata/ViewMetadataUnsupportedVersion.json @@ -0,0 +1,58 @@ +{ + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version": 2, + "location": "s3://bucket/warehouse/default.db/event_agg", + "current-version-id": 1, + "properties": { + "comment": "Daily event counts" + }, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1573518431292, + "schema-id": 1, + "default-catalog": "prod", + "default-namespace": [ + "default" + ], + "summary": { + "engine-name": "Spark", + "engineVersion": "3.3.2" + }, + "representations": [ + { + "type": "sql", + "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect": "spark" + } + ] + } + ], + "schemas": [ + { + "schema-id": 1, + "type": "struct", + "fields": [ + { + "id": 1, + "name": "event_count", + "required": false, + "type": "int", + "doc": "Count of events" + }, + { + "id": 2, + "name": "event_date", + "required": false, + "type": "date" + } + ] + } + ], + "version-log": [ + { + "timestamp-ms": 1573518431292, + "version-id": 1 + } + ] +} \ No newline at end of file diff --git a/crates/iceberg/testdata/view_metadata/ViewMetadataV1CurrentVersionNotFound.json b/crates/iceberg/testdata/view_metadata/ViewMetadataV1CurrentVersionNotFound.json new file mode 100644 index 000000000..4ba94ca4c --- /dev/null +++ b/crates/iceberg/testdata/view_metadata/ViewMetadataV1CurrentVersionNotFound.json @@ -0,0 +1,58 @@ +{ + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version": 1, + "location": "s3://bucket/warehouse/default.db/event_agg", + "current-version-id": 2, + "properties": { + "comment": "Daily event counts" + }, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1573518431292, + "schema-id": 1, + "default-catalog": "prod", + "default-namespace": [ + "default" + ], + "summary": { + "engine-name": "Spark", + "engineVersion": "3.3.2" + }, + "representations": [ + { + "type": "sql", + "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect": "spark" + } + ] + } + ], + "schemas": [ + { + "schema-id": 1, + "type": "struct", + "fields": [ + { + "id": 1, + "name": "event_count", + "required": false, + "type": "int", + "doc": "Count of events" + }, + { + "id": 2, + "name": "event_date", + "required": false, + "type": "date" + } + ] + } + ], + "version-log": [ + { + "timestamp-ms": 1573518431292, + "version-id": 1 + } + ] +} \ No newline at end of file diff --git a/crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingCurrentVersion.json b/crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingCurrentVersion.json new file mode 100644 index 000000000..c21088176 --- /dev/null +++ b/crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingCurrentVersion.json @@ -0,0 +1,57 @@ +{ + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version": 1, + "location": "s3://bucket/warehouse/default.db/event_agg", + "properties": { + "comment": "Daily event counts" + }, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1573518431292, + "schema-id": 1, + "default-catalog": "prod", + "default-namespace": [ + "default" + ], + "summary": { + "engine-name": "Spark", + "engineVersion": "3.3.2" + }, + "representations": [ + { + "type": "sql", + "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect": "spark" + } + ] + } + ], + "schemas": [ + { + "schema-id": 1, + "type": "struct", + "fields": [ + { + "id": 1, + "name": "event_count", + "required": false, + "type": "int", + "doc": "Count of events" + }, + { + "id": 2, + "name": "event_date", + "required": false, + "type": "date" + } + ] + } + ], + "version-log": [ + { + "timestamp-ms": 1573518431292, + "version-id": 1 + } + ] +} \ No newline at end of file diff --git a/crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingSchema.json b/crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingSchema.json new file mode 100644 index 000000000..b5b454bca --- /dev/null +++ b/crates/iceberg/testdata/view_metadata/ViewMetadataV1MissingSchema.json @@ -0,0 +1,56 @@ +{ + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version": 1, + "location": "s3://bucket/warehouse/default.db/event_agg", + "properties": { + "comment": "Daily event counts" + }, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1573518431292, + "default-catalog": "prod", + "default-namespace": [ + "default" + ], + "summary": { + "engine-name": "Spark", + "engineVersion": "3.3.2" + }, + "representations": [ + { + "type": "sql", + "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect": "spark" + } + ] + } + ], + "schemas": [ + { + "schema-id": 1, + "type": "struct", + "fields": [ + { + "id": 1, + "name": "event_count", + "required": false, + "type": "int", + "doc": "Count of events" + }, + { + "id": 2, + "name": "event_date", + "required": false, + "type": "date" + } + ] + } + ], + "version-log": [ + { + "timestamp-ms": 1573518431292, + "version-id": 1 + } + ] +} \ No newline at end of file diff --git a/crates/iceberg/testdata/view_metadata/ViewMetadataV1SchemaNotFound.json b/crates/iceberg/testdata/view_metadata/ViewMetadataV1SchemaNotFound.json new file mode 100644 index 000000000..0026d223e --- /dev/null +++ b/crates/iceberg/testdata/view_metadata/ViewMetadataV1SchemaNotFound.json @@ -0,0 +1,58 @@ +{ + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version": 1, + "location": "s3://bucket/warehouse/default.db/event_agg", + "current-version-id": 1, + "properties": { + "comment": "Daily event counts" + }, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1573518431292, + "schema-id": 2, + "default-catalog": "prod", + "default-namespace": [ + "default" + ], + "summary": { + "engine-name": "Spark", + "engineVersion": "3.3.2" + }, + "representations": [ + { + "type": "sql", + "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect": "spark" + } + ] + } + ], + "schemas": [ + { + "schema-id": 1, + "type": "struct", + "fields": [ + { + "id": 1, + "name": "event_count", + "required": false, + "type": "int", + "doc": "Count of events" + }, + { + "id": 2, + "name": "event_date", + "required": false, + "type": "date" + } + ] + } + ], + "version-log": [ + { + "timestamp-ms": 1573518431292, + "version-id": 1 + } + ] +} \ No newline at end of file diff --git a/crates/iceberg/testdata/view_metadata/ViewMetadataV1Valid.json b/crates/iceberg/testdata/view_metadata/ViewMetadataV1Valid.json new file mode 100644 index 000000000..5011a804f --- /dev/null +++ b/crates/iceberg/testdata/view_metadata/ViewMetadataV1Valid.json @@ -0,0 +1,58 @@ +{ + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", + "format-version": 1, + "location": "s3://bucket/warehouse/default.db/event_agg", + "current-version-id": 1, + "properties": { + "comment": "Daily event counts" + }, + "versions": [ + { + "version-id": 1, + "timestamp-ms": 1573518431292, + "schema-id": 1, + "default-catalog": "prod", + "default-namespace": [ + "default" + ], + "summary": { + "engine-name": "Spark", + "engineVersion": "3.3.2" + }, + "representations": [ + { + "type": "sql", + "sql": "SELECT\n COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2", + "dialect": "spark" + } + ] + } + ], + "schemas": [ + { + "schema-id": 1, + "type": "struct", + "fields": [ + { + "id": 1, + "name": "event_count", + "required": false, + "type": "int", + "doc": "Count of events" + }, + { + "id": 2, + "name": "event_date", + "required": false, + "type": "date" + } + ] + } + ], + "version-log": [ + { + "timestamp-ms": 1573518431292, + "version-id": 1 + } + ] +} \ No newline at end of file diff --git a/crates/iceberg/tests/file_io_s3_test.rs b/crates/iceberg/tests/file_io_s3_test.rs index efa9128a3..326fdbb29 100644 --- a/crates/iceberg/tests/file_io_s3_test.rs +++ b/crates/iceberg/tests/file_io_s3_test.rs @@ -17,6 +17,7 @@ //! Integration tests for FileIO S3. +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -26,6 +27,7 @@ use iceberg::io::{ use iceberg_test_utils::docker::DockerCompose; use iceberg_test_utils::{normalize_test_name, set_up}; +const MINIO_PORT: u16 = 9000; static DOCKER_COMPOSE_ENV: RwLock> = RwLock::new(None); #[ctor] @@ -51,11 +53,11 @@ async fn get_file_io() -> FileIO { let guard = DOCKER_COMPOSE_ENV.read().unwrap(); let docker_compose = guard.as_ref().unwrap(); let container_ip = docker_compose.get_container_ip("minio"); - let read_port = format!("{}:{}", container_ip, 9000); + let minio_socket_addr = SocketAddr::new(container_ip, MINIO_PORT); FileIOBuilder::new("s3") .with_props(vec![ - (S3_ENDPOINT, format!("http://{}", read_port)), + (S3_ENDPOINT, format!("http://{}", minio_socket_addr)), (S3_ACCESS_KEY_ID, "admin".to_string()), (S3_SECRET_ACCESS_KEY, "password".to_string()), (S3_REGION, "us-east-1".to_string()), diff --git a/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs index 9ad1d401f..292cd8b3a 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs @@ -18,6 +18,7 @@ //! Integration tests for Iceberg Datafusion with Hive Metastore. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::{Arc, RwLock}; use ctor::{ctor, dtor}; @@ -40,7 +41,7 @@ static DOCKER_COMPOSE_ENV: RwLock> = RwLock::new(None); struct TestFixture { hms_catalog: HmsCatalog, props: HashMap, - hms_catalog_ip: String, + hms_catalog_socket_addr: SocketAddr, } #[ctor] @@ -63,7 +64,7 @@ fn after_all() { impl TestFixture { fn get_catalog(&self) -> HmsCatalog { let config = HmsCatalogConfig::builder() - .address(format!("{}:{}", self.hms_catalog_ip, HMS_CATALOG_PORT)) + .address(self.hms_catalog_socket_addr.to_string()) .thrift_transport(HmsThriftTransport::Buffered) .warehouse("s3a://warehouse/hive".to_string()) .props(self.props.clone()) @@ -85,20 +86,17 @@ async fn get_test_fixture() -> TestFixture { ) }; - let read_port = format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("Waiting for 1s hms catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let hms_catalog_socket_addr = SocketAddr::new(hms_catalog_ip, HMS_CATALOG_PORT); + let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT); + while !scan_port_addr(hms_catalog_socket_addr) { + log::info!("Waiting for 1s hms catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let props = HashMap::from([ ( S3_ENDPOINT.to_string(), - format!("http://{}:{}", minio_ip, MINIO_PORT), + format!("http://{}", minio_socket_addr), ), (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), @@ -106,7 +104,7 @@ async fn get_test_fixture() -> TestFixture { ]); let config = HmsCatalogConfig::builder() - .address(format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT)) + .address(hms_catalog_socket_addr.to_string()) .thrift_transport(HmsThriftTransport::Buffered) .warehouse("s3a://warehouse/hive".to_string()) .props(props.clone()) @@ -117,7 +115,7 @@ async fn get_test_fixture() -> TestFixture { TestFixture { hms_catalog, props, - hms_catalog_ip, + hms_catalog_socket_addr, } } diff --git a/crates/test_utils/src/cmd.rs b/crates/test_utils/src/cmd.rs index 604d4a14d..503d63d15 100644 --- a/crates/test_utils/src/cmd.rs +++ b/crates/test_utils/src/cmd.rs @@ -28,14 +28,27 @@ pub fn run_command(mut cmd: Command, desc: impl ToString) { } } -pub fn get_cmd_output(mut cmd: Command, desc: impl ToString) -> String { +pub fn get_cmd_output_result(mut cmd: Command, desc: impl ToString) -> Result { let desc = desc.to_string(); log::info!("Starting to {}, command: {:?}", &desc, cmd); - let output = cmd.output().unwrap(); - if output.status.success() { - log::info!("{} succeed!", desc); - String::from_utf8(output.stdout).unwrap() - } else { - panic!("{} failed: {:?}", desc, output.status); + let result = cmd.output(); + match result { + Ok(output) => { + if output.status.success() { + log::info!("{} succeed!", desc); + Ok(String::from_utf8(output.stdout).unwrap()) + } else { + Err(format!("{} failed with rc: {:?}", desc, output.status)) + } + } + Err(err) => Err(format!("{} failed with error: {}", desc, { err })), + } +} + +pub fn get_cmd_output(cmd: Command, desc: impl ToString) -> String { + let result = get_cmd_output_result(cmd, desc); + match result { + Ok(output_str) => output_str, + Err(err) => panic!("{}", err), } } diff --git a/crates/test_utils/src/docker.rs b/crates/test_utils/src/docker.rs index 2902c49ed..bde9737b1 100644 --- a/crates/test_utils/src/docker.rs +++ b/crates/test_utils/src/docker.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. +use std::net::IpAddr; use std::process::Command; -use crate::cmd::{get_cmd_output, run_command}; +use crate::cmd::{get_cmd_output, get_cmd_output_result, run_command}; /// A utility to manage the lifecycle of `docker compose`. /// @@ -46,9 +47,22 @@ impl DockerCompose { .arg("--format") .arg("{{.OSType}}/{{.Architecture}}"); - get_cmd_output(cmd, "Get os arch".to_string()) - .trim() - .to_string() + let result = get_cmd_output_result(cmd, "Get os arch".to_string()); + match result { + Ok(value) => value.trim().to_string(), + Err(_err) => { + // docker/podman do not consistently place OSArch info in the same json path across OS and versions + // Below tries an alternative path if the above path fails + let mut alt_cmd = Command::new("docker"); + alt_cmd + .arg("info") + .arg("--format") + .arg("{{.Version.OsArch}}"); + get_cmd_output(alt_cmd, "Get os arch".to_string()) + .trim() + .to_string() + } + } } pub fn run(&self) { @@ -77,7 +91,7 @@ impl DockerCompose { ) } - pub fn get_container_ip(&self, service_name: impl AsRef) -> String { + pub fn get_container_ip(&self, service_name: impl AsRef) -> IpAddr { let container_name = format!("{}-{}-1", self.project_name, service_name.as_ref()); let mut cmd = Command::new("docker"); cmd.arg("inspect") @@ -85,9 +99,16 @@ impl DockerCompose { .arg("{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}") .arg(&container_name); - get_cmd_output(cmd, format!("Get container ip of {container_name}")) + let ip_result = get_cmd_output(cmd, format!("Get container ip of {container_name}")) .trim() - .to_string() + .parse::(); + match ip_result { + Ok(ip) => ip, + Err(e) => { + log::error!("Invalid IP, {e}"); + panic!("Failed to parse IP for {container_name}") + } + } } } diff --git a/docs/contributing/podman.md b/docs/contributing/podman.md new file mode 100644 index 000000000..3281ad4da --- /dev/null +++ b/docs/contributing/podman.md @@ -0,0 +1,85 @@ + + +# Using Podman instead of Docker + +Iceberg-rust does not require containerization, except for integration tests, where "docker" and "docker-compose" are used to start containers for minio and various catalogs. Below instructions setup "rootful podman" and docker's official docker-compose plugin to run integration tests as an alternative to docker or Orbstack. + +1. Have podman v4 or newer. + ```console + $ podman --version + podman version 4.9.4-rhel + ``` + +2. Open file `/usr/bin/docker` and add the below contents: + ```bash + #!/bin/sh + [ -e /etc/containers/nodocker ] || \ + echo "Emulate Docker CLI using podman. Create /etc/containers/nodocker to quiet msg." >&2 + exec sudo /usr/bin/podman "$@" + ``` + +3. Install the [docker compose plugin](https://docs.docker.com/compose/install/linux). Check for successful installation. + ```console + $ docker compose version + Docker Compose version v2.28.1 + ``` + +4. Append the below to `~/.bashrc` or equivalent shell config: + ```bash + export DOCKER_HOST=unix:///run/podman/podman.sock + ``` + +5. Start the "rootful" podman socket. + ```shell + sudo systemctl start podman.socket + sudo systemctl status podman.socket + ``` + +6. Check that the following symlink exists. + ```console + $ ls -al /var/run/docker.sock + lrwxrwxrwx 1 root root 27 Jul 24 12:18 /var/run/docker.sock -> /var/run/podman/podman.sock + ``` + If the symlink does not exist, create it. + ```shell + sudo ln -s /var/run/podman/podman.sock /var/run/docker.sock + ``` + +7. Check that the docker socket is working. + ```shell + sudo curl -H "Content-Type: application/json" --unix-socket /var/run/docker.sock http://localhost/_ping + ``` + +8. Try some integration tests! + ```shell + cargo test -p iceberg --test file_io_s3_test + ``` + +# References + +* +* + +# Note on rootless containers + +As of podman v4, ["To be succinct and simple, when running rootless containers, the container itself does not have an IP address"](https://www.redhat.com/sysadmin/container-ip-address-podman) This causes issues with iceberg-rust's integration tests, which rely upon ip-addressable containers via docker-compose. As a result, podman "rootful" containers are required throughout to ensure containers have IP addresses. Perhaps as a future work or with updates to default podman networking, the need for "rootful" podman containers can be eliminated. + +* +* diff --git a/rustfmt.toml b/rustfmt.toml index 49be5742b..91d924daf 100644 --- a/rustfmt.toml +++ b/rustfmt.toml @@ -23,4 +23,4 @@ group_imports = "StdExternalCrate" imports_granularity = "Module" overflow_delimited_expr = true trailing_comma = "Vertical" -where_single_line = true \ No newline at end of file +where_single_line = true diff --git a/website/src/SUMMARY.md b/website/src/SUMMARY.md index c9ed76d2f..e2a07ba9c 100644 --- a/website/src/SUMMARY.md +++ b/website/src/SUMMARY.md @@ -32,4 +32,5 @@ # Reference +- [Using Podman instead of Docker](./reference/podman.md) - [Setup GPG key](./reference/setup_gpg.md) \ No newline at end of file