diff --git a/.gitignore b/.gitignore index 25b803228..341f1ca92 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,6 @@ .vscode **/.DS_Store dist/* +crates/iceberg/testdata/performance/raw_data/* + +crates/iceberg/testdata/performance/warehouse diff --git a/crates/iceberg/Cargo.toml b/crates/iceberg/Cargo.toml index de5b7cdc5..667e6f3c0 100644 --- a/crates/iceberg/Cargo.toml +++ b/crates/iceberg/Cargo.toml @@ -79,8 +79,15 @@ url = { workspace = true } uuid = { workspace = true } [dev-dependencies] +criterion = { version = "0.3", features = ["async_tokio", "async_futures"] } ctor = { workspace = true } +futures-util = "0.3" +iceberg-catalog-rest = { path = "../catalog/rest" } iceberg_test_utils = { path = "../test_utils", features = ["tests"] } pretty_assertions = { workspace = true } tempfile = { workspace = true } tera = { workspace = true } + +[[bench]] +name = "table_scan_plan_files" +harness = false diff --git a/crates/iceberg/benches/table_scan_plan_files.rs b/crates/iceberg/benches/table_scan_plan_files.rs new file mode 100644 index 000000000..bdf8f7ad6 --- /dev/null +++ b/crates/iceberg/benches/table_scan_plan_files.rs @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use criterion::*; +use futures_util::StreamExt; +use tokio::runtime::Runtime; + +mod utils; +use utils::build_catalog; + +use iceberg::expr::Reference; +use iceberg::spec::Datum; +use iceberg::table::Table; +use iceberg::Catalog; + +async fn setup_async() -> Table { + let catalog = build_catalog().await; + let namespaces = catalog.list_namespaces(None).await.unwrap(); + let table_idents = catalog.list_tables(&namespaces[0]).await.unwrap(); + catalog.load_table(&table_idents[0]).await.unwrap() +} + +fn setup(runtime: &Runtime) -> Table { + runtime.block_on(setup_async()) +} + +async fn all_files_all_rows(table: &Table) { + let scan = table.scan().build().unwrap(); + let mut stream = scan.plan_files().await.unwrap(); + + while let Some(item) = stream.next().await { + black_box(item.unwrap()); + } +} + +async fn one_file_all_rows(table: &Table) { + let scan = table + .scan() + .with_filter( + Reference::new("tpep_pickup_datetime") + .greater_than_or_equal_to( + Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(), + ) + .and(Reference::new("tpep_pickup_datetime").less_than( + Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(), + )), + ) + .build() + .unwrap(); + let mut stream = scan.plan_files().await.unwrap(); + + while let Some(item) = stream.next().await { + black_box(item.unwrap()); + } +} + +async fn all_files_some_rows(table: &Table) { + let scan = table + .scan() + .with_filter(Reference::new("passenger_count").equal_to(Datum::double(1.0))) + .build() + .unwrap(); + let mut stream = scan.plan_files().await.unwrap(); + + while let Some(item) = stream.next().await { + black_box(item.unwrap()); + } +} + +async fn one_file_some_rows(table: &Table) { + let scan = table + .scan() + .with_filter( + Reference::new("tpep_pickup_datetime") + .greater_than_or_equal_to( + Datum::timestamptz_from_str("2024-02-01T00:00:00.000 UTC").unwrap(), + ) + .and(Reference::new("tpep_pickup_datetime").less_than( + Datum::timestamptz_from_str("2024-02-02T00:00:00.000 UTC").unwrap(), + )) + .and(Reference::new("passenger_count").equal_to(Datum::double(1.0))), + ) + .build() + .unwrap(); + let mut stream = scan.plan_files().await.unwrap(); + + while let Some(item) = stream.next().await { + black_box(item.unwrap()); + } +} + +pub fn bench_all_files_all_rows(c: &mut Criterion) { + let runtime = Runtime::new().unwrap(); + let table = setup(&runtime); + println!("setup complete"); + + c.bench_function("all_files_all_rows", |b| { + b.to_async(&runtime).iter(|| all_files_all_rows(&table)) + }); +} + +pub fn bench_one_file_all_rows(c: &mut Criterion) { + let runtime = Runtime::new().unwrap(); + let table = setup(&runtime); + println!("setup complete"); + + c.bench_function("one_file_all_rows", |b| { + b.to_async(&runtime).iter(|| one_file_all_rows(&table)) + }); +} + +pub fn bench_all_files_some_rows(c: &mut Criterion) { + let runtime = Runtime::new().unwrap(); + let table = setup(&runtime); + println!("setup complete"); + + c.bench_function("all_files_some_rows", |b| { + b.to_async(&runtime).iter(|| all_files_some_rows(&table)) + }); +} + +pub fn bench_one_file_some_rows(c: &mut Criterion) { + let runtime = Runtime::new().unwrap(); + let table = setup(&runtime); + println!("setup complete"); + + c.bench_function("one_file_some_rows", |b| { + b.to_async(&runtime).iter(|| one_file_some_rows(&table)) + }); +} + +criterion_group! { + name = benches; + config = Criterion::default().sample_size(10); + targets = bench_all_files_all_rows, bench_all_files_some_rows, bench_one_file_all_rows, bench_one_file_some_rows +} + +criterion_main!(benches); diff --git a/crates/iceberg/benches/utils.rs b/crates/iceberg/benches/utils.rs new file mode 100644 index 000000000..ccefe2b7d --- /dev/null +++ b/crates/iceberg/benches/utils.rs @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +use iceberg::io::{S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY}; +use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; +use iceberg_test_utils::docker::DockerCompose; +use std::collections::HashMap; + +pub fn get_docker_compose() -> DockerCompose { + DockerCompose::new( + "iceberg-rust-performance", + format!( + "{}/../iceberg/testdata/performance", + env!("CARGO_MANIFEST_DIR") + ), + ) +} + +pub async fn build_catalog() -> RestCatalog { + let docker_compose = get_docker_compose(); + + // determine which ports on the host that docker has exposed the specified port to for the given containers + let rest_api_host_port = docker_compose.get_host_port("rest", 8181); + let haproxy_host_port = docker_compose.get_host_port("haproxy", 9080); + + let catalog_uri = format!("http://localhost:{}", rest_api_host_port); + let haproxy_uri = format!("http://localhost:{}", haproxy_host_port); + + let user_props = HashMap::from_iter( + vec![ + (S3_ENDPOINT.to_string(), haproxy_uri), + (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), + (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), + (S3_REGION.to_string(), "us-east-1".to_string()), + ] + .into_iter(), + ); + + RestCatalog::new( + RestCatalogConfig::builder() + .uri(catalog_uri) + .props(user_props) + .build(), + ) +} diff --git a/crates/iceberg/testdata/performance/docker-compose.yaml b/crates/iceberg/testdata/performance/docker-compose.yaml new file mode 100644 index 000000000..13d7c90ef --- /dev/null +++ b/crates/iceberg/testdata/performance/docker-compose.yaml @@ -0,0 +1,112 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +version: "3" + +services: + spark-iceberg: + image: tabulario/spark-iceberg + build: spark/ + networks: + iceberg_perf: + depends_on: + - rest + - minio + volumes: + - ./warehouse:/home/iceberg/warehouse + - ./notebooks:/home/iceberg/notebooks/notebooks + - ./raw_data:/home/iceberg/raw_data + - ./spark_scripts:/home/iceberg/spark_scripts + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + ports: + - 8888:8888 + - 8080:8080 + - 10000:10000 + - 10001:10001 + + rest: + image: tabulario/iceberg-rest + networks: + iceberg_perf: + ports: + - 8181:8181 + volumes: + - ./warehouse:/warehouse + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + - CATALOG_WAREHOUSE=s3://warehouse/ + - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO + - CATALOG_S3_ENDPOINT=http://minio:9000 + - CATALOG_URI=jdbc:sqlite:file:/warehouse/catalog.sqlite + + minio: + image: minio/minio + environment: + - MINIO_ROOT_USER=admin + - MINIO_ROOT_PASSWORD=password + - MINIO_DOMAIN=minio + networks: + iceberg_perf: + aliases: + - warehouse.minio + volumes: + - ./warehouse:/warehouse + ports: + - 9001:9001 + - 9000:9000 + command: ["server", "/warehouse", "--console-address", ":9001"] + + mc: + depends_on: + - minio + image: minio/mc + container_name: mc + networks: + iceberg_perf: + environment: + - AWS_ACCESS_KEY_ID=admin + - AWS_SECRET_ACCESS_KEY=password + - AWS_REGION=us-east-1 + entrypoint: > + /bin/sh -c " + until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + /usr/bin/mc mb minio/warehouse; + /usr/bin/mc policy set public minio/warehouse; + tail -f /dev/null + " + + haproxy: + image: haproxy:lts-bookworm + networks: + iceberg_perf: + ports: + - 9080 + volumes: + - type: bind + source: ./haproxy.cfg + target: /usr/local/etc/haproxy/haproxy.cfg + read_only: true + depends_on: + - minio + +networks: + iceberg_perf: diff --git a/crates/iceberg/testdata/performance/haproxy.cfg b/crates/iceberg/testdata/performance/haproxy.cfg new file mode 100644 index 000000000..2bb47af0f --- /dev/null +++ b/crates/iceberg/testdata/performance/haproxy.cfg @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +global + log stdout format raw local0 debug + fd-hard-limit 50000 + +defaults + mode http + timeout client 10s + timeout connect 5s + timeout server 10s + timeout http-request 10s + log global + +frontend slowio + bind :9080 + default_backend minio + tcp-request inspect-delay 100ms + tcp-request content accept if WAIT_END + filter bwlim-out mylimit default-limit 625000 default-period 1s + +backend minio +# server s1 minio.iceberg-performance-tests.orb.local:9000 check maxconn 4 + server s1 minio:9000 check maxconn 4 diff --git a/crates/iceberg/testdata/performance/spark_scripts/setup.sql b/crates/iceberg/testdata/performance/spark_scripts/setup.sql new file mode 100644 index 000000000..730e94987 --- /dev/null +++ b/crates/iceberg/testdata/performance/spark_scripts/setup.sql @@ -0,0 +1,50 @@ +-- Licensed to the Apache Software Foundation (ASF) under one +-- or more contributor license agreements. See the NOTICE file +-- distributed with this work for additional information +-- regarding copyright ownership. The ASF licenses this file +-- to you under the Apache License, Version 2.0 (the +-- "License"); you may not use this file except in compliance +-- with the License. You may obtain a copy of the License at + +-- http://www.apache.org/licenses/LICENSE-2.0 + +-- Unless required by applicable law or agreed to in writing, +-- software distributed under the License is distributed on an +-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +-- KIND, either express or implied. See the License for the +-- specific language governing permissions and limitations +-- under the License. + +CREATE DATABASE IF NOT EXISTS nyc.taxis; +CREATE TABLE IF NOT EXISTS nyc.taxis ( + VendorID bigint, + tpep_pickup_datetime timestamp, + tpep_dropoff_datetime timestamp, + passenger_count double, + trip_distance double, + RatecodeID double, + store_and_fwd_flag string, + PULocationID bigint, + DOLocationID bigint, + payment_type bigint, + fare_amount double, + extra double, + mta_tax double, + tip_amount double, + tolls_amount double, + improvement_surcharge double, + total_amount double, + congestion_surcharge double, + airport_fee double +) +USING iceberg +PARTITIONED BY (days(tpep_pickup_datetime)); + +CREATE TEMPORARY VIEW parquetTable +USING org.apache.spark.sql.parquet +OPTIONS ( + path "/home/iceberg/raw_data/yellow_tripdata_*.parquet" +); + +INSERT INTO nyc.taxis SELECT * FROM parquetTable; + diff --git a/crates/iceberg/tests/file_io_s3_test.rs b/crates/iceberg/tests/file_io_s3_test.rs index efa9128a3..3938bc22e 100644 --- a/crates/iceberg/tests/file_io_s3_test.rs +++ b/crates/iceberg/tests/file_io_s3_test.rs @@ -50,12 +50,12 @@ async fn get_file_io() -> FileIO { let guard = DOCKER_COMPOSE_ENV.read().unwrap(); let docker_compose = guard.as_ref().unwrap(); - let container_ip = docker_compose.get_container_ip("minio"); - let read_port = format!("{}:{}", container_ip, 9000); + let host_port = docker_compose.get_host_port("minio", 9000); + let ip_and_port = format!("localhost:{}", host_port); FileIOBuilder::new("s3") .with_props(vec![ - (S3_ENDPOINT, format!("http://{}", read_port)), + (S3_ENDPOINT, format!("http://{}", ip_and_port)), (S3_ACCESS_KEY_ID, "admin".to_string()), (S3_SECRET_ACCESS_KEY, "password".to_string()), (S3_REGION, "us-east-1".to_string()), diff --git a/crates/iceberg/tests/read_performance_test.rs b/crates/iceberg/tests/read_performance_test.rs new file mode 100644 index 000000000..e670f74a8 --- /dev/null +++ b/crates/iceberg/tests/read_performance_test.rs @@ -0,0 +1,139 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Performance tests for FileIO S3 table scans + +use ctor::{ctor, dtor}; +use futures_util::stream::TryStreamExt; +use iceberg::io::{ + FileIO, FileIOBuilder, S3_ACCESS_KEY_ID, S3_ENDPOINT, S3_REGION, S3_SECRET_ACCESS_KEY, +}; +use iceberg::Catalog; +use iceberg_catalog_rest::{RestCatalog, RestCatalogConfig}; +use iceberg_test_utils::docker::DockerCompose; +use iceberg_test_utils::{normalize_test_name, set_up}; +use std::collections::HashMap; +use std::process::Command; +use std::sync::RwLock; + +static DOCKER_COMPOSE_ENV: RwLock> = RwLock::new(None); + +#[ctor] +fn before_all() { + let mut guard = DOCKER_COMPOSE_ENV.write().unwrap(); + let docker_compose = DockerCompose::new( + normalize_test_name(module_path!()), + format!("{}/testdata/performance", env!("CARGO_MANIFEST_DIR")), + ); + docker_compose.run(); + + guard.replace(docker_compose); +} + +#[dtor] +fn after_all() { + let mut guard = DOCKER_COMPOSE_ENV.write().unwrap(); + guard.take(); +} + +async fn get_file_io() -> FileIO { + set_up(); + + let guard = DOCKER_COMPOSE_ENV.read().unwrap(); + let docker_compose = guard.as_ref().unwrap(); + let host_port = docker_compose.get_host_port("rest", 8181); + let ip_and_port = format!("localhost:{}", host_port); + + FileIOBuilder::new("s3") + .with_props(vec![ + (S3_ENDPOINT, format!("http://{}", ip_and_port)), + (S3_ACCESS_KEY_ID, "admin".to_string()), + (S3_SECRET_ACCESS_KEY, "password".to_string()), + (S3_REGION, "us-east-1".to_string()), + ]) + .build() + .unwrap() +} + +async fn get_rest_catalog() -> RestCatalog { + set_up(); + + let guard = DOCKER_COMPOSE_ENV.read().unwrap(); + let docker_compose = guard.as_ref().unwrap(); + let rest_api_host_port = docker_compose.get_host_port("rest", 8181); + let minio_host_port = docker_compose.get_host_port("minio", 9000); + + let catalog_uri = format!("http://localhost:{}", rest_api_host_port); + let minio_socket_addr = format!("localhost:{}", minio_host_port); + + let user_props = HashMap::from_iter( + vec![ + ( + S3_ENDPOINT.to_string(), + format!("http://{}", minio_socket_addr), + ), + (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), + (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), + (S3_REGION.to_string(), "us-east-1".to_string()), + ] + .into_iter(), + ); + + let catalog = RestCatalog::new( + RestCatalogConfig::builder() + .uri(catalog_uri) + .props(user_props) + .build(), + ); + + catalog +} + +#[tokio::test] +async fn test_file_io_s3_is_exist() { + let file_io = get_file_io().await; + assert!(file_io.is_exist("s3://bucket1/").await.unwrap()); +} + +#[tokio::test] +async fn test_file_io_rest_catalog_connection() { + let catalog = get_rest_catalog().await; + + let namespaces = catalog.list_namespaces(None).await.unwrap(); + assert_eq!(namespaces[0].join("."), "nyc"); +} + +#[tokio::test] +async fn test_query_all() { + let catalog = get_rest_catalog().await; + let namespaces = catalog.list_namespaces(None).await.unwrap(); + let table_idents = catalog.list_tables(&namespaces[0]).await.unwrap(); + let table = catalog.load_table(&table_idents[0]).await.unwrap(); + + let scan = table.scan().build().unwrap(); + let record_batches = scan + .to_arrow() + .await + .unwrap() + .try_collect::>() + .await + .unwrap(); + + let record_count = record_batches.iter().fold(0, |acc, rb| acc + rb.num_rows()); + + assert_eq!(record_count, 2_964_624); +} diff --git a/crates/test_utils/Cargo.toml b/crates/test_utils/Cargo.toml index d4f6e1696..075a8a2eb 100644 --- a/crates/test_utils/Cargo.toml +++ b/crates/test_utils/Cargo.toml @@ -26,8 +26,15 @@ repository = { workspace = true } license = { workspace = true } [dependencies] +anyhow = "1" env_logger = { workspace = true } log = "0.4.20" +tokio = { version = "1.39.1", features = ["macros", "rt-multi-thread"] } [features] tests = [] + +[lib] +name = "iceberg_test_utils" +path = "src/lib.rs" + diff --git a/crates/test_utils/src/docker.rs b/crates/test_utils/src/docker.rs index 2902c49ed..7d381b069 100644 --- a/crates/test_utils/src/docker.rs +++ b/crates/test_utils/src/docker.rs @@ -77,8 +77,12 @@ impl DockerCompose { ) } + pub fn get_container_name(&self, service_name: impl AsRef) -> String { + format!("{}-{}-1", self.project_name, service_name.as_ref()) + } + pub fn get_container_ip(&self, service_name: impl AsRef) -> String { - let container_name = format!("{}-{}-1", self.project_name, service_name.as_ref()); + let container_name = self.get_container_name(service_name.as_ref()); let mut cmd = Command::new("docker"); cmd.arg("inspect") .arg("-f") @@ -89,10 +93,62 @@ impl DockerCompose { .trim() .to_string() } -} -impl Drop for DockerCompose { - fn drop(&mut self) { + pub fn get_host_port(&self, service_name: impl AsRef, container_port: u16) -> String { + let container_name = self.get_container_name(service_name.as_ref()); + let mut cmd = Command::new("docker"); + + cmd.arg("port") + .arg(&container_name) + .arg(format!("{}/tcp", container_port)); + + let binding = get_cmd_output(cmd, format!("Get host port for {container_name}")) + .trim() + .to_string(); + let output = binding.lines().next().unwrap(); + + let ip_and_port = output.split(":").collect::>(); + + ip_and_port[1].to_string() + } + + pub fn exec_on_container( + &self, + service_name: String, + command: String, + args: &[String], + description: String, + ) { + let container_name = self.get_container_name(service_name); + + let mut cmd = Command::new("docker"); + cmd.arg("exec").arg(&container_name).arg(command).args(args); + + run_command( + cmd, + format!( + "Executing command on container {}, project name: {}: '{}'", + &container_name, self.project_name, description + ), + ) + } + + pub fn stop(&mut self) { + let mut cmd = Command::new("docker"); + cmd.current_dir(&self.docker_compose_dir); + + cmd.args(vec!["compose", "-p", self.project_name.as_str(), "stop"]); + + run_command( + cmd, + format!( + "Stopping docker compose in {}, project name: {}", + self.docker_compose_dir, self.project_name + ), + ) + } + + pub fn remove(&mut self) { let mut cmd = Command::new("docker"); cmd.current_dir(&self.docker_compose_dir); diff --git a/crates/test_utils/src/lib.rs b/crates/test_utils/src/lib.rs index 4f63b8dd7..56a282ffc 100644 --- a/crates/test_utils/src/lib.rs +++ b/crates/test_utils/src/lib.rs @@ -19,15 +19,9 @@ //! //! It's not intended for use outside of `iceberg-rust`. -#[cfg(feature = "tests")] mod cmd; -#[cfg(feature = "tests")] pub mod docker; - -#[cfg(feature = "tests")] pub use common::*; - -#[cfg(feature = "tests")] mod common { use std::sync::Once; diff --git a/justfile b/justfile new file mode 100644 index 000000000..610d12330 --- /dev/null +++ b/justfile @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +default: + just --list --unsorted + +# start the performance-testing docker-compose project. +perf-start: + docker-compose -p iceberg-rust-performance --project-directory ./crates/iceberg/testdata/performance up -d + # initialize the perf test if the environment is clean + if [ ! -f ./crates/iceberg/testdata/performance/warehouse/catalog.sqlite ]; then just perf-init; fi + +# stop the performance-testing docker-compose project +perf-stop: + docker-compose -p iceberg-rust-performance --project-directory ./crates/iceberg/testdata/performance down + +# performance testing: initialize the tables and data +perf-init: perf-download-data + docker exec iceberg-rust-performance-spark-iceberg-1 spark-sql --driver-memory 4G --executor-memory 4G -f /home/iceberg/spark_scripts/setup.sql + +# stop and clean up the performance-testing docker-compose project docker images and data +perf-clean: + docker-compose -p iceberg-rust-performance --project-directory ./crates/iceberg/testdata/performance down --remove-orphans -v --rmi all + rm ./crates/iceberg/testdata/performance/raw_data/*.parquet + rm ./crates/iceberg/testdata/performance/warehouse/catalog.sqlite + rm -rf ./crates/iceberg/testdata/performance/warehouse/warehouse + rm -rf ./crates/iceberg/testdata/performance/warehouse/.minio.sys +# run the performance test suite +perf-run: + # start the perf testing env if it is not running + docker container inspect iceberg-rust-performance-rest-1 >/dev/null || just perf-start + cargo criterion --benches + +# download the "NYC Taxi" data required to populate the performance test suite +perf-download-data: + if [ ! -f ./crates/iceberg/testdata/performance/raw_data/yellow_tripdata_2024-01.parquet ]; then wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-01.parquet -P crates/iceberg/testdata/performance/raw_data; fi + if [ ! -f ./crates/iceberg/testdata/performance/raw_data/yellow_tripdata_2024-02.parquet ]; then wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-02.parquet -P crates/iceberg/testdata/performance/raw_data; fi + if [ ! -f ./crates/iceberg/testdata/performance/raw_data/yellow_tripdata_2024-03.parquet ]; then wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-03.parquet -P crates/iceberg/testdata/performance/raw_data; fi + if [ ! -f ./crates/iceberg/testdata/performance/raw_data/yellow_tripdata_2024-04.parquet ]; then wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-04.parquet -P crates/iceberg/testdata/performance/raw_data; fi + if [ ! -f ./crates/iceberg/testdata/performance/raw_data/yellow_tripdata_2024-05.parquet ]; then wget https://d37ci6vzurychx.cloudfront.net/trip-data/yellow_tripdata_2024-05.parquet -P crates/iceberg/testdata/performance/raw_data; fi \ No newline at end of file