diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8fae0bb15..019bd03fb 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -110,14 +110,16 @@ $ cargo version cargo 1.69.0 (6e9a83356 2023-04-12) ``` -#### Install docker +#### Install Docker or Podman -Currently, iceberg-rust uses docker to set up environment for integration tests. +Currently, iceberg-rust uses Docker to set up environment for integration tests. Podman is also supported. -You can learn how to install docker from [here](https://docs.docker.com/get-docker/). +You can learn how to install Docker from [here](https://docs.docker.com/get-docker/). For macos users, you can install [OrbStack](https://orbstack.dev/) as a docker alternative. +For podman users, refer to [Using Podman instead of Docker](docs/contributing/podman.md) + ## Build * To compile the project: `make build` diff --git a/crates/catalog/glue/tests/glue_catalog_test.rs b/crates/catalog/glue/tests/glue_catalog_test.rs index 3edd8cdaf..d9c5b4e0b 100644 --- a/crates/catalog/glue/tests/glue_catalog_test.rs +++ b/crates/catalog/glue/tests/glue_catalog_test.rs @@ -18,6 +18,7 @@ //! Integration tests for glue catalog. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -64,14 +65,11 @@ async fn get_catalog() -> GlueCatalog { docker_compose.get_container_ip("minio"), ) }; - let read_port = format!("{}:{}", glue_catalog_ip, GLUE_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("Waiting for 1s glue catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let glue_socket_addr = SocketAddr::new(glue_catalog_ip, GLUE_CATALOG_PORT); + let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT); + while !scan_port_addr(glue_socket_addr) { + log::info!("Waiting for 1s glue catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let props = HashMap::from([ @@ -83,7 +81,7 @@ async fn get_catalog() -> GlueCatalog { (AWS_REGION_NAME.to_string(), "us-east-1".to_string()), ( S3_ENDPOINT.to_string(), - format!("http://{}:{}", minio_ip, MINIO_PORT), + format!("http://{}", minio_socket_addr), ), (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), @@ -91,7 +89,7 @@ async fn get_catalog() -> GlueCatalog { ]); let config = GlueCatalogConfig::builder() - .uri(format!("http://{}:{}", glue_catalog_ip, GLUE_CATALOG_PORT)) + .uri(format!("http://{}", glue_socket_addr)) .warehouse("s3a://warehouse/hive".to_string()) .props(props.clone()) .build(); diff --git a/crates/catalog/hms/tests/hms_catalog_test.rs b/crates/catalog/hms/tests/hms_catalog_test.rs index e4974171f..5b8004439 100644 --- a/crates/catalog/hms/tests/hms_catalog_test.rs +++ b/crates/catalog/hms/tests/hms_catalog_test.rs @@ -18,6 +18,7 @@ //! Integration tests for hms catalog. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -63,22 +64,18 @@ async fn get_catalog() -> HmsCatalog { docker_compose.get_container_ip("minio"), ) }; - - let read_port = format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("scan read_port {} check", read_port); - log::info!("Waiting for 1s hms catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let hms_socket_addr = SocketAddr::new(hms_catalog_ip, HMS_CATALOG_PORT); + let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT); + while !scan_port_addr(hms_socket_addr) { + log::info!("scan hms_socket_addr {} check", hms_socket_addr); + log::info!("Waiting for 1s hms catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let props = HashMap::from([ ( S3_ENDPOINT.to_string(), - format!("http://{}:{}", minio_ip, MINIO_PORT), + format!("http://{}", minio_socket_addr), ), (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), @@ -86,7 +83,7 @@ async fn get_catalog() -> HmsCatalog { ]); let config = HmsCatalogConfig::builder() - .address(format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT)) + .address(hms_socket_addr.to_string()) .thrift_transport(HmsThriftTransport::Buffered) .warehouse("s3a://warehouse/hive".to_string()) .props(props) diff --git a/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml b/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml index b49b6c6c1..34ba3c874 100644 --- a/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml +++ b/crates/catalog/rest/testdata/rest_catalog/docker-compose.yaml @@ -15,6 +15,9 @@ # specific language governing permissions and limitations # under the License. +networks: + rest_bridge: + services: rest: image: tabulario/iceberg-rest:0.10.0 @@ -29,8 +32,10 @@ services: - CATALOG_S3_ENDPOINT=http://minio:9000 depends_on: - minio - links: - - minio:icebergdata.minio + networks: + rest_bridge: + aliases: + - icebergdata.minio expose: - 8181 @@ -40,10 +45,13 @@ services: - MINIO_ROOT_USER=admin - MINIO_ROOT_PASSWORD=password - MINIO_DOMAIN=minio + hostname: icebergdata.minio + networks: + rest_bridge: expose: - 9001 - 9000 - command: [ "server", "/data", "--console-address", ":9001" ] + command: ["server", "/data", "--console-address", ":9001"] mc: depends_on: @@ -55,3 +63,5 @@ services: - AWS_REGION=us-east-1 entrypoint: > /bin/sh -c " until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/icebergdata; /usr/bin/mc mb minio/icebergdata; /usr/bin/mc policy set public minio/icebergdata; tail -f /dev/null " + networks: + rest_bridge: diff --git a/crates/catalog/rest/tests/rest_catalog_test.rs b/crates/catalog/rest/tests/rest_catalog_test.rs index 3c9ec6937..e98890a86 100644 --- a/crates/catalog/rest/tests/rest_catalog_test.rs +++ b/crates/catalog/rest/tests/rest_catalog_test.rs @@ -18,6 +18,7 @@ //! Integration tests for rest catalog. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -59,18 +60,14 @@ async fn get_catalog() -> RestCatalog { docker_compose.get_container_ip("rest") }; - let read_port = format!("{}:{}", rest_catalog_ip, REST_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("Waiting for 1s rest catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let rest_socket_addr = SocketAddr::new(rest_catalog_ip, REST_CATALOG_PORT); + while !scan_port_addr(rest_socket_addr) { + log::info!("Waiting for 1s rest catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let config = RestCatalogConfig::builder() - .uri(format!("http://{}:{}", rest_catalog_ip, REST_CATALOG_PORT)) + .uri(format!("http://{}", rest_socket_addr)) .build(); RestCatalog::new(config) } diff --git a/crates/iceberg/testdata/file_io_s3/docker-compose.yaml b/crates/iceberg/testdata/file_io_s3/docker-compose.yaml index 0793d225b..cbce31864 100644 --- a/crates/iceberg/testdata/file_io_s3/docker-compose.yaml +++ b/crates/iceberg/testdata/file_io_s3/docker-compose.yaml @@ -18,7 +18,7 @@ services: minio: image: minio/minio:RELEASE.2024-02-26T09-33-48Z - ports: + expose: - 9000 - 9001 environment: diff --git a/crates/iceberg/tests/file_io_s3_test.rs b/crates/iceberg/tests/file_io_s3_test.rs index efa9128a3..326fdbb29 100644 --- a/crates/iceberg/tests/file_io_s3_test.rs +++ b/crates/iceberg/tests/file_io_s3_test.rs @@ -17,6 +17,7 @@ //! Integration tests for FileIO S3. +use std::net::SocketAddr; use std::sync::RwLock; use ctor::{ctor, dtor}; @@ -26,6 +27,7 @@ use iceberg::io::{ use iceberg_test_utils::docker::DockerCompose; use iceberg_test_utils::{normalize_test_name, set_up}; +const MINIO_PORT: u16 = 9000; static DOCKER_COMPOSE_ENV: RwLock> = RwLock::new(None); #[ctor] @@ -51,11 +53,11 @@ async fn get_file_io() -> FileIO { let guard = DOCKER_COMPOSE_ENV.read().unwrap(); let docker_compose = guard.as_ref().unwrap(); let container_ip = docker_compose.get_container_ip("minio"); - let read_port = format!("{}:{}", container_ip, 9000); + let minio_socket_addr = SocketAddr::new(container_ip, MINIO_PORT); FileIOBuilder::new("s3") .with_props(vec![ - (S3_ENDPOINT, format!("http://{}", read_port)), + (S3_ENDPOINT, format!("http://{}", minio_socket_addr)), (S3_ACCESS_KEY_ID, "admin".to_string()), (S3_SECRET_ACCESS_KEY, "password".to_string()), (S3_REGION, "us-east-1".to_string()), diff --git a/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs b/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs index 9ad1d401f..292cd8b3a 100644 --- a/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs +++ b/crates/integrations/datafusion/tests/integration_datafusion_hms_test.rs @@ -18,6 +18,7 @@ //! Integration tests for Iceberg Datafusion with Hive Metastore. use std::collections::HashMap; +use std::net::SocketAddr; use std::sync::{Arc, RwLock}; use ctor::{ctor, dtor}; @@ -40,7 +41,7 @@ static DOCKER_COMPOSE_ENV: RwLock> = RwLock::new(None); struct TestFixture { hms_catalog: HmsCatalog, props: HashMap, - hms_catalog_ip: String, + hms_catalog_socket_addr: SocketAddr, } #[ctor] @@ -63,7 +64,7 @@ fn after_all() { impl TestFixture { fn get_catalog(&self) -> HmsCatalog { let config = HmsCatalogConfig::builder() - .address(format!("{}:{}", self.hms_catalog_ip, HMS_CATALOG_PORT)) + .address(self.hms_catalog_socket_addr.to_string()) .thrift_transport(HmsThriftTransport::Buffered) .warehouse("s3a://warehouse/hive".to_string()) .props(self.props.clone()) @@ -85,20 +86,17 @@ async fn get_test_fixture() -> TestFixture { ) }; - let read_port = format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT); - loop { - if !scan_port_addr(&read_port) { - log::info!("Waiting for 1s hms catalog to ready..."); - sleep(std::time::Duration::from_millis(1000)).await; - } else { - break; - } + let hms_catalog_socket_addr = SocketAddr::new(hms_catalog_ip, HMS_CATALOG_PORT); + let minio_socket_addr = SocketAddr::new(minio_ip, MINIO_PORT); + while !scan_port_addr(hms_catalog_socket_addr) { + log::info!("Waiting for 1s hms catalog to ready..."); + sleep(std::time::Duration::from_millis(1000)).await; } let props = HashMap::from([ ( S3_ENDPOINT.to_string(), - format!("http://{}:{}", minio_ip, MINIO_PORT), + format!("http://{}", minio_socket_addr), ), (S3_ACCESS_KEY_ID.to_string(), "admin".to_string()), (S3_SECRET_ACCESS_KEY.to_string(), "password".to_string()), @@ -106,7 +104,7 @@ async fn get_test_fixture() -> TestFixture { ]); let config = HmsCatalogConfig::builder() - .address(format!("{}:{}", hms_catalog_ip, HMS_CATALOG_PORT)) + .address(hms_catalog_socket_addr.to_string()) .thrift_transport(HmsThriftTransport::Buffered) .warehouse("s3a://warehouse/hive".to_string()) .props(props.clone()) @@ -117,7 +115,7 @@ async fn get_test_fixture() -> TestFixture { TestFixture { hms_catalog, props, - hms_catalog_ip, + hms_catalog_socket_addr, } } diff --git a/crates/test_utils/src/cmd.rs b/crates/test_utils/src/cmd.rs index 604d4a14d..503d63d15 100644 --- a/crates/test_utils/src/cmd.rs +++ b/crates/test_utils/src/cmd.rs @@ -28,14 +28,27 @@ pub fn run_command(mut cmd: Command, desc: impl ToString) { } } -pub fn get_cmd_output(mut cmd: Command, desc: impl ToString) -> String { +pub fn get_cmd_output_result(mut cmd: Command, desc: impl ToString) -> Result { let desc = desc.to_string(); log::info!("Starting to {}, command: {:?}", &desc, cmd); - let output = cmd.output().unwrap(); - if output.status.success() { - log::info!("{} succeed!", desc); - String::from_utf8(output.stdout).unwrap() - } else { - panic!("{} failed: {:?}", desc, output.status); + let result = cmd.output(); + match result { + Ok(output) => { + if output.status.success() { + log::info!("{} succeed!", desc); + Ok(String::from_utf8(output.stdout).unwrap()) + } else { + Err(format!("{} failed with rc: {:?}", desc, output.status)) + } + } + Err(err) => Err(format!("{} failed with error: {}", desc, { err })), + } +} + +pub fn get_cmd_output(cmd: Command, desc: impl ToString) -> String { + let result = get_cmd_output_result(cmd, desc); + match result { + Ok(output_str) => output_str, + Err(err) => panic!("{}", err), } } diff --git a/crates/test_utils/src/docker.rs b/crates/test_utils/src/docker.rs index 2902c49ed..bde9737b1 100644 --- a/crates/test_utils/src/docker.rs +++ b/crates/test_utils/src/docker.rs @@ -15,9 +15,10 @@ // specific language governing permissions and limitations // under the License. +use std::net::IpAddr; use std::process::Command; -use crate::cmd::{get_cmd_output, run_command}; +use crate::cmd::{get_cmd_output, get_cmd_output_result, run_command}; /// A utility to manage the lifecycle of `docker compose`. /// @@ -46,9 +47,22 @@ impl DockerCompose { .arg("--format") .arg("{{.OSType}}/{{.Architecture}}"); - get_cmd_output(cmd, "Get os arch".to_string()) - .trim() - .to_string() + let result = get_cmd_output_result(cmd, "Get os arch".to_string()); + match result { + Ok(value) => value.trim().to_string(), + Err(_err) => { + // docker/podman do not consistently place OSArch info in the same json path across OS and versions + // Below tries an alternative path if the above path fails + let mut alt_cmd = Command::new("docker"); + alt_cmd + .arg("info") + .arg("--format") + .arg("{{.Version.OsArch}}"); + get_cmd_output(alt_cmd, "Get os arch".to_string()) + .trim() + .to_string() + } + } } pub fn run(&self) { @@ -77,7 +91,7 @@ impl DockerCompose { ) } - pub fn get_container_ip(&self, service_name: impl AsRef) -> String { + pub fn get_container_ip(&self, service_name: impl AsRef) -> IpAddr { let container_name = format!("{}-{}-1", self.project_name, service_name.as_ref()); let mut cmd = Command::new("docker"); cmd.arg("inspect") @@ -85,9 +99,16 @@ impl DockerCompose { .arg("{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}") .arg(&container_name); - get_cmd_output(cmd, format!("Get container ip of {container_name}")) + let ip_result = get_cmd_output(cmd, format!("Get container ip of {container_name}")) .trim() - .to_string() + .parse::(); + match ip_result { + Ok(ip) => ip, + Err(e) => { + log::error!("Invalid IP, {e}"); + panic!("Failed to parse IP for {container_name}") + } + } } } diff --git a/docs/contributing/podman.md b/docs/contributing/podman.md new file mode 100644 index 000000000..3281ad4da --- /dev/null +++ b/docs/contributing/podman.md @@ -0,0 +1,85 @@ + + +# Using Podman instead of Docker + +Iceberg-rust does not require containerization, except for integration tests, where "docker" and "docker-compose" are used to start containers for minio and various catalogs. Below instructions setup "rootful podman" and docker's official docker-compose plugin to run integration tests as an alternative to docker or Orbstack. + +1. Have podman v4 or newer. + ```console + $ podman --version + podman version 4.9.4-rhel + ``` + +2. Open file `/usr/bin/docker` and add the below contents: + ```bash + #!/bin/sh + [ -e /etc/containers/nodocker ] || \ + echo "Emulate Docker CLI using podman. Create /etc/containers/nodocker to quiet msg." >&2 + exec sudo /usr/bin/podman "$@" + ``` + +3. Install the [docker compose plugin](https://docs.docker.com/compose/install/linux). Check for successful installation. + ```console + $ docker compose version + Docker Compose version v2.28.1 + ``` + +4. Append the below to `~/.bashrc` or equivalent shell config: + ```bash + export DOCKER_HOST=unix:///run/podman/podman.sock + ``` + +5. Start the "rootful" podman socket. + ```shell + sudo systemctl start podman.socket + sudo systemctl status podman.socket + ``` + +6. Check that the following symlink exists. + ```console + $ ls -al /var/run/docker.sock + lrwxrwxrwx 1 root root 27 Jul 24 12:18 /var/run/docker.sock -> /var/run/podman/podman.sock + ``` + If the symlink does not exist, create it. + ```shell + sudo ln -s /var/run/podman/podman.sock /var/run/docker.sock + ``` + +7. Check that the docker socket is working. + ```shell + sudo curl -H "Content-Type: application/json" --unix-socket /var/run/docker.sock http://localhost/_ping + ``` + +8. Try some integration tests! + ```shell + cargo test -p iceberg --test file_io_s3_test + ``` + +# References + +* +* + +# Note on rootless containers + +As of podman v4, ["To be succinct and simple, when running rootless containers, the container itself does not have an IP address"](https://www.redhat.com/sysadmin/container-ip-address-podman) This causes issues with iceberg-rust's integration tests, which rely upon ip-addressable containers via docker-compose. As a result, podman "rootful" containers are required throughout to ensure containers have IP addresses. Perhaps as a future work or with updates to default podman networking, the need for "rootful" podman containers can be eliminated. + +* +* diff --git a/website/src/SUMMARY.md b/website/src/SUMMARY.md index c9ed76d2f..e2a07ba9c 100644 --- a/website/src/SUMMARY.md +++ b/website/src/SUMMARY.md @@ -32,4 +32,5 @@ # Reference +- [Using Podman instead of Docker](./reference/podman.md) - [Setup GPG key](./reference/setup_gpg.md) \ No newline at end of file