From ef2655ad5dc6c738bc60e7d6100ff06794ce6643 Mon Sep 17 00:00:00 2001 From: David Rabinowitz Date: Wed, 25 Oct 2023 20:06:39 -0700 Subject: [PATCH] Fixed nested field predicate pushdown (#1104) --- CHANGES.md | 1 + .../spark/bigquery/SparkFilterUtils.java | 8 +++++++- .../spark/bigquery/SparkFilterUtilsTest.java | 7 +++++++ .../integration/ReadIntegrationTestBase.java | 19 +++++++++++++++++++ 4 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGES.md b/CHANGES.md index 083429d9d..02bba507d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -5,6 +5,7 @@ * PR #1057: Enable async writes for greater throughput * PR #1094: CVE-2023-5072: Upgrading the org.json:json dependency * PR #1095: CVE-2023-4586: Upgrading the netty dependencies +* PR #1104: Fixed nested field predicate pushdown * Issue #1099: Fixing the usage of ExternalAccountCredentials * BigQuery API has been upgraded to version 2.33.2 * BigQuery Storage API has been upgraded to version 2.44.0 diff --git a/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkFilterUtils.java b/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkFilterUtils.java index ce8400703..3016e2147 100644 --- a/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkFilterUtils.java +++ b/spark-bigquery-connector-common/src/main/java/com/google/cloud/spark/bigquery/SparkFilterUtils.java @@ -337,7 +337,13 @@ static String escape(String value) { return value.replace("'", "\\'"); } + // Converting BigQuery field name into the quoted field name for BigQuery SQL + // foo -> `foo` + // foo.bar -> `foo`.`bar` + // foo.bar.baz -> `foo`.`bar`.`baz` static String quote(String value) { - return "`" + value + "`"; + return value.contains(".") + ? Arrays.stream(value.split("\\.")).collect(Collectors.joining("`.`", "`", "`")) + : "`" + value + "`"; } } diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkFilterUtilsTest.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkFilterUtilsTest.java index c969abb1a..df60b2b5c 100644 --- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkFilterUtilsTest.java +++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/SparkFilterUtilsTest.java @@ -339,4 +339,11 @@ public void testFiltersWithNestedOrAnd_3() { part2, part3); } + + @Test + public void testQuote() { + assertThat(SparkFilterUtils.quote("foo")).isEqualTo("`foo`"); + assertThat(SparkFilterUtils.quote("foo.bar")).isEqualTo("`foo`.`bar`"); + assertThat(SparkFilterUtils.quote("foo.bar.baz")).isEqualTo("`foo`.`bar`.`baz`"); + } } diff --git a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadIntegrationTestBase.java b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadIntegrationTestBase.java index 9759115df..76531490a 100644 --- a/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadIntegrationTestBase.java +++ b/spark-bigquery-connector-common/src/test/java/com/google/cloud/spark/bigquery/integration/ReadIntegrationTestBase.java @@ -31,6 +31,8 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; import java.util.stream.Stream; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Encoders; @@ -548,4 +550,21 @@ public void testCreateReadSessionTimeoutWithLessTimeOnHugeData() { .collect(); }); } + + @Test + public void testNestedFieldProjection() throws Exception { + Dataset githubNestedDF = + spark.read().format("bigquery").load("bigquery-public-data:samples.github_nested"); + List repositoryUrlRows = + githubNestedDF + .filter( + "repository.has_downloads = true AND url = 'https://github.com/googleapi/googleapi'") + .select("repository.url") + .collectAsList(); + assertThat(repositoryUrlRows).hasSize(4); + Set uniqueUrls = + repositoryUrlRows.stream().map(row -> row.getString(0)).collect(Collectors.toSet()); + assertThat(uniqueUrls).hasSize(1); + assertThat(uniqueUrls).contains("https://github.com/googleapi/googleapi"); + } }