Skip to content

Commit

Permalink
Fixed nested field predicate pushdown (GoogleCloudDataproc#1104)
Browse files Browse the repository at this point in the history
  • Loading branch information
davidrabinowitz authored Oct 26, 2023
1 parent e16669c commit ef2655a
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* PR #1057: Enable async writes for greater throughput
* PR #1094: CVE-2023-5072: Upgrading the org.json:json dependency
* PR #1095: CVE-2023-4586: Upgrading the netty dependencies
* PR #1104: Fixed nested field predicate pushdown
* Issue #1099: Fixing the usage of ExternalAccountCredentials
* BigQuery API has been upgraded to version 2.33.2
* BigQuery Storage API has been upgraded to version 2.44.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,13 @@ static String escape(String value) {
return value.replace("'", "\\'");
}

// Converting BigQuery field name into the quoted field name for BigQuery SQL
// foo -> `foo`
// foo.bar -> `foo`.`bar`
// foo.bar.baz -> `foo`.`bar`.`baz`
static String quote(String value) {
return "`" + value + "`";
return value.contains(".")
? Arrays.stream(value.split("\\.")).collect(Collectors.joining("`.`", "`", "`"))
: "`" + value + "`";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -339,4 +339,11 @@ public void testFiltersWithNestedOrAnd_3() {
part2,
part3);
}

@Test
public void testQuote() {
assertThat(SparkFilterUtils.quote("foo")).isEqualTo("`foo`");
assertThat(SparkFilterUtils.quote("foo.bar")).isEqualTo("`foo`.`bar`");
assertThat(SparkFilterUtils.quote("foo.bar.baz")).isEqualTo("`foo`.`bar`.`baz`");
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Encoders;
Expand Down Expand Up @@ -548,4 +550,21 @@ public void testCreateReadSessionTimeoutWithLessTimeOnHugeData() {
.collect();
});
}

@Test
public void testNestedFieldProjection() throws Exception {
Dataset<Row> githubNestedDF =
spark.read().format("bigquery").load("bigquery-public-data:samples.github_nested");
List<Row> repositoryUrlRows =
githubNestedDF
.filter(
"repository.has_downloads = true AND url = 'https://github.com/googleapi/googleapi'")
.select("repository.url")
.collectAsList();
assertThat(repositoryUrlRows).hasSize(4);
Set<String> uniqueUrls =
repositoryUrlRows.stream().map(row -> row.getString(0)).collect(Collectors.toSet());
assertThat(uniqueUrls).hasSize(1);
assertThat(uniqueUrls).contains("https://github.com/googleapi/googleapi");
}
}

0 comments on commit ef2655a

Please sign in to comment.