From cee73114d4db81f0298ee75009b5bf5cfb471ce7 Mon Sep 17 00:00:00 2001 From: Tanner Clary Date: Mon, 11 Sep 2023 19:00:07 -0700 Subject: [PATCH] [CALCITE-6001] Add useUtf8AsDefaultCharset flag to SqlConformanceEnum to allow encoding of non-ISO-8859-1 characters --- core/src/main/codegen/templates/Parser.jj | 3 ++- .../apache/calcite/sql/fun/SqlCastFunction.java | 12 ------------ .../calcite/sql/fun/SqlStdOperatorTable.java | 4 ++++ .../sql/validate/SqlAbstractConformance.java | 4 ++++ .../calcite/sql/validate/SqlConformance.java | 14 +++++++++++++- .../calcite/sql/validate/SqlConformanceEnum.java | 9 +++++++++ .../sql/validate/SqlDelegatingConformance.java | 2 ++ .../calcite/rel/rel2sql/RelToSqlConverterTest.java | 5 ++++- .../apache/calcite/sql/parser/SqlParserTest.java | 10 +++++++++- 9 files changed, 47 insertions(+), 16 deletions(-) diff --git a/core/src/main/codegen/templates/Parser.jj b/core/src/main/codegen/templates/Parser.jj index fffe2a4dcac..b09ca2d7a1e 100644 --- a/core/src/main/codegen/templates/Parser.jj +++ b/core/src/main/codegen/templates/Parser.jj @@ -177,6 +177,7 @@ public class ${parser.class} extends SqlAbstractParserImpl ((SourceStringReader) reader).getSourceString(); parser.setOriginalSql(sql); } + parser.setConformance(SqlConformanceEnum.DEFAULT); return parser; } }; @@ -4518,7 +4519,7 @@ SqlNode StringLiteral() : String p; final List frags; char unicodeEscapeChar = 0; - String charSet = null; + String charSet = this.conformance.useUtf8AsDefaultCharset() ? "UTF-8" : null; SqlCharStringLiteral literal; } { diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlCastFunction.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlCastFunction.java index 81da37c7e70..63e52eefcaf 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlCastFunction.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlCastFunction.java @@ -227,18 +227,6 @@ private static RelDataType createTypeWithNullabilityFromExpr(RelDataTypeFactory } return false; } - if (SqlTypeUtil.areCharacterSetsMismatched( - validatedNodeType, - returnType)) { - if (throwOnFailure) { - // Include full type string to indicate character - // set mismatch. - throw callBinding.newError( - RESOURCE.cannotCastValue(validatedNodeType.getFullTypeString(), - returnType.getFullTypeString())); - } - return false; - } return true; } diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java index 530368d2786..d1f54bff339 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlStdOperatorTable.java @@ -2708,4 +2708,8 @@ public static SqlOperator floorCeil(boolean floor, SqlConformance conformance) { return floor ? SqlStdOperatorTable.FLOOR : SqlStdOperatorTable.CEIL; } } + + public static String testCharset(SqlConformance conformance) { + return "UTF-8"; + } } diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java index 3205a48b409..28468b3fe6e 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlAbstractConformance.java @@ -136,6 +136,10 @@ public abstract class SqlAbstractConformance implements SqlConformance { @Override public boolean isValueAllowed() { return SqlConformanceEnum.DEFAULT.isValueAllowed(); } + + @Override public boolean useUtf8AsDefaultCharset() { + return SqlConformanceEnum.DEFAULT.useUtf8AsDefaultCharset(); + } @Override public SqlLibrary semantics() { return SqlConformanceEnum.DEFAULT.semantics(); diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java index b8501f02f14..833435295d1 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformance.java @@ -537,7 +537,19 @@ public interface SqlConformance { */ boolean isValueAllowed(); - /** + /** + * Whether {@code UTF-8} should be used as the default charset in + * the parser. + * + *

Among the built-in conformance levels, true in + * {@link SqlConformanceEnum#BABEL}, + * {@link SqlConformanceEnum#BIG_QUERY}, + * {@link SqlConformanceEnum#LENIENT}, + * false otherwise. + */ + boolean useUtf8AsDefaultCharset(); + + /** * Controls the behavior of operators that are part of Standard SQL but * nevertheless have different behavior in different databases. * diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java index a610f19f169..3bf31803674 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlConformanceEnum.java @@ -416,6 +416,15 @@ public enum SqlConformanceEnum implements SqlConformance { return false; } } + + @Override public boolean useUtf8AsDefaultCharset() { + switch (this) { + case BIG_QUERY: + return true; + default: + return false; + } + } @Override public SqlLibrary semantics() { switch (this) { diff --git a/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java b/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java index c8e2f7cdd2e..bcc268a8603 100644 --- a/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java +++ b/core/src/main/java/org/apache/calcite/sql/validate/SqlDelegatingConformance.java @@ -143,6 +143,8 @@ protected SqlDelegatingConformance(SqlConformance delegate) { return delegate.isValueAllowed(); } + @Override public boolean useUtf8AsDefaultCharset() { return delegate.useUtf8AsDefaultCharset(); } + @Override public SqlLibrary semantics() { return delegate.semantics(); } diff --git a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java index 50e9808a84d..59c61842c36 100644 --- a/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java +++ b/core/src/test/java/org/apache/calcite/rel/rel2sql/RelToSqlConverterTest.java @@ -543,7 +543,7 @@ private static String toSql(RelNode root, SqlDialect dialect, sql.withSql(query).ok("SELECT TIMESTAMP_SECONDS(CAST(CEIL(3) AS INT64)) AS " + "created_thing\nFROM foodmart.product"); } - + @Test void testBigQueryFloorPreservesCast() { final String query = "SELECT TIMESTAMP_SECONDS(CAST(FLOOR(CAST(3 AS BIGINT)) AS BIGINT)) " + "as created_thing\n FROM `foodmart`.`product`"; @@ -1486,6 +1486,9 @@ private static String toSql(RelNode root, SqlDialect dialect, assertThat(toSql(root), isLinux(expectedSql)); } + + + /** Test case for * [CALCITE-5394] * RelToSql converter fails when semi-join is under a join node. */ diff --git a/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java b/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java index 4434c33e4fe..ca1d0fc9dca 100644 --- a/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java +++ b/testkit/src/main/java/org/apache/calcite/sql/parser/SqlParserTest.java @@ -1792,6 +1792,14 @@ void checkPeriodPredicate(Checker checker) { expr("cast('foo' as bar)") .ok("CAST('foo' AS `BAR`)"); } + + @Test void testParsingNonIsoCharacter() { + String sql = "select 'ק' "; + sql(sql).ok("SELECT u&'\\05e7'"); + // BigQuery conformance should set charset to UTF-8 and be able to properly encode character + sql(sql).withConformance(SqlConformanceEnum.BIG_QUERY) + .ok("SELECT _UTF-8'ק'"); + } @Test void testCastFails() { expr("cast(x as time with ^time^ zone)") @@ -5156,7 +5164,7 @@ void checkPeriodPredicate(Checker checker) { .withDialect(MYSQL) .fails("(?s)Encountered \"\\\\\"\" at .*") .withDialect(BIG_QUERY) - .ok("SELECT deptno AS d, 'deptno' AS d2\n" + .ok("SELECT deptno AS d, _UTF-8'deptno' AS d2\n" + "FROM emp"); // MySQL uses single-quotes as escapes; BigQuery uses backslashes