Skip to content

Commit

Permalink
[CALCITE-6001] Add useUtf8AsDefaultCharset flag to SqlConformanceEnum…
Browse files Browse the repository at this point in the history
… to allow encoding of non-ISO-8859-1 characters
  • Loading branch information
tanclary committed Sep 14, 2023
1 parent 9c75894 commit cee7311
Show file tree
Hide file tree
Showing 9 changed files with 47 additions and 16 deletions.
3 changes: 2 additions & 1 deletion core/src/main/codegen/templates/Parser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ public class ${parser.class} extends SqlAbstractParserImpl
((SourceStringReader) reader).getSourceString();
parser.setOriginalSql(sql);
}
parser.setConformance(SqlConformanceEnum.DEFAULT);
return parser;
}
};
Expand Down Expand Up @@ -4518,7 +4519,7 @@ SqlNode StringLiteral() :
String p;
final List<SqlLiteral> frags;
char unicodeEscapeChar = 0;
String charSet = null;
String charSet = this.conformance.useUtf8AsDefaultCharset() ? "UTF-8" : null;
SqlCharStringLiteral literal;
}
{
Expand Down
12 changes: 0 additions & 12 deletions core/src/main/java/org/apache/calcite/sql/fun/SqlCastFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -227,18 +227,6 @@ private static RelDataType createTypeWithNullabilityFromExpr(RelDataTypeFactory
}
return false;
}
if (SqlTypeUtil.areCharacterSetsMismatched(
validatedNodeType,
returnType)) {
if (throwOnFailure) {
// Include full type string to indicate character
// set mismatch.
throw callBinding.newError(
RESOURCE.cannotCastValue(validatedNodeType.getFullTypeString(),
returnType.getFullTypeString()));
}
return false;
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2708,4 +2708,8 @@ public static SqlOperator floorCeil(boolean floor, SqlConformance conformance) {
return floor ? SqlStdOperatorTable.FLOOR : SqlStdOperatorTable.CEIL;
}
}

public static String testCharset(SqlConformance conformance) {
return "UTF-8";
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,10 @@ public abstract class SqlAbstractConformance implements SqlConformance {
@Override public boolean isValueAllowed() {
return SqlConformanceEnum.DEFAULT.isValueAllowed();
}

@Override public boolean useUtf8AsDefaultCharset() {
return SqlConformanceEnum.DEFAULT.useUtf8AsDefaultCharset();
}

@Override public SqlLibrary semantics() {
return SqlConformanceEnum.DEFAULT.semantics();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,19 @@ public interface SqlConformance {
*/
boolean isValueAllowed();

/**
/**
* Whether {@code UTF-8} should be used as the default charset in
* the parser.
*
* <p>Among the built-in conformance levels, true in
* {@link SqlConformanceEnum#BABEL},
* {@link SqlConformanceEnum#BIG_QUERY},
* {@link SqlConformanceEnum#LENIENT},
* false otherwise.
*/
boolean useUtf8AsDefaultCharset();

/**
* Controls the behavior of operators that are part of Standard SQL but
* nevertheless have different behavior in different databases.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -416,6 +416,15 @@ public enum SqlConformanceEnum implements SqlConformance {
return false;
}
}

@Override public boolean useUtf8AsDefaultCharset() {
switch (this) {
case BIG_QUERY:
return true;
default:
return false;
}
}

@Override public SqlLibrary semantics() {
switch (this) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,8 @@ protected SqlDelegatingConformance(SqlConformance delegate) {
return delegate.isValueAllowed();
}

@Override public boolean useUtf8AsDefaultCharset() { return delegate.useUtf8AsDefaultCharset(); }

@Override public SqlLibrary semantics() {
return delegate.semantics();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -543,7 +543,7 @@ private static String toSql(RelNode root, SqlDialect dialect,
sql.withSql(query).ok("SELECT TIMESTAMP_SECONDS(CAST(CEIL(3) AS INT64)) AS "
+ "created_thing\nFROM foodmart.product");
}

@Test void testBigQueryFloorPreservesCast() {
final String query = "SELECT TIMESTAMP_SECONDS(CAST(FLOOR(CAST(3 AS BIGINT)) AS BIGINT)) "
+ "as created_thing\n FROM `foodmart`.`product`";
Expand Down Expand Up @@ -1486,6 +1486,9 @@ private static String toSql(RelNode root, SqlDialect dialect,
assertThat(toSql(root), isLinux(expectedSql));
}




/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-5394">[CALCITE-5394]
* RelToSql converter fails when semi-join is under a join node</a>. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1792,6 +1792,14 @@ void checkPeriodPredicate(Checker checker) {
expr("cast('foo' as bar)")
.ok("CAST('foo' AS `BAR`)");
}

@Test void testParsingNonIsoCharacter() {
String sql = "select 'ק' ";
sql(sql).ok("SELECT u&'\\05e7'");
// BigQuery conformance should set charset to UTF-8 and be able to properly encode character
sql(sql).withConformance(SqlConformanceEnum.BIG_QUERY)
.ok("SELECT _UTF-8'ק'");
}

@Test void testCastFails() {
expr("cast(x as time with ^time^ zone)")
Expand Down Expand Up @@ -5156,7 +5164,7 @@ void checkPeriodPredicate(Checker checker) {
.withDialect(MYSQL)
.fails("(?s)Encountered \"\\\\\"\" at .*")
.withDialect(BIG_QUERY)
.ok("SELECT deptno AS d, 'deptno' AS d2\n"
.ok("SELECT deptno AS d, _UTF-8'deptno' AS d2\n"
+ "FROM emp");

// MySQL uses single-quotes as escapes; BigQuery uses backslashes
Expand Down

0 comments on commit cee7311

Please sign in to comment.