Skip to content

Commit

Permalink
[CALCITE-6001] Add dialect-specific encoding for string literals
Browse files Browse the repository at this point in the history
  • Loading branch information
tanclary committed Oct 19, 2023
1 parent 5151168 commit 5e8c158
Show file tree
Hide file tree
Showing 17 changed files with 162 additions and 47 deletions.
4 changes: 2 additions & 2 deletions babel/src/test/resources/sql/redshift.iq
Original file line number Diff line number Diff line change
Expand Up @@ -1777,7 +1777,7 @@ SELECT "LENGTH"('ily')

-- returns 8 (cf OCTET_LENGTH)
select length('français');
SELECT "LENGTH"(u&'fran\00e7ais')
SELECT "LENGTH"('français')
!explain-validated-on calcite

# LOWER
Expand Down Expand Up @@ -1824,7 +1824,7 @@ f7415e33f972c03abd4f3fed36748f7a
# OCTET_LENGTH
-- returns 9 (cf LENGTH)
select octet_length('français');
SELECT OCTET_LENGTH(CAST(u&'fran\00e7ais' AS VARBINARY))
SELECT OCTET_LENGTH(CAST('français' AS VARBINARY))
!explain-validated-on calcite

# POSITION is a synonym for STRPOS
Expand Down
10 changes: 8 additions & 2 deletions core/src/main/codegen/templates/Parser.jj
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ public class ${parser.class} extends SqlAbstractParserImpl

private Casing unquotedCasing;
private Casing quotedCasing;
private String charset;
private int identifierMaxLength;
private SqlConformance conformance;

Expand All @@ -178,6 +179,7 @@ public class ${parser.class} extends SqlAbstractParserImpl
((SourceStringReader) reader).getSourceString();
parser.setOriginalSql(sql);
}
parser.setConformance(SqlConformanceEnum.DEFAULT);
return parser;
}
};
Expand Down Expand Up @@ -222,6 +224,10 @@ public class ${parser.class} extends SqlAbstractParserImpl
this.unquotedCasing = unquotedCasing;
}

public void setCharset(String charset) {
this.charset = charset;
}

public void setIdentifierMaxLength(int identifierMaxLength) {
this.identifierMaxLength = identifierMaxLength;
}
Expand Down Expand Up @@ -4519,7 +4525,7 @@ SqlNode StringLiteral() :
String p;
final List<SqlLiteral> frags;
char unicodeEscapeChar = 0;
String charSet = null;
String charSet = this.charset;
SqlCharStringLiteral literal;
}
{
Expand Down Expand Up @@ -4653,7 +4659,7 @@ SqlNode StringLiteral() :
p = SqlParserUtil.stripQuotes(getToken(0).image, DQ, DQ, "\\\"",
Casing.UNCHANGED);
try {
return SqlLiteral.createCharString(p, charSet, getPos());
return literal = SqlLiteral.createCharString(p, charSet, getPos());
} catch (java.nio.charset.UnsupportedCharsetException e) {
throw SqlUtil.newContextException(getPos(),
RESOURCE.unknownCharacterSet(charSet));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ private NlsString getValueNonNull() {
writer.literal(
writer.getDialect().quoteStringLiteral(stringValue));
}

writer.literal(nlsString.asSql(true, true, writer.getDialect()));
}

Expand Down
67 changes: 47 additions & 20 deletions core/src/main/java/org/apache/calcite/sql/SqlDialect.java
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,7 @@ public class SqlDialect {
private final Casing unquotedCasing;
private final Casing quotedCasing;
private final boolean caseSensitive;
private final String charset;

//~ Constructors -----------------------------------------------------------

Expand Down Expand Up @@ -238,14 +239,15 @@ public SqlDialect(Context context) {
this.unquotedCasing = requireNonNull(context.unquotedCasing());
this.quotedCasing = requireNonNull(context.quotedCasing());
this.caseSensitive = context.caseSensitive();
this.charset = context.charset();
}

//~ Methods ----------------------------------------------------------------

/** Creates an empty context. Use {@link #EMPTY_CONTEXT} to reference the instance. */
private static Context emptyContext() {
return new ContextImpl(DatabaseProduct.UNKNOWN, null, null, -1, -1,
"'", "''", null, null,
"'", "''", null, null, "ISO-8859-1",
Casing.UNCHANGED, Casing.TO_UPPER, true, SqlConformanceEnum.DEFAULT,
NullCollation.HIGH, RelDataTypeSystemImpl.DEFAULT,
JethroDataSqlDialect.JethroInfo.EMPTY);
Expand Down Expand Up @@ -433,7 +435,9 @@ public void quoteStringLiteral(StringBuilder buf, @Nullable String charsetName,
if (containsNonAscii(val) && charsetName == null) {
quoteStringLiteralUnicode(buf, val);
} else {
if (charsetName != null) {
// Don't append charset if it matches dialect default, e.g. BigQuery shouldn't append _UTF-8
// because that is the default
if (charsetName != null && !charsetName.equals(getCharset())) {
buf.append("_");
buf.append(charsetName);
}
Expand Down Expand Up @@ -1180,7 +1184,8 @@ public SqlParser.Config configureParser(SqlParser.Config config) {
.withUnquotedCasing(getUnquotedCasing())
.withCaseSensitive(isCaseSensitive())
.withConformance(getConformance())
.withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD));
.withCharLiteralStyles(ImmutableSet.of(CharLiteralStyle.STANDARD))
.withCharset(getCharset());
}

@Deprecated // to be removed before 2.0
Expand Down Expand Up @@ -1240,6 +1245,11 @@ public Casing getQuotedCasing() {
return quotedCasing;
}

/** Returns charset to use for encoding. */
public String getCharset() {
return charset;
}

/** Returns whether matching of identifiers is case-sensitive. */
public boolean isCaseSensitive() {
return caseSensitive;
Expand Down Expand Up @@ -1428,6 +1438,8 @@ Context withLiteralEscapedQuoteString(
@Nullable String identifierEscapedQuoteString();
Context withIdentifierEscapedQuoteString(
@Nullable String identifierEscapedQuoteString);
String charset();
Context withCharset(String charset);
Casing unquotedCasing();
Context withUnquotedCasing(Casing unquotedCasing);
Casing quotedCasing();
Expand Down Expand Up @@ -1455,6 +1467,7 @@ private static class ContextImpl implements Context {
private final String literalEscapedQuoteString;
private final @Nullable String identifierQuoteString;
private final @Nullable String identifierEscapedQuoteString;
private final String charset;
private final Casing unquotedCasing;
private final Casing quotedCasing;
private final boolean caseSensitive;
Expand All @@ -1468,7 +1481,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
int databaseMajorVersion, int databaseMinorVersion,
String literalQuoteString, String literalEscapedQuoteString,
@Nullable String identifierQuoteString,
@Nullable String identifierEscapedQuoteString,
@Nullable String identifierEscapedQuoteString, String charset,
Casing quotedCasing, Casing unquotedCasing, boolean caseSensitive,
SqlConformance conformance, NullCollation nullCollation,
RelDataTypeSystem dataTypeSystem,
Expand All @@ -1482,6 +1495,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
this.literalEscapedQuoteString = literalEscapedQuoteString;
this.identifierQuoteString = identifierQuoteString;
this.identifierEscapedQuoteString = identifierEscapedQuoteString;
this.charset = charset;
this.quotedCasing = requireNonNull(quotedCasing, "quotedCasing");
this.unquotedCasing = requireNonNull(unquotedCasing, "unquotedCasing");
this.caseSensitive = caseSensitive;
Expand All @@ -1501,7 +1515,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1514,7 +1528,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1527,7 +1541,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1540,7 +1554,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1553,7 +1567,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1566,7 +1580,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1580,7 +1594,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1594,7 +1608,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1608,7 +1622,20 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

@Override public String charset() {
return charset;
}

@Override public Context withCharset(String charset) {
return new ContextImpl(databaseProduct, databaseProductName,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1621,7 +1648,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1634,7 +1661,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1647,7 +1674,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1660,7 +1687,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1674,7 +1701,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1687,7 +1714,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}

Expand All @@ -1700,7 +1727,7 @@ private ContextImpl(DatabaseProduct databaseProduct,
databaseVersion, databaseMajorVersion, databaseMinorVersion,
literalQuoteString, literalEscapedQuoteString,
identifierQuoteString, identifierEscapedQuoteString,
quotedCasing, unquotedCasing, caseSensitive,
charset, quotedCasing, unquotedCasing, caseSensitive,
conformance, nullCollation, dataTypeSystem, jethroInfo);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ public class BigQuerySqlDialect extends SqlDialect {
.withLiteralEscapedQuoteString("\\'")
.withIdentifierQuoteString("`")
.withIdentifierEscapedQuoteString("\\`")
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW)
.withUnquotedCasing(Casing.UNCHANGED)
.withQuotedCasing(Casing.UNCHANGED)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
public class HiveSqlDialect extends SqlDialect {
public static final SqlDialect.Context DEFAULT_CONTEXT = SqlDialect.EMPTY_CONTEXT
.withDatabaseProduct(SqlDialect.DatabaseProduct.HIVE)
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW);

public static final SqlDialect DEFAULT = new HiveSqlDialect(DEFAULT_CONTEXT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ public class MysqlSqlDialect extends SqlDialect {
.withIdentifierQuoteString("`")
.withDataTypeSystem(MYSQL_TYPE_SYSTEM)
.withUnquotedCasing(Casing.UNCHANGED)
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW);

public static final SqlDialect DEFAULT = new MysqlSqlDialect(DEFAULT_CONTEXT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ public class RedshiftSqlDialect extends SqlDialect {
.withQuotedCasing(Casing.TO_LOWER)
.withUnquotedCasing(Casing.TO_LOWER)
.withCaseSensitive(false)
.withCharset("UTF-8")
.withDataTypeSystem(TYPE_SYSTEM);

public static final SqlDialect DEFAULT = new RedshiftSqlDialect(DEFAULT_CONTEXT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
public class SparkSqlDialect extends SqlDialect {
public static final SqlDialect.Context DEFAULT_CONTEXT = SqlDialect.EMPTY_CONTEXT
.withDatabaseProduct(SqlDialect.DatabaseProduct.SPARK)
.withCharset("UTF-8")
.withNullCollation(NullCollation.LOW);

public static final SqlDialect DEFAULT = new SparkSqlDialect(DEFAULT_CONTEXT);
Expand Down
12 changes: 0 additions & 12 deletions core/src/main/java/org/apache/calcite/sql/fun/SqlCastFunction.java
Original file line number Diff line number Diff line change
Expand Up @@ -227,18 +227,6 @@ private static RelDataType createTypeWithNullabilityFromExpr(RelDataTypeFactory
}
return false;
}
if (SqlTypeUtil.areCharacterSetsMismatched(
validatedNodeType,
returnType)) {
if (throwOnFailure) {
// Include full type string to indicate character
// set mismatch.
throw callBinding.newError(
RESOURCE.cannotCastValue(validatedNodeType.getFullTypeString(),
returnType.getFullTypeString()));
}
return false;
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,6 @@ private static boolean argTypesValid(SqlCallBinding callBinding) {
if (operand.i == 0) {
collation = nls.getCollation();

// print with prefix
writer.literal(nls.asSql(true, false, writer.getDialect()));
} else {
// print without prefix
Expand Down
Loading

0 comments on commit 5e8c158

Please sign in to comment.