Skip to content

Commit

Permalink
[CALCITE-6156] Add ENDSWITH, STARTSWITH functions (enabled in Postgre…
Browse files Browse the repository at this point in the history
…s, Snowflake libraries)
  • Loading branch information
tanclary committed Dec 5, 2023
1 parent bd7d4e8 commit 78d0c9c
Show file tree
Hide file tree
Showing 8 changed files with 187 additions and 56 deletions.
6 changes: 6 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,12 @@ public enum SqlKind {
/** {@code SUBSTR} function (PostgreSQL semantics). */
SUBSTR_POSTGRESQL,

/** {@code ENDS_WITH} function. */
ENDS_WITH,

/** {@code STARTS_WITH} function. */
STARTS_WITH,

/** Call to a function using JDBC function syntax. */
JDBC_FN,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,10 @@
package org.apache.calcite.sql.dialect;

import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlWriter;

/**
* A <code>SqlDialect</code> implementation for the Snowflake database.
Expand All @@ -36,6 +39,30 @@ public SnowflakeSqlDialect(Context context) {
super(context);
}

@Override public void unparseCall(final SqlWriter writer, final SqlCall call, final int leftPrec,
final int rightPrec) {
switch(call.getKind()) {
case ENDS_WITH:
case STARTS_WITH:
unparseEndsStartsWith(writer, call);
break;
default:
super.unparseCall(writer, call, leftPrec, rightPrec);
}
}

/**
* Remove underscore for {@code STARTS_WITH} and {@code ENDS_WITH} operators to
* comply with Snowflake syntax.
*/
private static void unparseEndsStartsWith(final SqlWriter writer, final SqlCall call) {
final String name = call.getKind() == SqlKind.ENDS_WITH ? "ENDSWITH" : "STARTSWITH";
final SqlWriter.Frame frame = writer.startFunCall(name);
call.operand(0).unparse(writer, 0, 0);
call.operand(1).unparse(writer, 0, 0);
writer.endFunCall(frame);
}

@Override public boolean supportsApproxCountDistinct() {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,9 @@ public enum SqlLibrary {
POSTGRESQL("p", "postgresql"),
/** A collection of operators that are in Apache Spark but not in standard
* SQL. */
SPARK("s", "spark");
SPARK("s", "spark"),
/** A collection of operators that are in Snowflake but not in standard SQL. */
SNOWFLAKE("s", "snowflake");

/** Map from {@link Enum#name() name} and {@link #fun} to library. */
public static final Map<String, SqlLibrary> MAP;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import static org.apache.calcite.sql.fun.SqlLibrary.MYSQL;
import static org.apache.calcite.sql.fun.SqlLibrary.ORACLE;
import static org.apache.calcite.sql.fun.SqlLibrary.POSTGRESQL;
import static org.apache.calcite.sql.fun.SqlLibrary.SNOWFLAKE;
import static org.apache.calcite.sql.fun.SqlLibrary.SPARK;
import static org.apache.calcite.util.Static.RESOURCE;

Expand Down Expand Up @@ -360,17 +361,25 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
OperandTypes.STRING_INTEGER_OPTIONAL_INTEGER,
SqlFunctionCategory.STRING);

/** The "ENDS_WITH(value1, value2)" function (BigQuery). */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction ENDS_WITH =
SqlBasicFunction.create("ENDS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
/** The "ENDS_WITH(value1, value2)" function (BigQuery, PostgreSQL). */
@LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL})
public static final SqlBasicFunction ENDS_WITH =
SqlBasicFunction.create(SqlKind.ENDS_WITH, ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME);

/** The "STARTS_WITH(value1, value2)" function (BigQuery). */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction STARTS_WITH =
SqlBasicFunction.create("STARTS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
/** The "ENDSWITH(value1, value2)" function (Snowflake). */
@LibraryOperator(libraries = {SNOWFLAKE})
public static final SqlFunction ENDSWITH = ENDS_WITH.withName("ENDSWITH");

/** The "STARTS_WITH(value1, value2)" function (BigQuery, PostgreSQL). */
@LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL})
public static final SqlBasicFunction STARTS_WITH =
SqlBasicFunction.create(SqlKind.STARTS_WITH, ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME);

/** The "STARTSWITH(value1, value2)" function (Snowflake). */
@LibraryOperator(libraries = {SNOWFLAKE})
public static final SqlFunction STARTSWITH = STARTS_WITH.withName("STARTSWITH");

/** BigQuery's "SUBSTR(string, position [, substringLength ])" function. */
@LibraryOperator(libraries = {BIG_QUERY})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ private StandardConvertletTable() {
addAlias(SqlStdOperatorTable.PERCENT_REMAINDER, SqlStdOperatorTable.MOD);
addAlias(SqlLibraryOperators.IFNULL, SqlLibraryOperators.NVL);
addAlias(SqlLibraryOperators.REGEXP_SUBSTR, SqlLibraryOperators.REGEXP_EXTRACT);
addAlias(SqlLibraryOperators.ENDSWITH, SqlLibraryOperators.ENDS_WITH);
addAlias(SqlLibraryOperators.STARTSWITH, SqlLibraryOperators.STARTS_WITH);

// Register convertlets for specific objects.
registerOp(SqlStdOperatorTable.CAST, this::convertCast);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6557,6 +6557,34 @@ private void checkLiteral2(String expression, String expected) {
sql(sql).withMysql().ok(expectedMysql);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6156">[CALCITE-6156]
* Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries)</a>. */
@Test void testSnowflakeStartsWith() {
final String bigQuerySql = "select starts_with(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String snowflakeSql = "select startswith(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String expected = "SELECT STARTSWITH(\"brand_name\" 'a')\n"
+ "FROM \"foodmart\".\"product\"";
sql(bigQuerySql).withLibrary(SqlLibrary.BIG_QUERY).withSnowflake().ok(expected);
sql(snowflakeSql).withLibrary(SqlLibrary.SNOWFLAKE).withSnowflake().ok(expected);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6156">[CALCITE-6156]
* Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries)</a>. */
@Test void testSnowflakeEndsWith() {
final String bigQuerySql = "select ends_with(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String snowflakeSql = "select endswith(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String expected = "SELECT ENDSWITH(\"brand_name\" 'a')\n"
+ "FROM \"foodmart\".\"product\"";
sql(bigQuerySql).withLibrary(SqlLibrary.BIG_QUERY).withSnowflake().ok(expected);
sql(snowflakeSql).withLibrary(SqlLibrary.SNOWFLAKE).withSnowflake().ok(expected);
}

@Test void testSubstringInSpark() {
final String query = "select substring(\"brand_name\" from 2) "
+ "from \"product\"\n";
Expand Down
9 changes: 6 additions & 3 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2633,7 +2633,8 @@ The 'C' (compatibility) column contains value:
* 'q' for Microsoft SQL Server ('fun=mssql' in the connect string),
* 'o' for Oracle ('fun=oracle' in the connect string),
* 'p' for PostgreSQL ('fun=postgresql' in the connect string),
* 's' for Apache Spark ('fun=spark' in the connect string).
* 's' for Apache Spark ('fun=spark' in the connect string),
* 'f' for Snowflake ('fun=snowflake' in the connect string).

One operator name may correspond to multiple SQL dialects, but with different
semantics.
Expand Down Expand Up @@ -2727,7 +2728,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | DATE_TRUNC(date, timeUnit) | Truncates *date* to the granularity of *timeUnit*, rounding to the beginning of the unit
| o | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | Compares *value* to each *valueN* value one by one; if *value* is equal to a *valueN*, returns the corresponding *resultN*, else returns *default*, or NULL if *default* is not specified
| p | DIFFERENCE(string, string) | Returns a measure of the similarity of two strings, namely the number of character positions that their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0 if the `SOUNDEX` values are totally different
| b | ENDS_WITH(string1, string2) | Returns whether *string2* is a suffix of *string1*
| f | ENDSWITH(string1, string2) | Returns whether *string2* is a suffix of *string1*
| b p | ENDS_WITH(string1, string2) | Equivalent to `ENDSWITH(string1, string2)`
| o | EXTRACT(xml, xpath, [, namespaces ]) | Returns the XML fragment of the element or elements matched by the XPath expression. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression
| o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
Expand Down Expand Up @@ -2824,7 +2826,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| s | SOUNDEX(string) | Returns the phonetic representation of *string*; return original *string* if *string* is encoded with multi-byte encoding such as UTF-8
| m | SPACE(integer) | Returns a string of *integer* spaces; returns an empty string if *integer* is less than 1
| b | SPLIT(string [, delimiter ]) | Returns the string array of *string* split at *delimiter* (if omitted, default is comma). If the *string* is empty it returns an empty array, otherwise, if the *delimiter* is empty, it returns an array containing the original *string*.
| b | STARTS_WITH(string1, string2) | Returns whether *string2* is a prefix of *string1*
| f | STARTSWITH(string1, string2) | Returns whether *string2* is a prefix of*string1*
| b p | STARTS_WITH(string1, string2) | Equivalent to `STARTSWITH(string1, string2)`
| m | STRCMP(string, string) | Returns 0 if both of the strings are same and returns -1 when the first argument is smaller than the second and 1 when the second one is smaller than the first one
| b p | STRPOS(string, substring) | Equivalent to `POSITION(substring IN string)`
| b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set
Expand Down
138 changes: 96 additions & 42 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -4962,7 +4962,7 @@ private static void checkIf(SqlOperatorFixture f) {
f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1, 3)");
f.checkQuery("select regexp_replace('a b c', 'b', 'X', 1, 3, 'i')");
};
f0.forEachLibrary(list(SqlLibrary.MYSQL, SqlLibrary.ORACLE, SqlLibrary.BIG_QUERY), consumer);
f0.forEachLibrary(list(SqlLibrary.BIG_QUERY, SqlLibrary.MYSQL, SqlLibrary.ORACLE), consumer);

// Tests for double-backslash indexed capturing groups for regexp_replace in BQ
final SqlOperatorFixture f1 =
Expand Down Expand Up @@ -8878,51 +8878,105 @@ private void testCurrentDateFunc(Pair<String, Hook.Closeable> pair) {
}

@Test void testStartsWithFunction() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.BIG_QUERY);
f.setFor(SqlLibraryOperators.STARTS_WITH);
f.checkBoolean("starts_with('12345', '123')", true);
f.checkBoolean("starts_with('12345', '1243')", false);
f.checkBoolean("starts_with(x'11', x'11')", true);
f.checkBoolean("starts_with(x'112211', x'33')", false);
f.checkFails("^starts_with('aabbcc', x'aa')^",
"Cannot apply 'STARTS_WITH' to arguments of type "
+ "'STARTS_WITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+ "form\\(s\\): 'STARTS_WITH\\(<STRING>, <STRING>\\)'",
false);
f.checkNull("starts_with(null, null)");
f.checkNull("starts_with('12345', null)");
f.checkNull("starts_with(null, '123')");
f.checkBoolean("starts_with('', '123')", false);
f.checkBoolean("starts_with('', '')", true);
f.checkNull("starts_with(x'aa', null)");
f.checkNull("starts_with(null, x'aa')");
f.checkBoolean("starts_with(x'1234', x'')", true);
f.checkBoolean("starts_with(x'', x'123456')", false);
f.checkBoolean("starts_with(x'', x'')", true);
final SqlOperatorFixture f0 = fixture();
f0.setFor(SqlLibraryOperators.STARTS_WITH);
final Consumer<SqlOperatorFixture> consumer = f -> {
f.checkBoolean("starts_with('12345', '123')", true);
f.checkBoolean("starts_with('12345', '1243')", false);
f.checkBoolean("starts_with(x'11', x'11')", true);
f.checkBoolean("starts_with(x'112211', x'33')", false);
f.checkFails("^starts_with('aabbcc', x'aa')^",
"Cannot apply 'STARTS_WITH' to arguments of type "
+ "'STARTS_WITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+ "form\\(s\\): 'STARTS_WITH\\(<STRING>, <STRING>\\)'",
false);
f.checkNull("starts_with(null, null)");
f.checkNull("starts_with('12345', null)");
f.checkNull("starts_with(null, '123')");
f.checkBoolean("starts_with('', '123')", false);
f.checkBoolean("starts_with('', '')", true);
f.checkNull("starts_with(x'aa', null)");
f.checkNull("starts_with(null, x'aa')");
f.checkBoolean("starts_with(x'1234', x'')", true);
f.checkBoolean("starts_with(x'', x'123456')", false);
f.checkBoolean("starts_with(x'', x'')", true);
};
f0.forEachLibrary(list(SqlLibrary.BIG_QUERY, SqlLibrary.POSTGRESQL), consumer);
}

/** Tests for Snowflake's {@code STARTSWITH} function. */
@Test void testSnowflakeStartsWithFunction() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.SNOWFLAKE);
f.setFor(SqlLibraryOperators.STARTSWITH);
f.checkBoolean("startswith('12345', '123')", true);
f.checkBoolean("startswith('12345', '1243')", false);
f.checkBoolean("startswith(x'11', x'11')", true);
f.checkBoolean("startswith(x'112211', x'33')", false);
f.checkFails("^startswith('aabbcc', x'aa')^",
"Cannot apply 'STARTSWITH' to arguments of type "
+ "'STARTSWITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+ "form\\(s\\): 'STARTSWITH\\(<STRING>, <STRING>\\)'",
false);
f.checkNull("startswith(null, null)");
f.checkNull("startswith('12345', null)");
f.checkNull("startswith(null, '123')");
f.checkBoolean("startswith('', '123')", false);
f.checkBoolean("startswith('', '')", true);
f.checkNull("startswith(x'aa', null)");
f.checkNull("startswith(null, x'aa')");
f.checkBoolean("startswith(x'1234', x'')", true);
f.checkBoolean("startswith(x'', x'123456')", false);
f.checkBoolean("startswith(x'', x'')", true);
}

@Test void testEndsWithFunction() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.BIG_QUERY);
final SqlOperatorFixture f0 = fixture();
f0.setFor(SqlLibraryOperators.ENDS_WITH);
final Consumer<SqlOperatorFixture> consumer = f -> {
f.checkBoolean("ends_with('12345', '345')", true);
f.checkBoolean("ends_with('12345', '123')", false);
f.checkBoolean("ends_with(x'11', x'11')", true);
f.checkBoolean("ends_with(x'112211', x'33')", false);
f.checkFails("^ends_with('aabbcc', x'aa')^",
"Cannot apply 'ENDS_WITH' to arguments of type "
+ "'ENDS_WITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+ "form\\(s\\): 'ENDS_WITH\\(<STRING>, <STRING>\\)'",
false);
f.checkNull("ends_with(null, null)");
f.checkNull("ends_with('12345', null)");
f.checkNull("ends_with(null, '123')");
f.checkBoolean("ends_with('', '123')", false);
f.checkBoolean("ends_with('', '')", true);
f.checkNull("ends_with(x'aa', null)");
f.checkNull("ends_with(null, x'aa')");
f.checkBoolean("ends_with(x'1234', x'')", true);
f.checkBoolean("ends_with(x'', x'123456')", false);
f.checkBoolean("ends_with(x'', x'')", true);
};
}

@Test void testSnowflakeEndsWithFunction() {
final SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.SNOWFLAKE);
f.setFor(SqlLibraryOperators.ENDS_WITH);
f.checkBoolean("ends_with('12345', '345')", true);
f.checkBoolean("ends_with('12345', '123')", false);
f.checkBoolean("ends_with(x'11', x'11')", true);
f.checkBoolean("ends_with(x'112211', x'33')", false);
f.checkFails("^ends_with('aabbcc', x'aa')^",
"Cannot apply 'ENDS_WITH' to arguments of type "
+ "'ENDS_WITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+ "form\\(s\\): 'ENDS_WITH\\(<STRING>, <STRING>\\)'",
false);
f.checkNull("ends_with(null, null)");
f.checkNull("ends_with('12345', null)");
f.checkNull("ends_with(null, '123')");
f.checkBoolean("ends_with('', '123')", false);
f.checkBoolean("ends_with('', '')", true);
f.checkNull("ends_with(x'aa', null)");
f.checkNull("ends_with(null, x'aa')");
f.checkBoolean("ends_with(x'1234', x'')", true);
f.checkBoolean("ends_with(x'', x'123456')", false);
f.checkBoolean("ends_with(x'', x'')", true);
f.checkBoolean("endswith('12345', '345')", true);
f.checkBoolean("endswith('12345', '123')", false);
f.checkBoolean("endswith(x'11', x'11')", true);
f.checkBoolean("endswith(x'112211', x'33')", false);
f.checkFails("^endswith('aabbcc', x'aa')^",
"Cannot apply 'ENDSWITH' to arguments of type "
+ "'ENDSWITH\\(<CHAR\\(6\\)>, <BINARY\\(1\\)>\\)'\\. Supported "
+ "form\\(s\\): 'ENDSWITH\\(<STRING>, <STRING>\\)'",
false);
f.checkNull("endswith(null, null)");
f.checkNull("endswith('12345', null)");
f.checkNull("endswith(null, '123')");
f.checkBoolean("endswith('', '123')", false);
f.checkBoolean("endswith('', '')", true);
f.checkNull("endswith(x'aa', null)");
f.checkNull("endswith(null, x'aa')");
f.checkBoolean("endswith(x'1234', x'')", true);
f.checkBoolean("endswith(x'', x'123456')", false);
f.checkBoolean("endswith(x'', x'')", true);
}

/** Tests the {@code SPLIT} operator. */
Expand Down

0 comments on commit 78d0c9c

Please sign in to comment.