Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[CALCITE-6156] Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries) #3565

Merged
merged 1 commit into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions core/src/main/java/org/apache/calcite/sql/SqlKind.java
Original file line number Diff line number Diff line change
Expand Up @@ -797,6 +797,12 @@ public enum SqlKind {
/** {@code SUBSTR} function (PostgreSQL semantics). */
SUBSTR_POSTGRESQL,

/** {@code ENDS_WITH} function. */
ENDS_WITH,

/** {@code STARTS_WITH} function. */
STARTS_WITH,

/** Call to a function using JDBC function syntax. */
JDBC_FN,

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,11 @@
package org.apache.calcite.sql.dialect;

import org.apache.calcite.avatica.util.Casing;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlDialect;
import org.apache.calcite.sql.SqlWriter;
import org.apache.calcite.sql.fun.SqlLibraryOperators;
import org.apache.calcite.sql.parser.SqlParserPos;

/**
* A <code>SqlDialect</code> implementation for the Snowflake database.
Expand All @@ -36,6 +40,24 @@ public SnowflakeSqlDialect(Context context) {
super(context);
}

@Override public void unparseCall(final SqlWriter writer, final SqlCall call, final int leftPrec,
final int rightPrec) {
switch (call.getKind()) {
case ENDS_WITH:
SqlCall endsWithCall = SqlLibraryOperators.ENDSWITH
.createCall(SqlParserPos.ZERO, call.getOperandList());
super.unparseCall(writer, endsWithCall, leftPrec, rightPrec);
break;
case STARTS_WITH:
SqlCall startsWithCall = SqlLibraryOperators.STARTSWITH
.createCall(SqlParserPos.ZERO, call.getOperandList());
super.unparseCall(writer, startsWithCall, leftPrec, rightPrec);
break;
default:
super.unparseCall(writer, call, leftPrec, rightPrec);
}
}

@Override public boolean supportsApproxCountDistinct() {
return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ public enum SqlLibrary {
/** A collection of operators that are in PostgreSQL but not in standard
* SQL. */
POSTGRESQL("p", "postgresql"),
/** A collection of operators that are in Snowflake but not in standard SQL. */
SNOWFLAKE("f", "snowflake"),
/** A collection of operators that are in Apache Spark but not in standard
* SQL. */
SPARK("s", "spark");
Expand All @@ -97,7 +99,7 @@ public List<SqlLibrary> children() {
switch (this) {
case ALL:
return ImmutableList.of(BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE,
POSTGRESQL, SPARK);
POSTGRESQL, SNOWFLAKE, SPARK);
default:
return ImmutableList.of();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
import static org.apache.calcite.sql.fun.SqlLibrary.MYSQL;
import static org.apache.calcite.sql.fun.SqlLibrary.ORACLE;
import static org.apache.calcite.sql.fun.SqlLibrary.POSTGRESQL;
import static org.apache.calcite.sql.fun.SqlLibrary.SNOWFLAKE;
import static org.apache.calcite.sql.fun.SqlLibrary.SPARK;
import static org.apache.calcite.util.Static.RESOURCE;

Expand Down Expand Up @@ -360,17 +361,25 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
OperandTypes.STRING_INTEGER_OPTIONAL_INTEGER,
SqlFunctionCategory.STRING);

/** The "ENDS_WITH(value1, value2)" function (BigQuery). */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction ENDS_WITH =
SqlBasicFunction.create("ENDS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
/** The "ENDS_WITH(value1, value2)" function (BigQuery, PostgreSQL). */
@LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL})
public static final SqlBasicFunction ENDS_WITH =
SqlBasicFunction.create(SqlKind.ENDS_WITH, ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME);

/** The "STARTS_WITH(value1, value2)" function (BigQuery). */
@LibraryOperator(libraries = {BIG_QUERY})
public static final SqlFunction STARTS_WITH =
SqlBasicFunction.create("STARTS_WITH", ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME, SqlFunctionCategory.STRING);
/** The "ENDSWITH(value1, value2)" function (Snowflake). */
@LibraryOperator(libraries = {SNOWFLAKE})
public static final SqlFunction ENDSWITH = ENDS_WITH.withName("ENDSWITH");

/** The "STARTS_WITH(value1, value2)" function (BigQuery, PostgreSQL). */
@LibraryOperator(libraries = {BIG_QUERY, POSTGRESQL})
public static final SqlBasicFunction STARTS_WITH =
SqlBasicFunction.create(SqlKind.STARTS_WITH, ReturnTypes.BOOLEAN_NULLABLE,
OperandTypes.STRING_SAME_SAME);

/** The "STARTSWITH(value1, value2)" function (Snowflake). */
@LibraryOperator(libraries = {SNOWFLAKE})
public static final SqlFunction STARTSWITH = STARTS_WITH.withName("STARTSWITH");

/** BigQuery's "SUBSTR(string, position [, substringLength ])" function. */
@LibraryOperator(libraries = {BIG_QUERY})
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,8 @@ private StandardConvertletTable() {
addAlias(SqlStdOperatorTable.PERCENT_REMAINDER, SqlStdOperatorTable.MOD);
addAlias(SqlLibraryOperators.IFNULL, SqlLibraryOperators.NVL);
addAlias(SqlLibraryOperators.REGEXP_SUBSTR, SqlLibraryOperators.REGEXP_EXTRACT);
addAlias(SqlLibraryOperators.ENDSWITH, SqlLibraryOperators.ENDS_WITH);
addAlias(SqlLibraryOperators.STARTSWITH, SqlLibraryOperators.STARTS_WITH);

// Register convertlets for specific objects.
registerOp(SqlStdOperatorTable.CAST, this::convertCast);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6557,6 +6557,40 @@ private void checkLiteral2(String expression, String expected) {
sql(sql).withMysql().ok(expectedMysql);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6156">[CALCITE-6156]
* Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries)</a>. */
@Test void testSnowflakeStartsWith() {
final String query = "select startswith(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String expectedBigQuery = "SELECT STARTS_WITH(brand_name, 'a')\n"
+ "FROM foodmart.product";
final String expectedPostgres = "SELECT STARTS_WITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
final String expectedSnowflake = "SELECT STARTSWITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withBigQuery().ok(expectedBigQuery);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withPostgresql().ok(expectedPostgres);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withSnowflake().ok(expectedSnowflake);
}

/** Test case for
* <a href="https://issues.apache.org/jira/browse/CALCITE-6156">[CALCITE-6156]
* Add ENDSWITH, STARTSWITH functions (enabled in Postgres, Snowflake libraries)</a>. */
@Test void testSnowflakeEndsWith() {
final String query = "select endswith(\"brand_name\", 'a')\n"
+ "from \"product\"";
final String expectedBigQuery = "SELECT ENDS_WITH(brand_name, 'a')\n"
+ "FROM foodmart.product";
final String expectedPostgres = "SELECT ENDS_WITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
final String expectedSnowflake = "SELECT ENDSWITH(\"brand_name\", 'a')\n"
+ "FROM \"foodmart\".\"product\"";
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withBigQuery().ok(expectedBigQuery);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withPostgresql().ok(expectedPostgres);
sql(query).withLibrary(SqlLibrary.SNOWFLAKE).withSnowflake().ok(expectedSnowflake);
}

@Test void testSubstringInSpark() {
final String query = "select substring(\"brand_name\" from 2) "
+ "from \"product\"\n";
Expand Down
8 changes: 4 additions & 4 deletions core/src/test/java/org/apache/calcite/util/UtilTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -951,16 +951,16 @@ private List<Integer> makeConsList(int start, int end) {

assertThat(SqlLibrary.expand(ImmutableList.of(a)),
hasToString("[ALL, BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(a, c)),
hasToString("[ALL, BIG_QUERY, CALCITE, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(c, a)),
hasToString("[CALCITE, ALL, BIG_QUERY, HIVE, MSSQL, MYSQL, ORACLE, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(c, o, a)),
hasToString("[CALCITE, ORACLE, ALL, BIG_QUERY, HIVE, MSSQL, MYSQL, "
+ "POSTGRESQL, SPARK]"));
+ "POSTGRESQL, SNOWFLAKE, SPARK]"));
assertThat(SqlLibrary.expand(ImmutableList.of(o, c, o)),
hasToString("[ORACLE, CALCITE]"));

Expand Down
7 changes: 5 additions & 2 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2628,6 +2628,7 @@ The 'C' (compatibility) column contains value:
* '*' for all libraries,
* 'b' for Google BigQuery ('fun=bigquery' in the connect string),
* 'c' for Apache Calcite ('fun=calcite' in the connect string),
* 'f' for Snowflake ('fun=snowflake' in the connect string),
* 'h' for Apache Hive ('fun=hive' in the connect string),
* 'm' for MySQL ('fun=mysql' in the connect string),
* 'q' for Microsoft SQL Server ('fun=mssql' in the connect string),
Expand Down Expand Up @@ -2727,7 +2728,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | DATE_TRUNC(date, timeUnit) | Truncates *date* to the granularity of *timeUnit*, rounding to the beginning of the unit
| o | DECODE(value, value1, result1 [, valueN, resultN ]* [, default ]) | Compares *value* to each *valueN* value one by one; if *value* is equal to a *valueN*, returns the corresponding *resultN*, else returns *default*, or NULL if *default* is not specified
| p | DIFFERENCE(string, string) | Returns a measure of the similarity of two strings, namely the number of character positions that their `SOUNDEX` values have in common: 4 if the `SOUNDEX` values are same and 0 if the `SOUNDEX` values are totally different
| b | ENDS_WITH(string1, string2) | Returns whether *string2* is a suffix of *string1*
| f | ENDSWITH(string1, string2) | Returns whether *string2* is a suffix of *string1*
| b p | ENDS_WITH(string1, string2) | Equivalent to `ENDSWITH(string1, string2)`
| o | EXTRACT(xml, xpath, [, namespaces ]) | Returns the XML fragment of the element or elements matched by the XPath expression. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression
| o | EXISTSNODE(xml, xpath, [, namespaces ]) | Determines whether traversal of a XML document using a specified xpath results in any nodes. Returns 0 if no nodes remain after applying the XPath traversal on the document fragment of the element or elements matched by the XPath expression. Returns 1 if any nodes remain. The optional namespace value that specifies a default mapping or namespace mapping for prefixes, which is used when evaluating the XPath expression.
| m | EXTRACTVALUE(xml, xpathExpr)) | Returns the text of the first text node which is a child of the element or elements matched by the XPath expression.
Expand Down Expand Up @@ -2824,7 +2826,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| s | SOUNDEX(string) | Returns the phonetic representation of *string*; return original *string* if *string* is encoded with multi-byte encoding such as UTF-8
| m | SPACE(integer) | Returns a string of *integer* spaces; returns an empty string if *integer* is less than 1
| b | SPLIT(string [, delimiter ]) | Returns the string array of *string* split at *delimiter* (if omitted, default is comma). If the *string* is empty it returns an empty array, otherwise, if the *delimiter* is empty, it returns an array containing the original *string*.
| b | STARTS_WITH(string1, string2) | Returns whether *string2* is a prefix of *string1*
| f | STARTSWITH(string1, string2) | Returns whether *string2* is a prefix of *string1*
| b p | STARTS_WITH(string1, string2) | Equivalent to `STARTSWITH(string1, string2)`
tanclary marked this conversation as resolved.
Show resolved Hide resolved
| m | STRCMP(string, string) | Returns 0 if both of the strings are same and returns -1 when the first argument is smaller than the second and 1 when the second one is smaller than the first one
| b p | STRPOS(string, substring) | Equivalent to `POSITION(substring IN string)`
| b m o p | SUBSTR(string, position [, substringLength ]) | Returns a portion of *string*, beginning at character *position*, *substringLength* characters long. SUBSTR calculates lengths using characters as defined by the input character set
Expand Down
Loading
Loading