From a1eda17c319139f49be57bd8cc1e74690806935a Mon Sep 17 00:00:00 2001 From: Runkang He Date: Sat, 15 Jul 2023 07:39:17 +0800 Subject: [PATCH] [CALCITE-5825] Add URL_ENCODE and URL_DECODE function (enabled in Spark library) --- .../adapter/enumerable/RexImpTable.java | 6 ++ .../apache/calcite/runtime/UrlFunctions.java | 58 +++++++++++++++++++ .../calcite/sql/fun/SqlLibraryOperators.java | 16 +++++ .../apache/calcite/util/BuiltInMethod.java | 3 + site/_docs/reference.md | 2 + .../apache/calcite/test/SqlOperatorTest.java | 42 ++++++++++++++ 6 files changed, 127 insertions(+) create mode 100644 core/src/main/java/org/apache/calcite/runtime/UrlFunctions.java diff --git a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java index 43242677b6ae..c0fae13eba53 100644 --- a/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java +++ b/core/src/main/java/org/apache/calcite/adapter/enumerable/RexImpTable.java @@ -252,6 +252,8 @@ import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MICROS; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MILLIS; import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_SECONDS; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.URL_DECODE; +import static org.apache.calcite.sql.fun.SqlLibraryOperators.URL_ENCODE; import static org.apache.calcite.sql.fun.SqlLibraryOperators.XML_TRANSFORM; import static org.apache.calcite.sql.fun.SqlStdOperatorTable.ABS; import static org.apache.calcite.sql.fun.SqlStdOperatorTable.ACOS; @@ -824,6 +826,10 @@ Builder populate2() { // Compression Operators defineMethod(COMPRESS, BuiltInMethod.COMPRESS.method, NullPolicy.ARG0); + // Url Operators + defineMethod(URL_ENCODE, BuiltInMethod.URL_ENCODE.method, NullPolicy.STRICT); + defineMethod(URL_DECODE, BuiltInMethod.URL_DECODE.method, NullPolicy.STRICT); + // Xml Operators defineMethod(EXTRACT_VALUE, BuiltInMethod.EXTRACT_VALUE.method, NullPolicy.ARG0); defineMethod(XML_TRANSFORM, BuiltInMethod.XML_TRANSFORM.method, NullPolicy.ARG0); diff --git a/core/src/main/java/org/apache/calcite/runtime/UrlFunctions.java b/core/src/main/java/org/apache/calcite/runtime/UrlFunctions.java new file mode 100644 index 000000000000..c05c801a7edf --- /dev/null +++ b/core/src/main/java/org/apache/calcite/runtime/UrlFunctions.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to you under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.calcite.runtime; + +import java.io.UnsupportedEncodingException; +import java.net.URLDecoder; +import java.net.URLEncoder; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; + +import static org.apache.calcite.util.Static.RESOURCE; + +/** + * A collection of functions used in Url processing. + */ +public class UrlFunctions { + + private UrlFunctions() { + } + + private static final Charset UTF_8 = StandardCharsets.UTF_8; + + /** The "URL_DECODE(string)" function. */ + public static String urlDecode(String value) { + String url; + try { + url = URLDecoder.decode(value, UTF_8.name()); + } catch (UnsupportedEncodingException e) { + throw RESOURCE.charsetEncoding(value, UTF_8.name()).ex(); + } + return url; + } + + /** The "URL_ENCODE(string)" function. */ + public static String urlEncode(String url) { + String value; + try { + value = URLEncoder.encode(url, UTF_8.name()); + } catch (UnsupportedEncodingException e) { + throw RESOURCE.charsetEncoding(url, UTF_8.name()).ex(); + } + return value; + } +} diff --git a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java index 18abd1997532..534294746094 100644 --- a/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java +++ b/core/src/main/java/org/apache/calcite/sql/fun/SqlLibraryOperators.java @@ -473,6 +473,22 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding, ReturnTypes.VARBINARY_NULLABLE, OperandTypes.STRING, SqlFunctionCategory.STRING); + /** The "URL_DECODE(string)" function. */ + @LibraryOperator(libraries = {SPARK}) + public static final SqlFunction URL_DECODE = + SqlBasicFunction.create("URL_DECODE", + ReturnTypes.VARCHAR_NULLABLE, + OperandTypes.STRING, + SqlFunctionCategory.STRING); + + /** The "URL_ENCODE(string)" function. */ + @LibraryOperator(libraries = {SPARK}) + public static final SqlFunction URL_ENCODE = + SqlBasicFunction.create("URL_ENCODE", + ReturnTypes.VARCHAR_NULLABLE, + OperandTypes.STRING, + SqlFunctionCategory.STRING); + @LibraryOperator(libraries = {MYSQL}) public static final SqlFunction EXTRACT_VALUE = SqlBasicFunction.create("EXTRACTVALUE", diff --git a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java index b02870f3515b..30d2d76e35f8 100644 --- a/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java +++ b/core/src/main/java/org/apache/calcite/util/BuiltInMethod.java @@ -97,6 +97,7 @@ import org.apache.calcite.runtime.SpatialTypeFunctions; import org.apache.calcite.runtime.SqlFunctions; import org.apache.calcite.runtime.SqlFunctions.FlatProductInputType; +import org.apache.calcite.runtime.UrlFunctions; import org.apache.calcite.runtime.Utilities; import org.apache.calcite.runtime.XmlFunctions; import org.apache.calcite.schema.FilterableTable; @@ -374,6 +375,8 @@ public enum BuiltInMethod { SHA512(SqlFunctions.class, "sha512", String.class), THROW_UNLESS(SqlFunctions.class, "throwUnless", boolean.class, String.class), COMPRESS(CompressionFunctions.class, "compress", String.class), + URL_DECODE(UrlFunctions.class, "urlDecode", String.class), + URL_ENCODE(UrlFunctions.class, "urlEncode", String.class), EXTRACT_VALUE(XmlFunctions.class, "extractValue", String.class, String.class), XML_TRANSFORM(XmlFunctions.class, "xmlTransform", String.class, String.class), EXTRACT_XML(XmlFunctions.class, "extractXml", String.class, String.class, String.class), diff --git a/site/_docs/reference.md b/site/_docs/reference.md index 12f6467fc1da..dce5e89785a0 100644 --- a/site/_docs/reference.md +++ b/site/_docs/reference.md @@ -2830,6 +2830,8 @@ BigQuery's type system uses confusingly different names for types and functions: | b | UNIX_MILLIS(timestamp) | Returns the number of milliseconds since 1970-01-01 00:00:00 | b | UNIX_SECONDS(timestamp) | Returns the number of seconds since 1970-01-01 00:00:00 | b | UNIX_DATE(date) | Returns the number of days since 1970-01-01 +| s | URL_DECODE(string) | Decodes a *string* in 'application/x-www-form-urlencoded' format using a specific encoding scheme +| s | URL_ENCODE(string) | Translates a *string* into 'application/x-www-form-urlencoded' format using a specific encoding scheme | o | XMLTRANSFORM(xml, xslt) | Applies XSLT transform *xslt* to XML string *xml* and returns the result Note: diff --git a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java index 7771cf1948ba..ef1b7a1a55a5 100644 --- a/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java +++ b/testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java @@ -5107,6 +5107,48 @@ private static void checkIf(SqlOperatorFixture f) { "07000000789c4bad48cc2dc84905000bc002ed", "VARBINARY NOT NULL"); } + @Test void testUrlDecode() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.URL_DECODE); + f0.checkFails("^URL_DECODE('https://calcite.apache.org')^", + "No match found for function signature URL_DECODE\\(\\)", + false); + final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK); + f.checkString("URL_DECODE('https%3A%2F%2Fcalcite.apache.org')", + "https://calcite.apache.org", + "VARCHAR NOT NULL"); + f.checkString("URL_DECODE('http%3A%2F%2Ftest%3Fa%3Db%26c%3Dd')", + "http://test?a=b&c=d", + "VARCHAR NOT NULL"); + f.checkString("URL_DECODE('http%3A%2F%2F%E4%BD%A0%E5%A5%BD')", + "http://\u4F60\u597D", + "VARCHAR NOT NULL"); + f.checkString("URL_DECODE('test')", + "test", + "VARCHAR NOT NULL"); + f.checkNull("URL_DECODE(cast(null as varchar))"); + } + + @Test void testUrlEncode() { + final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.URL_ENCODE); + f0.checkFails("^URL_ENCODE('https://calcite.apache.org')^", + "No match found for function signature URL_ENCODE\\(\\)", + false); + final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK); + f.checkString("URL_ENCODE('https://calcite.apache.org')", + "https%3A%2F%2Fcalcite.apache.org", + "VARCHAR NOT NULL"); + f.checkString("URL_ENCODE('http://test?a=b&c=d')", + "http%3A%2F%2Ftest%3Fa%3Db%26c%3Dd", + "VARCHAR NOT NULL"); + f.checkString("URL_ENCODE(_UTF8'http://\u4F60\u597D')", + "http%3A%2F%2F%E4%BD%A0%E5%A5%BD", + "VARCHAR NOT NULL"); + f.checkString("URL_ENCODE('test')", + "test", + "VARCHAR NOT NULL"); + f.checkNull("URL_ENCODE(cast(null as varchar))"); + } + @Test void testExtractValue() { SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.MYSQL); f.checkNull("ExtractValue(NULL, '//b')");