Skip to content

Commit

Permalink
[CALCITE-5825] Add URL_ENCODE and URL_DECODE function (enabled in Spa…
Browse files Browse the repository at this point in the history
…rk library)
  • Loading branch information
herunkang2018 authored and JiajunBernoulli committed Oct 27, 2023
1 parent bdafeec commit ad2e843
Show file tree
Hide file tree
Showing 6 changed files with 140 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MICROS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_MILLIS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.UNIX_SECONDS;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.URL_DECODE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.URL_ENCODE;
import static org.apache.calcite.sql.fun.SqlLibraryOperators.XML_TRANSFORM;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.ABS;
import static org.apache.calcite.sql.fun.SqlStdOperatorTable.ACOS;
Expand Down Expand Up @@ -903,6 +905,10 @@ Builder populate2() {
// Compression Operators
defineMethod(COMPRESS, BuiltInMethod.COMPRESS.method, NullPolicy.ARG0);

// Url Operators
defineMethod(URL_ENCODE, BuiltInMethod.URL_ENCODE.method, NullPolicy.ARG0);
defineMethod(URL_DECODE, BuiltInMethod.URL_DECODE.method, NullPolicy.ARG0);

// Xml Operators
defineMethod(EXTRACT_VALUE, BuiltInMethod.EXTRACT_VALUE.method, NullPolicy.ARG0);
defineMethod(XML_TRANSFORM, BuiltInMethod.XML_TRANSFORM.method, NullPolicy.ARG0);
Expand Down
59 changes: 59 additions & 0 deletions core/src/main/java/org/apache/calcite/runtime/UrlFunctions.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.calcite.runtime;

import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

import static org.apache.calcite.util.Static.RESOURCE;

/**
* A collection of functions used in Url processing.
*/
public class UrlFunctions {

private UrlFunctions() {
}

private static final Charset UTF_8 = StandardCharsets.UTF_8;

/** The "URL_DECODE(string)" function for Hive and Spark,
* which returns original value when decoded error. */
public static String urlDecode(String value) {
try {
return URLDecoder.decode(value, UTF_8.name());
} catch (UnsupportedEncodingException e) {
throw RESOURCE.charsetEncoding(value, UTF_8.name()).ex();
} catch (RuntimeException e) {
return value;
}
}

/** The "URL_ENCODE(string)" function for Hive and Spark. */
public static String urlEncode(String url) {
String value;
try {
value = URLEncoder.encode(url, UTF_8.name());
} catch (UnsupportedEncodingException e) {
throw RESOURCE.charsetEncoding(url, UTF_8.name()).ex();
}
return value;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -528,6 +528,22 @@ static RelDataType deriveTypeSplit(SqlOperatorBinding operatorBinding,
ReturnTypes.VARBINARY_NULLABLE,
OperandTypes.STRING, SqlFunctionCategory.STRING);

/** The "URL_DECODE(string)" function. */
@LibraryOperator(libraries = {SPARK})
public static final SqlFunction URL_DECODE =
SqlBasicFunction.create("URL_DECODE",
ReturnTypes.VARCHAR_NULLABLE,
OperandTypes.STRING,
SqlFunctionCategory.STRING);

/** The "URL_ENCODE(string)" function. */
@LibraryOperator(libraries = {SPARK})
public static final SqlFunction URL_ENCODE =
SqlBasicFunction.create("URL_ENCODE",
ReturnTypes.VARCHAR_NULLABLE,
OperandTypes.STRING,
SqlFunctionCategory.STRING);

@LibraryOperator(libraries = {MYSQL})
public static final SqlFunction EXTRACT_VALUE =
SqlBasicFunction.create("EXTRACTVALUE",
Expand Down
3 changes: 3 additions & 0 deletions core/src/main/java/org/apache/calcite/util/BuiltInMethod.java
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@
import org.apache.calcite.runtime.SpatialTypeFunctions;
import org.apache.calcite.runtime.SqlFunctions;
import org.apache.calcite.runtime.SqlFunctions.FlatProductInputType;
import org.apache.calcite.runtime.UrlFunctions;
import org.apache.calcite.runtime.Utilities;
import org.apache.calcite.runtime.XmlFunctions;
import org.apache.calcite.schema.FilterableTable;
Expand Down Expand Up @@ -391,6 +392,8 @@ public enum BuiltInMethod {
SHA512(SqlFunctions.class, "sha512", String.class),
THROW_UNLESS(SqlFunctions.class, "throwUnless", boolean.class, String.class),
COMPRESS(CompressionFunctions.class, "compress", String.class),
URL_DECODE(UrlFunctions.class, "urlDecode", String.class),
URL_ENCODE(UrlFunctions.class, "urlEncode", String.class),
EXTRACT_VALUE(XmlFunctions.class, "extractValue", String.class, String.class),
XML_TRANSFORM(XmlFunctions.class, "xmlTransform", String.class, String.class),
EXTRACT_XML(XmlFunctions.class, "extractXml", String.class, String.class, String.class),
Expand Down
2 changes: 2 additions & 0 deletions site/_docs/reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -2856,6 +2856,8 @@ BigQuery's type system uses confusingly different names for types and functions:
| b | UNIX_MILLIS(timestamp) | Returns the number of milliseconds since 1970-01-01 00:00:00
| b | UNIX_SECONDS(timestamp) | Returns the number of seconds since 1970-01-01 00:00:00
| b | UNIX_DATE(date) | Returns the number of days since 1970-01-01
| s | URL_DECODE(string) | Decodes a *string* in 'application/x-www-form-urlencoded' format using a specific encoding scheme, returns original *string* when decoded error
| s | URL_ENCODE(string) | Translates a *string* into 'application/x-www-form-urlencoded' format using a specific encoding scheme
| o | XMLTRANSFORM(xml, xslt) | Applies XSLT transform *xslt* to XML string *xml* and returns the result

Note:
Expand Down
54 changes: 54 additions & 0 deletions testkit/src/main/java/org/apache/calcite/test/SqlOperatorTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -5619,6 +5619,60 @@ private static void checkIf(SqlOperatorFixture f) {
"07000000789c4bad48cc2dc84905000bc002ed", "VARBINARY NOT NULL");
}

@Test void testUrlDecode() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.URL_DECODE);
f0.checkFails("^URL_DECODE('https://calcite.apache.org')^",
"No match found for function signature URL_DECODE\\(<CHARACTER>\\)",
false);
final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
f.checkString("URL_DECODE('https%3A%2F%2Fcalcite.apache.org')",
"https://calcite.apache.org",
"VARCHAR NOT NULL");
f.checkString("URL_DECODE('http%3A%2F%2Ftest%3Fa%3Db%26c%3Dd')",
"http://test?a=b&c=d",
"VARCHAR NOT NULL");
f.checkString("URL_DECODE('http%3A%2F%2F%E4%BD%A0%E5%A5%BD')",
"http://\u4F60\u597D",
"VARCHAR NOT NULL");
f.checkString("URL_DECODE('test')",
"test",
"VARCHAR NOT NULL");
f.checkString("URL_DECODE('')",
"",
"VARCHAR NOT NULL");
f.checkString("URL_DECODE('https%%3A%2F%2Fcalcite.apache.org')",
"https%%3A%2F%2Fcalcite.apache.org",
"VARCHAR NOT NULL");
f.checkString("URL_DECODE('https%3A%2F%2Fcalcite.apache.org%')",
"https%3A%2F%2Fcalcite.apache.org%",
"VARCHAR NOT NULL");
f.checkNull("URL_DECODE(cast(null as varchar))");
}

@Test void testUrlEncode() {
final SqlOperatorFixture f0 = fixture().setFor(SqlLibraryOperators.URL_ENCODE);
f0.checkFails("^URL_ENCODE('https://calcite.apache.org')^",
"No match found for function signature URL_ENCODE\\(<CHARACTER>\\)",
false);
final SqlOperatorFixture f = f0.withLibrary(SqlLibrary.SPARK);
f.checkString("URL_ENCODE('https://calcite.apache.org')",
"https%3A%2F%2Fcalcite.apache.org",
"VARCHAR NOT NULL");
f.checkString("URL_ENCODE('http://test?a=b&c=d')",
"http%3A%2F%2Ftest%3Fa%3Db%26c%3Dd",
"VARCHAR NOT NULL");
f.checkString("URL_ENCODE(_UTF8'http://\u4F60\u597D')",
"http%3A%2F%2F%E4%BD%A0%E5%A5%BD",
"VARCHAR NOT NULL");
f.checkString("URL_ENCODE('test')",
"test",
"VARCHAR NOT NULL");
f.checkString("URL_ENCODE('')",
"",
"VARCHAR NOT NULL");
f.checkNull("URL_ENCODE(cast(null as varchar))");
}

@Test void testExtractValue() {
SqlOperatorFixture f = fixture().withLibrary(SqlLibrary.MYSQL);
f.checkNull("ExtractValue(NULL, '//b')");
Expand Down

0 comments on commit ad2e843

Please sign in to comment.