Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[func](jsonb)support json_depth function #24801

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 50 additions & 0 deletions be/src/util/jsonb_document.h
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,9 @@ class JsonbValue {
//Get the number of jsonbvalue elements
int length() const;

//Get the depth of jsonbvalue
int depth() const;
LemonLiTree marked this conversation as resolved.
Show resolved Hide resolved

//Whether to include the jsonbvalue rhs
bool contains(JsonbValue* rhs) const;

Expand Down Expand Up @@ -1269,6 +1272,53 @@ inline int JsonbValue::length() const {
}
}

inline int JsonbValue::depth() const {
switch (type_) {
case JsonbType::T_Int8:
case JsonbType::T_Int16:
case JsonbType::T_Int32:
case JsonbType::T_Int64:
case JsonbType::T_Double:
case JsonbType::T_Float:
case JsonbType::T_Int128:
case JsonbType::T_String:
case JsonbType::T_Binary:
case JsonbType::T_Null:
case JsonbType::T_True:
case JsonbType::T_False: {
return 1;
}
case JsonbType::T_Object: {
int base_depth = 1;
int numElem = ((ObjectVal*)this)->numElem();
if (numElem == 0) return base_depth;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: statement should be inside braces [readability-braces-around-statements]

Suggested change
if (numElem == 0) return base_depth;
if (numElem == 0) { return base_depth;
}


int max_depth = base_depth;

for (int i = 0; i < numElem; ++i) {
JsonbKeyValue* key = ((ObjectVal*)this)->getJsonbKeyValue(i);
JsonbValue* value = ((ObjectVal*)this)->find(key->getKeyStr(), key->klen());
int current_depth = base_depth + value->depth();
if (current_depth > max_depth) max_depth = current_depth;
LemonLiTree marked this conversation as resolved.
Show resolved Hide resolved
}
return max_depth;
}
case JsonbType::T_Array: {
int base_depth = 1;
int numElem = ((ArrayVal*)this)->numElem();
if (numElem == 0) return base_depth;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: statement should be inside braces [readability-braces-around-statements]

Suggested change
if (numElem == 0) return base_depth;
if (numElem == 0) { return base_depth;
}

int max_depth = base_depth;
for (int i = 0; i < numElem; ++i) {
int current_depth = base_depth + ((ArrayVal*)this)->get(i)->depth();
if (current_depth > max_depth) max_depth = current_depth;
LemonLiTree marked this conversation as resolved.
Show resolved Hide resolved
}
return max_depth;
}
default:
return 0;
}
}

inline bool JsonbValue::contains(JsonbValue* rhs) const {
switch (type_) {
case JsonbType::T_Int8:
Expand Down
49 changes: 48 additions & 1 deletion be/src/vec/functions/function_jsonb.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1167,7 +1167,6 @@ class FunctionJsonbContains : public IFunction {
size_t get_number_of_arguments() const override {
return get_variadic_argument_types_impl().size();
}

bool use_default_implementation_for_nulls() const override { return false; }

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
Expand Down Expand Up @@ -1289,6 +1288,53 @@ struct JsonbContainsAndPathImpl {
}
};

class FunctionJsonbDepth : public IFunction {
public:
static constexpr auto name = "json_depth";
String get_name() const override { return name; }
LemonLiTree marked this conversation as resolved.
Show resolved Hide resolved
static FunctionPtr create() { return std::make_shared<FunctionJsonbDepth>(); }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
LemonLiTree marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'get_return_type_impl' can be made static [readability-convert-member-functions-to-static]

Suggested change
DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
static DataTypePtr get_return_type_impl(const DataTypes& arguments) override {

return std::make_shared<DataTypeInt32>();
}

size_t get_number_of_arguments() const override { return 1; }
LemonLiTree marked this conversation as resolved.
Show resolved Hide resolved

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'execute_impl' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,

size_t result, size_t input_rows_count) const override {
Comment on lines +1303 to +1304
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

warning: method 'execute_impl' can be made static [readability-convert-member-functions-to-static]

Suggested change
Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
static Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {

DCHECK_GE(arguments.size(), 1);
auto jsonb_data_column = block.get_by_position(arguments[0]).column;

auto null_map = ColumnUInt8::create(input_rows_count, 0);
auto return_type = block.get_data_type(result);
auto col_result = ColumnVector<Int32>::create();
col_result->resize(input_rows_count);
auto& res_data = col_result->get_data();

for (size_t i = 0; i < input_rows_count; ++i) {
if (jsonb_data_column->is_null_at(i)) {
null_map->get_data()[i] = 1;
res_data[i] = 0;
continue;
}

auto jsonb_value = jsonb_data_column->get_data_at(i);
// doc is NOT necessary to be deleted since JsonbDocument will not allocate memory
JsonbDocument* doc = JsonbDocument::createDocument(jsonb_value.data, jsonb_value.size);
JsonbValue* value = doc->getValue();
if (UNLIKELY(jsonb_value.size == 0 || !value)) {
null_map->get_data()[i] = 1;
res_data[i] = 0;
continue;
}
res_data[i] = value->depth();
}

block.replace_by_position(result, std::move(col_result));
return Status::OK();
}
};

void register_function_jsonb(SimpleFunctionFactory& factory) {
factory.register_function<FunctionJsonbParse>(FunctionJsonbParse::name);
factory.register_alias(FunctionJsonbParse::name, FunctionJsonbParse::alias);
Expand Down Expand Up @@ -1354,6 +1400,7 @@ void register_function_jsonb(SimpleFunctionFactory& factory) {
factory.register_function<FunctionJsonbLength<JsonbLengthAndPathImpl>>();
factory.register_function<FunctionJsonbContains<JsonbContainsImpl>>();
factory.register_function<FunctionJsonbContains<JsonbContainsAndPathImpl>>();
factory.register_function<FunctionJsonbDepth>();
}

} // namespace doris::vectorized
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv6String;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArray;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonDepth;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonExtract;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonInsert;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonLength;
Expand Down Expand Up @@ -656,6 +657,7 @@ public class BuiltinScalarFunctions implements FunctionHelper {
scalar(ToIpv6OrDefault.class, "to_ipv6_or_default"),
scalar(ToIpv6OrNull.class, "to_ipv6_or_null"),
scalar(JsonArray.class, "json_array"),
scalar(JsonDepth.class, "json_depth"),
scalar(JsonObject.class, "json_object"),
scalar(JsonQuote.class, "json_quote"),
scalar(JsonUnQuote.class, "json_unquote"),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

package org.apache.doris.nereids.trees.expressions.functions.scalar;

import org.apache.doris.catalog.FunctionSignature;
import org.apache.doris.nereids.trees.expressions.Expression;
import org.apache.doris.nereids.trees.expressions.functions.ExplicitlyCastableSignature;
import org.apache.doris.nereids.trees.expressions.functions.PropagateNullable;
import org.apache.doris.nereids.trees.expressions.shape.BinaryExpression;
import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor;
import org.apache.doris.nereids.types.IntegerType;
import org.apache.doris.nereids.types.JsonType;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;

import java.util.List;

/**
* ScalarFunction 'json_depth'. This class is generated by GenerateFunction.
*/
public class JsonDepth extends ScalarFunction
implements BinaryExpression, ExplicitlyCastableSignature, PropagateNullable {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

jsonDepth should be UnaryExpression


public static final List<FunctionSignature> SIGNATURES = ImmutableList.of(
FunctionSignature.ret(IntegerType.INSTANCE).args(JsonType.INSTANCE)
);

/**
* constructor with 1 arguments.
*/
public JsonDepth(Expression arg0) {
super("json_depth", arg0);
}

/**
* withChildren.
*/
@Override
public JsonDepth withChildren(List<Expression> children) {
Preconditions.checkArgument(children.size() == 1);
return new JsonDepth(children.get(0));
}

@Override
public List<FunctionSignature> getSignatures() {
return SIGNATURES;
}

@Override
public <R, C> R accept(ExpressionVisitor<R, C> visitor, C context) {
return visitor.visitJsonDepth(this, context);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@
import org.apache.doris.nereids.trees.expressions.functions.scalar.IsIpv6String;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonArray;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonContains;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonDepth;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonExtract;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonInsert;
import org.apache.doris.nereids.trees.expressions.functions.scalar.JsonLength;
Expand Down Expand Up @@ -1415,6 +1416,10 @@ default R visitJsonLength(JsonLength jsonLength, C context) {
return visitScalarFunction(jsonLength, context);
}

default R visitJsonDepth(JsonDepth jsonDepth, C context) {
return visitScalarFunction(jsonDepth, context);
}

default R visitJsonContains(JsonContains jsonContains, C context) {
return visitScalarFunction(jsonContains, context);
}
Expand Down
1 change: 1 addition & 0 deletions gensrc/script/doris_builtins_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -1787,6 +1787,7 @@
[['json_contains'], 'BOOLEAN', ['JSONB', 'JSONB'], 'ALWAYS_NULLABLE'],
[['json_contains'], 'BOOLEAN', ['JSONB', 'JSONB', 'VARCHAR'], 'ALWAYS_NULLABLE'],
[['json_contains'], 'BOOLEAN', ['JSONB', 'JSONB', 'STRING'], 'ALWAYS_NULLABLE'],
[['json_depth'], 'INT', ['JSONB'], 'DEPEND_ON_ARGUMENT'],

# Json functions
[['get_json_int'], 'INT', ['VARCHAR', 'VARCHAR'], 'ALWAYS_NULLABLE'],
Expand Down
62 changes: 62 additions & 0 deletions regression-test/data/jsonb_p0/test_jsonb_load_and_function.out
Original file line number Diff line number Diff line change
Expand Up @@ -7726,3 +7726,65 @@ true
30 -9223372036854775808 false
31 18446744073709551615 false

-- !sql_json_depth --
\N

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
2

-- !sql_json_depth --
3

-- !sql_json_depth --
3

-- !sql_json_depth --
3

-- !sql_json_depth --
2

-- !sql_json_depth --
3

-- !sql_json_depth --
1 \N \N
2 null 1
3 true 1
4 false 1
5 100 1
6 10000 1
7 1000000000 1
8 1152921504606846976 1
9 6.18 1
10 "abcd" 1
11 {} 1
12 {"k1":"v31","k2":300} 2
13 [] 1
14 [123,456] 2
15 ["abc","def"] 2
16 [null,true,false,100,6.18,"abc"] 2
17 [{"k1":"v41","k2":400},1,"a",3.14] 3
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} 4
26 \N \N
27 {"k1":"v1","k2":200} 2
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} 3
29 12524337771678448270 1
30 -9223372036854775808 1
31 18446744073709551615 1

Original file line number Diff line number Diff line change
Expand Up @@ -6105,3 +6105,62 @@ true
27 {"k1":"v1","k2":200} false
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} false

-- !sql_json_depth --
\N

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
1

-- !sql_json_depth --
2

-- !sql_json_depth --
3

-- !sql_json_depth --
3

-- !sql_json_depth --
3

-- !sql_json_depth --
2

-- !sql_json_depth --
3

-- !sql_json_depth --
1 \N \N
2 null 1
3 true 1
4 false 1
5 100 1
6 10000 1
7 1000000000 1
8 1152921504606846976 1
9 6.18 1
10 "abcd" 1
11 {} 1
12 {"k1":"v31","k2":300} 2
13 [] 1
14 [123,456] 2
15 ["abc","def"] 2
16 [null,true,false,100,6.18,"abc"] 2
17 [{"k1":"v41","k2":400},1,"a",3.14] 3
18 {"k1":"v31","k2":300,"a1":[{"k1":"v41","k2":400},1,"a",3.14]} 4
26 \N \N
27 {"k1":"v1","k2":200} 2
28 {"a.b.c":{"k1.a1":"v31","k2":300},"a":"niu"} 3

Original file line number Diff line number Diff line change
Expand Up @@ -572,4 +572,18 @@ suite("test_jsonb_load_and_function", "p0") {
qt_select_json_contains """SELECT id, j, json_contains(j, cast('{"k2":300}' as json)) FROM ${testTable} ORDER BY id"""
qt_select_json_contains """SELECT id, j, json_contains(j, cast('{"k1":"v41","k2":400}' as json), '\$.a1') FROM ${testTable} ORDER BY id"""
qt_select_json_contains """SELECT id, j, json_contains(j, cast('[123,456]' as json)) FROM ${testTable} ORDER BY id"""

qt_sql_json_depth """SELECT json_depth(NULL)"""
qt_sql_json_depth """SELECT json_depth('true')"""
qt_sql_json_depth """SELECT json_depth('null')"""
qt_sql_json_depth """SELECT json_depth('"abc"')"""
qt_sql_json_depth """SELECT json_depth('[]')"""
qt_sql_json_depth """SELECT json_depth('{}')"""
qt_sql_json_depth """SELECT json_depth('[1, 2]')"""
qt_sql_json_depth """SELECT json_depth('[1, {"x": 2}]')"""
qt_sql_json_depth """SELECT json_depth('{"x": 1, "y": [1, 2]}')"""
qt_sql_json_depth """SELECT json_depth('[1, [2, 3]]')"""
qt_sql_json_depth """SELECT json_depth('{"x": 1}')"""
qt_sql_json_depth """SELECT json_depth('{"x": {"y": 1}}')"""
qt_sql_json_depth """SELECT id, j, json_depth(j) FROM ${testTable} ORDER BY id"""
}
Loading
Loading