Skip to content

Commit

Permalink
[VL] Port PR #6661 #6707 for bug fixing in rc1 (#6792)
Browse files Browse the repository at this point in the history
* [VL] Skip UTF-8 validation in JSON parsing (#6661)

* [VL] Fix high precision rounding (#6707)

---------

Co-authored-by: PHILO-HE <[email protected]>
Co-authored-by: Arnav Balyan <[email protected]>
  • Loading branch information
3 people authored Aug 13, 2024
1 parent 415c722 commit 0cddc4d
Show file tree
Hide file tree
Showing 6 changed files with 39 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -217,20 +217,28 @@ class ScalarFunctionsValidateSuite extends FunctionsValidateTest {
}
}

test("Test get_json_object datatab function") {
test("get_json_object") {
runQueryAndCompare(
"SELECT get_json_object(string_field1, '$.a') " +
"from datatab limit 1;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}

test("Test get_json_object lineitem function") {
runQueryAndCompare(
"SELECT l_orderkey, get_json_object('{\"a\":\"b\"}', '$.a') " +
"from lineitem limit 1;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}

// Invalid UTF-8 encoding.
spark.sql(
"CREATE TABLE t USING parquet SELECT concat('{\"a\": 2, \"'," +
" string(X'80'), '\": 3, \"c\": 100}') AS c1")
withTable("t") {
runQueryAndCompare("SELECT get_json_object(c1, '$.c') FROM t;") {
checkGlutenOperatorMatch[ProjectExecTransformer]
}
}
}

ignore("json_array_length") {
Expand Down
11 changes: 7 additions & 4 deletions cpp/velox/operators/functions/Arithmetic.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <folly/CPortability.h>
#include <stdint.h>
#include <cmath>
#include <limits>
#include <type_traits>

namespace gluten {
Expand All @@ -38,14 +39,16 @@ struct RoundFunction {
return number;
}

double factor = std::pow(10, decimals);
// Using long double for high precision during intermediate calculations.
// TODO: Make this more efficient with Boost to support high arbitrary precision at runtime.
long double factor = std::pow(10.0L, static_cast<long double>(decimals));
static const TNum kInf = std::numeric_limits<TNum>::infinity();

if (number < 0) {
return (std::round(std::nextafter(number, -kInf) * factor * -1) / factor) * -1;
return static_cast<TNum>((std::round(std::nextafter(number, -kInf) * factor * -1) / factor) * -1);
}
return std::round(std::nextafter(number, kInf) * factor) / factor;
return static_cast<TNum>(std::round(std::nextafter(number, kInf) * factor) / factor);
}

template <typename TInput>
FOLLY_ALWAYS_INLINE void call(TInput& result, const TInput& a, const int32_t b = 0) {
result = round(a, b);
Expand Down
11 changes: 11 additions & 0 deletions ep/build-velox/src/modify_velox.patch
Original file line number Diff line number Diff line change
Expand Up @@ -177,3 +177,14 @@ index 2cabfc29a..54329ce23 100644

add_library(
velox_dwio_arrow_parquet_writer_test_lib
diff --git a/CMake/resolve_dependency_modules/simdjson.cmake b/CMake/resolve_dependency_modules/simdjson.cmake
index 69e7f2044..777eb5ec1 100644
--- a/CMake/resolve_dependency_modules/simdjson.cmake
+++ b/CMake/resolve_dependency_modules/simdjson.cmake
@@ -29,4 +29,6 @@ FetchContent_Declare(
URL ${VELOX_SIMDJSON_SOURCE_URL}
URL_HASH ${VELOX_SIMDJSON_BUILD_SHA256_CHECKSUM})

+set(SIMDJSON_SKIPUTF8VALIDATION ON)
+
FetchContent_MakeAvailable(simdjson)
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ class GlutenMathExpressionsSuite extends MathExpressionsSuite with GlutenTestsTr
checkEvaluation(Round(-3.5, 0), -4.0)
checkEvaluation(Round(-0.35, 1), -0.4)
checkEvaluation(Round(-35, -1), -40)
checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
checkEvaluation(BRound(2.5, 0), 2.0)
checkEvaluation(BRound(3.5, 0), 4.0)
checkEvaluation(BRound(-2.5, 0), -2.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,10 @@ class GlutenMathExpressionsSuite extends MathExpressionsSuite with GlutenTestsTr
checkEvaluation(Round(-3.5, 0), -4.0)
checkEvaluation(Round(-0.35, 1), -0.4)
checkEvaluation(Round(-35, -1), -40)
checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
checkEvaluation(Round(-35, -1), -40)
checkEvaluation(Round(BigDecimal("45.00"), -1), BigDecimal(50))
checkEvaluation(BRound(2.5, 0), 2.0)
checkEvaluation(BRound(3.5, 0), 4.0)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,9 @@ class GlutenMathExpressionsSuite extends MathExpressionsSuite with GlutenTestsTr
checkEvaluation(BRound(-3.5, 0), -4.0)
checkEvaluation(BRound(-0.35, 1), -0.4)
checkEvaluation(BRound(-35, -1), -40)
checkEvaluation(Round(1.12345678901234567, 8), 1.12345679)
checkEvaluation(Round(-0.98765432109876543, 5), -0.98765)
checkEvaluation(Round(12345.67890123456789, 6), 12345.678901)
checkEvaluation(BRound(BigDecimal("45.00"), -1), BigDecimal(40))
checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(2.5), Literal(0))), Decimal(2))
checkEvaluation(checkDataTypeAndCast(RoundFloor(Literal(3.5), Literal(0))), Decimal(3))
Expand Down

0 comments on commit 0cddc4d

Please sign in to comment.