Fix null on overflow and multiply, and support cast varchar to decimal (

#169) * Fix null on overflow and multiply as spark precision. * Fix cast double to decimal. * Support casting from varchar to decimal.
oap-project · Mar 31, 2023 · d2535c3 · d2535c3
1 parent fdd6d95
commit d2535c3
Show file tree

Hide file tree

Showing 9 changed files with 684 additions and 139 deletions.
diff --git a/velox/common/base/BitUtil.h b/velox/common/base/BitUtil.h
@@ -693,6 +693,13 @@ inline int32_t countLeadingZeros(uint64_t word) {
   return __builtin_clzll(word);
 }
 
+inline int32_t countLeadingZerosUint128(__uint128_t word) {
+  uint64_t hi = word >> 64;
+  uint64_t lo = static_cast<uint64_t>(word);
+  return (hi == 0) ? 64 + bits::countLeadingZeros(lo)
+                   : bits::countLeadingZeros(hi);
+}
+
 inline uint64_t nextPowerOfTwo(uint64_t size) {
   if (size == 0) {
     return 0;

diff --git a/velox/expression/CastExpr.cpp b/velox/expression/CastExpr.cpp
@@ -26,6 +26,7 @@
 #include "velox/expression/StringWriter.h"
 #include "velox/external/date/tz.h"
 #include "velox/functions/lib/RowsTranslationUtil.h"
+#include "velox/type/DecimalUtilOp.h"
 #include "velox/vector/ComplexVector.h"
 #include "velox/vector/FunctionVector.h"
 #include "velox/vector/SelectivityVector.h"
@@ -201,6 +202,30 @@ void applyDoubleToDecimalCastKernel(
     }
   });
 }
+
+template <typename TOutput>
+void applyVarCharToDecimalCastKernel(
+    const SelectivityVector& rows,
+    const BaseVector& input,
+    exec::EvalCtx& context,
+    const TypePtr& toType,
+    VectorPtr castResult) {
+  auto sourceVector = input.as<SimpleVector<StringView>>();
+  auto castResultRawBuffer =
+      castResult->asUnchecked<FlatVector<TOutput>>()->mutableRawValues();
+  const auto& toPrecisionScale = getDecimalPrecisionScale(*toType);
+  context.applyToSelectedNoThrow(rows, [&](vector_size_t row) {
+    auto rescaledValue = DecimalUtilOp::rescaleVarchar<TOutput>(
+        sourceVector->valueAt(row),
+        toPrecisionScale.first,
+        toPrecisionScale.second);
+    if (rescaledValue.has_value()) {
+      castResultRawBuffer[row] = rescaledValue.value();
+    } else {
+      castResult->setNull(row, true);
+    }
+  });
+}
 } // namespace
 
 template <typename To, typename From>
@@ -635,6 +660,16 @@ VectorPtr CastExpr::applyDecimal(
       }
       break;
     }
+    case TypeKind::VARCHAR: {
+      if (toType->kind() == TypeKind::SHORT_DECIMAL) {
+        applyVarCharToDecimalCastKernel<UnscaledShortDecimal>(
+            rows, input, context, toType, castResult);
+      } else {
+        applyVarCharToDecimalCastKernel<UnscaledLongDecimal>(
+            rows, input, context, toType, castResult);
+      }
+      break;
+    }
     default:
       VELOX_UNSUPPORTED(
           "Cast from {} to {} is not supported",

diff --git a/velox/expression/tests/CastExprTest.cpp b/velox/expression/tests/CastExprTest.cpp
@@ -826,6 +826,26 @@ TEST_F(CastExprTest, bigintToDecimal) {
       "Cannot cast BIGINT '100' to DECIMAL(17,16)");
 }
 
+TEST_F(CastExprTest, varcharToDecimal) {
+  // varchar to short decimal
+//   auto input = makeFlatVector<StringView>({"-3", "177"});
+//   testComplexCast(
+//       "c0", input, makeShortDecimalFlatVector({-300, 17700}, DECIMAL(6, 2)));
+
+//   // varchar to long decimal
+//   auto input2 = makeFlatVector<StringView>(
+//       {"-300000001234567891234.5", "1771234.5678912345678"});
+//   testComplexCast(
+//       "c0", input2, makeLongDecimalFlatVector({-300, 17700}, DECIMAL(32, 7)));
+
+  auto input3 = makeFlatVector<StringView>({"9999999999.99", "9999999999.99"});
+  testComplexCast(
+      "c0", input3, makeLongDecimalFlatVector(
+          {-30'000'000'000,
+           -20'000'000'000},
+          DECIMAL(12, 2)));
+}
+
 TEST_F(CastExprTest, castInTry) {
   // Test try(cast(array(varchar) as array(bigint))) whose input vector is
   // wrapped in dictinary encoding. The row of ["2a"] should trigger an error