Skip to content

Commit

Permalink
fix!: statistical functions should return null when provided a vect…
Browse files Browse the repository at this point in the history
…or of only `null` values (#5606)

* Updated Numeric calculations and tests, corrected median and percentile to ignore NULL values.

* Correct Numeric avg() function and tests.

* UpdateBy corrections for all-NULL vectors

* Agg corrections for avg,var,std

* Better testing and coverage, fix to wvar() and misc cleanup.
  • Loading branch information
lbooker42 authored Jun 19, 2024
1 parent f3cd3b4 commit 83f62db
Show file tree
Hide file tree
Showing 44 changed files with 703 additions and 177 deletions.
212 changes: 194 additions & 18 deletions engine/function/src/templates/Numeric.ftl

Large diffs are not rendered by default.

157 changes: 144 additions & 13 deletions engine/function/src/templates/TestNumeric.ftl

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusLong;
import static io.deephaven.engine.util.NullSafeAddition.minusLong;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative average operator.
Expand Down Expand Up @@ -92,7 +93,7 @@ private boolean addChunk(ByteChunk<? extends Values> values, long destination, i
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);
} else if (nonNullCount.onlyNullsUnsafe(destination)) {
resultColumn.set(destination, Double.NaN);
resultColumn.set(destination, NULL_DOUBLE);
} else {
return false;
}
Expand All @@ -110,8 +111,11 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
final long newCount = nonNullCount.addNonNullUnsafe(destination, -chunkNonNull.get());
final long newSum = minusLong(runningSum.getUnsafe(destination), chunkSum);
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);

if (newCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, (double) newSum / newCount);
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// @formatter:off
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand All @@ -23,6 +24,7 @@

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusDouble;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative variance operator.
Expand Down Expand Up @@ -86,21 +88,27 @@ private boolean addChunk(ByteChunk<? extends Values> values, long destination, i
final double sum = SumByteChunk.sum2ByteChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else if (nonNullCounter.getCountUnsafe(destination) <= 1) {
resultColumn.set(destination, Double.NaN);
} else {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
return true;
}
Expand All @@ -114,12 +122,12 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -129,12 +137,16 @@ private boolean removeChunk(ByteChunk<? extends Values> values, long destination
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
}
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusLong;
import static io.deephaven.engine.util.NullSafeAddition.minusLong;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative average operator.
Expand Down Expand Up @@ -88,7 +89,7 @@ private boolean addChunk(CharChunk<? extends Values> values, long destination, i
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);
} else if (nonNullCount.onlyNullsUnsafe(destination)) {
resultColumn.set(destination, Double.NaN);
resultColumn.set(destination, NULL_DOUBLE);
} else {
return false;
}
Expand All @@ -106,8 +107,11 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
final long newCount = nonNullCount.addNonNullUnsafe(destination, -chunkNonNull.get());
final long newSum = minusLong(runningSum.getUnsafe(destination), chunkSum);
runningSum.set(destination, newSum);
resultColumn.set(destination, (double) newSum / newCount);

if (newCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, (double) newSum / newCount);
}
return true;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
//
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand All @@ -19,6 +20,7 @@

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusDouble;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative variance operator.
Expand Down Expand Up @@ -82,21 +84,27 @@ private boolean addChunk(CharChunk<? extends Values> values, long destination, i
final double sum = SumCharChunk.sum2CharChunk(values, chunkStart, chunkSize, chunkNonNull, sum2);

if (chunkNonNull.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNonNull.get());
final double newSum = plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
} else if (nonNullCounter.getCountUnsafe(destination) <= 1) {
resultColumn.set(destination, Double.NaN);
} else {
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
}
}
return true;
}
Expand All @@ -110,12 +118,12 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
return false;
}

final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, -chunkNonNull.get());

final double newSum;
final double newSum2;

if (nonNullCount == 0) {
if (totalNormalCount == 0) {
newSum = newSum2 = 0;
} else {
newSum = plusDouble(sumSource.getUnsafe(destination), -sum);
Expand All @@ -125,12 +133,16 @@ private boolean removeChunk(CharChunk<? extends Values> values, long destination
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (nonNullCount <= 1) {
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
}
if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
}

final double variance = (newSum2 - (newSum * newSum / nonNullCount)) / (nonNullCount - 1);
final double variance = (newSum2 - (newSum * newSum / totalNormalCount)) / (totalNormalCount - 1);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.engine.util.NullSafeAddition.plusDouble;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

class DoubleChunkedAvgOperator extends FpChunkedNonNormalCounter implements IterativeChunkedAggregationOperator {
private final String name;
Expand Down Expand Up @@ -141,19 +142,23 @@ private void updateResultWithNewSum(long destination, long totalNormal, long tot
resultColumn.set(destination, Double.POSITIVE_INFINITY);
} else if (totalNegativeInfinityCount > 0) {
resultColumn.set(destination, Double.NEGATIVE_INFINITY);
} else if (totalNormal == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, newSum / totalNormal);
}
}

private void updateResultSumUnchanged(long destination, long totalNormal, long totalNanCount,
long totalInfinityCount, long totalNegativeInfinityCount) {
if (totalNanCount > 0 || totalNormal == 0 || (totalInfinityCount > 0 && totalNegativeInfinityCount > 0)) {
if (totalNanCount > 0 || (totalInfinityCount > 0 && totalNegativeInfinityCount > 0)) {
resultColumn.set(destination, Double.NaN);
} else if (totalInfinityCount > 0) {
resultColumn.set(destination, Double.POSITIVE_INFINITY);
} else if (totalNegativeInfinityCount > 0) {
resultColumn.set(destination, Double.NEGATIVE_INFINITY);
} else if (totalNormal == 0) {
resultColumn.set(destination, NULL_DOUBLE);
} else {
resultColumn.set(destination, runningSum.getUnsafe(destination) / totalNormal);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
import java.util.Collections;
import java.util.Map;

import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative average operator.
*/
Expand Down Expand Up @@ -127,12 +129,14 @@ private boolean updateResult(long destination) {

private boolean updateResult(long destination, long nncValue, long nanValue, long picValue, long nicValue,
double sumSumValue) {
if (nanValue > 0 || (picValue > 0 && nicValue > 0) || nncValue == 0) {
if (nanValue > 0 || (picValue > 0 && nicValue > 0)) {
return !Double.isNaN(resultColumn.getAndSetUnsafe(destination, Double.NaN));
} else if (picValue > 0) {
return resultColumn.getAndSetUnsafe(destination, Double.POSITIVE_INFINITY) != Double.POSITIVE_INFINITY;
} else if (nicValue > 0) {
return resultColumn.getAndSetUnsafe(destination, Double.NEGATIVE_INFINITY) != Double.NEGATIVE_INFINITY;
} else if (nncValue == 0) {
return resultColumn.getAndSetUnsafe(destination, NULL_DOUBLE) != NULL_DOUBLE;
} else {
final double newValue = (double) (sumSumValue / nncValue);
return resultColumn.getAndSetUnsafe(destination, newValue) != newValue;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
// @formatter:off
package io.deephaven.engine.table.impl.by;

import io.deephaven.base.verify.Assert;
import io.deephaven.chunk.attributes.ChunkLengths;
import io.deephaven.chunk.attributes.ChunkPositions;
import io.deephaven.chunk.attributes.Values;
Expand All @@ -23,6 +24,7 @@
import java.util.Map;

import static io.deephaven.engine.table.impl.by.RollupConstants.*;
import static io.deephaven.util.QueryConstants.NULL_DOUBLE;

/**
* Iterative variance operator.
Expand Down Expand Up @@ -95,31 +97,40 @@ private boolean addChunk(DoubleChunk<? extends Values> values, long destination,
final boolean forceNanResult = totalNegativeInfinities > 0 || totalPositiveInfinities > 0 || totalNanCount > 0;

if (chunkNormalCount.get() > 0) {
final long nonNullCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final long totalNormalCount = nonNullCounter.addNonNullUnsafe(destination, chunkNormalCount.get());
final double newSum = NullSafeAddition.plusDouble(sumSource.getUnsafe(destination), sum);
final double newSum2 = NullSafeAddition.plusDouble(sum2Source.getUnsafe(destination), sum2.doubleValue());

sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);

if (forceNanResult || nonNullCount <= 1) {
Assert.neqZero(totalNormalCount, "totalNormalCount");
if (forceNanResult || totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
} else {
// If the sum or sumSquared has reached +/-Infinity, we are stuck with NaN forever.
if (Double.isInfinite(newSum) || Double.isInfinite(newSum2)) {
resultColumn.set(destination, Double.NaN);
return true;
}
final double variance = computeVariance(nonNullCount, newSum, newSum2);
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);
}
return true;
} else if (forceNanResult || (nonNullCounter.getCountUnsafe(destination) <= 1)) {
}
if (forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
}
final long totalNormalCount = nonNullCounter.getCountUnsafe(destination);
if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
} else if (totalNormalCount == 1) {
resultColumn.set(destination, Double.NaN);
return true;
} else {
return false;
}
return false;
}

private static double computeVariance(long nonNullCount, double newSum, double newSum2) {
Expand Down Expand Up @@ -165,16 +176,17 @@ private boolean removeChunk(DoubleChunk<? extends Values> values, long destinati
}
sumSource.set(destination, newSum);
sum2Source.set(destination, newSum2);
} else if (totalNormalCount <= 1 || forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
} else {
newSum = sumSource.getUnsafe(destination);
newSum2 = sum2Source.getUnsafe(destination);
}
if (totalNormalCount <= 1) {

if (totalNormalCount == 1 || forceNanResult) {
resultColumn.set(destination, Double.NaN);
return true;
} else if (totalNormalCount == 0) {
resultColumn.set(destination, NULL_DOUBLE);
return true;
}

// If the sum has reach +/-Infinity, we are stuck with NaN forever.
Expand All @@ -186,6 +198,7 @@ private boolean removeChunk(DoubleChunk<? extends Values> values, long destinati
// Perform the calculation in a way that minimizes the impact of FP error.
final double variance = computeVariance(totalNormalCount, newSum, newSum2);
resultColumn.set(destination, std ? Math.sqrt(variance) : variance);

return true;
}

Expand Down
Loading

0 comments on commit 83f62db

Please sign in to comment.