Skip to content

Commit

Permalink
[MINOR] Fix compression statistic logging for frames
Browse files Browse the repository at this point in the history
Logging of frames statistics for compression is misleading when
samples are used to estimate the number of elements.
Therefore this commit change the logging message to reflect the
approximate nature of distinct counts
  • Loading branch information
Baunsgaard committed Apr 5, 2024
1 parent 3e6e462 commit 6b23ea4
Showing 1 changed file with 8 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@
package org.apache.sysds.runtime.frame.data.compress;

import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.conf.ConfigurationManager;
import org.apache.sysds.conf.DMLConfig;
import org.apache.sysds.runtime.frame.data.columns.ArrayFactory.FrameArrayType;

public class ArrayCompressionStatistics {
Expand Down Expand Up @@ -48,8 +50,12 @@ public ArrayCompressionStatistics(int bytePerValue, int nUnique, boolean shouldC
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append(String.format("Compressed Stats: size:%8d->%8d, Use:%10s, Unique:%6d, ValueType:%7s", originalSize,
compressedSizeEstimate, bestType == null ? "None" : bestType.toString(), nUnique, valueType));
if(ConfigurationManager.getDMLConfig().getDoubleValue(DMLConfig.COMPRESSED_SAMPLING_RATIO) < 1)
sb.append(String.format("Compressed Stats: size:%8d->%8d, Use:%10s, EstUnique:%6d, ValueType:%7s",
originalSize, compressedSizeEstimate, bestType == null ? "None" : bestType.toString(), nUnique, valueType));
else
sb.append(String.format("Compressed Stats: size:%8d->%8d, Use:%10s, Unique:%6d, ValueType:%7s", originalSize,
compressedSizeEstimate, bestType == null ? "None" : bestType.toString(), nUnique, valueType));
return sb.toString();
}
}

0 comments on commit 6b23ea4

Please sign in to comment.