Skip to content

Commit

Permalink
Squashed commit of the following:
Browse files Browse the repository at this point in the history
Author: Yuri Astrakhan <[email protected]>
Date:   Sat May 11 03:53:23 2024 -0400

    Clean up bit_cost

    * remove unused arrays and other dead code
    * make `ShannonEntropy` return a tuple rather than an output param (they are discouraged)
    * use `if cfg!(...)` for conditional compilation
  • Loading branch information
danielrh committed May 27, 2024
1 parent 8950401 commit 85196be
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 111 deletions.
172 changes: 73 additions & 99 deletions src/enc/bit_cost.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use alloc::SliceWrapperMut;

use core::cmp::{max, min};

use super::super::alloc::SliceWrapper;
Expand All @@ -6,53 +8,41 @@ use super::util::{FastLog2, FastLog2u16};
use super::vectorization::Mem256i;
use crate::enc::floatX;

pub fn ShannonEntropy(mut population: &[u32], size: usize, total: &mut usize) -> floatX {
let mut sum: usize = 0usize;

const BROTLI_REPEAT_ZERO_CODE_LENGTH: usize = 17;
const BROTLI_CODE_LENGTH_CODES: usize = BROTLI_REPEAT_ZERO_CODE_LENGTH + 1;

pub fn ShannonEntropy(mut population: &[u32], size: usize) -> (floatX, usize) {
let mut sum: usize = 0;
let mut retval: floatX = 0.0;
let mut p: usize;
if size & 1 != 0 && !population.is_empty() {
p = population[0] as usize;

if (size & 1) != 0 && !population.is_empty() {
let p = population[0] as usize;
population = population.split_at(1).1;
sum = sum.wrapping_add(p);
retval -= (p as floatX) * FastLog2u16(p as u16);
retval -= p as floatX * FastLog2u16(p as u16);
}
for pop_iter in population.split_at((size >> 1) << 1).0 {
p = *pop_iter as usize;
let p = *pop_iter as usize;
sum = sum.wrapping_add(p);
retval -= (p as floatX) * FastLog2u16(p as u16);
retval -= p as floatX * FastLog2u16(p as u16);
}
if sum != 0 {
retval += (sum as floatX) * FastLog2(sum as u64); // not sure it's 16 bit
retval += sum as floatX * FastLog2(sum as u64); // not sure it's 16 bit
}
*total = sum;
retval

(retval, sum)
}

#[inline(always)]
pub fn BitsEntropy(population: &[u32], size: usize) -> floatX {
let mut sum: usize = 0;
let retval = ShannonEntropy(population, size, &mut sum);
floatX::max(retval, sum as floatX)
let (mut retval, sum) = ShannonEntropy(population, size);
if retval < sum as floatX {
retval = sum as floatX;
}
retval
}

const BROTLI_REPEAT_ZERO_CODE_LENGTH: usize = 17;
const BROTLI_CODE_LENGTH_CODES: usize = BROTLI_REPEAT_ZERO_CODE_LENGTH + 1;
/*
use std::io::{self, Error, ErrorKind, Read, Write};
macro_rules! println_stderr(
($($val:tt)*) => { {
writeln!(&mut ::std::io::stderr(), $($val)*).unwrap();
} }
);
*/

#[cfg(feature = "vector_scratch_space")]
const vectorize_population_cost: bool = true;

#[cfg(not(feature = "vector_scratch_space"))]
const vectorize_population_cost: bool = false;

#[allow(clippy::excessive_precision)]
fn CostComputation<T: SliceWrapper<Mem256i>>(
depth_histo: &mut [u32; BROTLI_CODE_LENGTH_CODES],
Expand Down Expand Up @@ -85,8 +75,6 @@ fn CostComputation<T: SliceWrapper<Mem256i>>(
bits
}

use alloc::SliceWrapperMut;

pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
histogram: &HistogramType,
nnz_data: &mut HistogramType::i32vec,
Expand All @@ -95,72 +83,68 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
static kTwoSymbolHistogramCost: floatX = 20.0;
static kThreeSymbolHistogramCost: floatX = 28.0;
static kFourSymbolHistogramCost: floatX = 37.0;

let data_size: usize = histogram.slice().len();
let mut count: i32 = 0i32;
let mut count = 0;
let mut s: [usize; 5] = [0; 5];

let mut bits: floatX = 0.0;
let mut i: usize;
if histogram.total_count() == 0usize {

if histogram.total_count() == 0 {
return kOneSymbolHistogramCost;
}
i = 0usize;
'break1: while i < data_size {
{
if histogram.slice()[i] > 0u32 {
s[count as usize] = i;
count += 1;
if count > 4i32 {
break 'break1;
}
for i in 0..data_size {
if histogram.slice()[i] > 0 {
s[count] = i;
count += 1;
if count > 4 {
break;
}
}
i = i.wrapping_add(1);
}
if count == 1i32 {
return kOneSymbolHistogramCost;
}
if count == 2i32 {
return kTwoSymbolHistogramCost + (histogram.total_count() as floatX);
}
if count == 3i32 {
let histo0: u32 = histogram.slice()[s[0]];
let histo1: u32 = histogram.slice()[s[1]];
let histo2: u32 = histogram.slice()[s[2]];
let histomax: u32 = max(histo0, max(histo1, histo2));
return kThreeSymbolHistogramCost
+ ((2u32).wrapping_mul(histo0.wrapping_add(histo1).wrapping_add(histo2)) as floatX)
- (histomax as floatX);
}
if count == 4i32 {
let mut histo: [u32; 4] = [0; 4];

for i in 0usize..4usize {
histo[i] = histogram.slice()[s[i]];
match count {
1 => return kOneSymbolHistogramCost,
2 => return kTwoSymbolHistogramCost + histogram.total_count() as floatX,
3 => {
let histo0: u32 = histogram.slice()[s[0]];
let histo1: u32 = histogram.slice()[s[1]];
let histo2: u32 = histogram.slice()[s[2]];
let histomax: u32 = max(histo0, max(histo1, histo2));
return kThreeSymbolHistogramCost
+ (2u32).wrapping_mul(histo0.wrapping_add(histo1).wrapping_add(histo2)) as floatX
- histomax as floatX;
}
for i in 0..4 {
for j in i + 1..4 {
if histo[j] > histo[i] {
histo.swap(j, i);
4 => {
let mut histo: [u32; 4] = [0; 4];

for i in 0..4 {
histo[i] = histogram.slice()[s[i]];
}
for i in 0..4 {
for j in i + 1..4 {
if histo[j] > histo[i] {
histo.swap(j, i);
}
}
}
let h23: u32 = histo[2].wrapping_add(histo[3]);
let histomax: u32 = max(h23, histo[0]);
return kFourSymbolHistogramCost
+ (3u32).wrapping_mul(h23) as floatX
+ (2u32).wrapping_mul(histo[0].wrapping_add(histo[1])) as floatX
- histomax as floatX;
}
let h23: u32 = histo[2].wrapping_add(histo[3]);
let histomax: u32 = max(h23, histo[0]);
return kFourSymbolHistogramCost
+ ((3u32).wrapping_mul(h23) as floatX)
+ ((2u32).wrapping_mul(histo[0].wrapping_add(histo[1])) as floatX)
- (histomax as floatX);
_ => {}
}
if vectorize_population_cost {

if cfg!(feature = "vector_scratch_space") {
// vectorization failed: it's faster to do things inline than split into two loops
let mut nnz: usize = 0;
let mut depth_histo = [0u32; 18];
let total_count = histogram.total_count() as floatX;
let log2total = FastLog2(histogram.total_count() as u64);
i = 0usize;
let mut i: usize = 0;
while i < data_size {
if histogram.slice()[i] > 0u32 {
if histogram.slice()[i] > 0 {
let nnz_val = &mut nnz_data.slice_mut()[nnz >> 3];
nnz_val[nnz & 7] = histogram.slice()[i] as i32;
i += 1;
Expand All @@ -178,14 +162,14 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
break;
}
if reps < 3 {
depth_histo[0] += reps
depth_histo[0] += reps;
} else {
reps -= 2;
let mut depth_histo_adds: u32 = 0;
while reps > 0u32 {
while reps > 0 {
depth_histo_adds += 1;
bits += 3.0;
reps >>= 3i32;
reps >>= 3;
}
depth_histo[BROTLI_REPEAT_ZERO_CODE_LENGTH] += depth_histo_adds;
}
Expand All @@ -195,7 +179,7 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
} else {
let mut max_depth: usize = 1;
let mut depth_histo = [0u32; 18];
let log2total = FastLog2(histogram.total_count() as u64); // 64 bit here
let log2total: floatX = FastLog2(histogram.total_count() as u64); // 64 bit here
let mut reps: u32 = 0;
for histo in histogram.slice()[..data_size].iter() {
if *histo != 0 {
Expand All @@ -204,7 +188,7 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
depth_histo[0] += reps;
} else {
reps -= 2;
while reps > 0u32 {
while reps > 0 {
depth_histo[17] += 1;
bits += 3.0;
reps >>= 3;
Expand All @@ -213,8 +197,8 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
reps = 0;
}
let log2p = log2total - FastLog2u16(*histo as u16);
let mut depth: usize = (log2p + 0.5) as usize;
bits += (*histo as floatX) * log2p;
let mut depth = (log2p + 0.5) as usize;
bits += *histo as floatX * log2p;
depth = min(depth, 15);
max_depth = max(depth, max_depth);
depth_histo[depth] += 1;
Expand All @@ -223,17 +207,7 @@ pub fn BrotliPopulationCost<HistogramType: SliceWrapper<u32> + CostAccessors>(
}
}
bits += (18usize).wrapping_add((2usize).wrapping_mul(max_depth)) as floatX;
bits += BitsEntropy(&depth_histo[..], 18usize);
bits += BitsEntropy(&depth_histo[..], 18);
}
bits
}
/*
fn HistogramDataSizeCommand() -> usize {
704usize
}*/

/*
fn HistogramDataSizeDistance() -> usize {
520usize
}
*/
18 changes: 6 additions & 12 deletions src/enc/encode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1724,7 +1724,6 @@ fn ChooseContextMap(
let mut two_prefix_histo = [0u32; 6];

let mut i: usize;
let mut dummy: usize = 0;
let mut entropy = [0.0 as floatX; 4];
i = 0usize;
while i < 9usize {
Expand All @@ -1742,16 +1741,12 @@ fn ChooseContextMap(
}
i = i.wrapping_add(1);
}
entropy[1] = ShannonEntropy(&monogram_histo[..], 3usize, &mut dummy);
entropy[2] = ShannonEntropy(&two_prefix_histo[..], 3usize, &mut dummy)
+ ShannonEntropy(&two_prefix_histo[3..], 3usize, &mut dummy);
entropy[1] = ShannonEntropy(&monogram_histo[..], 3).0;
entropy[2] =
ShannonEntropy(&two_prefix_histo[..], 3).0 + ShannonEntropy(&two_prefix_histo[3..], 3).0;
entropy[3] = 0.0;
for i in 0usize..3usize {
entropy[3] += ShannonEntropy(
&bigram_histo[(3usize).wrapping_mul(i)..],
3usize,
&mut dummy,
);
entropy[3] += ShannonEntropy(&bigram_histo[(3usize).wrapping_mul(i)..], 3).0;
}
let total: usize = monogram_histo[0]
.wrapping_add(monogram_histo[1])
Expand Down Expand Up @@ -1818,7 +1813,6 @@ fn ShouldUseComplexStaticContextMap(
let mut context_histo: [[u32; 32]; 13] = [[0; 32]; 13];
let mut total = 0u32;
let mut entropy = [0.0 as floatX; 3];
let mut dummy = 0usize;
let utf8_lut = BROTLI_CONTEXT_LUT(ContextType::CONTEXT_UTF8);
while start_pos + 64 <= end_pos {
let stride_end_pos = start_pos + 64;
Expand All @@ -1840,11 +1834,11 @@ fn ShouldUseComplexStaticContextMap(
}
start_pos += 4096;
}
entropy[1] = ShannonEntropy(&combined_histo[..], 32, &mut dummy);
entropy[1] = ShannonEntropy(&combined_histo[..], 32).0;
entropy[2] = 0.0;
for i in 0..13 {
assert!(i < 13);
entropy[2] += ShannonEntropy(&context_histo[i][..], 32, &mut dummy);
entropy[2] += ShannonEntropy(&context_histo[i][..], 32).0;
}
entropy[0] = 1.0 / (total as floatX);
entropy[1] *= entropy[0];
Expand Down

0 comments on commit 85196be

Please sign in to comment.