From 5e5076f64cae3fec2d9789af357ea54a5ef13135 Mon Sep 17 00:00:00 2001
From: Thibaut Vandervelden <thvdveld@vub.be>
Date: Mon, 15 Jul 2024 13:00:43 +0200
Subject: [PATCH] Use LUT for linear to sRGB conversion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Using a LUT for the linear to sRGB conversion improves decoding
performance for the decode_into benchmark:

```
decode_into LGF5]+Yk^6#M@-5c,1J5@[or[Q6./200
    time:   [766.58 µs 769.86 µs 776.05 µs]
    change: [-62.967% -62.784% -62.602%] (p = 0.00 < 0.05)
    Performance has improved.
```

However, the size of the LUT is 8.192 u8. The `fast-linear-to-srgb`
feature flag was added, which is enabled by default. In cases where
binary size is more important than performance, the feature can be
disabled.

Another note in the implementation: using a lookup table reduces the
accuracy of the conversion, compared to the original implementation.
But all current tests pass. Higher accuracy can be achieved by
increasing the LUT size.
---
 Cargo.toml  |  3 ++-
 build.rs    | 36 ++++++++++++++++++++++++++++++++++--
 src/util.rs | 10 ++++++++++
 3 files changed, 46 insertions(+), 3 deletions(-)

diff --git a/Cargo.toml b/Cargo.toml
index 320bc83..5c53e6a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,9 +19,10 @@ image = ">= 0.23, <= 0.25"
 criterion = "0.5"
 
 [features]
-default = []
+default = ["fast-linear-to-srgb"]
 image = [ "dep:image" ]
 gdk-pixbuf = [ "dep:gdk-pixbuf" ]
+fast-linear-to-srgb = []
 
 [[bench]]
 name = "decode"
diff --git a/build.rs b/build.rs
index 7601ba3..a58f6cd 100644
--- a/build.rs
+++ b/build.rs
@@ -1,5 +1,25 @@
 use std::io::Write;
 
+const LINEAR_TO_SRGB_LOOKUP_SIZE: usize = 8192;
+
+fn linear_to_srgb(value: f32) -> u8 {
+    let v = value.clamp(0., 1.);
+    if v <= 0.003_130_8 {
+        (v * 12.92 * 255. + 0.5).round() as u8
+    } else {
+        ((1.055 * 255.) * f32::powf(v, 1. / 2.4) - (0.055 * 255. - 0.5)).round() as u8
+    }
+}
+
+fn generate_linear_to_srgb_lookup() -> [u8; LINEAR_TO_SRGB_LOOKUP_SIZE] {
+    let mut table = [0u8; LINEAR_TO_SRGB_LOOKUP_SIZE];
+    for i in 0..table.len() {
+        let float = i as f32 / (table.len() - 1) as f32;
+        table[i] = linear_to_srgb(float);
+    }
+    table
+}
+
 /// srgb 0-255 integer to linear 0.0-1.0 floating point conversion.
 pub fn srgb_to_linear(value: u8) -> f32 {
     let v = value as f32 / 255.;
@@ -19,8 +39,20 @@ fn generate_srgb_lookup() -> [f32; 256] {
 }
 
 fn write_srgb(f: &mut std::fs::File) {
-    let table = generate_srgb_lookup();
-    writeln!(f, "static SRGB_LOOKUP: [f32; 256] = {:?};", table).unwrap();
+    writeln!(
+        f,
+        "
+        static SRGB_LOOKUP: [f32; 256] = {:?};
+        #[cfg(feature = \"fast-linear-to-srgb\")]
+        const LINEAR_TO_SRGB_LOOKUP_SIZE: usize = {};
+        #[cfg(feature = \"fast-linear-to-srgb\")]
+        static LINEAR_TO_SRGB_LOOKUP: [u8; LINEAR_TO_SRGB_LOOKUP_SIZE] = {:?};
+        ",
+        generate_srgb_lookup(),
+        LINEAR_TO_SRGB_LOOKUP_SIZE,
+        generate_linear_to_srgb_lookup()
+    )
+    .unwrap();
 }
 
 fn write_base83(f: &mut std::fs::File) {
diff --git a/src/util.rs b/src/util.rs
index 8cb9db2..2c4d5c4 100644
--- a/src/util.rs
+++ b/src/util.rs
@@ -1,6 +1,7 @@
 include!(concat!(env!("OUT_DIR"), "/srgb_lookup.rs"));
 
 /// linear 0.0-1.0 floating point to srgb 0-255 integer conversion.
+#[cfg(not(feature = "fast-linear-to-srgb"))]
 pub fn linear_to_srgb(value: f32) -> u8 {
     let v = value.clamp(0., 1.);
     if v <= 0.003_130_8 {
@@ -13,6 +14,15 @@ pub fn linear_to_srgb(value: f32) -> u8 {
     }
 }
 
+/// linear 0.0-1.0 floating point to srgb 0-255 integer conversion.
+#[cfg(feature = "fast-linear-to-srgb")]
+pub fn linear_to_srgb(value: f32) -> u8 {
+    let v = value.clamp(0.0, 1.0);
+    let index =
+        ((LINEAR_TO_SRGB_LOOKUP_SIZE as f32 * v) as usize).min(LINEAR_TO_SRGB_LOOKUP_SIZE - 1);
+    LINEAR_TO_SRGB_LOOKUP[index]
+}
+
 /// srgb 0-255 integer to linear 0.0-1.0 floating point conversion.
 pub fn srgb_to_linear(value: u8) -> f32 {
     SRGB_LOOKUP[value as usize]