SIMD ColorLUT. Fix color overflow

uploadcare · Sep 23, 2024 · 6c188c0 · 6c188c0
1 parent ff3158d
commit 6c188c0
Show file tree

Hide file tree

Showing 4 changed files with 14 additions and 8 deletions.
diff --git a/CHANGES.SIMD.rst b/CHANGES.SIMD.rst
@@ -1,13 +1,17 @@
 Changelog (Pillow-SIMD)
 =======================
 
+9.5.0.post2 & 9.4.0.post2 & 9.3.0.post2 & 9.2.0.post2 & 9.1.1.post2 & 9.0.0.post3
+---------------------------------------------------------------------------------
+
+- Fixed color ovrflow in LUT processing
+
 9.0.0.post1
 -----------
 
 - Fixed possible overflow in LUT processing
 - Restored compatibility with Visual C Compiler
 
-
 7.0.0.post4
 -----------
 

diff --git a/src/PIL/_version.py b/src/PIL/_version.py
@@ -1,2 +1,2 @@
 # Master version for Pillow
-__version__ = "9.5.0.post1"
+__version__ = "9.5.0.post2"
diff --git a/src/_imaging.c b/src/_imaging.c
@@ -760,6 +760,7 @@ _prepare_lut_table(PyObject *table, Py_ssize_t table_size) {
 
 /* NOTE: This value should be the same as in ColorLUT.c */
 #define PRECISION_BITS (16 - 8 - 2)
+#define PRECISION_ROUNDING (1 << (PRECISION_BITS - 1))
 
     const char *wrong_size =
         ("The table should have table_channels * "
@@ -824,8 +825,8 @@ _prepare_lut_table(PyObject *table, Py_ssize_t table_size) {
                 break;
         }
         /* Max value for INT16 */
-        if (item >= (0x7fff - 0.5) / (255 << PRECISION_BITS)) {
-            prepared[i] = 0x7fff;
+        if (item >= (0x7fff - 0.5 - PRECISION_ROUNDING) / (255 << PRECISION_BITS)) {
+            prepared[i] = 0x7fff - PRECISION_ROUNDING;
             continue;
         }
         /* Min value for INT16 */
@@ -841,6 +842,7 @@ _prepare_lut_table(PyObject *table, Py_ssize_t table_size) {
     }
 
 #undef PRECISION_BITS
+#undef PRECISION_ROUNDING
     if (free_table_data) {
         free(table_data);
     }

diff --git a/src/libImaging/ColorLUT.c b/src/libImaging/ColorLUT.c
@@ -233,7 +233,7 @@ ImagingColorLUT3D_linear(
             __m128i source = _mm_loadu_si128((__m128i *) &rowIn[x]);
             // scale up to 16 bits, but scale * 255 * 256 up to 31 bits
             // bi, gi and ri - 6 bits index
-            // rs, rs and rs - 9 bits shift
+            // bs, gs and rs - 9 bits shift
             // 00 bi3.bs3 gi3.gs3 ri3.rs3 00 bi2.bs2 gi2.gs2 ri2.rs2
             // 00 bi1.bs1 gi1.gs1 ri1.rs1 00 bi0.bs0 gi0.gs0 ri0.rs0
             __m256i index = _mm256_mulhi_epu16(scale256,
@@ -248,7 +248,7 @@ ImagingColorLUT3D_linear(
                 __m128i next_source = _mm_loadu_si128((__m128i *) &rowIn[x + 4]);
                 // scale up to 16 bits, but scale * 255 * 256 up to 31 bits
                 // bi, gi and ri - 6 bits index
-                // rs, rs and rs - 9 bits shift
+                // bs, gs and rs - 9 bits shift
                 // 00 bi3.bs3 gi3.gs3 ri3.rs3 00 bi2.bs2 gi2.gs2 ri2.rs2
                 // 00 bi1.bs1 gi1.gs1 ri1.rs1 00 bi0.bs0 gi0.gs0 ri0.rs0
                 __m256i next_index = _mm256_mulhi_epu16(scale256,
@@ -332,7 +332,7 @@ ImagingColorLUT3D_linear(
             __m128i source = _mm_loadl_epi64((__m128i *) &rowIn[x]);
             // scale up to 16 bits, but scale * 255 * 256 up to 31 bits
             // bi, gi and ri - 6 bits index
-            // rs, rs and rs - 9 bits shift
+            // bs, gs and rs - 9 bits shift
             // 00 bi1.bs1 gi1.gs1 ri1.rs1 00 bi0.bs0 gi0.gs0 ri0.rs0
             __m128i index = _mm_mulhi_epu16(scale,
                 _mm_unpacklo_epi8(_mm_setzero_si128(), source));
@@ -402,7 +402,7 @@ ImagingColorLUT3D_linear(
             __m128i source = _mm_cvtsi32_si128(rowIn[x]);
             // scale up to 16 bits, but scale * 255 * 256 up to 31 bits
             // bi, gi and ri - 6 bits index
-            // rs, rs and rs - 9 bits shift
+            // bs, gs and rs - 9 bits shift
             // 00 00 00 00 00 bi.bs gi.gs ri.rs
             __m128i index = _mm_mulhi_epu16(scale,
                 _mm_unpacklo_epi8(_mm_setzero_si128(), source));