From 8c47d7412089aa45c47c52b0048d37421876b580 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sun, 15 Oct 2023 16:39:11 -0700 Subject: [PATCH 1/6] `fn generate_scaling`: Deduplicate w/ generics. --- lib.rs | 1 + src/fg_apply.rs | 87 +++++++++++++++++++++++++++++++++++++++++ src/fg_apply_tmpl_16.rs | 76 +++-------------------------------- src/fg_apply_tmpl_8.rs | 50 +++-------------------- 4 files changed, 98 insertions(+), 116 deletions(-) create mode 100644 src/fg_apply.rs diff --git a/lib.rs b/lib.rs index 0653cd433..e7e7a985e 100644 --- a/lib.rs +++ b/lib.rs @@ -48,6 +48,7 @@ pub mod src { mod dequant_tables; mod env; pub(crate) mod error; + mod fg_apply; #[cfg(feature = "bitdepth_16")] mod fg_apply_tmpl_16; #[cfg(feature = "bitdepth_8")] diff --git a/src/fg_apply.rs b/src/fg_apply.rs new file mode 100644 index 000000000..d92876a33 --- /dev/null +++ b/src/fg_apply.rs @@ -0,0 +1,87 @@ +use crate::include::common::bitdepth::BitDepth; +use crate::include::common::bitdepth::BPC; +use libc::memset; +use std::ffi::c_int; +use std::ffi::c_void; + +// TODO(kkysen) temporarily pub until mod is deduplicated +pub(crate) unsafe fn generate_scaling( + bitdepth: c_int, + points: *const [u8; 2], + num: c_int, + scaling: *mut u8, +) { + let (shift_x, scaling_size) = match BD::BPC { + BPC::BPC8 => (0, 256), + BPC::BPC16 => { + if !(bitdepth > 8) { + unreachable!(); + } + let shift_x = bitdepth - 8; + let scaling_size = (1 as c_int) << bitdepth; + (shift_x, scaling_size) + } + }; + if num == 0 { + memset(scaling as *mut c_void, 0 as c_int, scaling_size as usize); + return; + } + memset( + scaling as *mut c_void, + (*points.offset(0))[1] as c_int, + (((*points.offset(0))[0] as c_int) << shift_x) as usize, + ); + let mut i = 0; + while i < num - 1 { + let bx = (*points.offset(i as isize))[0] as c_int; + let by = (*points.offset(i as isize))[1] as c_int; + let ex = (*points.offset((i + 1) as isize))[0] as c_int; + let ey = (*points.offset((i + 1) as isize))[1] as c_int; + let dx = ex - bx; + let dy = ey - by; + if !(dx > 0) { + unreachable!(); + } + let delta = dy * ((0x10000 + (dx >> 1)) / dx); + let mut x = 0; + let mut d = 0x8000 as c_int; + while x < dx { + *scaling.offset((bx + x << shift_x) as isize) = (by + (d >> 16)) as u8; + d += delta; + x += 1; + } + i += 1; + } + let n = ((*points.offset((num - 1) as isize))[0] as c_int) << shift_x; + memset( + &mut *scaling.offset(n as isize) as *mut u8 as *mut c_void, + (*points.offset((num - 1) as isize))[1] as c_int, + (scaling_size - n) as usize, + ); + + if BD::BPC != BPC::BPC8 { + let pad = (1 as c_int) << shift_x; + let rnd = pad >> 1; + let mut i_0 = 0; + while i_0 < num - 1 { + let bx_0 = ((*points.offset(i_0 as isize))[0] as c_int) << shift_x; + let ex_0 = ((*points.offset((i_0 + 1) as isize))[0] as c_int) << shift_x; + let dx_0 = ex_0 - bx_0; + let mut x_0 = 0; + while x_0 < dx_0 { + let range = *scaling.offset((bx_0 + x_0 + pad) as isize) as c_int + - *scaling.offset((bx_0 + x_0) as isize) as c_int; + let mut n_0 = 1; + let mut r = rnd; + while n_0 < pad { + r += range; + *scaling.offset((bx_0 + x_0 + n_0) as isize) = + (*scaling.offset((bx_0 + x_0) as isize) as c_int + (r >> shift_x)) as u8; + n_0 += 1; + } + x_0 += pad; + } + i_0 += 1; + } + } +} diff --git a/src/fg_apply_tmpl_16.rs b/src/fg_apply_tmpl_16.rs index 8b39a31a8..55f22379c 100644 --- a/src/fg_apply_tmpl_16.rs +++ b/src/fg_apply_tmpl_16.rs @@ -1,3 +1,4 @@ +use crate::include::common::bitdepth::BitDepth16; use crate::include::dav1d::headers::Dav1dFilmGrainData; use crate::include::dav1d::headers::Rav1dFilmGrainData; use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; @@ -7,10 +8,10 @@ use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::align::Align1; use crate::src::align::Align16; +use crate::src::fg_apply::generate_scaling; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use libc::intptr_t; use libc::memcpy; -use libc::memset; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; @@ -28,73 +29,6 @@ unsafe fn PXSTRIDE(x: ptrdiff_t) -> ptrdiff_t { return x >> 1; } -unsafe fn generate_scaling(bitdepth: c_int, points: *const [u8; 2], num: c_int, scaling: *mut u8) { - if !(bitdepth > 8) { - unreachable!(); - } - let shift_x = bitdepth - 8; - let scaling_size = (1 as c_int) << bitdepth; - if num == 0 { - memset(scaling as *mut c_void, 0 as c_int, scaling_size as usize); - return; - } - memset( - scaling as *mut c_void, - (*points.offset(0))[1] as c_int, - (((*points.offset(0))[0] as c_int) << shift_x) as usize, - ); - let mut i = 0; - while i < num - 1 { - let bx = (*points.offset(i as isize))[0] as c_int; - let by = (*points.offset(i as isize))[1] as c_int; - let ex = (*points.offset((i + 1) as isize))[0] as c_int; - let ey = (*points.offset((i + 1) as isize))[1] as c_int; - let dx = ex - bx; - let dy = ey - by; - if !(dx > 0) { - unreachable!(); - } - let delta = dy * ((0x10000 + (dx >> 1)) / dx); - let mut x = 0; - let mut d = 0x8000 as c_int; - while x < dx { - *scaling.offset((bx + x << shift_x) as isize) = (by + (d >> 16)) as u8; - d += delta; - x += 1; - } - i += 1; - } - let n = ((*points.offset((num - 1) as isize))[0] as c_int) << shift_x; - memset( - &mut *scaling.offset(n as isize) as *mut u8 as *mut c_void, - (*points.offset((num - 1) as isize))[1] as c_int, - (scaling_size - n) as usize, - ); - let pad = (1 as c_int) << shift_x; - let rnd = pad >> 1; - let mut i_0 = 0; - while i_0 < num - 1 { - let bx_0 = ((*points.offset(i_0 as isize))[0] as c_int) << shift_x; - let ex_0 = ((*points.offset((i_0 + 1) as isize))[0] as c_int) << shift_x; - let dx_0 = ex_0 - bx_0; - let mut x_0 = 0; - while x_0 < dx_0 { - let range = *scaling.offset((bx_0 + x_0 + pad) as isize) as c_int - - *scaling.offset((bx_0 + x_0) as isize) as c_int; - let mut n_0 = 1; - let mut r = rnd; - while n_0 < pad { - r += range; - *scaling.offset((bx_0 + x_0 + n_0) as isize) = - (*scaling.offset((bx_0 + x_0) as isize) as c_int + (r >> shift_x)) as u8; - n_0 += 1; - } - x_0 += pad; - } - i_0 += 1; - } -} - pub(crate) unsafe fn rav1d_prep_grain_16bpc( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, @@ -132,7 +66,7 @@ pub(crate) unsafe fn rav1d_prep_grain_16bpc( ); } if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).y_points).as_ptr(), (*data).num_y_points, @@ -140,7 +74,7 @@ pub(crate) unsafe fn rav1d_prep_grain_16bpc( ); } if (*data).num_uv_points[0] != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).uv_points[0]).as_ptr(), (*data).num_uv_points[0], @@ -148,7 +82,7 @@ pub(crate) unsafe fn rav1d_prep_grain_16bpc( ); } if (*data).num_uv_points[1] != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).uv_points[1]).as_ptr(), (*data).num_uv_points[1], diff --git a/src/fg_apply_tmpl_8.rs b/src/fg_apply_tmpl_8.rs index 92b647073..3d12aa032 100644 --- a/src/fg_apply_tmpl_8.rs +++ b/src/fg_apply_tmpl_8.rs @@ -1,3 +1,4 @@ +use crate::include::common::bitdepth::BitDepth8; use crate::include::dav1d::headers::Dav1dFilmGrainData; use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; @@ -5,11 +6,11 @@ use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::align::Align16; +use crate::src::fg_apply::generate_scaling; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use cfg_if::cfg_if; use libc::intptr_t; use libc::memcpy; -use libc::memset; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; @@ -19,47 +20,6 @@ use std::ffi::c_void; pub type pixel = u8; pub type entry = i8; -unsafe fn generate_scaling(_bitdepth: c_int, points: *const [u8; 2], num: c_int, scaling: *mut u8) { - let shift_x = 0; - let scaling_size = 256; - if num == 0 { - memset(scaling as *mut c_void, 0 as c_int, scaling_size as usize); - return; - } - memset( - scaling as *mut c_void, - (*points.offset(0))[1] as c_int, - (((*points.offset(0))[0] as c_int) << shift_x) as usize, - ); - let mut i = 0; - while i < num - 1 { - let bx = (*points.offset(i as isize))[0] as c_int; - let by = (*points.offset(i as isize))[1] as c_int; - let ex = (*points.offset((i + 1) as isize))[0] as c_int; - let ey = (*points.offset((i + 1) as isize))[1] as c_int; - let dx = ex - bx; - let dy = ey - by; - if !(dx > 0) { - unreachable!(); - } - let delta = dy * ((0x10000 + (dx >> 1)) / dx); - let mut x = 0; - let mut d = 0x8000 as c_int; - while x < dx { - *scaling.offset((bx + x << shift_x) as isize) = (by + (d >> 16)) as u8; - d += delta; - x += 1; - } - i += 1; - } - let n = ((*points.offset((num - 1) as isize))[0] as c_int) << shift_x; - memset( - &mut *scaling.offset(n as isize) as *mut u8 as *mut c_void, - (*points.offset((num - 1) as isize))[1] as c_int, - (scaling_size - n) as usize, - ); -} - pub(crate) unsafe fn rav1d_prep_grain_8bpc( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, @@ -96,7 +56,7 @@ pub(crate) unsafe fn rav1d_prep_grain_8bpc( ); } if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).y_points).as_ptr(), (*data).num_y_points, @@ -104,7 +64,7 @@ pub(crate) unsafe fn rav1d_prep_grain_8bpc( ); } if (*data).num_uv_points[0] != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).uv_points[0]).as_ptr(), (*data).num_uv_points[0], @@ -112,7 +72,7 @@ pub(crate) unsafe fn rav1d_prep_grain_8bpc( ); } if (*data).num_uv_points[1] != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).uv_points[1]).as_ptr(), (*data).num_uv_points[1], From beafe6d62b1195f976f1005b90f512b9ad554856 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sun, 15 Oct 2023 16:55:43 -0700 Subject: [PATCH 2/6] `trait BitDepth`: Fix `Entry`, `Scaling`, `SCALING_SIZE` associated `type`s/`const`s to use correct names and bounds. --- include/common/bitdepth.rs | 30 +++++++++++++++--------------- src/internal.rs | 2 +- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index 73817e9b1..20c16063c 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -108,6 +108,11 @@ pub trait BitDepth: Clone + Copy { + Add + Display; + type Entry; + + type Scaling: AsRef<[u8]> + AsMut<[u8]>; + const SCALING_SIZE: usize; + type BitDepthMax; type DisplayPixel: Display; @@ -174,11 +179,6 @@ pub trait BitDepth: Clone + Copy { T: BitDepthDependentType, T::T: Copy, T::T: Copy; - - type GrainLut; - type Scaling; - - const SCALING_LEN: usize; } #[derive(Clone, Copy)] @@ -195,6 +195,11 @@ impl BitDepth for BitDepth8 { type Coef = i16; + type Entry = i8; + + type Scaling = [u8; Self::SCALING_SIZE]; + const SCALING_SIZE: usize = 256; + type BitDepthMax = (); type DisplayPixel = DisplayPixel8; @@ -256,11 +261,6 @@ impl BitDepth for BitDepth8 { { bd.bpc8 } - - type GrainLut = i8; - type Scaling = [u8; Self::SCALING_LEN]; - - const SCALING_LEN: usize = 256; } #[derive(Clone, Copy)] @@ -276,6 +276,11 @@ impl BitDepth for BitDepth16 { type Coef = i32; + type Entry = i16; + + type Scaling = [u8; Self::SCALING_SIZE]; + const SCALING_SIZE: usize = 4096; + type BitDepthMax = Self::Pixel; type DisplayPixel = DisplayPixel16; @@ -341,11 +346,6 @@ impl BitDepth for BitDepth16 { { bd.bpc16 } - - type GrainLut = i16; - type Scaling = [u8; Self::SCALING_LEN]; - - const SCALING_LEN: usize = 4096; } pub struct DisplayPixel8(::Pixel); diff --git a/src/internal.rs b/src/internal.rs index d1bc3ff74..6338a8442 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -116,7 +116,7 @@ pub(crate) struct Rav1dContext_frame_thread { #[derive(Clone, Copy)] #[repr(C)] pub struct GrainLutScalingBD { - pub grain_lut: Align16<[[[BD::GrainLut; 82]; 73 + 1]; 3]>, + pub grain_lut: Align16<[[[BD::Entry; 82]; 73 + 1]; 3]>, // TODO(kkysen) can use `BD::SCALING_LEN`` directly with `#![feature(generic_const_exprs)]` when stabilized pub scaling: Align64<[BD::Scaling; 3]>, } From 3db7e8f702aae6dacabfdc7cf86e16b3d542958a Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sun, 15 Oct 2023 16:55:59 -0700 Subject: [PATCH 3/6] `fn rav1d_prep_grain`: Deduplicate w/ generics. --- src/fg_apply.rs | 136 +++++++++++++++++++++++++++++++++++++++- src/fg_apply_tmpl_16.rs | 133 +-------------------------------------- src/fg_apply_tmpl_8.rs | 131 +------------------------------------- src/thread_task.rs | 14 ++--- 4 files changed, 143 insertions(+), 271 deletions(-) diff --git a/src/fg_apply.rs b/src/fg_apply.rs index d92876a33..9676903f1 100644 --- a/src/fg_apply.rs +++ b/src/fg_apply.rs @@ -1,11 +1,19 @@ use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BPC; +use crate::include::dav1d::headers::Rav1dFilmGrainData; +use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; +use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; +use crate::include::dav1d::picture::Rav1dPicture; +use crate::src::filmgrain::Rav1dFilmGrainDSPContext; +use libc::intptr_t; +use libc::memcpy; use libc::memset; +use libc::ptrdiff_t; use std::ffi::c_int; +use std::ffi::c_uint; use std::ffi::c_void; -// TODO(kkysen) temporarily pub until mod is deduplicated -pub(crate) unsafe fn generate_scaling( +unsafe fn generate_scaling( bitdepth: c_int, points: *const [u8; 2], num: c_int, @@ -85,3 +93,127 @@ pub(crate) unsafe fn generate_scaling( } } } + +pub(crate) unsafe fn rav1d_prep_grain( + dsp: *const Rav1dFilmGrainDSPContext, + out: *mut Rav1dPicture, + in_0: *const Rav1dPicture, + scaling: *mut BD::Scaling, + grain_lut: *mut [[BD::Entry; 82]; 74], +) { + let data: *const Rav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; + let bitdepth_max = ((1 as c_int) << (*out).p.bpc) - 1; + ((*dsp).generate_grain_y).expect("non-null function pointer")( + (*grain_lut.offset(0)).as_mut_ptr().cast(), + data, + bitdepth_max, + ); + if (*data).num_uv_points[0] != 0 || (*data).chroma_scaling_from_luma != 0 { + ((*dsp).generate_grain_uv + [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) + .expect("non-null function pointer")( + (*grain_lut.offset(1)).as_mut_ptr().cast(), + (*grain_lut.offset(0)).as_mut_ptr().cast(), + data, + 0 as c_int as intptr_t, + bitdepth_max, + ); + } + if (*data).num_uv_points[1] != 0 || (*data).chroma_scaling_from_luma != 0 { + ((*dsp).generate_grain_uv + [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) + .expect("non-null function pointer")( + (*grain_lut.offset(2)).as_mut_ptr().cast(), + (*grain_lut.offset(0)).as_mut_ptr().cast(), + data, + 1 as c_int as intptr_t, + bitdepth_max, + ); + } + if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { + generate_scaling::( + (*in_0).p.bpc, + ((*data).y_points).as_ptr(), + (*data).num_y_points, + (*scaling.offset(0)).as_mut().as_mut_ptr(), + ); + } + if (*data).num_uv_points[0] != 0 { + generate_scaling::( + (*in_0).p.bpc, + ((*data).uv_points[0]).as_ptr(), + (*data).num_uv_points[0], + (*scaling.offset(1)).as_mut().as_mut_ptr(), + ); + } + if (*data).num_uv_points[1] != 0 { + generate_scaling::( + (*in_0).p.bpc, + ((*data).uv_points[1]).as_ptr(), + (*data).num_uv_points[1], + (*scaling.offset(2)).as_mut().as_mut_ptr(), + ); + } + if !((*out).stride[0] == (*in_0).stride[0]) { + unreachable!(); + } + if (*data).num_y_points == 0 { + let stride: ptrdiff_t = (*out).stride[0]; + let sz: ptrdiff_t = (*out).p.h as isize * stride; + if sz < 0 { + memcpy( + ((*out).data[0] as *mut u8) + .offset(sz as isize) + .offset(-(stride as isize)) as *mut c_void, + ((*in_0).data[0] as *mut u8) + .offset(sz as isize) + .offset(-(stride as isize)) as *const c_void, + -sz as usize, + ); + } else { + memcpy((*out).data[0], (*in_0).data[0], sz as usize); + } + } + if (*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I400 as c_int as c_uint + && (*data).chroma_scaling_from_luma == 0 + { + if !((*out).stride[1] == (*in_0).stride[1]) { + unreachable!(); + } + let ss_ver = + ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; + let stride_0: ptrdiff_t = (*out).stride[1]; + let sz_0: ptrdiff_t = ((*out).p.h + ss_ver >> ss_ver) as isize * stride_0; + if sz_0 < 0 { + if (*data).num_uv_points[0] == 0 { + memcpy( + ((*out).data[1] as *mut u8) + .offset(sz_0 as isize) + .offset(-(stride_0 as isize)) as *mut c_void, + ((*in_0).data[1] as *mut u8) + .offset(sz_0 as isize) + .offset(-(stride_0 as isize)) as *const c_void, + -sz_0 as usize, + ); + } + if (*data).num_uv_points[1] == 0 { + memcpy( + ((*out).data[2] as *mut u8) + .offset(sz_0 as isize) + .offset(-(stride_0 as isize)) as *mut c_void, + ((*in_0).data[2] as *mut u8) + .offset(sz_0 as isize) + .offset(-(stride_0 as isize)) as *const c_void, + -sz_0 as usize, + ); + } + } else { + if (*data).num_uv_points[0] == 0 { + memcpy((*out).data[1], (*in_0).data[1], sz_0 as usize); + } + if (*data).num_uv_points[1] == 0 { + memcpy((*out).data[2], (*in_0).data[2], sz_0 as usize); + } + } + } +} diff --git a/src/fg_apply_tmpl_16.rs b/src/fg_apply_tmpl_16.rs index 55f22379c..d98e266c2 100644 --- a/src/fg_apply_tmpl_16.rs +++ b/src/fg_apply_tmpl_16.rs @@ -1,22 +1,17 @@ use crate::include::common::bitdepth::BitDepth16; use crate::include::dav1d::headers::Dav1dFilmGrainData; -use crate::include::dav1d::headers::Rav1dFilmGrainData; use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::align::Align1; use crate::src::align::Align16; -use crate::src::fg_apply::generate_scaling; +use crate::src::fg_apply::rav1d_prep_grain; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; -use libc::intptr_t; -use libc::memcpy; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; -use std::ffi::c_void; pub type pixel = u16; pub type entry = i16; @@ -29,130 +24,6 @@ unsafe fn PXSTRIDE(x: ptrdiff_t) -> ptrdiff_t { return x >> 1; } -pub(crate) unsafe fn rav1d_prep_grain_16bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, - scaling: *mut [u8; 4096], - grain_lut: *mut [[entry; 82]; 74], -) { - let data: *const Rav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; - let bitdepth_max = ((1 as c_int) << (*out).p.bpc) - 1; - ((*dsp).generate_grain_y).expect("non-null function pointer")( - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - bitdepth_max, - ); - if (*data).num_uv_points[0] != 0 || (*data).chroma_scaling_from_luma != 0 { - ((*dsp).generate_grain_uv - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - (*grain_lut.offset(1)).as_mut_ptr().cast(), - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 0 as c_int as intptr_t, - bitdepth_max, - ); - } - if (*data).num_uv_points[1] != 0 || (*data).chroma_scaling_from_luma != 0 { - ((*dsp).generate_grain_uv - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - (*grain_lut.offset(2)).as_mut_ptr().cast(), - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 1 as c_int as intptr_t, - bitdepth_max, - ); - } - if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { - generate_scaling::( - (*in_0).p.bpc, - ((*data).y_points).as_ptr(), - (*data).num_y_points, - (*scaling.offset(0)).as_mut_ptr(), - ); - } - if (*data).num_uv_points[0] != 0 { - generate_scaling::( - (*in_0).p.bpc, - ((*data).uv_points[0]).as_ptr(), - (*data).num_uv_points[0], - (*scaling.offset(1)).as_mut_ptr(), - ); - } - if (*data).num_uv_points[1] != 0 { - generate_scaling::( - (*in_0).p.bpc, - ((*data).uv_points[1]).as_ptr(), - (*data).num_uv_points[1], - (*scaling.offset(2)).as_mut_ptr(), - ); - } - if !((*out).stride[0] == (*in_0).stride[0]) { - unreachable!(); - } - if (*data).num_y_points == 0 { - let stride: ptrdiff_t = (*out).stride[0]; - let sz: ptrdiff_t = (*out).p.h as isize * stride; - if sz < 0 { - memcpy( - ((*out).data[0] as *mut u8) - .offset(sz as isize) - .offset(-(stride as isize)) as *mut c_void, - ((*in_0).data[0] as *mut u8) - .offset(sz as isize) - .offset(-(stride as isize)) as *const c_void, - -sz as usize, - ); - } else { - memcpy((*out).data[0], (*in_0).data[0], sz as usize); - } - } - if (*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I400 as c_int as c_uint - && (*data).chroma_scaling_from_luma == 0 - { - if !((*out).stride[1] == (*in_0).stride[1]) { - unreachable!(); - } - let ss_ver = - ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; - let stride_0: ptrdiff_t = (*out).stride[1]; - let sz_0: ptrdiff_t = ((*out).p.h + ss_ver >> ss_ver) as isize * stride_0; - if sz_0 < 0 { - if (*data).num_uv_points[0] == 0 { - memcpy( - ((*out).data[1] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *mut c_void, - ((*in_0).data[1] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *const c_void, - -sz_0 as usize, - ); - } - if (*data).num_uv_points[1] == 0 { - memcpy( - ((*out).data[2] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *mut c_void, - ((*in_0).data[2] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *const c_void, - -sz_0 as usize, - ); - } - } else { - if (*data).num_uv_points[0] == 0 { - memcpy((*out).data[1], (*in_0).data[1], sz_0 as usize); - } - if (*data).num_uv_points[1] == 0 { - memcpy((*out).data[2], (*in_0).data[2], sz_0 as usize); - } - } - } -} - pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, @@ -270,7 +141,7 @@ pub(crate) unsafe fn rav1d_apply_grain_16bpc( let mut grain_lut = Align16([[[0; 82]; 74]; 3]); let mut scaling = Align1([[0; 4096]; 3]); let rows = (*out).p.h + 31 >> 5; - rav1d_prep_grain_16bpc( + rav1d_prep_grain::( dsp, out, in_0, diff --git a/src/fg_apply_tmpl_8.rs b/src/fg_apply_tmpl_8.rs index 3d12aa032..1f9fb51d4 100644 --- a/src/fg_apply_tmpl_8.rs +++ b/src/fg_apply_tmpl_8.rs @@ -1,148 +1,21 @@ use crate::include::common::bitdepth::BitDepth8; use crate::include::dav1d::headers::Dav1dFilmGrainData; use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::align::Align16; -use crate::src::fg_apply::generate_scaling; +use crate::src::fg_apply::rav1d_prep_grain; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use cfg_if::cfg_if; -use libc::intptr_t; -use libc::memcpy; use libc::ptrdiff_t; use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; -use std::ffi::c_void; pub type pixel = u8; pub type entry = i8; -pub(crate) unsafe fn rav1d_prep_grain_8bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, - scaling: *mut [u8; 256], - grain_lut: *mut [[entry; 82]; 74], -) { - let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; - ((*dsp).generate_grain_y).expect("non-null function pointer")( - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 8, - ); - if (*data).num_uv_points[0] != 0 || (*data).chroma_scaling_from_luma != 0 { - ((*dsp).generate_grain_uv - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - (*grain_lut.offset(1)).as_mut_ptr().cast(), - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 0 as c_int as intptr_t, - 8, - ); - } - if (*data).num_uv_points[1] != 0 || (*data).chroma_scaling_from_luma != 0 { - ((*dsp).generate_grain_uv - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - (*grain_lut.offset(2)).as_mut_ptr().cast(), - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 1 as c_int as intptr_t, - 8, - ); - } - if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { - generate_scaling::( - (*in_0).p.bpc, - ((*data).y_points).as_ptr(), - (*data).num_y_points, - (*scaling.offset(0)).as_mut_ptr(), - ); - } - if (*data).num_uv_points[0] != 0 { - generate_scaling::( - (*in_0).p.bpc, - ((*data).uv_points[0]).as_ptr(), - (*data).num_uv_points[0], - (*scaling.offset(1)).as_mut_ptr(), - ); - } - if (*data).num_uv_points[1] != 0 { - generate_scaling::( - (*in_0).p.bpc, - ((*data).uv_points[1]).as_ptr(), - (*data).num_uv_points[1], - (*scaling.offset(2)).as_mut_ptr(), - ); - } - if !((*out).stride[0] == (*in_0).stride[0]) { - unreachable!(); - } - if (*data).num_y_points == 0 { - let stride: ptrdiff_t = (*out).stride[0]; - let sz: ptrdiff_t = (*out).p.h as isize * stride; - if sz < 0 { - memcpy( - ((*out).data[0] as *mut u8) - .offset(sz as isize) - .offset(-(stride as isize)) as *mut c_void, - ((*in_0).data[0] as *mut u8) - .offset(sz as isize) - .offset(-(stride as isize)) as *const c_void, - -sz as usize, - ); - } else { - memcpy((*out).data[0], (*in_0).data[0], sz as usize); - } - } - if (*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I400 as c_int as c_uint - && (*data).chroma_scaling_from_luma == 0 - { - if !((*out).stride[1] == (*in_0).stride[1]) { - unreachable!(); - } - let ss_ver = - ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; - let stride_0: ptrdiff_t = (*out).stride[1]; - let sz_0: ptrdiff_t = ((*out).p.h + ss_ver >> ss_ver) as isize * stride_0; - if sz_0 < 0 { - if (*data).num_uv_points[0] == 0 { - memcpy( - ((*out).data[1] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *mut c_void, - ((*in_0).data[1] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *const c_void, - -sz_0 as usize, - ); - } - if (*data).num_uv_points[1] == 0 { - memcpy( - ((*out).data[2] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *mut c_void, - ((*in_0).data[2] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *const c_void, - -sz_0 as usize, - ); - } - } else { - if (*data).num_uv_points[0] == 0 { - memcpy((*out).data[1], (*in_0).data[1], sz_0 as usize); - } - if (*data).num_uv_points[1] == 0 { - memcpy((*out).data[2], (*in_0).data[2], sz_0 as usize); - } - } - } -} - pub(crate) unsafe fn rav1d_apply_grain_row_8bpc( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, @@ -269,7 +142,7 @@ pub(crate) unsafe fn rav1d_apply_grain_8bpc( } } let rows = (*out).p.h + 31 >> 5; - rav1d_prep_grain_8bpc( + rav1d_prep_grain::( dsp, out, in_0, diff --git a/src/thread_task.rs b/src/thread_task.rs index 209c4b553..8dbb27197 100644 --- a/src/thread_task.rs +++ b/src/thread_task.rs @@ -16,6 +16,7 @@ use crate::src::error::Rav1dError::EGeneric; use crate::src::error::Rav1dError::EINVAL; use crate::src::error::Rav1dError::ENOMEM; use crate::src::error::Rav1dResult; +use crate::src::fg_apply::rav1d_prep_grain; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameContext; use crate::src::internal::Rav1dTask; @@ -52,15 +53,10 @@ use std::ffi::c_void; use std::process::abort; #[cfg(feature = "bitdepth_8")] -use crate::{ - src::fg_apply_tmpl_8::rav1d_apply_grain_row_8bpc, src::fg_apply_tmpl_8::rav1d_prep_grain_8bpc, -}; +use crate::src::fg_apply_tmpl_8::rav1d_apply_grain_row_8bpc; #[cfg(feature = "bitdepth_16")] -use crate::{ - src::fg_apply_tmpl_16::rav1d_apply_grain_row_16bpc, - src::fg_apply_tmpl_16::rav1d_prep_grain_16bpc, -}; +use crate::src::fg_apply_tmpl_16::rav1d_apply_grain_row_16bpc; #[cfg(target_os = "linux")] use libc::prctl; @@ -740,7 +736,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 8 => { let grain_lut_scaling = BitDepth8::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_prep_grain_8bpc( + rav1d_prep_grain::( &(*((*c).dsp).as_ptr().offset(0)).fg, out, in_0, @@ -752,7 +748,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 10 | 12 => { let grain_lut_scaling = BitDepth16::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_prep_grain_16bpc( + rav1d_prep_grain::( &(*((*c).dsp).as_ptr().offset(off as isize)).fg, out, in_0, From 1ca33da77e1505d2dd01d120ea95f2d6e21d2398 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sun, 15 Oct 2023 17:05:09 -0700 Subject: [PATCH 4/6] `fn rav1d_apply_grain_row`: Deduplicate w/ generics. --- src/fg_apply.rs | 118 ++++++++++++++++++++++++++++++++++++ src/fg_apply_tmpl_16.rs | 129 +--------------------------------------- src/fg_apply_tmpl_8.rs | 120 +------------------------------------ src/thread_task.rs | 11 +--- 4 files changed, 125 insertions(+), 253 deletions(-) diff --git a/src/fg_apply.rs b/src/fg_apply.rs index 9676903f1..18e086493 100644 --- a/src/fg_apply.rs +++ b/src/fg_apply.rs @@ -1,14 +1,18 @@ use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BPC; +use crate::include::dav1d::headers::Dav1dFilmGrainData; use crate::include::dav1d::headers::Rav1dFilmGrainData; +use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; +use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use libc::intptr_t; use libc::memcpy; use libc::memset; use libc::ptrdiff_t; +use std::cmp; use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_void; @@ -217,3 +221,117 @@ pub(crate) unsafe fn rav1d_prep_grain( } } } + +pub(crate) unsafe fn rav1d_apply_grain_row( + dsp: *const Rav1dFilmGrainDSPContext, + out: *mut Rav1dPicture, + in_0: *const Rav1dPicture, + scaling: *const BD::Scaling, + grain_lut: *const [[BD::Entry; 82]; 74], + row: c_int, +) { + let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; + let ss_y = ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; + let ss_x = ((*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I444 as c_int as c_uint) as c_int; + let cpw = (*out).p.w + ss_x >> ss_x; + let is_id = ((*(*out).seq_hdr).mtrx as c_uint == RAV1D_MC_IDENTITY as c_int as c_uint) as c_int; + let luma_src: *mut BD::Pixel = ((*in_0).data[0] as *mut BD::Pixel) + .offset(((row * 32) as isize * BD::pxstride((*in_0).stride[0] as usize) as isize) as isize); + let bitdepth_max = ((1 as c_int) << (*out).p.bpc) - 1; + if (*data).num_y_points != 0 { + let bh = cmp::min((*out).p.h - row * 32, 32 as c_int); + ((*dsp).fgy_32x32xn).expect("non-null function pointer")( + ((*out).data[0] as *mut BD::Pixel) + .offset( + ((row * 32) as isize * BD::pxstride((*out).stride[0] as usize) as isize) + as isize, + ) + .cast(), + luma_src.cast(), + (*out).stride[0], + data, + (*out).p.w as usize, + (*scaling.offset(0)).as_ref().as_ptr(), + (*grain_lut.offset(0)).as_ptr().cast(), + bh, + row, + bitdepth_max, + ); + } + if (*data).num_uv_points[0] == 0 + && (*data).num_uv_points[1] == 0 + && (*data).chroma_scaling_from_luma == 0 + { + return; + } + let bh_0 = cmp::min((*out).p.h - row * 32, 32 as c_int) + ss_y >> ss_y; + if (*out).p.w & ss_x != 0 { + let mut ptr: *mut BD::Pixel = luma_src; + let mut y = 0; + while y < bh_0 { + *ptr.offset((*out).p.w as isize) = *ptr.offset(((*out).p.w - 1) as isize); + ptr = + ptr.offset(((BD::pxstride((*in_0).stride[0] as usize) as isize) << ss_y) as isize); + y += 1; + } + } + let uv_off: ptrdiff_t = + (row * 32) as isize * BD::pxstride((*out).stride[1] as usize) as isize >> ss_y; + if (*data).chroma_scaling_from_luma != 0 { + let mut pl = 0; + while pl < 2 { + ((*dsp).fguv_32x32xn + [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) + .expect("non-null function pointer")( + ((*out).data[(1 + pl) as usize] as *mut BD::Pixel) + .offset(uv_off as isize) + .cast(), + ((*in_0).data[(1 + pl) as usize] as *const BD::Pixel) + .offset(uv_off as isize) + .cast(), + (*in_0).stride[1], + data, + cpw as usize, + (*scaling.offset(0)).as_ref().as_ptr(), + (*grain_lut.offset((1 + pl) as isize)).as_ptr().cast(), + bh_0, + row, + luma_src.cast(), + (*in_0).stride[0], + pl, + is_id, + bitdepth_max, + ); + pl += 1; + } + } else { + let mut pl_0 = 0; + while pl_0 < 2 { + if (*data).num_uv_points[pl_0 as usize] != 0 { + ((*dsp).fguv_32x32xn + [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) + .expect("non-null function pointer")( + ((*out).data[(1 + pl_0) as usize] as *mut BD::Pixel) + .offset(uv_off as isize) + .cast(), + ((*in_0).data[(1 + pl_0) as usize] as *const BD::Pixel) + .offset(uv_off as isize) + .cast(), + (*in_0).stride[1], + data, + cpw as usize, + (*scaling.offset((1 + pl_0) as isize)).as_ref().as_ptr(), + (*grain_lut.offset((1 + pl_0) as isize)).as_ptr().cast(), + bh_0, + row, + luma_src.cast(), + (*in_0).stride[0], + pl_0, + is_id, + bitdepth_max, + ); + } + pl_0 += 1; + } + }; +} diff --git a/src/fg_apply_tmpl_16.rs b/src/fg_apply_tmpl_16.rs index d98e266c2..faaa95228 100644 --- a/src/fg_apply_tmpl_16.rs +++ b/src/fg_apply_tmpl_16.rs @@ -1,138 +1,13 @@ use crate::include::common::bitdepth::BitDepth16; -use crate::include::dav1d::headers::Dav1dFilmGrainData; -use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::align::Align1; use crate::src::align::Align16; +use crate::src::fg_apply::rav1d_apply_grain_row; use crate::src::fg_apply::rav1d_prep_grain; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; -use libc::ptrdiff_t; -use std::cmp; -use std::ffi::c_int; -use std::ffi::c_uint; -pub type pixel = u16; pub type entry = i16; -#[inline] -unsafe fn PXSTRIDE(x: ptrdiff_t) -> ptrdiff_t { - if x & 1 != 0 { - unreachable!(); - } - return x >> 1; -} - -pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, - scaling: *const [u8; 4096], - grain_lut: *const [[entry; 82]; 74], - row: c_int, -) { - let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; - let ss_y = ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; - let ss_x = ((*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I444 as c_int as c_uint) as c_int; - let cpw = (*out).p.w + ss_x >> ss_x; - let is_id = ((*(*out).seq_hdr).mtrx as c_uint == RAV1D_MC_IDENTITY as c_int as c_uint) as c_int; - let luma_src: *mut pixel = ((*in_0).data[0] as *mut pixel) - .offset(((row * 32) as isize * PXSTRIDE((*in_0).stride[0])) as isize); - let bitdepth_max = ((1 as c_int) << (*out).p.bpc) - 1; - if (*data).num_y_points != 0 { - let bh = cmp::min((*out).p.h - row * 32, 32 as c_int); - ((*dsp).fgy_32x32xn).expect("non-null function pointer")( - ((*out).data[0] as *mut pixel) - .offset(((row * 32) as isize * PXSTRIDE((*out).stride[0])) as isize) - .cast(), - luma_src.cast(), - (*out).stride[0], - data, - (*out).p.w as usize, - (*scaling.offset(0)).as_ptr(), - (*grain_lut.offset(0)).as_ptr().cast(), - bh, - row, - bitdepth_max, - ); - } - if (*data).num_uv_points[0] == 0 - && (*data).num_uv_points[1] == 0 - && (*data).chroma_scaling_from_luma == 0 - { - return; - } - let bh_0 = cmp::min((*out).p.h - row * 32, 32 as c_int) + ss_y >> ss_y; - if (*out).p.w & ss_x != 0 { - let mut ptr: *mut pixel = luma_src; - let mut y = 0; - while y < bh_0 { - *ptr.offset((*out).p.w as isize) = *ptr.offset(((*out).p.w - 1) as isize); - ptr = ptr.offset((PXSTRIDE((*in_0).stride[0]) << ss_y) as isize); - y += 1; - } - } - let uv_off: ptrdiff_t = (row * 32) as isize * PXSTRIDE((*out).stride[1]) >> ss_y; - if (*data).chroma_scaling_from_luma != 0 { - let mut pl = 0; - while pl < 2 { - ((*dsp).fguv_32x32xn - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - ((*out).data[(1 + pl) as usize] as *mut pixel) - .offset(uv_off as isize) - .cast(), - ((*in_0).data[(1 + pl) as usize] as *const pixel) - .offset(uv_off as isize) - .cast(), - (*in_0).stride[1], - data, - cpw as usize, - (*scaling.offset(0)).as_ptr(), - (*grain_lut.offset((1 + pl) as isize)).as_ptr().cast(), - bh_0, - row, - luma_src.cast(), - (*in_0).stride[0], - pl, - is_id, - bitdepth_max, - ); - pl += 1; - } - } else { - let mut pl_0 = 0; - while pl_0 < 2 { - if (*data).num_uv_points[pl_0 as usize] != 0 { - ((*dsp).fguv_32x32xn - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - ((*out).data[(1 + pl_0) as usize] as *mut pixel) - .offset(uv_off as isize) - .cast(), - ((*in_0).data[(1 + pl_0) as usize] as *const pixel) - .offset(uv_off as isize) - .cast(), - (*in_0).stride[1], - data, - cpw as usize, - (*scaling.offset((1 + pl_0) as isize)).as_ptr(), - (*grain_lut.offset((1 + pl_0) as isize)).as_ptr().cast(), - bh_0, - row, - luma_src.cast(), - (*in_0).stride[0], - pl_0, - is_id, - bitdepth_max, - ); - } - pl_0 += 1; - } - }; -} - pub(crate) unsafe fn rav1d_apply_grain_16bpc( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, @@ -150,7 +25,7 @@ pub(crate) unsafe fn rav1d_apply_grain_16bpc( ); let mut row = 0; while row < rows { - rav1d_apply_grain_row_16bpc( + rav1d_apply_grain_row::( dsp, out, in_0, diff --git a/src/fg_apply_tmpl_8.rs b/src/fg_apply_tmpl_8.rs index 1f9fb51d4..359afdef4 100644 --- a/src/fg_apply_tmpl_8.rs +++ b/src/fg_apply_tmpl_8.rs @@ -1,129 +1,13 @@ use crate::include::common::bitdepth::BitDepth8; -use crate::include::dav1d::headers::Dav1dFilmGrainData; -use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; use crate::src::align::Align16; +use crate::src::fg_apply::rav1d_apply_grain_row; use crate::src::fg_apply::rav1d_prep_grain; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use cfg_if::cfg_if; -use libc::ptrdiff_t; -use std::cmp; -use std::ffi::c_int; -use std::ffi::c_uint; -pub type pixel = u8; pub type entry = i8; -pub(crate) unsafe fn rav1d_apply_grain_row_8bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, - scaling: *const [u8; 256], - grain_lut: *const [[entry; 82]; 74], - row: c_int, -) { - let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; - let ss_y = ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; - let ss_x = ((*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I444 as c_int as c_uint) as c_int; - let cpw = (*out).p.w + ss_x >> ss_x; - let is_id = ((*(*out).seq_hdr).mtrx as c_uint == RAV1D_MC_IDENTITY as c_int as c_uint) as c_int; - let luma_src: *mut pixel = - ((*in_0).data[0] as *mut pixel).offset(((row * 32) as isize * (*in_0).stride[0]) as isize); - if (*data).num_y_points != 0 { - let bh = cmp::min((*out).p.h - row * 32, 32 as c_int); - ((*dsp).fgy_32x32xn).expect("non-null function pointer")( - ((*out).data[0] as *mut pixel) - .offset(((row * 32) as isize * (*out).stride[0]) as isize) - .cast(), - luma_src.cast(), - (*out).stride[0], - data, - (*out).p.w as usize, - (*scaling.offset(0)).as_ptr(), - (*grain_lut.offset(0)).as_ptr().cast(), - bh, - row, - 8, - ); - } - if (*data).num_uv_points[0] == 0 - && (*data).num_uv_points[1] == 0 - && (*data).chroma_scaling_from_luma == 0 - { - return; - } - let bh_0 = cmp::min((*out).p.h - row * 32, 32 as c_int) + ss_y >> ss_y; - if (*out).p.w & ss_x != 0 { - let mut ptr: *mut pixel = luma_src; - let mut y = 0; - while y < bh_0 { - *ptr.offset((*out).p.w as isize) = *ptr.offset(((*out).p.w - 1) as isize); - ptr = ptr.offset(((*in_0).stride[0] << ss_y) as isize); - y += 1; - } - } - let uv_off: ptrdiff_t = (row * 32) as isize * (*out).stride[1] >> ss_y; - if (*data).chroma_scaling_from_luma != 0 { - let mut pl = 0; - while pl < 2 { - ((*dsp).fguv_32x32xn - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - ((*out).data[(1 + pl) as usize] as *mut pixel) - .offset(uv_off as isize) - .cast(), - ((*in_0).data[(1 + pl) as usize] as *const pixel) - .offset(uv_off as isize) - .cast(), - (*in_0).stride[1], - data, - cpw as usize, - (*scaling.offset(0)).as_ptr(), - (*grain_lut.offset((1 + pl) as isize)).as_ptr().cast(), - bh_0, - row, - luma_src.cast(), - (*in_0).stride[0], - pl, - is_id, - 8, - ); - pl += 1; - } - } else { - let mut pl_0 = 0; - while pl_0 < 2 { - if (*data).num_uv_points[pl_0 as usize] != 0 { - ((*dsp).fguv_32x32xn - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - ((*out).data[(1 + pl_0) as usize] as *mut pixel) - .offset(uv_off as isize) - .cast(), - ((*in_0).data[(1 + pl_0) as usize] as *const pixel) - .offset(uv_off as isize) - .cast(), - (*in_0).stride[1], - data, - cpw as usize, - (*scaling.offset((1 + pl_0) as isize)).as_ptr(), - (*grain_lut.offset((1 + pl_0) as isize)).as_ptr().cast(), - bh_0, - row, - luma_src.cast(), - (*in_0).stride[0], - pl_0, - is_id, - 8, - ); - } - pl_0 += 1; - } - }; -} - pub(crate) unsafe fn rav1d_apply_grain_8bpc( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, @@ -151,7 +35,7 @@ pub(crate) unsafe fn rav1d_apply_grain_8bpc( ); let mut row = 0; while row < rows { - rav1d_apply_grain_row_8bpc( + rav1d_apply_grain_row::( dsp, out, in_0, diff --git a/src/thread_task.rs b/src/thread_task.rs index 8dbb27197..31335ec89 100644 --- a/src/thread_task.rs +++ b/src/thread_task.rs @@ -16,6 +16,7 @@ use crate::src::error::Rav1dError::EGeneric; use crate::src::error::Rav1dError::EINVAL; use crate::src::error::Rav1dError::ENOMEM; use crate::src::error::Rav1dResult; +use crate::src::fg_apply::rav1d_apply_grain_row; use crate::src::fg_apply::rav1d_prep_grain; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameContext; @@ -52,12 +53,6 @@ use std::ffi::c_uint; use std::ffi::c_void; use std::process::abort; -#[cfg(feature = "bitdepth_8")] -use crate::src::fg_apply_tmpl_8::rav1d_apply_grain_row_8bpc; - -#[cfg(feature = "bitdepth_16")] -use crate::src::fg_apply_tmpl_16::rav1d_apply_grain_row_16bpc; - #[cfg(target_os = "linux")] use libc::prctl; @@ -791,7 +786,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 8 => { let grain_lut_scaling = BitDepth8::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_apply_grain_row_8bpc( + rav1d_apply_grain_row::( &(*((*c).dsp).as_ptr().offset(0)).fg, out, in_0, @@ -804,7 +799,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 10 | 12 => { let grain_lut_scaling = BitDepth16::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_apply_grain_row_16bpc( + rav1d_apply_grain_row::( &(*((*c).dsp).as_ptr().offset(off as isize)).fg, out, in_0, From 2074269687f92c505e6125dd64be51872aa9a9fd Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sun, 15 Oct 2023 17:51:25 -0700 Subject: [PATCH 5/6] `fn rav1d_apply_grain`: Deduplicate w/ generics. --- include/common/bitdepth.rs | 14 +++++++++-- lib.rs | 4 ---- src/fg_apply.rs | 35 +++++++++++++++++++++++++++ src/fg_apply_tmpl_16.rs | 38 ------------------------------ src/fg_apply_tmpl_8.rs | 48 -------------------------------------- src/lib.rs | 16 ++++++------- 6 files changed, 55 insertions(+), 100 deletions(-) delete mode 100644 src/fg_apply_tmpl_16.rs delete mode 100644 src/fg_apply_tmpl_8.rs diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index 20c16063c..e54a6bf2f 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -77,6 +77,16 @@ impl_FromPrimitive!(isize => {, ...}); impl_FromPrimitive!(f32 => {, ...}); impl_FromPrimitive!(f64 => {, ...}); +pub trait ArrayDefault { + fn default() -> Self; +} + +impl ArrayDefault for [T; N] { + fn default() -> Self { + [T::default(); N] + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BPC { BPC8, @@ -108,9 +118,9 @@ pub trait BitDepth: Clone + Copy { + Add + Display; - type Entry; + type Entry: Copy + Default; - type Scaling: AsRef<[u8]> + AsMut<[u8]>; + type Scaling: AsRef<[u8]> + AsMut<[u8]> + ArrayDefault + Copy; const SCALING_SIZE: usize; type BitDepthMax; diff --git a/lib.rs b/lib.rs index e7e7a985e..37d595cc0 100644 --- a/lib.rs +++ b/lib.rs @@ -49,10 +49,6 @@ pub mod src { mod env; pub(crate) mod error; mod fg_apply; - #[cfg(feature = "bitdepth_16")] - mod fg_apply_tmpl_16; - #[cfg(feature = "bitdepth_8")] - mod fg_apply_tmpl_8; mod filmgrain; #[cfg(feature = "bitdepth_16")] mod filmgrain_tmpl_16; diff --git a/src/fg_apply.rs b/src/fg_apply.rs index 18e086493..d64d52b41 100644 --- a/src/fg_apply.rs +++ b/src/fg_apply.rs @@ -1,3 +1,4 @@ +use crate::include::common::bitdepth::ArrayDefault; use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BPC; use crate::include::dav1d::headers::Dav1dFilmGrainData; @@ -7,6 +8,8 @@ use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; +use crate::src::align::Align16; +use crate::src::align::Align64; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use libc::intptr_t; use libc::memcpy; @@ -335,3 +338,35 @@ pub(crate) unsafe fn rav1d_apply_grain_row( } }; } + +pub(crate) unsafe fn rav1d_apply_grain( + dsp: *const Rav1dFilmGrainDSPContext, + out: *mut Rav1dPicture, + in_0: *const Rav1dPicture, +) { + let mut grain_lut = Align16([[[Default::default(); 82]; 74]; 3]); + // Only `x86_64` [`BitDepth8`] needs [`Align64`], + // but it shouldn't be a problem to over-align. + // [`GrainLutScaling::scaling`] over-aligns, for example. + let mut scaling = Align64([ArrayDefault::default(); 3]); + let rows = (*out).p.h + 31 >> 5; + rav1d_prep_grain::( + dsp, + out, + in_0, + scaling.0.as_mut_ptr(), + grain_lut.0.as_mut_ptr(), + ); + let mut row = 0; + while row < rows { + rav1d_apply_grain_row::( + dsp, + out, + in_0, + scaling.0.as_mut_ptr() as *const BD::Scaling, + grain_lut.0.as_mut_ptr() as *const [[BD::Entry; 82]; 74], + row, + ); + row += 1; + } +} diff --git a/src/fg_apply_tmpl_16.rs b/src/fg_apply_tmpl_16.rs deleted file mode 100644 index faaa95228..000000000 --- a/src/fg_apply_tmpl_16.rs +++ /dev/null @@ -1,38 +0,0 @@ -use crate::include::common::bitdepth::BitDepth16; -use crate::include::dav1d::picture::Rav1dPicture; -use crate::src::align::Align1; -use crate::src::align::Align16; -use crate::src::fg_apply::rav1d_apply_grain_row; -use crate::src::fg_apply::rav1d_prep_grain; -use crate::src::filmgrain::Rav1dFilmGrainDSPContext; - -pub type entry = i16; - -pub(crate) unsafe fn rav1d_apply_grain_16bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, -) { - let mut grain_lut = Align16([[[0; 82]; 74]; 3]); - let mut scaling = Align1([[0; 4096]; 3]); - let rows = (*out).p.h + 31 >> 5; - rav1d_prep_grain::( - dsp, - out, - in_0, - scaling.0.as_mut_ptr(), - grain_lut.0.as_mut_ptr(), - ); - let mut row = 0; - while row < rows { - rav1d_apply_grain_row::( - dsp, - out, - in_0, - scaling.0.as_mut_ptr() as *const [u8; 4096], - grain_lut.0.as_mut_ptr() as *const [[entry; 82]; 74], - row, - ); - row += 1; - } -} diff --git a/src/fg_apply_tmpl_8.rs b/src/fg_apply_tmpl_8.rs deleted file mode 100644 index 359afdef4..000000000 --- a/src/fg_apply_tmpl_8.rs +++ /dev/null @@ -1,48 +0,0 @@ -use crate::include::common::bitdepth::BitDepth8; -use crate::include::dav1d::picture::Rav1dPicture; -use crate::src::align::Align16; -use crate::src::fg_apply::rav1d_apply_grain_row; -use crate::src::fg_apply::rav1d_prep_grain; -use crate::src::filmgrain::Rav1dFilmGrainDSPContext; -use cfg_if::cfg_if; - -pub type entry = i8; - -pub(crate) unsafe fn rav1d_apply_grain_8bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, -) { - let mut grain_lut = Align16([[[0; 82]; 74]; 3]); - cfg_if! { - if #[cfg(target_arch = "x86_64")] { - use crate::src::align::Align64; - - let mut scaling = Align64([[0; 256]; 3]); - } else { - use crate::src::align::Align1; - - let mut scaling = Align1([[0; 256]; 3]); - } - } - let rows = (*out).p.h + 31 >> 5; - rav1d_prep_grain::( - dsp, - out, - in_0, - scaling.0.as_mut_ptr(), - grain_lut.0.as_mut_ptr(), - ); - let mut row = 0; - while row < rows { - rav1d_apply_grain_row::( - dsp, - out, - in_0, - scaling.0.as_mut_ptr() as *const [u8; 256], - grain_lut.0.as_mut_ptr() as *const [[entry; 82]; 74], - row, - ); - row += 1; - } -} diff --git a/src/lib.rs b/src/lib.rs index 00748d267..9ea4f40e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; +use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynCoef; use crate::include::common::validate::validate_input; use crate::include::dav1d::common::Dav1dDataProps; @@ -49,6 +50,7 @@ use crate::src::error::Rav1dError::EINVAL; use crate::src::error::Rav1dError::ENOENT; use crate::src::error::Rav1dError::ENOMEM; use crate::src::error::Rav1dResult; +use crate::src::fg_apply; use crate::src::internal::CodedBlockInfo; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameContext; @@ -117,12 +119,6 @@ use std::process::abort; use std::ptr::NonNull; use std::sync::Once; -#[cfg(feature = "bitdepth_8")] -use crate::src::fg_apply_tmpl_8::rav1d_apply_grain_8bpc; - -#[cfg(feature = "bitdepth_16")] -use crate::src::fg_apply_tmpl_16::rav1d_apply_grain_16bpc; - #[cfg(target_os = "linux")] use libc::dlsym; @@ -878,11 +874,15 @@ pub(crate) unsafe fn rav1d_apply_grain( match out.p.bpc { #[cfg(feature = "bitdepth_8")] 8 => { - rav1d_apply_grain_8bpc(&mut (*(c.dsp).as_mut_ptr().offset(0)).fg, out, in_0); + fg_apply::rav1d_apply_grain::( + &mut (*(c.dsp).as_mut_ptr().offset(0)).fg, + out, + in_0, + ); } #[cfg(feature = "bitdepth_16")] 10 | 12 => { - rav1d_apply_grain_16bpc( + fg_apply::rav1d_apply_grain::( &mut (*(c.dsp).as_mut_ptr().offset(((out.p.bpc >> 1) - 4) as isize)).fg, out, in_0, From b98a747b2d54f05198fd2e4a371230c2db248677 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Sun, 15 Oct 2023 17:54:27 -0700 Subject: [PATCH 6/6] `trait ArrayDefault`: Add docs. --- include/common/bitdepth.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index e54a6bf2f..97fc2426f 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -77,6 +77,9 @@ impl_FromPrimitive!(isize => {, ...}); impl_FromPrimitive!(f32 => {, ...}); impl_FromPrimitive!(f64 => {, ...}); +/// [`Default`] isn't `impl`emented for all arrays `[T; N]` +/// because they were implemented before `const` generics +/// and thus only for low values of `N`. pub trait ArrayDefault { fn default() -> Self; }