diff --git a/include/common/bitdepth.rs b/include/common/bitdepth.rs index 73817e9b1..97fc2426f 100644 --- a/include/common/bitdepth.rs +++ b/include/common/bitdepth.rs @@ -77,6 +77,19 @@ impl_FromPrimitive!(isize => {, ...}); impl_FromPrimitive!(f32 => {, ...}); impl_FromPrimitive!(f64 => {, ...}); +/// [`Default`] isn't `impl`emented for all arrays `[T; N]` +/// because they were implemented before `const` generics +/// and thus only for low values of `N`. +pub trait ArrayDefault { + fn default() -> Self; +} + +impl ArrayDefault for [T; N] { + fn default() -> Self { + [T::default(); N] + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum BPC { BPC8, @@ -108,6 +121,11 @@ pub trait BitDepth: Clone + Copy { + Add + Display; + type Entry: Copy + Default; + + type Scaling: AsRef<[u8]> + AsMut<[u8]> + ArrayDefault + Copy; + const SCALING_SIZE: usize; + type BitDepthMax; type DisplayPixel: Display; @@ -174,11 +192,6 @@ pub trait BitDepth: Clone + Copy { T: BitDepthDependentType, T::T: Copy, T::T: Copy; - - type GrainLut; - type Scaling; - - const SCALING_LEN: usize; } #[derive(Clone, Copy)] @@ -195,6 +208,11 @@ impl BitDepth for BitDepth8 { type Coef = i16; + type Entry = i8; + + type Scaling = [u8; Self::SCALING_SIZE]; + const SCALING_SIZE: usize = 256; + type BitDepthMax = (); type DisplayPixel = DisplayPixel8; @@ -256,11 +274,6 @@ impl BitDepth for BitDepth8 { { bd.bpc8 } - - type GrainLut = i8; - type Scaling = [u8; Self::SCALING_LEN]; - - const SCALING_LEN: usize = 256; } #[derive(Clone, Copy)] @@ -276,6 +289,11 @@ impl BitDepth for BitDepth16 { type Coef = i32; + type Entry = i16; + + type Scaling = [u8; Self::SCALING_SIZE]; + const SCALING_SIZE: usize = 4096; + type BitDepthMax = Self::Pixel; type DisplayPixel = DisplayPixel16; @@ -341,11 +359,6 @@ impl BitDepth for BitDepth16 { { bd.bpc16 } - - type GrainLut = i16; - type Scaling = [u8; Self::SCALING_LEN]; - - const SCALING_LEN: usize = 4096; } pub struct DisplayPixel8(::Pixel); diff --git a/lib.rs b/lib.rs index 0653cd433..37d595cc0 100644 --- a/lib.rs +++ b/lib.rs @@ -48,10 +48,7 @@ pub mod src { mod dequant_tables; mod env; pub(crate) mod error; - #[cfg(feature = "bitdepth_16")] - mod fg_apply_tmpl_16; - #[cfg(feature = "bitdepth_8")] - mod fg_apply_tmpl_8; + mod fg_apply; mod filmgrain; #[cfg(feature = "bitdepth_16")] mod filmgrain_tmpl_16; diff --git a/src/fg_apply_tmpl_16.rs b/src/fg_apply.rs similarity index 72% rename from src/fg_apply_tmpl_16.rs rename to src/fg_apply.rs index 8b39a31a8..d64d52b41 100644 --- a/src/fg_apply_tmpl_16.rs +++ b/src/fg_apply.rs @@ -1,3 +1,6 @@ +use crate::include::common::bitdepth::ArrayDefault; +use crate::include::common::bitdepth::BitDepth; +use crate::include::common::bitdepth::BPC; use crate::include::dav1d::headers::Dav1dFilmGrainData; use crate::include::dav1d::headers::Rav1dFilmGrainData; use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; @@ -5,8 +8,8 @@ use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; use crate::include::dav1d::picture::Rav1dPicture; -use crate::src::align::Align1; use crate::src::align::Align16; +use crate::src::align::Align64; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use libc::intptr_t; use libc::memcpy; @@ -17,23 +20,23 @@ use std::ffi::c_int; use std::ffi::c_uint; use std::ffi::c_void; -pub type pixel = u16; -pub type entry = i16; - -#[inline] -unsafe fn PXSTRIDE(x: ptrdiff_t) -> ptrdiff_t { - if x & 1 != 0 { - unreachable!(); - } - return x >> 1; -} - -unsafe fn generate_scaling(bitdepth: c_int, points: *const [u8; 2], num: c_int, scaling: *mut u8) { - if !(bitdepth > 8) { - unreachable!(); - } - let shift_x = bitdepth - 8; - let scaling_size = (1 as c_int) << bitdepth; +unsafe fn generate_scaling( + bitdepth: c_int, + points: *const [u8; 2], + num: c_int, + scaling: *mut u8, +) { + let (shift_x, scaling_size) = match BD::BPC { + BPC::BPC8 => (0, 256), + BPC::BPC16 => { + if !(bitdepth > 8) { + unreachable!(); + } + let shift_x = bitdepth - 8; + let scaling_size = (1 as c_int) << bitdepth; + (shift_x, scaling_size) + } + }; if num == 0 { memset(scaling as *mut c_void, 0 as c_int, scaling_size as usize); return; @@ -70,37 +73,40 @@ unsafe fn generate_scaling(bitdepth: c_int, points: *const [u8; 2], num: c_int, (*points.offset((num - 1) as isize))[1] as c_int, (scaling_size - n) as usize, ); - let pad = (1 as c_int) << shift_x; - let rnd = pad >> 1; - let mut i_0 = 0; - while i_0 < num - 1 { - let bx_0 = ((*points.offset(i_0 as isize))[0] as c_int) << shift_x; - let ex_0 = ((*points.offset((i_0 + 1) as isize))[0] as c_int) << shift_x; - let dx_0 = ex_0 - bx_0; - let mut x_0 = 0; - while x_0 < dx_0 { - let range = *scaling.offset((bx_0 + x_0 + pad) as isize) as c_int - - *scaling.offset((bx_0 + x_0) as isize) as c_int; - let mut n_0 = 1; - let mut r = rnd; - while n_0 < pad { - r += range; - *scaling.offset((bx_0 + x_0 + n_0) as isize) = - (*scaling.offset((bx_0 + x_0) as isize) as c_int + (r >> shift_x)) as u8; - n_0 += 1; + + if BD::BPC != BPC::BPC8 { + let pad = (1 as c_int) << shift_x; + let rnd = pad >> 1; + let mut i_0 = 0; + while i_0 < num - 1 { + let bx_0 = ((*points.offset(i_0 as isize))[0] as c_int) << shift_x; + let ex_0 = ((*points.offset((i_0 + 1) as isize))[0] as c_int) << shift_x; + let dx_0 = ex_0 - bx_0; + let mut x_0 = 0; + while x_0 < dx_0 { + let range = *scaling.offset((bx_0 + x_0 + pad) as isize) as c_int + - *scaling.offset((bx_0 + x_0) as isize) as c_int; + let mut n_0 = 1; + let mut r = rnd; + while n_0 < pad { + r += range; + *scaling.offset((bx_0 + x_0 + n_0) as isize) = + (*scaling.offset((bx_0 + x_0) as isize) as c_int + (r >> shift_x)) as u8; + n_0 += 1; + } + x_0 += pad; } - x_0 += pad; + i_0 += 1; } - i_0 += 1; } } -pub(crate) unsafe fn rav1d_prep_grain_16bpc( +pub(crate) unsafe fn rav1d_prep_grain( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, in_0: *const Rav1dPicture, - scaling: *mut [u8; 4096], - grain_lut: *mut [[entry; 82]; 74], + scaling: *mut BD::Scaling, + grain_lut: *mut [[BD::Entry; 82]; 74], ) { let data: *const Rav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; let bitdepth_max = ((1 as c_int) << (*out).p.bpc) - 1; @@ -132,27 +138,27 @@ pub(crate) unsafe fn rav1d_prep_grain_16bpc( ); } if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).y_points).as_ptr(), (*data).num_y_points, - (*scaling.offset(0)).as_mut_ptr(), + (*scaling.offset(0)).as_mut().as_mut_ptr(), ); } if (*data).num_uv_points[0] != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).uv_points[0]).as_ptr(), (*data).num_uv_points[0], - (*scaling.offset(1)).as_mut_ptr(), + (*scaling.offset(1)).as_mut().as_mut_ptr(), ); } if (*data).num_uv_points[1] != 0 { - generate_scaling( + generate_scaling::( (*in_0).p.bpc, ((*data).uv_points[1]).as_ptr(), (*data).num_uv_points[1], - (*scaling.offset(2)).as_mut_ptr(), + (*scaling.offset(2)).as_mut().as_mut_ptr(), ); } if !((*out).stride[0] == (*in_0).stride[0]) { @@ -219,12 +225,12 @@ pub(crate) unsafe fn rav1d_prep_grain_16bpc( } } -pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( +pub(crate) unsafe fn rav1d_apply_grain_row( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, in_0: *const Rav1dPicture, - scaling: *const [u8; 4096], - grain_lut: *const [[entry; 82]; 74], + scaling: *const BD::Scaling, + grain_lut: *const [[BD::Entry; 82]; 74], row: c_int, ) { let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; @@ -232,20 +238,23 @@ pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( let ss_x = ((*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I444 as c_int as c_uint) as c_int; let cpw = (*out).p.w + ss_x >> ss_x; let is_id = ((*(*out).seq_hdr).mtrx as c_uint == RAV1D_MC_IDENTITY as c_int as c_uint) as c_int; - let luma_src: *mut pixel = ((*in_0).data[0] as *mut pixel) - .offset(((row * 32) as isize * PXSTRIDE((*in_0).stride[0])) as isize); + let luma_src: *mut BD::Pixel = ((*in_0).data[0] as *mut BD::Pixel) + .offset(((row * 32) as isize * BD::pxstride((*in_0).stride[0] as usize) as isize) as isize); let bitdepth_max = ((1 as c_int) << (*out).p.bpc) - 1; if (*data).num_y_points != 0 { let bh = cmp::min((*out).p.h - row * 32, 32 as c_int); ((*dsp).fgy_32x32xn).expect("non-null function pointer")( - ((*out).data[0] as *mut pixel) - .offset(((row * 32) as isize * PXSTRIDE((*out).stride[0])) as isize) + ((*out).data[0] as *mut BD::Pixel) + .offset( + ((row * 32) as isize * BD::pxstride((*out).stride[0] as usize) as isize) + as isize, + ) .cast(), luma_src.cast(), (*out).stride[0], data, (*out).p.w as usize, - (*scaling.offset(0)).as_ptr(), + (*scaling.offset(0)).as_ref().as_ptr(), (*grain_lut.offset(0)).as_ptr().cast(), bh, row, @@ -260,31 +269,33 @@ pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( } let bh_0 = cmp::min((*out).p.h - row * 32, 32 as c_int) + ss_y >> ss_y; if (*out).p.w & ss_x != 0 { - let mut ptr: *mut pixel = luma_src; + let mut ptr: *mut BD::Pixel = luma_src; let mut y = 0; while y < bh_0 { *ptr.offset((*out).p.w as isize) = *ptr.offset(((*out).p.w - 1) as isize); - ptr = ptr.offset((PXSTRIDE((*in_0).stride[0]) << ss_y) as isize); + ptr = + ptr.offset(((BD::pxstride((*in_0).stride[0] as usize) as isize) << ss_y) as isize); y += 1; } } - let uv_off: ptrdiff_t = (row * 32) as isize * PXSTRIDE((*out).stride[1]) >> ss_y; + let uv_off: ptrdiff_t = + (row * 32) as isize * BD::pxstride((*out).stride[1] as usize) as isize >> ss_y; if (*data).chroma_scaling_from_luma != 0 { let mut pl = 0; while pl < 2 { ((*dsp).fguv_32x32xn [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) .expect("non-null function pointer")( - ((*out).data[(1 + pl) as usize] as *mut pixel) + ((*out).data[(1 + pl) as usize] as *mut BD::Pixel) .offset(uv_off as isize) .cast(), - ((*in_0).data[(1 + pl) as usize] as *const pixel) + ((*in_0).data[(1 + pl) as usize] as *const BD::Pixel) .offset(uv_off as isize) .cast(), (*in_0).stride[1], data, cpw as usize, - (*scaling.offset(0)).as_ptr(), + (*scaling.offset(0)).as_ref().as_ptr(), (*grain_lut.offset((1 + pl) as isize)).as_ptr().cast(), bh_0, row, @@ -303,16 +314,16 @@ pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( ((*dsp).fguv_32x32xn [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) .expect("non-null function pointer")( - ((*out).data[(1 + pl_0) as usize] as *mut pixel) + ((*out).data[(1 + pl_0) as usize] as *mut BD::Pixel) .offset(uv_off as isize) .cast(), - ((*in_0).data[(1 + pl_0) as usize] as *const pixel) + ((*in_0).data[(1 + pl_0) as usize] as *const BD::Pixel) .offset(uv_off as isize) .cast(), (*in_0).stride[1], data, cpw as usize, - (*scaling.offset((1 + pl_0) as isize)).as_ptr(), + (*scaling.offset((1 + pl_0) as isize)).as_ref().as_ptr(), (*grain_lut.offset((1 + pl_0) as isize)).as_ptr().cast(), bh_0, row, @@ -328,15 +339,18 @@ pub(crate) unsafe fn rav1d_apply_grain_row_16bpc( }; } -pub(crate) unsafe fn rav1d_apply_grain_16bpc( +pub(crate) unsafe fn rav1d_apply_grain( dsp: *const Rav1dFilmGrainDSPContext, out: *mut Rav1dPicture, in_0: *const Rav1dPicture, ) { - let mut grain_lut = Align16([[[0; 82]; 74]; 3]); - let mut scaling = Align1([[0; 4096]; 3]); + let mut grain_lut = Align16([[[Default::default(); 82]; 74]; 3]); + // Only `x86_64` [`BitDepth8`] needs [`Align64`], + // but it shouldn't be a problem to over-align. + // [`GrainLutScaling::scaling`] over-aligns, for example. + let mut scaling = Align64([ArrayDefault::default(); 3]); let rows = (*out).p.h + 31 >> 5; - rav1d_prep_grain_16bpc( + rav1d_prep_grain::( dsp, out, in_0, @@ -345,12 +359,12 @@ pub(crate) unsafe fn rav1d_apply_grain_16bpc( ); let mut row = 0; while row < rows { - rav1d_apply_grain_row_16bpc( + rav1d_apply_grain_row::( dsp, out, in_0, - scaling.0.as_mut_ptr() as *const [u8; 4096], - grain_lut.0.as_mut_ptr() as *const [[entry; 82]; 74], + scaling.0.as_mut_ptr() as *const BD::Scaling, + grain_lut.0.as_mut_ptr() as *const [[BD::Entry; 82]; 74], row, ); row += 1; diff --git a/src/fg_apply_tmpl_8.rs b/src/fg_apply_tmpl_8.rs deleted file mode 100644 index 92b647073..000000000 --- a/src/fg_apply_tmpl_8.rs +++ /dev/null @@ -1,331 +0,0 @@ -use crate::include::dav1d::headers::Dav1dFilmGrainData; -use crate::include::dav1d::headers::RAV1D_MC_IDENTITY; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I400; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I420; -use crate::include::dav1d::headers::RAV1D_PIXEL_LAYOUT_I444; -use crate::include::dav1d::picture::Rav1dPicture; -use crate::src::align::Align16; -use crate::src::filmgrain::Rav1dFilmGrainDSPContext; -use cfg_if::cfg_if; -use libc::intptr_t; -use libc::memcpy; -use libc::memset; -use libc::ptrdiff_t; -use std::cmp; -use std::ffi::c_int; -use std::ffi::c_uint; -use std::ffi::c_void; - -pub type pixel = u8; -pub type entry = i8; - -unsafe fn generate_scaling(_bitdepth: c_int, points: *const [u8; 2], num: c_int, scaling: *mut u8) { - let shift_x = 0; - let scaling_size = 256; - if num == 0 { - memset(scaling as *mut c_void, 0 as c_int, scaling_size as usize); - return; - } - memset( - scaling as *mut c_void, - (*points.offset(0))[1] as c_int, - (((*points.offset(0))[0] as c_int) << shift_x) as usize, - ); - let mut i = 0; - while i < num - 1 { - let bx = (*points.offset(i as isize))[0] as c_int; - let by = (*points.offset(i as isize))[1] as c_int; - let ex = (*points.offset((i + 1) as isize))[0] as c_int; - let ey = (*points.offset((i + 1) as isize))[1] as c_int; - let dx = ex - bx; - let dy = ey - by; - if !(dx > 0) { - unreachable!(); - } - let delta = dy * ((0x10000 + (dx >> 1)) / dx); - let mut x = 0; - let mut d = 0x8000 as c_int; - while x < dx { - *scaling.offset((bx + x << shift_x) as isize) = (by + (d >> 16)) as u8; - d += delta; - x += 1; - } - i += 1; - } - let n = ((*points.offset((num - 1) as isize))[0] as c_int) << shift_x; - memset( - &mut *scaling.offset(n as isize) as *mut u8 as *mut c_void, - (*points.offset((num - 1) as isize))[1] as c_int, - (scaling_size - n) as usize, - ); -} - -pub(crate) unsafe fn rav1d_prep_grain_8bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, - scaling: *mut [u8; 256], - grain_lut: *mut [[entry; 82]; 74], -) { - let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; - ((*dsp).generate_grain_y).expect("non-null function pointer")( - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 8, - ); - if (*data).num_uv_points[0] != 0 || (*data).chroma_scaling_from_luma != 0 { - ((*dsp).generate_grain_uv - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - (*grain_lut.offset(1)).as_mut_ptr().cast(), - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 0 as c_int as intptr_t, - 8, - ); - } - if (*data).num_uv_points[1] != 0 || (*data).chroma_scaling_from_luma != 0 { - ((*dsp).generate_grain_uv - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - (*grain_lut.offset(2)).as_mut_ptr().cast(), - (*grain_lut.offset(0)).as_mut_ptr().cast(), - data, - 1 as c_int as intptr_t, - 8, - ); - } - if (*data).num_y_points != 0 || (*data).chroma_scaling_from_luma != 0 { - generate_scaling( - (*in_0).p.bpc, - ((*data).y_points).as_ptr(), - (*data).num_y_points, - (*scaling.offset(0)).as_mut_ptr(), - ); - } - if (*data).num_uv_points[0] != 0 { - generate_scaling( - (*in_0).p.bpc, - ((*data).uv_points[0]).as_ptr(), - (*data).num_uv_points[0], - (*scaling.offset(1)).as_mut_ptr(), - ); - } - if (*data).num_uv_points[1] != 0 { - generate_scaling( - (*in_0).p.bpc, - ((*data).uv_points[1]).as_ptr(), - (*data).num_uv_points[1], - (*scaling.offset(2)).as_mut_ptr(), - ); - } - if !((*out).stride[0] == (*in_0).stride[0]) { - unreachable!(); - } - if (*data).num_y_points == 0 { - let stride: ptrdiff_t = (*out).stride[0]; - let sz: ptrdiff_t = (*out).p.h as isize * stride; - if sz < 0 { - memcpy( - ((*out).data[0] as *mut u8) - .offset(sz as isize) - .offset(-(stride as isize)) as *mut c_void, - ((*in_0).data[0] as *mut u8) - .offset(sz as isize) - .offset(-(stride as isize)) as *const c_void, - -sz as usize, - ); - } else { - memcpy((*out).data[0], (*in_0).data[0], sz as usize); - } - } - if (*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I400 as c_int as c_uint - && (*data).chroma_scaling_from_luma == 0 - { - if !((*out).stride[1] == (*in_0).stride[1]) { - unreachable!(); - } - let ss_ver = - ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; - let stride_0: ptrdiff_t = (*out).stride[1]; - let sz_0: ptrdiff_t = ((*out).p.h + ss_ver >> ss_ver) as isize * stride_0; - if sz_0 < 0 { - if (*data).num_uv_points[0] == 0 { - memcpy( - ((*out).data[1] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *mut c_void, - ((*in_0).data[1] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *const c_void, - -sz_0 as usize, - ); - } - if (*data).num_uv_points[1] == 0 { - memcpy( - ((*out).data[2] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *mut c_void, - ((*in_0).data[2] as *mut u8) - .offset(sz_0 as isize) - .offset(-(stride_0 as isize)) as *const c_void, - -sz_0 as usize, - ); - } - } else { - if (*data).num_uv_points[0] == 0 { - memcpy((*out).data[1], (*in_0).data[1], sz_0 as usize); - } - if (*data).num_uv_points[1] == 0 { - memcpy((*out).data[2], (*in_0).data[2], sz_0 as usize); - } - } - } -} - -pub(crate) unsafe fn rav1d_apply_grain_row_8bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, - scaling: *const [u8; 256], - grain_lut: *const [[entry; 82]; 74], - row: c_int, -) { - let data: *const Dav1dFilmGrainData = &mut (*(*out).frame_hdr).film_grain.data; - let ss_y = ((*in_0).p.layout as c_uint == RAV1D_PIXEL_LAYOUT_I420 as c_int as c_uint) as c_int; - let ss_x = ((*in_0).p.layout as c_uint != RAV1D_PIXEL_LAYOUT_I444 as c_int as c_uint) as c_int; - let cpw = (*out).p.w + ss_x >> ss_x; - let is_id = ((*(*out).seq_hdr).mtrx as c_uint == RAV1D_MC_IDENTITY as c_int as c_uint) as c_int; - let luma_src: *mut pixel = - ((*in_0).data[0] as *mut pixel).offset(((row * 32) as isize * (*in_0).stride[0]) as isize); - if (*data).num_y_points != 0 { - let bh = cmp::min((*out).p.h - row * 32, 32 as c_int); - ((*dsp).fgy_32x32xn).expect("non-null function pointer")( - ((*out).data[0] as *mut pixel) - .offset(((row * 32) as isize * (*out).stride[0]) as isize) - .cast(), - luma_src.cast(), - (*out).stride[0], - data, - (*out).p.w as usize, - (*scaling.offset(0)).as_ptr(), - (*grain_lut.offset(0)).as_ptr().cast(), - bh, - row, - 8, - ); - } - if (*data).num_uv_points[0] == 0 - && (*data).num_uv_points[1] == 0 - && (*data).chroma_scaling_from_luma == 0 - { - return; - } - let bh_0 = cmp::min((*out).p.h - row * 32, 32 as c_int) + ss_y >> ss_y; - if (*out).p.w & ss_x != 0 { - let mut ptr: *mut pixel = luma_src; - let mut y = 0; - while y < bh_0 { - *ptr.offset((*out).p.w as isize) = *ptr.offset(((*out).p.w - 1) as isize); - ptr = ptr.offset(((*in_0).stride[0] << ss_y) as isize); - y += 1; - } - } - let uv_off: ptrdiff_t = (row * 32) as isize * (*out).stride[1] >> ss_y; - if (*data).chroma_scaling_from_luma != 0 { - let mut pl = 0; - while pl < 2 { - ((*dsp).fguv_32x32xn - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - ((*out).data[(1 + pl) as usize] as *mut pixel) - .offset(uv_off as isize) - .cast(), - ((*in_0).data[(1 + pl) as usize] as *const pixel) - .offset(uv_off as isize) - .cast(), - (*in_0).stride[1], - data, - cpw as usize, - (*scaling.offset(0)).as_ptr(), - (*grain_lut.offset((1 + pl) as isize)).as_ptr().cast(), - bh_0, - row, - luma_src.cast(), - (*in_0).stride[0], - pl, - is_id, - 8, - ); - pl += 1; - } - } else { - let mut pl_0 = 0; - while pl_0 < 2 { - if (*data).num_uv_points[pl_0 as usize] != 0 { - ((*dsp).fguv_32x32xn - [((*in_0).p.layout as c_uint).wrapping_sub(1 as c_int as c_uint) as usize]) - .expect("non-null function pointer")( - ((*out).data[(1 + pl_0) as usize] as *mut pixel) - .offset(uv_off as isize) - .cast(), - ((*in_0).data[(1 + pl_0) as usize] as *const pixel) - .offset(uv_off as isize) - .cast(), - (*in_0).stride[1], - data, - cpw as usize, - (*scaling.offset((1 + pl_0) as isize)).as_ptr(), - (*grain_lut.offset((1 + pl_0) as isize)).as_ptr().cast(), - bh_0, - row, - luma_src.cast(), - (*in_0).stride[0], - pl_0, - is_id, - 8, - ); - } - pl_0 += 1; - } - }; -} - -pub(crate) unsafe fn rav1d_apply_grain_8bpc( - dsp: *const Rav1dFilmGrainDSPContext, - out: *mut Rav1dPicture, - in_0: *const Rav1dPicture, -) { - let mut grain_lut = Align16([[[0; 82]; 74]; 3]); - cfg_if! { - if #[cfg(target_arch = "x86_64")] { - use crate::src::align::Align64; - - let mut scaling = Align64([[0; 256]; 3]); - } else { - use crate::src::align::Align1; - - let mut scaling = Align1([[0; 256]; 3]); - } - } - let rows = (*out).p.h + 31 >> 5; - rav1d_prep_grain_8bpc( - dsp, - out, - in_0, - scaling.0.as_mut_ptr(), - grain_lut.0.as_mut_ptr(), - ); - let mut row = 0; - while row < rows { - rav1d_apply_grain_row_8bpc( - dsp, - out, - in_0, - scaling.0.as_mut_ptr() as *const [u8; 256], - grain_lut.0.as_mut_ptr() as *const [[entry; 82]; 74], - row, - ); - row += 1; - } -} diff --git a/src/internal.rs b/src/internal.rs index d1bc3ff74..6338a8442 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -116,7 +116,7 @@ pub(crate) struct Rav1dContext_frame_thread { #[derive(Clone, Copy)] #[repr(C)] pub struct GrainLutScalingBD { - pub grain_lut: Align16<[[[BD::GrainLut; 82]; 73 + 1]; 3]>, + pub grain_lut: Align16<[[[BD::Entry; 82]; 73 + 1]; 3]>, // TODO(kkysen) can use `BD::SCALING_LEN`` directly with `#![feature(generic_const_exprs)]` when stabilized pub scaling: Align64<[BD::Scaling; 3]>, } diff --git a/src/lib.rs b/src/lib.rs index 00748d267..9ea4f40e7 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ use crate::include::common::bitdepth::BitDepth; use crate::include::common::bitdepth::BitDepth16; +use crate::include::common::bitdepth::BitDepth8; use crate::include::common::bitdepth::DynCoef; use crate::include::common::validate::validate_input; use crate::include::dav1d::common::Dav1dDataProps; @@ -49,6 +50,7 @@ use crate::src::error::Rav1dError::EINVAL; use crate::src::error::Rav1dError::ENOENT; use crate::src::error::Rav1dError::ENOMEM; use crate::src::error::Rav1dResult; +use crate::src::fg_apply; use crate::src::internal::CodedBlockInfo; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameContext; @@ -117,12 +119,6 @@ use std::process::abort; use std::ptr::NonNull; use std::sync::Once; -#[cfg(feature = "bitdepth_8")] -use crate::src::fg_apply_tmpl_8::rav1d_apply_grain_8bpc; - -#[cfg(feature = "bitdepth_16")] -use crate::src::fg_apply_tmpl_16::rav1d_apply_grain_16bpc; - #[cfg(target_os = "linux")] use libc::dlsym; @@ -878,11 +874,15 @@ pub(crate) unsafe fn rav1d_apply_grain( match out.p.bpc { #[cfg(feature = "bitdepth_8")] 8 => { - rav1d_apply_grain_8bpc(&mut (*(c.dsp).as_mut_ptr().offset(0)).fg, out, in_0); + fg_apply::rav1d_apply_grain::( + &mut (*(c.dsp).as_mut_ptr().offset(0)).fg, + out, + in_0, + ); } #[cfg(feature = "bitdepth_16")] 10 | 12 => { - rav1d_apply_grain_16bpc( + fg_apply::rav1d_apply_grain::( &mut (*(c.dsp).as_mut_ptr().offset(((out.p.bpc >> 1) - 4) as isize)).fg, out, in_0, diff --git a/src/thread_task.rs b/src/thread_task.rs index 209c4b553..31335ec89 100644 --- a/src/thread_task.rs +++ b/src/thread_task.rs @@ -16,6 +16,8 @@ use crate::src::error::Rav1dError::EGeneric; use crate::src::error::Rav1dError::EINVAL; use crate::src::error::Rav1dError::ENOMEM; use crate::src::error::Rav1dResult; +use crate::src::fg_apply::rav1d_apply_grain_row; +use crate::src::fg_apply::rav1d_prep_grain; use crate::src::internal::Rav1dContext; use crate::src::internal::Rav1dFrameContext; use crate::src::internal::Rav1dTask; @@ -51,17 +53,6 @@ use std::ffi::c_uint; use std::ffi::c_void; use std::process::abort; -#[cfg(feature = "bitdepth_8")] -use crate::{ - src::fg_apply_tmpl_8::rav1d_apply_grain_row_8bpc, src::fg_apply_tmpl_8::rav1d_prep_grain_8bpc, -}; - -#[cfg(feature = "bitdepth_16")] -use crate::{ - src::fg_apply_tmpl_16::rav1d_apply_grain_row_16bpc, - src::fg_apply_tmpl_16::rav1d_prep_grain_16bpc, -}; - #[cfg(target_os = "linux")] use libc::prctl; @@ -740,7 +731,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 8 => { let grain_lut_scaling = BitDepth8::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_prep_grain_8bpc( + rav1d_prep_grain::( &(*((*c).dsp).as_ptr().offset(0)).fg, out, in_0, @@ -752,7 +743,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 10 | 12 => { let grain_lut_scaling = BitDepth16::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_prep_grain_16bpc( + rav1d_prep_grain::( &(*((*c).dsp).as_ptr().offset(off as isize)).fg, out, in_0, @@ -795,7 +786,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 8 => { let grain_lut_scaling = BitDepth8::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_apply_grain_row_8bpc( + rav1d_apply_grain_row::( &(*((*c).dsp).as_ptr().offset(0)).fg, out, in_0, @@ -808,7 +799,7 @@ unsafe fn delayed_fg_task(c: *const Rav1dContext, ttd: *mut TaskThreadData) { 10 | 12 => { let grain_lut_scaling = BitDepth16::select_mut(&mut (*ttd).delayed_fg.grain_lut_scaling); - rav1d_apply_grain_row_16bpc( + rav1d_apply_grain_row::( &(*((*c).dsp).as_ptr().offset(off as isize)).fg, out, in_0,