From ebf331a63c0e37ee829d4c1b37cf01c801cc7453 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 18 Sep 2023 23:30:08 -0700 Subject: [PATCH 1/3] `struct WedgeMasks`: Encapsulate `static wedge_masks_*`s and `fn fill2d_16x2` in a `struct` with separated building and slicing/referencing `fn`s. --- src/wedge.rs | 299 +++++++++++++++++++++------------------------------ 1 file changed, 122 insertions(+), 177 deletions(-) diff --git a/src/wedge.rs b/src/wedge.rs index cfecf0fc7..1637bab7f 100644 --- a/src/wedge.rs +++ b/src/wedge.rs @@ -104,43 +104,6 @@ static wedge_codebook_16_heqw: [wedge_code_type; 16] = [ wedge_code_type::new(6, 4, WEDGE_OBLIQUE117), ]; -static mut wedge_masks_444_32x32: Align64<[[[u8; 32 * 32]; 16]; 2]> = - Align64([[[0; 32 * 32]; 16]; 2]); -static mut wedge_masks_444_32x16: Align64<[[[u8; 32 * 16]; 16]; 2]> = - Align64([[[0; 32 * 16]; 16]; 2]); -static mut wedge_masks_444_32x8: Align64<[[[u8; 32 * 8]; 16]; 2]> = Align64([[[0; 32 * 8]; 16]; 2]); -static mut wedge_masks_444_16x32: Align64<[[[u8; 16 * 32]; 16]; 2]> = - Align64([[[0; 16 * 32]; 16]; 2]); -static mut wedge_masks_444_16x16: Align64<[[[u8; 16 * 16]; 16]; 2]> = - Align64([[[0; 16 * 16]; 16]; 2]); -static mut wedge_masks_444_16x8: Align64<[[[u8; 16 * 8]; 16]; 2]> = Align64([[[0; 16 * 8]; 16]; 2]); -static mut wedge_masks_444_8x32: Align64<[[[u8; 8 * 32]; 16]; 2]> = Align64([[[0; 8 * 32]; 16]; 2]); -static mut wedge_masks_444_8x16: Align64<[[[u8; 8 * 16]; 16]; 2]> = Align64([[[0; 8 * 16]; 16]; 2]); -static mut wedge_masks_444_8x8: Align64<[[[u8; 8 * 8]; 16]; 2]> = Align64([[[0; 8 * 8]; 16]; 2]); - -static mut wedge_masks_422_16x32: Align64<[[[u8; 16 * 32]; 16]; 2]> = - Align64([[[0; 16 * 32]; 16]; 2]); -static mut wedge_masks_422_16x16: Align64<[[[u8; 16 * 16]; 16]; 2]> = - Align64([[[0; 16 * 16]; 16]; 2]); -static mut wedge_masks_422_16x8: Align64<[[[u8; 16 * 8]; 16]; 2]> = Align64([[[0; 16 * 8]; 16]; 2]); -static mut wedge_masks_422_8x32: Align64<[[[u8; 8 * 32]; 16]; 2]> = Align64([[[0; 8 * 32]; 16]; 2]); -static mut wedge_masks_422_8x16: Align64<[[[u8; 8 * 16]; 16]; 2]> = Align64([[[0; 8 * 16]; 16]; 2]); -static mut wedge_masks_422_8x8: Align64<[[[u8; 8 * 8]; 16]; 2]> = Align64([[[0; 8 * 8]; 16]; 2]); -static mut wedge_masks_422_4x32: Align64<[[[u8; 4 * 32]; 16]; 2]> = Align64([[[0; 4 * 32]; 16]; 2]); -static mut wedge_masks_422_4x16: Align64<[[[u8; 4 * 16]; 16]; 2]> = Align64([[[0; 4 * 16]; 16]; 2]); -static mut wedge_masks_422_4x8: Align64<[[[u8; 4 * 8]; 16]; 2]> = Align64([[[0; 4 * 8]; 16]; 2]); - -static mut wedge_masks_420_16x16: Align64<[[[u8; 16 * 16]; 16]; 2]> = - Align64([[[0; 16 * 16]; 16]; 2]); -static mut wedge_masks_420_16x8: Align64<[[[u8; 16 * 8]; 16]; 2]> = Align64([[[0; 16 * 8]; 16]; 2]); -static mut wedge_masks_420_16x4: Align64<[[[u8; 16 * 4]; 16]; 2]> = Align64([[[0; 16 * 4]; 16]; 2]); -static mut wedge_masks_420_8x16: Align64<[[[u8; 8 * 16]; 16]; 2]> = Align64([[[0; 8 * 16]; 16]; 2]); -static mut wedge_masks_420_8x8: Align64<[[[u8; 8 * 8]; 16]; 2]> = Align64([[[0; 8 * 8]; 16]; 2]); -static mut wedge_masks_420_8x4: Align64<[[[u8; 8 * 4]; 16]; 2]> = Align64([[[0; 8 * 4]; 16]; 2]); -static mut wedge_masks_420_4x16: Align64<[[[u8; 4 * 16]; 16]; 2]> = Align64([[[0; 4 * 16]; 16]; 2]); -static mut wedge_masks_420_4x8: Align64<[[[u8; 4 * 8]; 16]; 2]> = Align64([[[0; 4 * 8]; 16]; 2]); -static mut wedge_masks_420_4x4: Align64<[[[u8; 4 * 4]; 16]; 2]> = Align64([[[0; 4 * 4]; 16]; 2]); - pub static mut dav1d_wedge_masks: [[[[&'static [u8]; 16]; 2]; 3]; N_BS_SIZES] = [[[[&[]; 16]; 2]; 3]; N_BS_SIZES]; @@ -258,61 +221,88 @@ const fn init_chroma( chroma } -#[cold] -fn fill2d_16x2( - w: usize, - h: usize, - master: &[[[u8; 64]; 64]; N_WEDGE_DIRECTIONS], - cb: &[wedge_code_type; 16], - masks_444: &'static mut [[[u8; LEN_444]; 16]; 2], - masks_422: &'static mut [[[u8; LEN_422]; 16]; 2], - masks_420: &'static mut [[[u8; LEN_420]; 16]; 2], +struct WedgeMasks { + masks_444: Align64<[[[u8; LEN_444]; 16]; 2]>, + masks_422: Align64<[[[u8; LEN_422]; 16]; 2]>, + masks_420: Align64<[[[u8; LEN_420]; 16]; 2]>, signs: u16, -) -> [[[&'static [u8]; 16]; 2]; 3] { - assert!(LEN_444 == (w * h) >> 0); - assert!(LEN_422 == (w * h) >> 1); - assert!(LEN_420 == (w * h) >> 2); - - const_for!(n in 0..16 => { - masks_444[0][n] = copy2d( - &master[cb[n].direction as usize], - w, - h, - 32 - (w * cb[n].x_offset as usize >> 3), - 32 - (h * cb[n].y_offset as usize >> 3), - ); - }); - const_for!(n in 0..16 => { - masks_444[1][n] = invert(&masks_444[0][n], w, h); - }); +} - const_for!(n in 0..16 => { - let sign = (signs >> n & 1) != 0; - let luma = &masks_444[sign as usize][n]; +impl + WedgeMasks +{ + const fn fill2d_16x2( + w: usize, + h: usize, + master: &[[[u8; 64]; 64]; N_WEDGE_DIRECTIONS], + cb: &[wedge_code_type; 16], + signs: u16, + ) -> Self { + assert!(LEN_444 == (w * h) >> 0); + assert!(LEN_422 == (w * h) >> 1); + assert!(LEN_420 == (w * h) >> 2); + + let mut masks_444 = [[[0; LEN_444]; 16]; 2]; + let mut masks_422 = [[[0; LEN_422]; 16]; 2]; + let mut masks_420 = [[[0; LEN_420]; 16]; 2]; + + const_for!(n in 0..16 => { + masks_444[0][n] = copy2d( + &master[cb[n].direction as usize], + w, + h, + 32 - (w * cb[n].x_offset as usize >> 3), + 32 - (h * cb[n].y_offset as usize >> 3), + ); + }); + const_for!(n in 0..16 => { + masks_444[1][n] = invert(&masks_444[0][n], w, h); + }); - masks_422[sign as usize][n] = init_chroma(luma, false, w, h, false); - masks_422[!sign as usize][n] = init_chroma(luma, true, w, h, false); - masks_420[sign as usize][n] = init_chroma(luma, false, w, h, true); - masks_420[!sign as usize][n] = init_chroma(luma, true, w, h, true); - }); + const_for!(n in 0..16 => { + let sign = (signs >> n & 1) != 0; + let luma = &masks_444[sign as usize][n]; - let mut masks = [[[&[] as &'static [u8]; 16]; 2]; 3]; + masks_422[sign as usize][n] = init_chroma(luma, false, w, h, false); + masks_422[!sign as usize][n] = init_chroma(luma, true, w, h, false); + masks_420[sign as usize][n] = init_chroma(luma, false, w, h, true); + masks_420[!sign as usize][n] = init_chroma(luma, true, w, h, true); + }); - // assign pointers in externally visible array - const_for!(n in 0..16 => { - let sign = (signs >> n & 1) != 0; + Self { + masks_444: Align64(masks_444), + masks_422: Align64(masks_422), + masks_420: Align64(masks_420), + signs, + } + } - masks[0][0][n] = &masks_444[sign as usize][n]; - // not using !sign is intentional here, since 444 does not require - // any rounding since no chroma subsampling is applied. - masks[0][1][n] = &masks_444[sign as usize][n]; - masks[1][0][n] = &masks_422[sign as usize][n]; - masks[1][1][n] = &masks_422[!sign as usize][n]; - masks[2][0][n] = &masks_420[sign as usize][n]; - masks[2][1][n] = &masks_420[!sign as usize][n]; - }); + const fn slice<'a>(&'a self) -> [[[&'a [u8]; 16]; 2]; 3] { + let Self { + masks_444: Align64(masks_444), + masks_422: Align64(masks_422), + masks_420: Align64(masks_420), + signs, + } = self; + + let mut masks = [[[&[] as &'static [u8]; 16]; 2]; 3]; + + // assign pointers in externally visible array + const_for!(n in 0..16 => { + let sign = (*signs >> n & 1) != 0; + + masks[0][0][n] = &masks_444[sign as usize][n]; + // not using !sign is intentional here, since 444 does not require + // any rounding since no chroma subsampling is applied. + masks[0][1][n] = &masks_444[sign as usize][n]; + masks[1][0][n] = &masks_422[sign as usize][n]; + masks[1][1][n] = &masks_422[!sign as usize][n]; + masks[2][0][n] = &masks_420[sign as usize][n]; + masks[2][1][n] = &masks_420[!sign as usize][n]; + }); - masks + masks + } } const fn build_master() -> [[[u8; 64]; 64]; N_WEDGE_DIRECTIONS] { @@ -366,98 +356,53 @@ const fn build_master() -> [[[u8; 64]; 64]; N_WEDGE_DIRECTIONS] { pub unsafe fn dav1d_init_wedge_masks() { // This function is guaranteed to be called only once - let master = build_master(); - - dav1d_wedge_masks[BS_32x32 as usize] = fill2d_16x2( - 32, - 32, - &master, - &wedge_codebook_16_heqw, - &mut wedge_masks_444_32x32.0, - &mut wedge_masks_422_16x32.0, - &mut wedge_masks_420_16x16.0, - 0x7bfb, - ); - dav1d_wedge_masks[BS_32x16 as usize] = fill2d_16x2( - 32, - 16, - &master, - &wedge_codebook_16_hltw, - &mut wedge_masks_444_32x16.0, - &mut wedge_masks_422_16x16.0, - &mut wedge_masks_420_16x8.0, - 0x7beb, - ); - dav1d_wedge_masks[BS_32x8 as usize] = fill2d_16x2( - 32, - 8, - &master, - &wedge_codebook_16_hltw, - &mut wedge_masks_444_32x8.0, - &mut wedge_masks_422_16x8.0, - &mut wedge_masks_420_16x4.0, - 0x6beb, - ); - dav1d_wedge_masks[BS_16x32 as usize] = fill2d_16x2( - 16, - 32, - &master, - &wedge_codebook_16_hgtw, - &mut wedge_masks_444_16x32.0, - &mut wedge_masks_422_8x32.0, - &mut wedge_masks_420_8x16.0, - 0x7beb, - ); - dav1d_wedge_masks[BS_16x16 as usize] = fill2d_16x2( - 16, - 16, - &master, - &wedge_codebook_16_heqw, - &mut wedge_masks_444_16x16.0, - &mut wedge_masks_422_8x16.0, - &mut wedge_masks_420_8x8.0, - 0x7bfb, - ); - dav1d_wedge_masks[BS_16x8 as usize] = fill2d_16x2( - 16, - 8, - &master, - &wedge_codebook_16_hltw, - &mut wedge_masks_444_16x8.0, - &mut wedge_masks_422_8x8.0, - &mut wedge_masks_420_8x4.0, - 0x7beb, - ); - dav1d_wedge_masks[BS_8x32 as usize] = fill2d_16x2( - 8, - 32, - &master, - &wedge_codebook_16_hgtw, - &mut wedge_masks_444_8x32.0, - &mut wedge_masks_422_4x32.0, - &mut wedge_masks_420_4x16.0, - 0x7aeb, - ); - dav1d_wedge_masks[BS_8x16 as usize] = fill2d_16x2( - 8, - 16, - &master, - &wedge_codebook_16_hgtw, - &mut wedge_masks_444_8x16.0, - &mut wedge_masks_422_4x16.0, - &mut wedge_masks_420_4x8.0, - 0x7beb, - ); - dav1d_wedge_masks[BS_8x8 as usize] = fill2d_16x2( - 8, - 8, - &master, - &wedge_codebook_16_heqw, - &mut wedge_masks_444_8x8.0, - &mut wedge_masks_422_4x8.0, - &mut wedge_masks_420_4x4.0, - 0x7bfb, - ); + static master: [[[u8; 64]; 64]; N_WEDGE_DIRECTIONS] = build_master(); + + { + static wedge_masks: WedgeMasks<{ 32 * 32 }, { (32 / 2) * 32 }, { (32 / 2) * (32 / 2) }> = + WedgeMasks::fill2d_16x2(32, 32, &master, &wedge_codebook_16_heqw, 0x7bfb); + dav1d_wedge_masks[BS_32x32 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 32 * 16 }, { (32 / 2) * 16 }, { (32 / 2) * (16 / 2) }> = + WedgeMasks::fill2d_16x2(32, 16, &master, &wedge_codebook_16_hltw, 0x7beb); + dav1d_wedge_masks[BS_32x16 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 32 * 8 }, { (32 / 2) * 8 }, { (32 / 2) * (8 / 2) }> = + WedgeMasks::fill2d_16x2(32, 8, &master, &wedge_codebook_16_hltw, 0x6beb); + dav1d_wedge_masks[BS_32x8 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 16 * 32 }, { (16 / 2) * 32 }, { (16 / 2) * (32 / 2) }> = + WedgeMasks::fill2d_16x2(16, 32, &master, &wedge_codebook_16_hgtw, 0x7beb); + dav1d_wedge_masks[BS_16x32 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 16 * 16 }, { (16 / 2) * 16 }, { (16 / 2) * (16 / 2) }> = + WedgeMasks::fill2d_16x2(16, 16, &master, &wedge_codebook_16_heqw, 0x7bfb); + dav1d_wedge_masks[BS_16x16 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 16 * 8 }, { (16 / 2) * 8 }, { (16 / 2) * (8 / 2) }> = + WedgeMasks::fill2d_16x2(16, 8, &master, &wedge_codebook_16_hltw, 0x7beb); + dav1d_wedge_masks[BS_16x8 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 8 * 32 }, { (8 / 2) * 32 }, { (8 / 2) * (32 / 2) }> = + WedgeMasks::fill2d_16x2(8, 32, &master, &wedge_codebook_16_hgtw, 0x7aeb); + dav1d_wedge_masks[BS_8x32 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 8 * 16 }, { (8 / 2) * 16 }, { (8 / 2) * (16 / 2) }> = + WedgeMasks::fill2d_16x2(8, 16, &master, &wedge_codebook_16_hgtw, 0x7beb); + dav1d_wedge_masks[BS_8x16 as usize] = wedge_masks.slice(); + }; + { + static wedge_masks: WedgeMasks<{ 8 * 8 }, { (8 / 2) * 8 }, { (8 / 2) * (8 / 2) }> = + WedgeMasks::fill2d_16x2(8, 8, &master, &wedge_codebook_16_heqw, 0x7bfb); + dav1d_wedge_masks[BS_8x8 as usize] = wedge_masks.slice(); + }; } static ii_dc_mask: Align64<[u8; 32 * 32]> = Align64([32; 32 * 32]); From 067b67aa66acfcf4a0314dfb49974cd473838812 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 18 Sep 2023 23:43:46 -0700 Subject: [PATCH 2/3] `fn WedgeMasks::slice`: Elide lifetimes. --- src/wedge.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/wedge.rs b/src/wedge.rs index 1637bab7f..34d96d38f 100644 --- a/src/wedge.rs +++ b/src/wedge.rs @@ -277,7 +277,7 @@ impl } } - const fn slice<'a>(&'a self) -> [[[&'a [u8]; 16]; 2]; 3] { + const fn slice(&self) -> [[[&[u8]; 16]; 2]; 3] { let Self { masks_444: Align64(masks_444), masks_422: Align64(masks_422), From ead1fea90c86a8fcee94ed2232af25de89e34fea Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Mon, 18 Sep 2023 23:31:21 -0700 Subject: [PATCH 3/3] `fn dav1d_init_wedge_masks`: Translate `fill!` macro, approximately. --- src/wedge.rs | 67 +++++++++++++++++----------------------------------- 1 file changed, 22 insertions(+), 45 deletions(-) diff --git a/src/wedge.rs b/src/wedge.rs index 34d96d38f..6e855e48b 100644 --- a/src/wedge.rs +++ b/src/wedge.rs @@ -358,51 +358,28 @@ pub unsafe fn dav1d_init_wedge_masks() { static master: [[[u8; 64]; 64]; N_WEDGE_DIRECTIONS] = build_master(); - { - static wedge_masks: WedgeMasks<{ 32 * 32 }, { (32 / 2) * 32 }, { (32 / 2) * (32 / 2) }> = - WedgeMasks::fill2d_16x2(32, 32, &master, &wedge_codebook_16_heqw, 0x7bfb); - dav1d_wedge_masks[BS_32x32 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 32 * 16 }, { (32 / 2) * 16 }, { (32 / 2) * (16 / 2) }> = - WedgeMasks::fill2d_16x2(32, 16, &master, &wedge_codebook_16_hltw, 0x7beb); - dav1d_wedge_masks[BS_32x16 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 32 * 8 }, { (32 / 2) * 8 }, { (32 / 2) * (8 / 2) }> = - WedgeMasks::fill2d_16x2(32, 8, &master, &wedge_codebook_16_hltw, 0x6beb); - dav1d_wedge_masks[BS_32x8 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 16 * 32 }, { (16 / 2) * 32 }, { (16 / 2) * (32 / 2) }> = - WedgeMasks::fill2d_16x2(16, 32, &master, &wedge_codebook_16_hgtw, 0x7beb); - dav1d_wedge_masks[BS_16x32 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 16 * 16 }, { (16 / 2) * 16 }, { (16 / 2) * (16 / 2) }> = - WedgeMasks::fill2d_16x2(16, 16, &master, &wedge_codebook_16_heqw, 0x7bfb); - dav1d_wedge_masks[BS_16x16 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 16 * 8 }, { (16 / 2) * 8 }, { (16 / 2) * (8 / 2) }> = - WedgeMasks::fill2d_16x2(16, 8, &master, &wedge_codebook_16_hltw, 0x7beb); - dav1d_wedge_masks[BS_16x8 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 8 * 32 }, { (8 / 2) * 32 }, { (8 / 2) * (32 / 2) }> = - WedgeMasks::fill2d_16x2(8, 32, &master, &wedge_codebook_16_hgtw, 0x7aeb); - dav1d_wedge_masks[BS_8x32 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 8 * 16 }, { (8 / 2) * 16 }, { (8 / 2) * (16 / 2) }> = - WedgeMasks::fill2d_16x2(8, 16, &master, &wedge_codebook_16_hgtw, 0x7beb); - dav1d_wedge_masks[BS_8x16 as usize] = wedge_masks.slice(); - }; - { - static wedge_masks: WedgeMasks<{ 8 * 8 }, { (8 / 2) * 8 }, { (8 / 2) * (8 / 2) }> = - WedgeMasks::fill2d_16x2(8, 8, &master, &wedge_codebook_16_heqw, 0x7bfb); - dav1d_wedge_masks[BS_8x8 as usize] = wedge_masks.slice(); - }; + macro_rules! fill { + ($w:literal x $h:literal, $cb:expr, $signs:expr) => {{ + static wedge_masks: WedgeMasks< + { $w * $h }, + { ($w / 2) * $h }, + { ($w / 2) * ($h / 2) }, + > = WedgeMasks::fill2d_16x2($w, $h, &master, $cb, $signs); + paste! { + dav1d_wedge_masks[[] as usize] = wedge_masks.slice(); + } + }}; + } + + fill!(32 x 32, &wedge_codebook_16_heqw, 0x7bfb); + fill!(32 x 16, &wedge_codebook_16_hltw, 0x7beb); + fill!(32 x 8, &wedge_codebook_16_hltw, 0x6beb); + fill!(16 x 32, &wedge_codebook_16_hgtw, 0x7beb); + fill!(16 x 16, &wedge_codebook_16_heqw, 0x7bfb); + fill!(16 x 8, &wedge_codebook_16_hltw, 0x7beb); + fill!( 8 x 32, &wedge_codebook_16_hgtw, 0x7aeb); + fill!( 8 x 16, &wedge_codebook_16_hgtw, 0x7beb); + fill!( 8 x 8, &wedge_codebook_16_heqw, 0x7bfb); } static ii_dc_mask: Align64<[u8; 32 * 32]> = Align64([32; 32 * 32]);