diff --git a/src/decode.rs b/src/decode.rs index 1a11c8ff8..fb8e62597 100644 --- a/src/decode.rs +++ b/src/decode.rs @@ -78,10 +78,9 @@ use crate::src::internal::Rav1dTaskContext; use crate::src::internal::Rav1dTaskContext_scratch_pal; use crate::src::internal::Rav1dTileState; use crate::src::internal::ScalableMotionParams; -use crate::src::intra_edge::EdgeBranch; use crate::src::intra_edge::EdgeFlags; -use crate::src::intra_edge::EdgeNode; -use crate::src::intra_edge::EdgeTip; +use crate::src::intra_edge::EdgeIndex; +use crate::src::intra_edge::IntraEdges; use crate::src::intra_edge::EDGE_I444_TOP_HAS_RIGHT; use crate::src::ipred::rav1d_intra_pred_dsp_init; use crate::src::levels::mv; @@ -3492,16 +3491,25 @@ unsafe fn decode_sb( t: &mut Rav1dTaskContext, f: &mut Rav1dFrameData, bl: BlockLevel, - node: *const EdgeNode, + edge_index: EdgeIndex, ) -> Result<(), ()> { let ts = &mut *t.ts; let hsz = 16 >> bl; let have_h_split = f.bw > t.bx + hsz; let have_v_split = f.bh > t.by + hsz; + let sb128 = f.seq_hdr().sb128 != 0; + let intra_edge = &IntraEdges::DEFAULT; + if !have_h_split && !have_v_split { assert!(bl < BL_8X8); - return decode_sb(c, t, f, bl + 1, (*(node as *const EdgeBranch)).split[0]); + return decode_sb( + c, + t, + f, + bl + 1, + intra_edge.branch(sb128, edge_index).split[0], + ); } let frame_hdr = &***f.frame_hdr.as_ref().unwrap(); @@ -3554,18 +3562,18 @@ unsafe fn decode_sb( match bp { PARTITION_NONE => { - let node = &*node; + let node = intra_edge.node(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, node.o)?; } PARTITION_H => { - let node = &*node; + let node = intra_edge.node(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, node.h[0])?; t.by += hsz; decode_b(c, t, f, bl, b[0], bp, node.h[1])?; t.by -= hsz; } PARTITION_V => { - let node = &*node; + let node = intra_edge.node(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, node.v[0])?; t.bx += hsz; decode_b(c, t, f, bl, b[0], bp, node.v[1])?; @@ -3573,7 +3581,7 @@ unsafe fn decode_sb( } PARTITION_SPLIT => { if bl == BL_8X8 { - let tip = &*(node as *const EdgeTip); + let tip = intra_edge.tip(sb128, edge_index); assert!(hsz == 1); decode_b(c, t, f, bl, BS_4x4, bp, tip.split[0])?; let tl_filter = t.tl_4x4_filter; @@ -3596,7 +3604,7 @@ unsafe fn decode_sb( (((ts.frame_thread[p].cf as uintptr_t) + 63) & !63) as *mut DynCoef; } } else { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_sb(c, t, f, bl + 1, branch.split[0])?; t.bx += hsz; decode_sb(c, t, f, bl + 1, branch.split[1])?; @@ -3610,7 +3618,7 @@ unsafe fn decode_sb( } } PARTITION_T_TOP_SPLIT => { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, branch.tts[0])?; t.bx += hsz; decode_b(c, t, f, bl, b[0], bp, branch.tts[1])?; @@ -3620,7 +3628,7 @@ unsafe fn decode_sb( t.by -= hsz; } PARTITION_T_BOTTOM_SPLIT => { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, branch.tbs[0])?; t.by += hsz; decode_b(c, t, f, bl, b[1], bp, branch.tbs[1])?; @@ -3630,7 +3638,7 @@ unsafe fn decode_sb( t.by -= hsz; } PARTITION_T_LEFT_SPLIT => { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, branch.tls[0])?; t.by += hsz; decode_b(c, t, f, bl, b[0], bp, branch.tls[1])?; @@ -3640,7 +3648,7 @@ unsafe fn decode_sb( t.bx -= hsz; } PARTITION_T_RIGHT_SPLIT => { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, branch.trs[0])?; t.bx += hsz; decode_b(c, t, f, bl, b[1], bp, branch.trs[1])?; @@ -3650,7 +3658,7 @@ unsafe fn decode_sb( t.bx -= hsz; } PARTITION_H4 => { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, branch.h4[0])?; t.by += hsz >> 1; decode_b(c, t, f, bl, b[0], bp, branch.h4[1])?; @@ -3663,7 +3671,7 @@ unsafe fn decode_sb( t.by -= hsz * 3 >> 1; } PARTITION_V4 => { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); decode_b(c, t, f, bl, b[0], bp, branch.v4[0])?; t.bx += hsz >> 1; decode_b(c, t, f, bl, b[0], bp, branch.v4[1])?; @@ -3704,13 +3712,14 @@ unsafe fn decode_sb( assert!(bl < BL_8X8); if is_split { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); bp = PARTITION_SPLIT; decode_sb(c, t, f, bl + 1, branch.split[0])?; t.bx += hsz; decode_sb(c, t, f, bl + 1, branch.split[1])?; t.bx -= hsz; } else { + let node = intra_edge.node(sb128, edge_index); bp = PARTITION_H; decode_b( c, @@ -3719,7 +3728,7 @@ unsafe fn decode_sb( bl, dav1d_block_sizes[bl as usize][bp as usize][0], bp, - (*node).h[0], + node.h[0], )?; } } else { @@ -3753,13 +3762,14 @@ unsafe fn decode_sb( assert!(bl < BL_8X8); if is_split { - let branch = &*(node as *const EdgeBranch); + let branch = intra_edge.branch(sb128, edge_index); bp = PARTITION_SPLIT; decode_sb(c, t, f, bl + 1, branch.split[0])?; t.by += hsz; decode_sb(c, t, f, bl + 1, branch.split[2])?; t.by -= hsz; } else { + let node = intra_edge.node(sb128, edge_index); bp = PARTITION_V; decode_b( c, @@ -3768,7 +3778,7 @@ unsafe fn decode_sb( bl, dav1d_block_sizes[bl as usize][bp as usize][0], bp, - (*node).v[0], + node.v[0], )?; } } @@ -4101,7 +4111,7 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( if c.flush.load(Ordering::Acquire) != 0 { return Err(()); } - decode_sb(c, t, f, root_bl, c.intra_edge.root(root_bl))?; + decode_sb(c, t, f, root_bl, EdgeIndex::root())?; if t.bx & 16 != 0 || f.seq_hdr().sb128 != 0 { t.a = (t.a).offset(1); } @@ -4208,7 +4218,7 @@ pub(crate) unsafe fn rav1d_decode_tile_sbrow( read_restoration_info(t, f, lr, p, frame_type); } } - decode_sb(c, t, f, root_bl, c.intra_edge.root(root_bl))?; + decode_sb(c, t, f, root_bl, EdgeIndex::root())?; if t.bx & 16 != 0 || f.seq_hdr().sb128 != 0 { t.a = (t.a).offset(1); t.lf_mask = (t.lf_mask).offset(1); diff --git a/src/internal.rs b/src/internal.rs index c3e5ec475..710bd5a60 100644 --- a/src/internal.rs +++ b/src/internal.rs @@ -33,18 +33,12 @@ use crate::src::error::Rav1dResult; use crate::src::filmgrain::Rav1dFilmGrainDSPContext; use crate::src::filmgrain::GRAIN_HEIGHT; use crate::src::filmgrain::GRAIN_WIDTH; -use crate::src::intra_edge::EdgeBranch; use crate::src::intra_edge::EdgeFlags; -use crate::src::intra_edge::EdgeNode; -use crate::src::intra_edge::EdgeTip; use crate::src::ipred::Rav1dIntraPredDSPContext; use crate::src::itx::Rav1dInvTxfmDSPContext; use crate::src::levels::Av1Block; -use crate::src::levels::BlockLevel; use crate::src::levels::BlockSize; use crate::src::levels::Filter2d; -use crate::src::levels::BL_128X128; -use crate::src::levels::BL_64X64; use crate::src::lf_mask::Av1Filter; use crate::src::lf_mask::Av1FilterLUT; use crate::src::lf_mask::Av1Restoration; @@ -205,24 +199,6 @@ pub(crate) struct Rav1dContext_refs { pub refpoc: [c_uint; 7], } -#[repr(C)] -pub struct Rav1dContext_intra_edge { - pub branch_sb128: [EdgeBranch; 85], - pub branch_sb64: [EdgeBranch; 21], - pub tip_sb128: [EdgeTip; 256], - pub tip_sb64: [EdgeTip; 64], -} - -impl Rav1dContext_intra_edge { - pub fn root(&self, bl: BlockLevel) -> &EdgeNode { - match bl { - BL_128X128 => &self.branch_sb128[0].node, - BL_64X64 => &self.branch_sb64[0].node, - _ => unreachable!(), - } - } -} - pub(crate) enum Rav1dContextTaskType { /// Worker thread in a multi-threaded context. Worker(JoinHandle<()>), @@ -289,9 +265,6 @@ pub struct Rav1dContext { pub(crate) dsp: [Rav1dDSPContext; 3], /* 8, 10, 12 bits/component */ pub(crate) refmvs_dsp: Rav1dRefmvsDSPContext, - // tree to keep track of which edges are available - pub(crate) intra_edge: Rav1dContext_intra_edge, - pub(crate) allocator: Rav1dPicAllocator, pub(crate) apply_grain: bool, pub(crate) operating_point: c_int, diff --git a/src/intra_edge.rs b/src/intra_edge.rs index 06cd55b9d..3b4131c98 100644 --- a/src/intra_edge.rs +++ b/src/intra_edge.rs @@ -1,12 +1,9 @@ +use crate::src::enum_map::DefaultValue; use crate::src::levels::BlockLevel; use crate::src::levels::BL_128X128; use crate::src::levels::BL_16X16; use crate::src::levels::BL_32X32; use crate::src::levels::BL_64X64; -use crate::src::levels::BL_8X8; -use std::iter; -use std::ptr; -use std::slice; pub type EdgeFlags = u8; pub const EDGE_I420_LEFT_HAS_BOTTOM: EdgeFlags = 32; @@ -16,6 +13,41 @@ pub const EDGE_I420_TOP_HAS_RIGHT: EdgeFlags = 4; pub const EDGE_I422_TOP_HAS_RIGHT: EdgeFlags = 2; pub const EDGE_I444_TOP_HAS_RIGHT: EdgeFlags = 1; +pub const EDGE_LEFT_HAS_BOTTOM: EdgeFlags = + EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM; +pub const EDGE_TOP_HAS_RIGHT: EdgeFlags = + EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT; + +const B: usize = 4; + +#[derive(Clone, Copy, PartialEq, Eq)] +pub enum EdgeKind { + Tip, + Branch, +} + +#[derive(Clone, Copy)] +pub struct EdgeIndex { + index: u8, + kind: EdgeKind, +} + +impl EdgeIndex { + pub const fn root() -> Self { + Self { + index: 0, + kind: EdgeKind::Branch, + } + } + + #[must_use] + pub const fn pop_front(mut self) -> (Self, Self) { + let front = self; + self.index = self.index.wrapping_add(1); + (front, self) + } +} + #[repr(C)] pub struct EdgeNode { pub o: EdgeFlags, @@ -26,7 +58,7 @@ pub struct EdgeNode { #[repr(C)] pub struct EdgeTip { pub node: EdgeNode, - pub split: [EdgeFlags; 4], + pub split: [EdgeFlags; B], } #[repr(C)] @@ -38,182 +70,334 @@ pub struct EdgeBranch { pub trs: [EdgeFlags; 3], pub h4: [EdgeFlags; 4], pub v4: [EdgeFlags; 4], - pub split: [*mut EdgeNode; 4], -} - -struct ModeSelMem { - pub nwc: [*mut EdgeBranch; 3], - pub nt: *mut EdgeTip, -} - -unsafe fn init_edges(node: *mut EdgeNode, bl: BlockLevel, edge_flags: EdgeFlags) { - (*node).o = edge_flags; - - if bl == BL_8X8 { - let nt = &mut *(node as *mut EdgeTip); - let node = &mut nt.node; - - node.h[0] = edge_flags - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - node.h[1] = edge_flags - & ((EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM) - | EDGE_I420_TOP_HAS_RIGHT); - - node.v[0] = edge_flags - | (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - node.v[1] = edge_flags - & ((EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT) - | EDGE_I420_LEFT_HAS_BOTTOM - | EDGE_I422_LEFT_HAS_BOTTOM); - - nt.split[0] = (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT) - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nt.split[1] = (edge_flags - & (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT)) - | EDGE_I422_LEFT_HAS_BOTTOM; - nt.split[2] = edge_flags | EDGE_I444_TOP_HAS_RIGHT; - nt.split[3] = edge_flags - & (EDGE_I420_TOP_HAS_RIGHT | EDGE_I420_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM); - } else { - let nwc = &mut *(node as *mut EdgeBranch); - let node = &mut nwc.node; - - node.h[0] = edge_flags - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - node.h[1] = edge_flags - & (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - - node.v[0] = edge_flags - | (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - node.v[1] = edge_flags - & (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - - nwc.h4[0] = edge_flags - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nwc.h4[1] = - EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM; - nwc.h4[2] = nwc.h4[1]; - nwc.h4[3] = edge_flags - & (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - if bl == BL_16X16 { - nwc.h4[1] |= edge_flags & EDGE_I420_TOP_HAS_RIGHT; - } + pub split: [EdgeIndex; B], +} - nwc.v4[0] = edge_flags - | (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - nwc.v4[1] = EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT; - nwc.v4[2] = nwc.v4[1]; - nwc.v4[3] = edge_flags - & (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - if bl == BL_16X16 { - nwc.v4[1] |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM); +impl EdgeTip { + const fn new(edge_flags: EdgeFlags) -> Self { + let o = edge_flags; + let h = [ + edge_flags | EDGE_LEFT_HAS_BOTTOM, + edge_flags & (EDGE_LEFT_HAS_BOTTOM | EDGE_I420_TOP_HAS_RIGHT), + ]; + let v = [ + edge_flags | EDGE_TOP_HAS_RIGHT, + edge_flags + & (EDGE_TOP_HAS_RIGHT | EDGE_I420_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM), + ]; + let node = EdgeNode { o, h, v }; + + let split = [ + EDGE_TOP_HAS_RIGHT | EDGE_LEFT_HAS_BOTTOM, + (edge_flags & EDGE_TOP_HAS_RIGHT) | EDGE_I422_LEFT_HAS_BOTTOM, + edge_flags | EDGE_I444_TOP_HAS_RIGHT, + edge_flags + & (EDGE_I420_TOP_HAS_RIGHT | EDGE_I420_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM), + ]; + + Self { node, split } + } +} + +impl EdgeBranch { + const fn new(edge_flags: EdgeFlags, bl: BlockLevel) -> Self { + let o = edge_flags; + let h = [ + edge_flags | EDGE_LEFT_HAS_BOTTOM, + edge_flags & EDGE_LEFT_HAS_BOTTOM, + ]; + let v = [ + edge_flags | EDGE_TOP_HAS_RIGHT, + edge_flags & EDGE_TOP_HAS_RIGHT, + ]; + let node = EdgeNode { o, h, v }; + + let h4 = [ + edge_flags | EDGE_LEFT_HAS_BOTTOM, + EDGE_LEFT_HAS_BOTTOM + | (if bl == BL_16X16 { + edge_flags & EDGE_I420_TOP_HAS_RIGHT + } else { + 0 as EdgeFlags + }), + EDGE_LEFT_HAS_BOTTOM, + edge_flags & EDGE_LEFT_HAS_BOTTOM, + ]; + + let v4 = [ + edge_flags | EDGE_TOP_HAS_RIGHT, + EDGE_TOP_HAS_RIGHT + | (if bl == BL_16X16 { + edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM) + } else { + 0 as EdgeFlags + }), + EDGE_TOP_HAS_RIGHT, + edge_flags & EDGE_TOP_HAS_RIGHT, + ]; + + let tls = [ + EDGE_TOP_HAS_RIGHT | EDGE_LEFT_HAS_BOTTOM, + edge_flags & EDGE_LEFT_HAS_BOTTOM, + edge_flags & EDGE_TOP_HAS_RIGHT, + ]; + let trs = [ + edge_flags | EDGE_TOP_HAS_RIGHT, + edge_flags | EDGE_LEFT_HAS_BOTTOM, + 0 as EdgeFlags, + ]; + let tts = [ + EDGE_TOP_HAS_RIGHT | EDGE_LEFT_HAS_BOTTOM, + edge_flags & EDGE_TOP_HAS_RIGHT, + edge_flags & EDGE_LEFT_HAS_BOTTOM, + ]; + let tbs = [ + edge_flags | EDGE_LEFT_HAS_BOTTOM, + edge_flags | EDGE_TOP_HAS_RIGHT, + 0 as EdgeFlags, + ]; + + let split = [EdgeIndex::root(); 4]; + + Self { + node, + h4, + v4, + tls, + trs, + tts, + tbs, + split, } + } +} - nwc.tls[0] = (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT) - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nwc.tls[1] = edge_flags - & (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nwc.tls[2] = edge_flags - & (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - - nwc.trs[0] = edge_flags - | (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - nwc.trs[1] = edge_flags - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nwc.trs[2] = 0 as EdgeFlags; - - nwc.tts[0] = (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT) - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nwc.tts[1] = edge_flags - & (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - nwc.tts[2] = edge_flags - & (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - - nwc.tbs[0] = edge_flags - | (EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM); - nwc.tbs[1] = edge_flags - | (EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT); - nwc.tbs[2] = 0 as EdgeFlags; - }; -} - -unsafe fn init_mode_node( - nwc: &mut EdgeBranch, - bl: BlockLevel, - mem: &mut ModeSelMem, - top_has_right: bool, - left_has_bottom: bool, -) { - init_edges( - &mut nwc.node, - bl, - (if top_has_right { - EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT - } else { - 0 as EdgeFlags - }) | (if left_has_bottom { - EDGE_I444_LEFT_HAS_BOTTOM | EDGE_I422_LEFT_HAS_BOTTOM | EDGE_I420_LEFT_HAS_BOTTOM - } else { - 0 as EdgeFlags - }), - ); - if bl == BL_16X16 { - let nt = slice::from_raw_parts_mut(mem.nt, nwc.split.len()); - mem.nt = mem.nt.offset(nt.len() as isize); - for (n, (split, nt)) in iter::zip(&mut nwc.split, nt).enumerate() { - *split = &mut nt.node; - init_edges( - &mut nt.node, - bl + 1, - ((if n == 3 || (n == 1 && !top_has_right) { +impl DefaultValue for EdgeTip { + const DEFAULT: Self = Self::new(0 as EdgeFlags); +} + +impl DefaultValue for EdgeBranch { + const DEFAULT: Self = Self::new(0 as EdgeFlags, 0 as BlockLevel); +} + +struct EdgeIndices { + branch: [EdgeIndex; 3], + tip: EdgeIndex, +} + +#[repr(C)] +struct IntraEdge { + branch: [EdgeBranch; N_BRANCH], + tip: [EdgeTip; N_TIP], +} + +const fn level_index(mut level: u8) -> u8 { + let mut level_size = 1; + let mut index = 0; + while level > 0 { + index += level_size; + level_size *= B; + level -= 1; + } + index as u8 +} + +impl + IntraEdge +{ + #[must_use] + const fn init_mode_node( + mut self, + branch_index: EdgeIndex, + bl: BlockLevel, + mut indices: EdgeIndices, + top_has_right: bool, + left_has_bottom: bool, + ) -> (Self, EdgeIndices) { + let mut branch = EdgeBranch::new( + (if top_has_right { + EDGE_TOP_HAS_RIGHT + } else { + 0 as EdgeFlags + }) | (if left_has_bottom { + EDGE_LEFT_HAS_BOTTOM + } else { + 0 as EdgeFlags + }), + bl, + ); + if bl == BL_16X16 { + let mut n = 0; + while n < B as u8 { + let (tip, next) = indices.tip.pop_front(); + indices.tip = next; + branch.split[n as usize] = tip; + let edge_flags = (if n == 3 || (n == 1 && !top_has_right) { 0 as EdgeFlags } else { - EDGE_I444_TOP_HAS_RIGHT | EDGE_I422_TOP_HAS_RIGHT | EDGE_I420_TOP_HAS_RIGHT + EDGE_TOP_HAS_RIGHT }) | (if !(n == 0 || (n == 2 && left_has_bottom)) { 0 as EdgeFlags } else { - EDGE_I444_LEFT_HAS_BOTTOM - | EDGE_I422_LEFT_HAS_BOTTOM - | EDGE_I420_LEFT_HAS_BOTTOM - })) as EdgeFlags, - ); + EDGE_LEFT_HAS_BOTTOM + }); + self.tip[tip.index as usize] = EdgeTip::new(edge_flags); + n += 1; + } + } else { + let mut n = 0; + while n < B as u8 { + let (child_branch, next) = indices.branch[bl as usize].pop_front(); + indices.branch[bl as usize] = next; + branch.split[n as usize] = child_branch; + (self, indices) = self.init_mode_node( + child_branch, + bl + 1, + indices, + !(n == 3 || (n == 1 && !top_has_right)), + n == 0 || (n == 2 && left_has_bottom), + ); + n += 1; + } + }; + self.branch[branch_index.index as usize] = branch; + (self, indices) + } + + const fn init(mut self) -> Self { + let mut indices = EdgeIndices { + branch: [EdgeIndex { + index: 0, + kind: EdgeKind::Branch, + }; 3], + tip: EdgeIndex { + index: 0, + kind: EdgeKind::Tip, + }, + }; + + let sb128 = SB128 as u8; + + let mut bl = BL_128X128; + while bl <= BL_32X32 { + indices.branch[bl as usize].index = level_index(bl + sb128); + bl += 1; + } + + let bl = if SB128 { BL_128X128 } else { BL_64X64 }; + (self, indices) = self.init_mode_node(EdgeIndex::root(), bl, indices, true, false); + + let mut bl = BL_128X128; + while bl <= BL_32X32 { + let index = indices.branch[bl as usize].index; + if index != 0 { + assert!(index == level_index(1 + bl + sb128)); + } + bl += 1; } - } else { - let nwc_children = slice::from_raw_parts_mut(mem.nwc[bl as usize], nwc.split.len()); - mem.nwc[bl as usize] = mem.nwc[bl as usize].offset(nwc_children.len() as isize); - for (n, (split, nwc_child)) in iter::zip(&mut nwc.split, nwc_children).enumerate() { - *split = &mut nwc_child.node; - init_mode_node( - nwc_child, - bl + 1, - mem, - !(n == 3 || (n == 1 && !top_has_right)), - n == 0 || (n == 2 && left_has_bottom), - ); + assert!(indices.tip.index == self.tip.len() as u8); + + self + } + + /// Check that all indices are in bound so that bounds checks are not needed at runtime. + const fn check_indices(self) -> Self { + let mut i = 0; + while i < self.branch.len() { + let mut j = 0; + while j < B { + let edge = self.branch[i].split[j]; + let index = edge.index as usize; + match edge.kind { + EdgeKind::Branch => assert!(index < self.branch.len()), + EdgeKind::Tip => assert!(index < self.tip.len()), + } + j += 1; + } + i += 1; + } + + self + } + + const fn new() -> Self { + Self { + branch: [EdgeBranch::DEFAULT; N_BRANCH], + tip: [EdgeTip::DEFAULT; N_TIP], } - }; -} - -pub unsafe fn rav1d_init_mode_tree(root: *mut EdgeBranch, nt: &mut [EdgeTip], allow_sb128: bool) { - let mut mem = ModeSelMem { - nwc: [ptr::null_mut(); 3], - nt: nt.as_mut_ptr(), - }; - if allow_sb128 { - mem.nwc[BL_128X128 as usize] = root.offset(1); - mem.nwc[BL_64X64 as usize] = root.offset(1 + 4); - mem.nwc[BL_32X32 as usize] = root.offset(1 + 4 + 16); - init_mode_node(&mut *root, BL_128X128, &mut mem, true, false); - assert_eq!(mem.nwc[BL_128X128 as usize], root.offset(1 + 4)); - assert_eq!(mem.nwc[BL_64X64 as usize], root.offset(1 + 4 + 16)); - assert_eq!(mem.nwc[BL_32X32 as usize], root.offset(1 + 4 + 16 + 64)); - } else { - mem.nwc[BL_128X128 as usize] = ptr::null_mut(); - mem.nwc[BL_64X64 as usize] = root.offset(1); - mem.nwc[BL_32X32 as usize] = root.offset(1 + 4); - init_mode_node(&mut *root, BL_64X64, &mut mem, true, false); - assert_eq!(mem.nwc[BL_64X64 as usize], root.offset(1 + 4)); - assert_eq!(mem.nwc[BL_32X32 as usize], root.offset(1 + 4 + 16)); - }; - assert_eq!(mem.nt, nt.as_mut_ptr_range().end); + .init() + .check_indices() + } + + fn edge(edges: &[E; N], edge: EdgeIndex, kind: EdgeKind) -> &E { + assert!(edge.kind == kind); + if cfg!(debug_assertions) { + &edges[edge.index as usize] + } else { + // Safety: Already checked in `Self::check_indices`, and `EdgeIndex`'s fields are private. + unsafe { edges.get_unchecked(edge.index as usize) } + } + } + + pub fn branch(&self, branch: EdgeIndex) -> &EdgeBranch { + Self::edge(&self.branch, branch, EdgeKind::Branch) + } + + pub fn tip(&self, tip: EdgeIndex) -> &EdgeTip { + Self::edge(&self.tip, tip, EdgeKind::Tip) + } + + pub fn node(&self, node: EdgeIndex) -> &EdgeNode { + match node.kind { + EdgeKind::Branch => &self.branch(node).node, + EdgeKind::Tip => &self.tip(node).node, + } + } +} + +/// A tree to keep track of which edges are available. +#[repr(C)] +pub struct IntraEdges { + sb128: IntraEdge, + sb64: IntraEdge, +} + +impl IntraEdges { + #[inline(always)] + const fn new() -> Self { + Self { + sb128: IntraEdge::new(), + sb64: IntraEdge::new(), + } + } + + pub fn branch(&self, sb128: bool, branch: EdgeIndex) -> &EdgeBranch { + assert!(branch.kind == EdgeKind::Branch); // Optimizes better before the `if`. + if sb128 { + self.sb128.branch(branch) + } else { + self.sb64.branch(branch) + } + } + + pub fn tip(&self, sb128: bool, tip: EdgeIndex) -> &EdgeTip { + assert!(tip.kind == EdgeKind::Tip); // Optimizes better before the `if`. + if sb128 { + self.sb128.tip(tip) + } else { + self.sb64.tip(tip) + } + } + + pub fn node(&self, sb128: bool, node: EdgeIndex) -> &EdgeNode { + if sb128 { + self.sb128.node(node) + } else { + self.sb64.node(node) + } + } +} + +impl DefaultValue for IntraEdges { + const DEFAULT: Self = Self::new(); } diff --git a/src/lib.rs b/src/lib.rs index b0efb8d26..fafd59736 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -39,7 +39,6 @@ use crate::src::internal::Rav1dTask; use crate::src::internal::Rav1dTaskContext; use crate::src::internal::Rav1dTaskContext_task_thread; use crate::src::internal::TaskThreadData; -use crate::src::intra_edge::rav1d_init_mode_tree; use crate::src::log::Rav1dLog as _; use crate::src::mem::freep; use crate::src::mem::rav1d_alloc_aligned; @@ -362,16 +361,6 @@ pub(crate) unsafe fn rav1d_open(c_out: &mut *mut Rav1dContext, s: &Rav1dSettings }) .collect(); rav1d_refmvs_dsp_init(&mut (*c).refmvs_dsp); - rav1d_init_mode_tree( - &mut (*c).intra_edge.branch_sb128[0], - &mut (*c).intra_edge.tip_sb128, - true, - ); - rav1d_init_mode_tree( - &mut (*c).intra_edge.branch_sb64[0], - &mut (*c).intra_edge.tip_sb64, - false, - ); pthread_attr_destroy(&mut thread_attr); Ok(()) }