Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

mod intra_edge: Backport memory reduction from dav1d 1.2.1 #830

Merged
merged 4 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 24 additions & 28 deletions src/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2390,19 +2390,19 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
if (bl == BL_8X8) {
const EdgeTip *const tip = (const EdgeTip *) node;
assert(hsz == 1);
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, EDGE_ALL_TR_AND_BL))
return -1;
const enum Filter2d tl_filter = t->tl_4x4_filter;
t->bx++;
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[0]))
return -1;
t->bx--;
t->by++;
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[1]))
return -1;
t->bx++;
t->tl_4x4_filter = tl_filter;
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[3]))
if (decode_b(t, bl, BS_4x4, PARTITION_SPLIT, tip->split[2]))
return -1;
t->bx--;
t->by--;
Expand Down Expand Up @@ -2435,91 +2435,87 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
}
break;
case PARTITION_T_TOP_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[0]))
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, EDGE_ALL_TR_AND_BL))
return -1;
t->bx += hsz;
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, branch->tts[1]))
if (decode_b(t, bl, b[0], PARTITION_T_TOP_SPLIT, node->v[1]))
return -1;
t->bx -= hsz;
t->by += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, branch->tts[2]))
if (decode_b(t, bl, b[1], PARTITION_T_TOP_SPLIT, node->h[1]))
return -1;
t->by -= hsz;
break;
}
case PARTITION_T_BOTTOM_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, branch->tbs[0]))
if (decode_b(t, bl, b[0], PARTITION_T_BOTTOM_SPLIT, node->h[0]))
return -1;
t->by += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[1]))
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, node->v[0]))
return -1;
t->bx += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, branch->tbs[2]))
if (decode_b(t, bl, b[1], PARTITION_T_BOTTOM_SPLIT, 0))
return -1;
t->bx -= hsz;
t->by -= hsz;
break;
}
case PARTITION_T_LEFT_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[0]))
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, EDGE_ALL_TR_AND_BL))
return -1;
t->by += hsz;
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, branch->tls[1]))
if (decode_b(t, bl, b[0], PARTITION_T_LEFT_SPLIT, node->h[1]))
return -1;
t->by -= hsz;
t->bx += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, branch->tls[2]))
if (decode_b(t, bl, b[1], PARTITION_T_LEFT_SPLIT, node->v[1]))
return -1;
t->bx -= hsz;
break;
}
case PARTITION_T_RIGHT_SPLIT: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, branch->trs[0]))
if (decode_b(t, bl, b[0], PARTITION_T_RIGHT_SPLIT, node->v[0]))
return -1;
t->bx += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[1]))
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, node->h[0]))
return -1;
t->by += hsz;
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, branch->trs[2]))
if (decode_b(t, bl, b[1], PARTITION_T_RIGHT_SPLIT, 0))
return -1;
t->by -= hsz;
t->bx -= hsz;
break;
}
case PARTITION_H4: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[0]))
if (decode_b(t, bl, b[0], PARTITION_H4, node->h[0]))
return -1;
t->by += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[1]))
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4))
return -1;
t->by += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[2]))
if (decode_b(t, bl, b[0], PARTITION_H4, EDGE_ALL_LEFT_HAS_BOTTOM))
return -1;
t->by += hsz >> 1;
if (t->by < f->bh)
if (decode_b(t, bl, b[0], PARTITION_H4, branch->h4[3]))
if (decode_b(t, bl, b[0], PARTITION_H4, node->h[1]))
return -1;
t->by -= hsz * 3 >> 1;
break;
}
case PARTITION_V4: {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[0]))
if (decode_b(t, bl, b[0], PARTITION_V4, node->v[0]))
return -1;
t->bx += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[1]))
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4))
return -1;
t->bx += hsz >> 1;
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[2]))
if (decode_b(t, bl, b[0], PARTITION_V4, EDGE_ALL_TOP_HAS_RIGHT))
return -1;
t->bx += hsz >> 1;
if (t->bx < f->bw)
if (decode_b(t, bl, b[0], PARTITION_V4, branch->v4[3]))
if (decode_b(t, bl, b[0], PARTITION_V4, node->v[1]))
return -1;
t->bx -= hsz * 3 >> 1;
break;
Expand Down
58 changes: 30 additions & 28 deletions src/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3575,16 +3575,16 @@ unsafe fn decode_sb(
None => {
let tip = intra_edge.tip(sb128, edge_index);
assert!(hsz == 1);
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[0])?;
decode_b(c, t, f, bl, BS_4x4, bp, EdgeFlags::ALL_TR_AND_BL)?;
let tl_filter = t.tl_4x4_filter;
t.bx += 1;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[1])?;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[0])?;
t.bx -= 1;
t.by += 1;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[2])?;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[1])?;
t.bx += 1;
t.tl_4x4_filter = tl_filter;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[3])?;
decode_b(c, t, f, bl, BS_4x4, bp, tip.split[2])?;
t.bx -= 1;
t.by -= 1;
if cfg!(target_arch = "x86_64") && t.frame_thread.pass != 0 {
Expand Down Expand Up @@ -3612,68 +3612,70 @@ unsafe fn decode_sb(
}
}
PARTITION_T_TOP_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.tts[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_TR_AND_BL)?;
t.bx += hsz;
decode_b(c, t, f, bl, b[0], bp, branch.tts[1])?;
decode_b(c, t, f, bl, b[0], bp, node.v[1])?;
t.bx -= hsz;
t.by += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tts[2])?;
decode_b(c, t, f, bl, b[1], bp, node.h[1])?;
t.by -= hsz;
}
PARTITION_T_BOTTOM_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.tbs[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, node.h[0])?;
t.by += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tbs[1])?;
decode_b(c, t, f, bl, b[1], bp, node.v[0])?;
t.bx += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tbs[2])?;
decode_b(c, t, f, bl, b[1], bp, EdgeFlags::empty())?;
t.bx -= hsz;
t.by -= hsz;
}
PARTITION_T_LEFT_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.tls[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_TR_AND_BL)?;
t.by += hsz;
decode_b(c, t, f, bl, b[0], bp, branch.tls[1])?;
decode_b(c, t, f, bl, b[0], bp, node.h[1])?;
t.by -= hsz;
t.bx += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.tls[2])?;
decode_b(c, t, f, bl, b[1], bp, node.v[1])?;
t.bx -= hsz;
}
PARTITION_T_RIGHT_SPLIT => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.trs[0])?;
let node = intra_edge.node(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, node.v[0])?;
t.bx += hsz;
decode_b(c, t, f, bl, b[1], bp, branch.trs[1])?;
decode_b(c, t, f, bl, b[1], bp, node.h[0])?;
t.by += hsz;
decode_b(c, t, f, bl, b[1], bp, (*branch).trs[2])?;
decode_b(c, t, f, bl, b[1], bp, EdgeFlags::empty())?;
t.by -= hsz;
t.bx -= hsz;
}
PARTITION_H4 => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.h4[0])?;
let node = &branch.node;
decode_b(c, t, f, bl, b[0], bp, node.h[0])?;
t.by += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.h4[1])?;
decode_b(c, t, f, bl, b[0], bp, branch.h4)?;
t.by += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.h4[2])?;
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_LEFT_HAS_BOTTOM)?;
t.by += hsz >> 1;
if t.by < f.bh {
decode_b(c, t, f, bl, b[0], bp, branch.h4[3])?;
decode_b(c, t, f, bl, b[0], bp, node.h[1])?;
}
t.by -= hsz * 3 >> 1;
}
PARTITION_V4 => {
let branch = intra_edge.branch(sb128, edge_index);
decode_b(c, t, f, bl, b[0], bp, branch.v4[0])?;
let node = &branch.node;
decode_b(c, t, f, bl, b[0], bp, node.v[0])?;
t.bx += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.v4[1])?;
decode_b(c, t, f, bl, b[0], bp, branch.v4)?;
t.bx += hsz >> 1;
decode_b(c, t, f, bl, b[0], bp, branch.v4[2])?;
decode_b(c, t, f, bl, b[0], bp, EdgeFlags::ALL_TOP_HAS_RIGHT)?;
t.bx += hsz >> 1;
if t.bx < f.bw {
decode_b(c, t, f, bl, b[0], bp, branch.v4[3])?;
decode_b(c, t, f, bl, b[0], bp, node.v[1])?;
}
t.bx -= hsz * 3 >> 1;
}
Expand Down
69 changes: 20 additions & 49 deletions src/intra_edge.c
Original file line number Diff line number Diff line change
Expand Up @@ -44,66 +44,37 @@ static void init_edges(EdgeNode *const node,
const enum EdgeFlags edge_flags)
{
node->o = edge_flags;
node->h[0] = edge_flags | EDGE_ALL_LEFT_HAS_BOTTOM;
node->v[0] = edge_flags | EDGE_ALL_TOP_HAS_RIGHT;

#define ALL_FL(t) (EDGE_I444_##t | EDGE_I422_##t | EDGE_I420_##t)
if (bl == BL_8X8) {
EdgeTip *const nt = (EdgeTip *) node;

node->h[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
node->h[1] = edge_flags & (ALL_FL(LEFT_HAS_BOTTOM) |
node->h[1] = edge_flags & (EDGE_ALL_LEFT_HAS_BOTTOM |
EDGE_I420_TOP_HAS_RIGHT);

node->v[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
node->v[1] = edge_flags & (ALL_FL(TOP_HAS_RIGHT) |
node->v[1] = edge_flags & (EDGE_ALL_TOP_HAS_RIGHT |
EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);

nt->split[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
nt->split[1] = (edge_flags & ALL_FL(TOP_HAS_RIGHT)) |
nt->split[0] = (edge_flags & EDGE_ALL_TOP_HAS_RIGHT) |
EDGE_I422_LEFT_HAS_BOTTOM;
nt->split[2] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
nt->split[3] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
nt->split[1] = edge_flags | EDGE_I444_TOP_HAS_RIGHT;
nt->split[2] = edge_flags & (EDGE_I420_TOP_HAS_RIGHT |
EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);
} else {
EdgeBranch *const nwc = (EdgeBranch *) node;

node->h[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
node->h[1] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);

node->v[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
node->v[1] = edge_flags & ALL_FL(TOP_HAS_RIGHT);

nwc->h4[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
nwc->h4[1] =
nwc->h4[2] = ALL_FL(LEFT_HAS_BOTTOM);
nwc->h4[3] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
if (bl == BL_16X16)
nwc->h4[1] |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;

nwc->v4[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
nwc->v4[1] =
nwc->v4[2] = ALL_FL(TOP_HAS_RIGHT);
nwc->v4[3] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
if (bl == BL_16X16)
nwc->v4[1] |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);

nwc->tls[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
nwc->tls[1] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);
nwc->tls[2] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
node->h[1] = edge_flags & EDGE_ALL_LEFT_HAS_BOTTOM;
node->v[1] = edge_flags & EDGE_ALL_TOP_HAS_RIGHT;

nwc->trs[0] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
nwc->trs[1] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
nwc->trs[2] = 0;

nwc->tts[0] = ALL_FL(TOP_HAS_RIGHT) | ALL_FL(LEFT_HAS_BOTTOM);
nwc->tts[1] = edge_flags & ALL_FL(TOP_HAS_RIGHT);
nwc->tts[2] = edge_flags & ALL_FL(LEFT_HAS_BOTTOM);

nwc->tbs[0] = edge_flags | ALL_FL(LEFT_HAS_BOTTOM);
nwc->tbs[1] = edge_flags | ALL_FL(TOP_HAS_RIGHT);
nwc->tbs[2] = 0;
nwc->h4 = EDGE_ALL_LEFT_HAS_BOTTOM;
nwc->v4 = EDGE_ALL_TOP_HAS_RIGHT;
if (bl == BL_16X16) {
nwc->h4 |= edge_flags & EDGE_I420_TOP_HAS_RIGHT;
nwc->v4 |= edge_flags & (EDGE_I420_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM);
}
}
}

Expand All @@ -114,17 +85,17 @@ static void init_mode_node(EdgeBranch *const nwc,
const int left_has_bottom)
{
init_edges(&nwc->node, bl,
(top_has_right ? ALL_FL(TOP_HAS_RIGHT) : 0) |
(left_has_bottom ? ALL_FL(LEFT_HAS_BOTTOM) : 0));
(top_has_right ? EDGE_ALL_TOP_HAS_RIGHT : 0) |
(left_has_bottom ? EDGE_ALL_LEFT_HAS_BOTTOM : 0));
if (bl == BL_16X16) {
for (int n = 0; n < 4; n++) {
EdgeTip *const nt = mem->nt++;
nwc->split[n] = &nt->node;
init_edges(&nt->node, bl + 1,
((n == 3 || (n == 1 && !top_has_right)) ? 0 :
ALL_FL(TOP_HAS_RIGHT)) |
EDGE_ALL_TOP_HAS_RIGHT) |
(!(n == 0 || (n == 2 && left_has_bottom)) ? 0 :
ALL_FL(LEFT_HAS_BOTTOM)));
EDGE_ALL_LEFT_HAS_BOTTOM));
}
} else {
for (int n = 0; n < 4; n++) {
Expand Down
20 changes: 14 additions & 6 deletions src/intra_edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,20 @@
#include <stdint.h>

enum EdgeFlags {
EDGE_I444_TOP_HAS_RIGHT = 1 << 0,
EDGE_I422_TOP_HAS_RIGHT = 1 << 1,
EDGE_I420_TOP_HAS_RIGHT = 1 << 2,
EDGE_I444_TOP_HAS_RIGHT = 1 << 0,
EDGE_I422_TOP_HAS_RIGHT = 1 << 1,
EDGE_I420_TOP_HAS_RIGHT = 1 << 2,
EDGE_I444_LEFT_HAS_BOTTOM = 1 << 3,
EDGE_I422_LEFT_HAS_BOTTOM = 1 << 4,
EDGE_I420_LEFT_HAS_BOTTOM = 1 << 5,
EDGE_ALL_TOP_HAS_RIGHT = EDGE_I444_TOP_HAS_RIGHT |
EDGE_I422_TOP_HAS_RIGHT |
EDGE_I420_TOP_HAS_RIGHT,
EDGE_ALL_LEFT_HAS_BOTTOM = EDGE_I444_LEFT_HAS_BOTTOM |
EDGE_I422_LEFT_HAS_BOTTOM |
EDGE_I420_LEFT_HAS_BOTTOM,
EDGE_ALL_TR_AND_BL = EDGE_ALL_TOP_HAS_RIGHT |
EDGE_ALL_LEFT_HAS_BOTTOM,
};

typedef struct EdgeNode {
Expand All @@ -45,13 +53,13 @@ typedef struct EdgeNode {

typedef struct EdgeTip {
EdgeNode node;
uint8_t /* enum EdgeFlags */ split[4];
uint8_t /* enum EdgeFlags */ split[3];
} EdgeTip;

typedef struct EdgeBranch {
EdgeNode node;
uint8_t /* enum EdgeFlags */ tts[3], tbs[3], tls[3], trs[3], h4[4], v4[4];
EdgeNode *split[4];
uint8_t /* enum EdgeFlags */ h4, v4;
const EdgeNode *split[4];
} EdgeBranch;

void dav1d_init_mode_tree(EdgeNode *const root, EdgeTip *const nt,
Expand Down
Loading
Loading