Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport intra_edge optimizations from dav1d 1.2.1 #874

Merged
merged 2 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 11 additions & 14 deletions src/decode.c
Original file line number Diff line number Diff line change
Expand Up @@ -2329,7 +2329,7 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,

if (!have_h_split && !have_v_split) {
assert(bl < BL_8X8);
return decode_sb(t, bl + 1, ((const EdgeBranch *) node)->split[0]);
return decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0));
}

uint16_t *pc;
Expand Down Expand Up @@ -2417,18 +2417,17 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,
}
#endif
} else {
const EdgeBranch *const branch = (const EdgeBranch *) node;
if (decode_sb(t, bl + 1, branch->split[0]))
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0)))
return 1;
t->bx += hsz;
if (decode_sb(t, bl + 1, branch->split[1]))
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1)))
return 1;
t->bx -= hsz;
t->by += hsz;
if (decode_sb(t, bl + 1, branch->split[2]))
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2)))
return 1;
t->bx += hsz;
if (decode_sb(t, bl + 1, branch->split[3]))
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 3)))
return 1;
t->bx -= hsz;
t->by -= hsz;
Expand Down Expand Up @@ -2538,11 +2537,10 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,

assert(bl < BL_8X8);
if (is_split) {
const EdgeBranch *const branch = (const EdgeBranch *) node;
bp = PARTITION_SPLIT;
if (decode_sb(t, bl + 1, branch->split[0])) return 1;
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
t->bx += hsz;
if (decode_sb(t, bl + 1, branch->split[1])) return 1;
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 1))) return 1;
t->bx -= hsz;
} else {
bp = PARTITION_H;
Expand All @@ -2569,11 +2567,10 @@ static int decode_sb(Dav1dTaskContext *const t, const enum BlockLevel bl,

assert(bl < BL_8X8);
if (is_split) {
const EdgeBranch *const branch = (const EdgeBranch *) node;
bp = PARTITION_SPLIT;
if (decode_sb(t, bl + 1, branch->split[0])) return 1;
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 0))) return 1;
t->by += hsz;
if (decode_sb(t, bl + 1, branch->split[2])) return 1;
if (decode_sb(t, bl + 1, INTRA_EDGE_SPLIT(node, 2))) return 1;
t->by -= hsz;
} else {
bp = PARTITION_V;
Expand Down Expand Up @@ -2812,7 +2809,7 @@ int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
{
if (atomic_load_explicit(c->flush, memory_order_acquire))
return 1;
if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
return 1;
if (t->bx & 16 || f->seq_hdr->sb128)
t->a++;
Expand Down Expand Up @@ -2901,7 +2898,7 @@ int dav1d_decode_tile_sbrow(Dav1dTaskContext *const t) {
read_restoration_info(t, lr, p, frame_type);
}
}
if (decode_sb(t, root_bl, c->intra_edge.root[root_bl]))
if (decode_sb(t, root_bl, dav1d_intra_edge_tree[root_bl]))
return 1;
if (t->bx & 16 || f->seq_hdr->sb128) {
t->a++;
Expand Down
9 changes: 0 additions & 9 deletions src/internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,15 +175,6 @@ struct Dav1dContext {
Dav1dDSPContext dsp[3 /* 8, 10, 12 bits/component */];
Dav1dRefmvsDSPContext refmvs_dsp;

// tree to keep track of which edges are available
struct Dav1dContext_intra_edge {
EdgeNode *root[2 /* BL_128X128 vs. BL_64X64 */];
EdgeBranch branch_sb128[1 + 4 + 16 + 64];
EdgeBranch branch_sb64[1 + 4 + 16];
EdgeTip tip_sb128[256];
EdgeTip tip_sb64[64];
} intra_edge;

Dav1dPicAllocator allocator;
int apply_grain;
int operating_point;
Expand Down
84 changes: 48 additions & 36 deletions src/intra_edge.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2018, Two Orioles, LLC
* Copyright © 2018-2023, VideoLAN and dav1d authors
* Copyright © 2018-2023, Two Orioles, LLC
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -39,9 +39,22 @@ struct ModeSelMem {
EdgeTip *nt;
};

static void init_edges(EdgeNode *const node,
const enum BlockLevel bl,
const enum EdgeFlags edge_flags)
/* Because we're using 16-bit offsets to refer to other nodes those arrays
* are placed in a struct to ensure they're consecutive in memory. */
static struct {
EdgeBranch branch_sb128[1 + 4 + 16 + 64];
EdgeTip tip_sb128[256];
EdgeBranch branch_sb64[1 + 4 + 16];
EdgeTip tip_sb64[64];
} ALIGN(nodes, 16);

const EdgeNode *dav1d_intra_edge_tree[2] = {
(EdgeNode*)nodes.branch_sb128, (EdgeNode*)nodes.branch_sb64
};

static COLD void init_edges(EdgeNode *const node,
const enum BlockLevel bl,
const enum EdgeFlags edge_flags)
{
node->o = edge_flags;
node->h[0] = edge_flags | EDGE_ALL_LEFT_HAS_BOTTOM;
Expand Down Expand Up @@ -78,19 +91,21 @@ static void init_edges(EdgeNode *const node,
}
}

static void init_mode_node(EdgeBranch *const nwc,
const enum BlockLevel bl,
struct ModeSelMem *const mem,
const int top_has_right,
const int left_has_bottom)
#define PTR_OFFSET(a, b) ((uint16_t)((uintptr_t)(b) - (uintptr_t)(a)))

static COLD void init_mode_node(EdgeBranch *const nwc,
const enum BlockLevel bl,
struct ModeSelMem *const mem,
const int top_has_right,
const int left_has_bottom)
{
init_edges(&nwc->node, bl,
(top_has_right ? EDGE_ALL_TOP_HAS_RIGHT : 0) |
(left_has_bottom ? EDGE_ALL_LEFT_HAS_BOTTOM : 0));
if (bl == BL_16X16) {
for (int n = 0; n < 4; n++) {
EdgeTip *const nt = mem->nt++;
nwc->split[n] = &nt->node;
nwc->split_offset[n] = PTR_OFFSET(nwc, nt);
init_edges(&nt->node, bl + 1,
((n == 3 || (n == 1 && !top_has_right)) ? 0 :
EDGE_ALL_TOP_HAS_RIGHT) |
Expand All @@ -100,37 +115,34 @@ static void init_mode_node(EdgeBranch *const nwc,
} else {
for (int n = 0; n < 4; n++) {
EdgeBranch *const nwc_child = mem->nwc[bl]++;
nwc->split[n] = &nwc_child->node;
nwc->split_offset[n] = PTR_OFFSET(nwc, nwc_child);
init_mode_node(nwc_child, bl + 1, mem,
!(n == 3 || (n == 1 && !top_has_right)),
n == 0 || (n == 2 && left_has_bottom));
}
}
}

void dav1d_init_mode_tree(EdgeNode *const root_node, EdgeTip *const nt,
const int allow_sb128)
{
EdgeBranch *const root = (EdgeBranch *) root_node;
COLD void dav1d_init_intra_edge_tree(void) {
// This function is guaranteed to be called only once
struct ModeSelMem mem;
mem.nt = nt;

if (allow_sb128) {
mem.nwc[BL_128X128] = &root[1];
mem.nwc[BL_64X64] = &root[1 + 4];
mem.nwc[BL_32X32] = &root[1 + 4 + 16];
init_mode_node(root, BL_128X128, &mem, 1, 0);
assert(mem.nwc[BL_128X128] == &root[1 + 4]);
assert(mem.nwc[BL_64X64] == &root[1 + 4 + 16]);
assert(mem.nwc[BL_32X32] == &root[1 + 4 + 16 + 64]);
assert(mem.nt == &nt[256]);
} else {
mem.nwc[BL_128X128] = NULL;
mem.nwc[BL_64X64] = &root[1];
mem.nwc[BL_32X32] = &root[1 + 4];
init_mode_node(root, BL_64X64, &mem, 1, 0);
assert(mem.nwc[BL_64X64] == &root[1 + 4]);
assert(mem.nwc[BL_32X32] == &root[1 + 4 + 16]);
assert(mem.nt == &nt[64]);
}

mem.nwc[BL_128X128] = &nodes.branch_sb128[1];
mem.nwc[BL_64X64] = &nodes.branch_sb128[1 + 4];
mem.nwc[BL_32X32] = &nodes.branch_sb128[1 + 4 + 16];
mem.nt = nodes.tip_sb128;
init_mode_node(nodes.branch_sb128, BL_128X128, &mem, 1, 0);
assert(mem.nwc[BL_128X128] == &nodes.branch_sb128[1 + 4]);
assert(mem.nwc[BL_64X64] == &nodes.branch_sb128[1 + 4 + 16]);
assert(mem.nwc[BL_32X32] == &nodes.branch_sb128[1 + 4 + 16 + 64]);
assert(mem.nt == &nodes.tip_sb128[256]);

mem.nwc[BL_128X128] = NULL;
mem.nwc[BL_64X64] = &nodes.branch_sb64[1];
mem.nwc[BL_32X32] = &nodes.branch_sb64[1 + 4];
mem.nt = nodes.tip_sb64;
init_mode_node(nodes.branch_sb64, BL_64X64, &mem, 1, 0);
assert(mem.nwc[BL_64X64] == &nodes.branch_sb64[1 + 4]);
assert(mem.nwc[BL_32X32] == &nodes.branch_sb64[1 + 4 + 16]);
assert(mem.nt == &nodes.tip_sb64[64]);
}
11 changes: 8 additions & 3 deletions src/intra_edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ enum EdgeFlags {
EDGE_ALL_LEFT_HAS_BOTTOM,
};

#define INTRA_EDGE_SPLIT(n, i) \
((const EdgeNode*)((uintptr_t)(n) + ((const EdgeBranch*)(n))->split_offset[i]))

typedef struct EdgeNode {
uint8_t /* enum EdgeFlags */ o, h[2], v[2];
} EdgeNode;
Expand All @@ -59,10 +62,12 @@ typedef struct EdgeTip {
typedef struct EdgeBranch {
EdgeNode node;
uint8_t /* enum EdgeFlags */ h4, v4;
const EdgeNode *split[4];
uint16_t split_offset[4]; /* relative to the address of this node */
} EdgeBranch;

void dav1d_init_mode_tree(EdgeNode *const root, EdgeTip *const nt,
const int allow_sb128);
/* Tree to keep track of which edges are available. */
EXTERN const EdgeNode *dav1d_intra_edge_tree[2 /* BL_128X128, BL_64X64 */];

void dav1d_init_intra_edge_tree(void);

#endif /* DAV1D_SRC_INTRA_EDGE_H */
7 changes: 1 addition & 6 deletions src/lib.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
static COLD void init_internal(void) {
dav1d_init_cpu();
dav1d_init_interintra_masks();
dav1d_init_intra_edge_tree();
dav1d_init_qm_tables();
dav1d_init_thread();
dav1d_init_wedge_masks();
Expand Down Expand Up @@ -291,12 +292,6 @@ COLD int dav1d_open(Dav1dContext **const c_out, const Dav1dSettings *const s) {
}
dav1d_refmvs_dsp_init(&c->refmvs_dsp);

// intra edge tree
c->intra_edge.root[BL_128X128] = &c->intra_edge.branch_sb128[0].node;
dav1d_init_mode_tree(c->intra_edge.root[BL_128X128], c->intra_edge.tip_sb128, 1);
c->intra_edge.root[BL_64X64] = &c->intra_edge.branch_sb64[0].node;
dav1d_init_mode_tree(c->intra_edge.root[BL_64X64], c->intra_edge.tip_sb64, 0);

pthread_attr_destroy(&thread_attr);

return 0;
Expand Down
Loading