Skip to content

Commit

Permalink
Merge pull request #369 from linebender/contig_tiling
Browse files Browse the repository at this point in the history
Contiguous storage for path segments
  • Loading branch information
raphlinus committed Oct 7, 2023
2 parents 34483eb + b46609d commit 1ef6724
Show file tree
Hide file tree
Showing 21 changed files with 786 additions and 755 deletions.
25 changes: 25 additions & 0 deletions crates/encoding/src/clip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,18 @@
use bytemuck::{Pod, Zeroable};

/// Clip stack element.
///
/// This is the bicyclic semigroup, a monoid useful for representing
/// stack depth. There is considerably more detail in the draft paper
/// [Fast GPU bounding boxes on tree-structured scenes].
///
/// [Fast GPU bounding boxes on tree-structured scenes]: https://arxiv.org/abs/2205.11659
#[derive(Copy, Clone, Pod, Zeroable, Debug, Default)]
#[repr(C)]
pub struct ClipBic {
/// When interpreted as a stack operation, the number of pop operations.
pub a: u32,
/// When interpreted as a stack operation, the number of push operations.
pub b: u32,
}

Expand Down Expand Up @@ -41,3 +49,20 @@ pub struct Clip {
pub struct ClipBbox {
pub bbox: [f32; 4],
}

impl ClipBic {
pub fn new(a: u32, b: u32) -> Self {
ClipBic { a, b }
}

/// The bicyclic semigroup operation.
///
/// This operation is associative. When interpreted as a stack
/// operation, it represents doing the pops of `self`, the pushes of
/// `self`, the pops of `other`, and the pushes of `other`. The middle
/// two can cancel each other out.
pub fn combine(self, other: ClipBic) -> Self {
let m = self.b.min(other.a);
ClipBic::new(self.a + other.a - m, self.b + other.b - m)
}
}
41 changes: 34 additions & 7 deletions crates/encoding/src/config.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
// Copyright 2023 The Vello authors
// SPDX-License-Identifier: Apache-2.0 OR MIT

use crate::SegmentCount;

use super::{
BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Layout, Path,
PathBbox, PathMonoid, PathSegment, Tile,
BinHeader, Clip, ClipBbox, ClipBic, ClipElement, Cubic, DrawBbox, DrawMonoid, Layout, LineSoup,
Path, PathBbox, PathMonoid, PathSegment, Tile,
};
use bytemuck::{Pod, Zeroable};
use std::mem;
Expand All @@ -14,7 +16,7 @@ const TILE_HEIGHT: u32 = 16;
// TODO: Obtain these from the vello_shaders crate
pub(crate) const PATH_REDUCE_WG: u32 = 256;
const PATH_BBOX_WG: u32 = 256;
const PATH_COARSE_WG: u32 = 256;
const FLATTEN_WG: u32 = 256;
const CLIP_REDUCE_WG: u32 = 256;

/// Counters for tracking dynamic allocation on the GPU.
Expand All @@ -29,8 +31,24 @@ pub struct BumpAllocators {
pub binning: u32,
pub ptcl: u32,
pub tile: u32,
pub seg_counts: u32,
pub segments: u32,
pub blend: u32,
pub lines: u32,
}

/// Storage of indirect dispatch size values.
///
/// The original plan was to reuse BumpAllocators, but the WebGPU compatible
/// usage list rules forbid that being used as indirect counts while also
/// bound as writable.
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
pub struct IndirectCount {
pub count_x: u32,
pub count_y: u32,
pub count_z: u32,
pub pad0: u32,
}

/// Uniform render configuration data used by all GPU stages.
Expand Down Expand Up @@ -114,7 +132,7 @@ pub struct WorkgroupCounts {
pub path_scan1: WorkgroupSize,
pub path_scan: WorkgroupSize,
pub bbox_clear: WorkgroupSize,
pub path_seg: WorkgroupSize,
pub flatten: WorkgroupSize,
pub draw_reduce: WorkgroupSize,
pub draw_leaf: WorkgroupSize,
pub clip_reduce: WorkgroupSize,
Expand Down Expand Up @@ -146,7 +164,7 @@ impl WorkgroupCounts {
path_tag_wgs
};
let draw_object_wgs = (n_draw_objects + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
let path_coarse_wgs = (n_path_tags + PATH_COARSE_WG - 1) / PATH_COARSE_WG;
let flatten_wgs = (n_path_tags + FLATTEN_WG - 1) / FLATTEN_WG;
let clip_reduce_wgs = n_clips.saturating_sub(1) / CLIP_REDUCE_WG;
let clip_wgs = (n_clips + CLIP_REDUCE_WG - 1) / CLIP_REDUCE_WG;
let path_wgs = (n_paths + PATH_BBOX_WG - 1) / PATH_BBOX_WG;
Expand All @@ -159,14 +177,14 @@ impl WorkgroupCounts {
path_scan1: (reduced_size / PATH_REDUCE_WG, 1, 1),
path_scan: (path_tag_wgs, 1, 1),
bbox_clear: (draw_object_wgs, 1, 1),
path_seg: (path_coarse_wgs, 1, 1),
flatten: (flatten_wgs, 1, 1),
draw_reduce: (draw_object_wgs, 1, 1),
draw_leaf: (draw_object_wgs, 1, 1),
clip_reduce: (clip_reduce_wgs, 1, 1),
clip_leaf: (clip_wgs, 1, 1),
binning: (draw_object_wgs, 1, 1),
tile_alloc: (path_wgs, 1, 1),
path_coarse: (path_coarse_wgs, 1, 1),
path_coarse: (flatten_wgs, 1, 1),
backdrop: (path_wgs, 1, 1),
coarse: (width_in_bins, height_in_bins, 1),
fine: (width_in_tiles, height_in_tiles, 1),
Expand Down Expand Up @@ -248,11 +266,14 @@ pub struct BufferSizes {
pub clip_bboxes: BufferSize<ClipBbox>,
pub draw_bboxes: BufferSize<DrawBbox>,
pub bump_alloc: BufferSize<BumpAllocators>,
pub indirect_count: BufferSize<IndirectCount>,
pub bin_headers: BufferSize<BinHeader>,
pub paths: BufferSize<Path>,
// Bump allocated buffers
pub lines: BufferSize<LineSoup>,
pub bin_data: BufferSize<u32>,
pub tiles: BufferSize<Tile>,
pub seg_counts: BufferSize<SegmentCount>,
pub segments: BufferSize<PathSegment>,
pub ptcl: BufferSize<u32>,
}
Expand Down Expand Up @@ -284,6 +305,7 @@ impl BufferSizes {
let clip_bboxes = BufferSize::new(n_clips);
let draw_bboxes = BufferSize::new(n_paths);
let bump_alloc = BufferSize::new(1);
let indirect_count = BufferSize::new(1);
let bin_headers = BufferSize::new(draw_object_wgs * 256);
let n_paths_aligned = align_up(n_paths, 256);
let paths = BufferSize::new(n_paths_aligned);
Expand All @@ -293,6 +315,8 @@ impl BufferSizes {
// reasonable heuristics.
let bin_data = BufferSize::new(1 << 18);
let tiles = BufferSize::new(1 << 21);
let lines = BufferSize::new(1 << 21);
let seg_counts = BufferSize::new(1 << 21);
let segments = BufferSize::new(1 << 21);
let ptcl = BufferSize::new(1 << 23);
Self {
Expand All @@ -311,10 +335,13 @@ impl BufferSizes {
clip_bboxes,
draw_bboxes,
bump_alloc,
indirect_count,
lines,
bin_headers,
paths,
bin_data,
tiles,
seg_counts,
segments,
ptcl,
}
Expand Down
7 changes: 4 additions & 3 deletions crates/encoding/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ mod resolve;
pub use binning::BinHeader;
pub use clip::{Clip, ClipBbox, ClipBic, ClipElement};
pub use config::{
BufferSize, BufferSizes, BumpAllocators, ConfigUniform, RenderConfig, WorkgroupCounts,
WorkgroupSize,
BufferSize, BufferSizes, BumpAllocators, ConfigUniform, IndirectCount, RenderConfig,
WorkgroupCounts, WorkgroupSize,
};
pub use draw::{
DrawBbox, DrawBeginClip, DrawColor, DrawImage, DrawLinearGradient, DrawMonoid,
Expand All @@ -35,7 +35,8 @@ pub use encoding::{Encoding, StreamOffsets};
pub use math::Transform;
pub use monoid::Monoid;
pub use path::{
Cubic, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType, PathTag, Tile,
Cubic, LineSoup, Path, PathBbox, PathEncoder, PathMonoid, PathSegment, PathSegmentType,
PathTag, SegmentCount, Tile,
};
pub use resolve::{resolve_solid_paths_only, Layout};

Expand Down
26 changes: 24 additions & 2 deletions crates/encoding/src/path.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,36 @@ use peniko::kurbo::Shape;

use super::Monoid;

/// Line segment (after flattening, before tiling).
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct LineSoup {
pub path_ix: u32,
pub _padding: u32,
pub p0: [f32; 2],
pub p1: [f32; 2],
}

/// Line segment (after flattening, before tiling).
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct SegmentCount {
pub line_ix: u32,
// This could more accurately be modeled as:
// segment_within_line: u16,
// segment_within_slice: u16,
// However, here we mirror the way it's written in WGSL
pub counts: u32,
}

/// Path segment.
#[derive(Clone, Copy, Debug, Zeroable, Pod, Default)]
#[repr(C)]
pub struct PathSegment {
pub origin: [f32; 2],
pub delta: [f32; 2],
pub y_edge: f32,
pub next: u32,
pub _padding: u32,
}

/// Path segment type.
Expand Down Expand Up @@ -193,7 +215,7 @@ pub struct PathBbox {
#[repr(C)]
pub struct Path {
/// Bounding box in tiles.
pub bbox: [f32; 4],
pub bbox: [u32; 4],
/// Offset (in u32s) to tile rectangle.
pub tiles: u32,
_padding: [u32; 3],
Expand Down
63 changes: 31 additions & 32 deletions shader/coarse.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ var<storage> info_bin_data: array<u32>;
var<storage> paths: array<Path>;

@group(0) @binding(6)
var<storage> tiles: array<Tile>;
var<storage, read_write> tiles: array<Tile>;

@group(0) @binding(7)
var<storage, read_write> bump: BumpAllocators;
Expand Down Expand Up @@ -82,31 +82,30 @@ fn alloc_cmd(size: u32) {
}
}

fn write_path(tile: Tile, linewidth: f32) -> bool {
// TODO: take flags
alloc_cmd(3u);
if linewidth < 0.0 {
let even_odd = linewidth < -1.0;
if tile.segments != 0u {
let fill = CmdFill(tile.segments, tile.backdrop);
ptcl[cmd_offset] = CMD_FILL;
let segments_and_rule = select(fill.tile << 1u, (fill.tile << 1u) | 1u, even_odd);
ptcl[cmd_offset + 1u] = segments_and_rule;
ptcl[cmd_offset + 2u] = u32(fill.backdrop);
cmd_offset += 3u;
} else {
if even_odd && (abs(tile.backdrop) & 1) == 0 {
return false;
}
ptcl[cmd_offset] = CMD_SOLID;
cmd_offset += 1u;
}
fn write_path(tile: Tile, tile_ix: u32, linewidth: f32) -> bool {
let even_odd = linewidth < -1.0;
// We overload the "segments" field to store both count (written by
// path_count stage) and segment allocation (used by path_tiling and
// fine).
let n_segs = tile.segment_count_or_ix;
if n_segs != 0u {
var seg_ix = atomicAdd(&bump.segments, n_segs);
tiles[tile_ix].segment_count_or_ix = ~seg_ix;
alloc_cmd(4u);
ptcl[cmd_offset] = CMD_FILL;
let size_and_rule = (n_segs << 1u) | u32(even_odd);
let fill = CmdFill(size_and_rule, seg_ix, tile.backdrop);
ptcl[cmd_offset + 1u] = fill.size_and_rule;
ptcl[cmd_offset + 2u] = fill.seg_data;
ptcl[cmd_offset + 3u] = u32(fill.backdrop);
cmd_offset += 4u;
} else {
let stroke = CmdStroke(tile.segments, 0.5 * linewidth);
ptcl[cmd_offset] = CMD_STROKE;
ptcl[cmd_offset + 1u] = stroke.tile;
ptcl[cmd_offset + 2u] = bitcast<u32>(stroke.half_width);
cmd_offset += 3u;
if even_odd && (abs(tile.backdrop) & 1) == 0 {
return false;
}
alloc_cmd(1u);
ptcl[cmd_offset] = CMD_SOLID;
cmd_offset += 1u;
}
return true;
}
Expand Down Expand Up @@ -311,7 +310,7 @@ fn main(
let blend = scene[dd];
is_blend = blend != BLEND_CLIP;
}
let include_tile = tile.segments != 0u || (tile.backdrop == 0) == is_clip || is_blend;
let include_tile = tile.segment_count_or_ix != 0u || (tile.backdrop == 0) == is_clip || is_blend;
if include_tile {
let el_slice = el_ix / 32u;
let el_mask = 1u << (el_ix & 31u);
Expand Down Expand Up @@ -352,15 +351,15 @@ fn main(
// DRAWTAG_FILL_COLOR
case 0x44u: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
let rgba_color = scene[dd];
write_color(CmdColor(rgba_color));
}
}
// DRAWTAG_FILL_LIN_GRADIENT
case 0x114u: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
let index = scene[dd];
let info_offset = di + 1u;
write_grad(CMD_LIN_GRAD, index, info_offset);
Expand All @@ -369,7 +368,7 @@ fn main(
// DRAWTAG_FILL_RAD_GRADIENT
case 0x29cu: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
let index = scene[dd];
let info_offset = di + 1u;
write_grad(CMD_RAD_GRAD, index, info_offset);
Expand All @@ -378,13 +377,13 @@ fn main(
// DRAWTAG_FILL_IMAGE
case 0x248u: {
let linewidth = bitcast<f32>(info_bin_data[di]);
if write_path(tile, linewidth) {
if write_path(tile, tile_ix, linewidth) {
write_image(di + 1u);
}
}
// DRAWTAG_BEGIN_CLIP
case 0x9u: {
if tile.segments == 0u && tile.backdrop == 0 {
if tile.segment_count_or_ix == 0u && tile.backdrop == 0 {
clip_zero_depth = clip_depth + 1u;
} else {
write_begin_clip();
Expand All @@ -396,7 +395,7 @@ fn main(
// DRAWTAG_END_CLIP
case 0x21u: {
clip_depth -= 1u;
write_path(tile, -1.0);
write_path(tile, tile_ix, -1.0);
let blend = scene[dd];
let alpha = bitcast<f32>(scene[dd + 1u]);
write_end_clip(CmdEndClip(blend, alpha));
Expand Down
Loading

0 comments on commit 1ef6724

Please sign in to comment.