Skip to content

Commit

Permalink
Spherical instances in a vacuum
Browse files Browse the repository at this point in the history
- Write out instance bounding spheres in pass one
- Read them back in pass two so we don't have to read in the entire
  instance twice
- Cull pass 2 no longer needs to be parameterized by instance type, so
  less program binds are needed
- Fix page indexing logic
- Fix visibility sizing logic
  • Loading branch information
Jozufozu committed Nov 4, 2024
1 parent 1823a9f commit a6c5f93
Show file tree
Hide file tree
Showing 8 changed files with 82 additions and 85 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import dev.engine_room.flywheel.backend.util.AtomicReferenceCounted;
import dev.engine_room.flywheel.lib.util.ResourceUtil;
import net.minecraft.resources.ResourceLocation;
import net.minecraft.util.Unit;

public class IndirectPrograms extends AtomicReferenceCounted {
private static final ResourceLocation CULL_SHADER_API_IMPL = Flywheel.rl("internal/indirect/cull_api_impl.glsl");
Expand All @@ -37,6 +38,7 @@ public class IndirectPrograms extends AtomicReferenceCounted {

private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<ResourceLocation> UTIL = new Compile<>();
private static final Compile<Unit> UNIT = new Compile<>();

private static final List<String> EXTENSIONS = getExtensions(GlCompat.MAX_GLSL_VERSION);
private static final List<String> COMPUTE_EXTENSIONS = getComputeExtensions(GlCompat.MAX_GLSL_VERSION);
Expand All @@ -46,10 +48,10 @@ public class IndirectPrograms extends AtomicReferenceCounted {

private final PipelineCompiler pipeline;
private final CompilationHarness<InstanceType<?>> culling;
private final CompilationHarness<InstanceType<?>> cullPassTwo;
private final CompilationHarness<Unit> cullPassTwo;
private final CompilationHarness<ResourceLocation> utils;

private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness<InstanceType<?>> culling, CompilationHarness<InstanceType<?>> cullPassTwo, CompilationHarness<ResourceLocation> utils) {
private IndirectPrograms(PipelineCompiler pipeline, CompilationHarness<InstanceType<?>> culling, CompilationHarness<Unit> cullPassTwo, CompilationHarness<ResourceLocation> utils) {
this.pipeline = pipeline;
this.culling = culling;
this.cullPassTwo = cullPassTwo;
Expand Down Expand Up @@ -91,7 +93,7 @@ static void reload(ShaderSources sources, List<SourceComponent> vertexComponents

var pipelineCompiler = PipelineCompiler.create(sources, Pipelines.INDIRECT, vertexComponents, fragmentComponents, EXTENSIONS);
var pass1Compiler = createCullingCompiler(sources, CULL_SHADER_MAIN, "early_cull");
var pass2Compiler = createCullingCompiler(sources, PASS2_SHADER_MAIN, "late_cull");
var pass2Compiler = createPassTwoCompiler(sources, PASS2_SHADER_MAIN, "late_cull");
var utilCompiler = createUtilCompiler(sources);

IndirectPrograms newInstance = new IndirectPrograms(pipelineCompiler, pass1Compiler, pass2Compiler, utilCompiler);
Expand Down Expand Up @@ -119,6 +121,19 @@ private static CompilationHarness<InstanceType<?>> createCullingCompiler(ShaderS
.harness(name, sources);
}

private static CompilationHarness<Unit> createPassTwoCompiler(ShaderSources sources, ResourceLocation main, String name) {
return UNIT.program()
.link(UNIT.shader(GlCompat.MAX_GLSL_VERSION, ShaderType.COMPUTE)
.nameMapper(instanceType -> name)
.requireExtensions(COMPUTE_EXTENSIONS)
.define("_FLW_SUBGROUP_SIZE", GlCompat.SUBGROUP_SIZE)
.enableExtension("GL_KHR_shader_subgroup_basic")
.enableExtension("GL_KHR_shader_subgroup_ballot")
.withResource(main))
.postLink((key, program) -> Uniforms.setUniformBlockBindings(program))
.harness(name, sources);
}

/**
* A compiler for utility shaders, directly compiles the shader at the resource location specified by the parameter.
*/
Expand Down Expand Up @@ -163,8 +178,8 @@ public GlProgram getCullingProgram(InstanceType<?> instanceType) {
return culling.get(instanceType);
}

public GlProgram getCullPassTwoProgram(InstanceType<?> instanceType) {
return cullPassTwo.get(instanceType);
public GlProgram getCullPassTwoProgram() {
return cullPassTwo.get(Unit.INSTANCE);
}

public GlProgram getApplyProgram() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
package dev.engine_room.flywheel.backend.engine.indirect;

public final class BufferBindings {
public static final int LAST_FRAME_VISIBILITY = 0;
public static final int PAGE_FRAME_DESCRIPTOR = 1;
public static final int INSTANCE = 2;
public static final int DRAW_INSTANCE_INDEX = 3;
public static final int MODEL = 4;
public static final int DRAW = 5;
public static final int BOUNDING_SPHERES = 0;
public static final int LAST_FRAME_VISIBILITY = 1;
public static final int PAGE_FRAME_DESCRIPTOR = 2;
public static final int INSTANCE = 3;
public static final int DRAW_INSTANCE_INDEX = 4;
public static final int MODEL = 5;
public static final int DRAW = 6;

public static final int LIGHT_LUT = 6;
public static final int LIGHT_SECTION = 7;
public static final int MATRICES = 8;
public static final int LIGHT_LUT = 7;
public static final int LIGHT_SECTION = 8;
public static final int MATRICES = 9;

private BufferBindings() {
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,11 @@
import org.lwjgl.system.Pointer;

import dev.engine_room.flywheel.backend.gl.buffer.GlBufferType;
import dev.engine_room.flywheel.lib.math.MoreMath;
import dev.engine_room.flywheel.lib.memory.MemoryBlock;

public class IndirectBuffers {
// Number of vbos created.
public static final int BUFFER_COUNT = 6;
public static final int BUFFER_COUNT = 7;

public static final long INT_SIZE = Integer.BYTES;
public static final long PTR_SIZE = Pointer.POINTER_SIZE;
Expand All @@ -31,6 +30,7 @@ public class IndirectBuffers {
private static final long BUFFERS_SIZE_BYTES = SIZE_OFFSET + BUFFER_COUNT * PTR_SIZE;

// Offsets to the vbos
private static final long BOUNDING_SPHERES_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.BOUNDING_SPHERES * INT_SIZE;
private static final long LAST_FRAME_VISIBILITY_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.LAST_FRAME_VISIBILITY * INT_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * INT_SIZE;
private static final long INSTANCE_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.INSTANCE * INT_SIZE;
Expand All @@ -39,6 +39,7 @@ public class IndirectBuffers {
private static final long DRAW_HANDLE_OFFSET = HANDLE_OFFSET + BufferBindings.DRAW * INT_SIZE;

// Offsets to the sizes
private static final long BOUNDING_SPHERES_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.BOUNDING_SPHERES * PTR_SIZE;
private static final long LAST_FRAME_VISIBILITY_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.LAST_FRAME_VISIBILITY * PTR_SIZE;
private static final long PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.PAGE_FRAME_DESCRIPTOR * PTR_SIZE;
private static final long INSTANCE_SIZE_OFFSET = SIZE_OFFSET + BufferBindings.INSTANCE * PTR_SIZE;
Expand All @@ -65,6 +66,7 @@ public class IndirectBuffers {
*/
private final MemoryBlock multiBindBlock;

public final ResizableStorageArray boundingSpheres;
public final ResizableStorageArray lastFrameVisibility;
public final ObjectStorage objectStorage;
public final ResizableStorageArray drawInstanceIndex;
Expand All @@ -74,6 +76,7 @@ public class IndirectBuffers {
IndirectBuffers(long instanceStride) {
this.multiBindBlock = MemoryBlock.calloc(BUFFERS_SIZE_BYTES, 1);

boundingSpheres = new ResizableStorageArray(16);
lastFrameVisibility = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
objectStorage = new ObjectStorage(instanceStride);
drawInstanceIndex = new ResizableStorageArray(INT_SIZE, INSTANCE_GROWTH_FACTOR);
Expand All @@ -83,20 +86,23 @@ public class IndirectBuffers {

void updateCounts(int instanceCount, int modelCount, int drawCount) {
drawInstanceIndex.ensureCapacity(instanceCount);
lastFrameVisibility.ensureCapacity(MoreMath.ceilingDiv(instanceCount, 32));
lastFrameVisibility.ensureCapacity(objectStorage.capacity());
boundingSpheres.ensureCapacity(objectStorage.capacity() * 32L);
model.ensureCapacity(modelCount);
draw.ensureCapacity(drawCount);

final long ptr = multiBindBlock.ptr();

MemoryUtil.memPutInt(ptr + BOUNDING_SPHERES_HANDLE_OFFSET, boundingSpheres.handle());
MemoryUtil.memPutInt(ptr + LAST_FRAME_VISIBILITY_HANDLE_OFFSET, lastFrameVisibility.handle());
MemoryUtil.memPutInt(ptr + PAGE_FRAME_DESCRIPTOR_HANDLE_OFFSET, objectStorage.frameDescriptorBuffer.handle());
MemoryUtil.memPutInt(ptr + INSTANCE_HANDLE_OFFSET, objectStorage.objectBuffer.handle());
MemoryUtil.memPutInt(ptr + DRAW_INSTANCE_INDEX_HANDLE_OFFSET, drawInstanceIndex.handle());
MemoryUtil.memPutInt(ptr + MODEL_HANDLE_OFFSET, model.handle());
MemoryUtil.memPutInt(ptr + DRAW_HANDLE_OFFSET, draw.handle());

MemoryUtil.memPutAddress(ptr + LAST_FRAME_VISIBILITY_SIZE_OFFSET, INT_SIZE * MoreMath.ceilingDiv(instanceCount, 32));
MemoryUtil.memPutAddress(ptr + BOUNDING_SPHERES_SIZE_OFFSET, 16L * objectStorage.capacity() * 32);
MemoryUtil.memPutAddress(ptr + LAST_FRAME_VISIBILITY_SIZE_OFFSET, INT_SIZE * objectStorage.capacity());
MemoryUtil.memPutAddress(ptr + PAGE_FRAME_DESCRIPTOR_SIZE_OFFSET, objectStorage.frameDescriptorBuffer.capacity());
MemoryUtil.memPutAddress(ptr + INSTANCE_SIZE_OFFSET, objectStorage.objectBuffer.capacity());
MemoryUtil.memPutAddress(ptr + DRAW_INSTANCE_INDEX_SIZE_OFFSET, INT_SIZE * instanceCount);
Expand All @@ -105,31 +111,31 @@ void updateCounts(int instanceCount, int modelCount, int drawCount) {
}

public void bindForCullPassOne() {
multiBind(0, 5);
multiBind(0, 6);
}

public void bindForCullPassTwo() {
multiBind(0, 5);
multiBind(0, 6);
}

public void bindForApply() {
multiBind(4, 2);
multiBind(5, 2);
}

public void bindForModelReset() {
multiBind(4, 1);
multiBind(5, 1);
}

public void bindForDraw() {
multiBind(2, 4);
multiBind(3, 4);
GlBufferType.DRAW_INDIRECT_BUFFER.bind(draw.handle());
}

/**
* Bind all buffers except the draw command buffer.
*/
public void bindForCrumbling() {
multiBind(3, 3);
multiBind(4, 3);
}

private void multiBind(int base, int count) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import dev.engine_room.flywheel.backend.engine.InstancerKey;
import dev.engine_room.flywheel.backend.engine.MaterialRenderState;
import dev.engine_room.flywheel.backend.engine.MeshPool;
import dev.engine_room.flywheel.backend.engine.uniform.Uniforms;
import dev.engine_room.flywheel.backend.gl.GlCompat;
import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.math.MoreMath;
Expand All @@ -44,7 +43,6 @@ public class IndirectCullingGroup<I extends Instance> {

private final IndirectPrograms programs;
private final GlProgram earlyCull;
private final GlProgram lateCull;

private boolean needsDrawBarrier;
private boolean needsDrawSort;
Expand All @@ -58,7 +56,6 @@ public class IndirectCullingGroup<I extends Instance> {

this.programs = programs;
earlyCull = programs.getCullingProgram(instanceType);
lateCull = programs.getCullPassTwoProgram(instanceType);
}

public void flushInstancers() {
Expand Down Expand Up @@ -113,7 +110,6 @@ public void dispatchCull() {
return;
}

Uniforms.bindAll();
earlyCull.bind();

buffers.bindForCullPassOne();
Expand All @@ -125,9 +121,6 @@ public void dispatchCullPassTwo() {
return;
}

Uniforms.bindAll();
lateCull.bind();

buffers.bindForCullPassTwo();
glDispatchCompute(buffers.objectStorage.capacity(), 1, 1);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ public void render(VisualType visualType) {
GlTextureUnit.T0.makeActive();
GlStateManager._bindTexture(depthPyramid.pyramidTextureId);

programs.getCullPassTwoProgram()
.bind();

for (var group1 : cullingGroups.values()) {
group1.dispatchCullPassTwo();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,17 @@
// A few of these could be combined.

// Per culling group
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 0// cull1, cull2
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 1// cull1, cull2
#define _FLW_INSTANCE_BUFFER_BINDING 2// cull1, cull2, draw
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 3// cull1, cull2, draw
#define _FLW_MODEL_BUFFER_BINDING 4// cull1, cull2, apply
#define _FLW_DRAW_BUFFER_BINDING 5// apply, draw
#define _FLW_BOUNDING_SPHERE_BINDING 0// cull1, cull2
#define _FLW_LAST_FRAME_VISIBILITY_BUFFER_BINDING 1// cull1, cull2
#define _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING 2// cull1, cull2
#define _FLW_INSTANCE_BUFFER_BINDING 3// cull1, cull2, draw
#define _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING 4// cull1, cull2, draw
#define _FLW_MODEL_BUFFER_BINDING 5// cull1, cull2, apply
#define _FLW_DRAW_BUFFER_BINDING 6// apply, draw


// Global to the engine
#define _FLW_LIGHT_LUT_BUFFER_BINDING 6
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 7
#define _FLW_LIGHT_LUT_BUFFER_BINDING 7
#define _FLW_LIGHT_SECTIONS_BUFFER_BINDING 8

#define _FLW_MATRIX_BUFFER_BINDING 8
#define _FLW_MATRIX_BUFFER_BINDING 9
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,14 @@

layout(local_size_x = 32) in;

layout(std430, binding = _FLW_BOUNDING_SPHERE_BINDING) restrict writeonly buffer BoundingSphereBuffer {
vec4 _flw_boundingSpheres[];
};

layout(std430, binding = _FLW_DRAW_INSTANCE_INDEX_BUFFER_BINDING) restrict writeonly buffer DrawIndexBuffer {
uint _flw_drawIndices[];
};

// High 6 bits for the number of instances in the page.
const uint _FLW_PAGE_COUNT_OFFSET = 26u;
// Bottom 26 bits for the model index.
const uint _FLW_MODEL_INDEX_MASK = 0x3FFFFFF;

layout(std430, binding = _FLW_PAGE_FRAME_DESCRIPTOR_BUFFER_BINDING) restrict readonly buffer PageFrameDescriptorBuffer {
uint _flw_pageFrameDescriptors[];
};
Expand Down Expand Up @@ -61,39 +60,36 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}

_flw_boundingSpheres[instanceIndex] = vec4(center, radius);

return _flw_testSphere(center, radius);
}

// TODO: There's an opportunity here to write out the transformed bounding spheres to a buffer and use them in pass 2,
// instead of pulling the entire instance again. It would save a lot of memory bandwidth and matrix multiplications in
// pass 2, but it would also be a good bit of writes in pass 1. It's worth investigating, but it would be nice to have
// nsight trace working to be more sure.
void main() {
uint pageIndex = gl_WorkGroupID.x;
uint pageIndex = gl_WorkGroupID.x << 1u;

if (pageIndex >= _flw_pageFrameDescriptors.length()) {
return;
}

uint packedModelIndexAndCount = _flw_pageFrameDescriptors[pageIndex];
uint modelIndex = _flw_pageFrameDescriptors[pageIndex];

uint pageInstanceCount = packedModelIndexAndCount >> _FLW_PAGE_COUNT_OFFSET;
uint pageValidity = _flw_pageFrameDescriptors[pageIndex + 1];

if (gl_LocalInvocationID.x >= pageInstanceCount) {
if (((1u << gl_LocalInvocationID.x) & pageValidity) == 0) {
return;
}

uint instanceIndex = gl_GlobalInvocationID.x;

uint modelIndex = packedModelIndexAndCount & _FLW_MODEL_INDEX_MASK;

if (!_flw_isVisible(instanceIndex, modelIndex)) {
return;
}

uint pageVisibility = _flw_visibility[pageIndex];
uint pageVisibility = _flw_visibility[gl_WorkGroupID.x];
bool visibleLastFrame = (_flw_visibility[gl_WorkGroupID.x] & (1u << gl_LocalInvocationID.x)) != 0u;

if ((pageVisibility & (1u << gl_LocalInvocationID.x)) != 0u) {
if (visibleLastFrame) {
// This instance was visibile last frame, it should be rendered early.
uint localIndex = atomicAdd(_flw_models[modelIndex].instanceCount, 1);
uint targetIndex = _flw_models[modelIndex].baseInstance + localIndex;
Expand Down
Loading

0 comments on commit a6c5f93

Please sign in to comment.