Skip to content

Commit

Permalink
gl_stream_buffer: optimize OpenGL buffer handling
Browse files Browse the repository at this point in the history
This seems to give a huge performance boost for some Mali GPU devices.
  • Loading branch information
weihuoya authored and Gamer64ytb committed Sep 16, 2024
1 parent 7031479 commit 107837f
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 73 deletions.
2 changes: 1 addition & 1 deletion src/video_core/renderer_opengl/gl_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ void Driver::FindBugs() {

// TODO: Check if these have been fixed in the newer driver
if (vendor == Vendor::AMD) {
bugs |= DriverBug::ShaderStageChangeFreeze | DriverBug::VertexArrayOutOfBound;
bugs |= DriverBug::ShaderStageChangeFreeze;
}

if (vendor == Vendor::AMD || (vendor == Vendor::Intel && !is_linux)) {
Expand Down
11 changes: 3 additions & 8 deletions src/video_core/renderer_opengl/gl_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,13 @@ enum class Vendor {
enum class DriverBug {
// AMD drivers sometimes freezes when one shader stage is changed but not the others.
ShaderStageChangeFreeze = 1 << 0,
// On AMD drivers there is a strange crash in indexed drawing. The crash happens when the buffer
// read position is near the end and is an out-of-bound access to the vertex buffer. This is
// probably a bug in the driver and is related to the usage of vec3<byte> attributes in the
// vertex array. Doubling the allocation size for the vertex buffer seems to avoid the crash.
VertexArrayOutOfBound = 1 << 1,
// On AMD and Intel drivers on Windows glTextureView produces incorrect results
BrokenTextureView = 1 << 2,
BrokenTextureView = 1 << 1,
// On Haswell and Broadwell Intel drivers glClearTexSubImage produces a black screen
BrokenClearTexture = 1 << 3,
BrokenClearTexture = 1 << 2,
// On some Mali GPUs, the texture buffer size is small and has reduced performance
// if the buffer is close to the maximum texture size
SlowTextureBufferWithBigSize = 1 << 4,
SlowTextureBufferWithBigSize = 1 << 3,
};

/**
Expand Down
75 changes: 20 additions & 55 deletions src/video_core/renderer_opengl/gl_stream_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,35 +10,15 @@

namespace OpenGL {

OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
bool prefer_coherent)
OGLStreamBuffer::OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size)
: gl_target(target), buffer_size(size) {
gl_buffer.Create();
glBindBuffer(gl_target, gl_buffer.handle);

GLsizeiptr allocate_size = size;
if (driver.HasBug(DriverBug::VertexArrayOutOfBound) && target == GL_ARRAY_BUFFER) {
allocate_size *= 2;
}

if (GLAD_GL_ARB_buffer_storage) {
persistent = true;
coherent = prefer_coherent;
GLbitfield flags =
GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | (coherent ? GL_MAP_COHERENT_BIT : 0);
glBufferStorage(gl_target, allocate_size, nullptr, flags);
mapped_ptr = static_cast<u8*>(glMapBufferRange(
gl_target, 0, buffer_size, flags | (coherent ? 0 : GL_MAP_FLUSH_EXPLICIT_BIT)));
} else {
glBufferData(gl_target, allocate_size, nullptr, GL_STREAM_DRAW);
}
// prefer `glBufferData` than `glBufferStorage` on mobile device
glBufferData(gl_target, buffer_size, nullptr, GL_STREAM_DRAW);
}

OGLStreamBuffer::~OGLStreamBuffer() {
if (persistent) {
glBindBuffer(gl_target, gl_buffer.handle);
glUnmapBuffer(gl_target);
}
gl_buffer.Release();
}

Expand All @@ -51,48 +31,33 @@ GLsizeiptr OGLStreamBuffer::GetSize() const {
}

std::tuple<u8*, GLintptr, bool> OGLStreamBuffer::Map(GLsizeiptr size, GLintptr alignment) {
ASSERT_MSG(size <= buffer_size, "Requested size {} exceeds buffer size {}", size, buffer_size);
ASSERT(alignment <= buffer_size);
mapped_size = size;

if (alignment > 0) {
buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
}

bool invalidate = false;

buffer_pos = Common::AlignUp<std::size_t>(buffer_pos, alignment);
if (buffer_pos + size > buffer_size) {
buffer_pos = 0;
invalidate = true;

if (persistent) {
glUnmapBuffer(gl_target);
}
}

if (invalidate || !persistent) {
MANDARINE_PROFILE("OpenGL", "Stream Buffer Orphaning");
GLbitfield flags = GL_MAP_WRITE_BIT | (persistent ? GL_MAP_PERSISTENT_BIT : 0) |
(coherent ? GL_MAP_COHERENT_BIT : GL_MAP_FLUSH_EXPLICIT_BIT) |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
mapped_ptr = static_cast<u8*>(
glMapBufferRange(gl_target, buffer_pos, buffer_size - buffer_pos, flags));
mapped_offset = buffer_pos;
}

return std::make_tuple(mapped_ptr + buffer_pos - mapped_offset, buffer_pos, invalidate);
GLbitfield flags = GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT |
(invalidate ? GL_MAP_INVALIDATE_BUFFER_BIT : GL_MAP_UNSYNCHRONIZED_BIT);
u8* mapped_ptr = static_cast<u8*>(glMapBufferRange(gl_target, buffer_pos, size, flags));
return std::make_tuple(mapped_ptr, buffer_pos, invalidate);
}

void OGLStreamBuffer::Unmap(GLsizeiptr size) {
ASSERT(size <= mapped_size);

if (!coherent && size > 0) {
glFlushMappedBufferRange(gl_target, buffer_pos - mapped_offset, size);
}

if (!persistent) {
glUnmapBuffer(gl_target);
if (size > 0) {
// flush is relative to the start of the currently mapped range of buffer
glFlushMappedBufferRange(gl_target, 0, size);
GLenum error = glGetError();
if (error != GL_NO_ERROR) {
LOG_DEBUG(Render_OpenGL,
"flush mapped buffer range error: {:04X}, target: {:04X}, offset: {}, size: "
"{}, total: {}",
error, gl_target, buffer_pos, size, buffer_size);
}
}

glUnmapBuffer(gl_target);
buffer_pos += size;
}

Expand Down
11 changes: 2 additions & 9 deletions src/video_core/renderer_opengl/gl_stream_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@ class Driver;

class OGLStreamBuffer : private NonCopyable {
public:
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size,
bool prefer_coherent = false);
explicit OGLStreamBuffer(Driver& driver, GLenum target, GLsizeiptr size);
~OGLStreamBuffer();

GLuint GetHandle() const;
Expand All @@ -28,22 +27,16 @@ class OGLStreamBuffer : private NonCopyable {
* and the invalidation flag for previous chunks.
* The actual used size must be specified on unmapping the chunk.
*/
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment = 0);
std::tuple<u8*, GLintptr, bool> Map(GLsizeiptr size, GLintptr alignment);

void Unmap(GLsizeiptr size);

private:
OGLBuffer gl_buffer;
GLenum gl_target;

bool coherent = false;
bool persistent = false;

GLintptr buffer_pos = 0;
GLsizeiptr buffer_size = 0;
GLintptr mapped_offset = 0;
GLsizeiptr mapped_size = 0;
u8* mapped_ptr = nullptr;
};

} // namespace OpenGL

1 comment on commit 107837f

@daeklo4
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exelent thanks for add this upgrade,👍👍👍👍👍👍

Please sign in to comment.