Skip to content

Commit

Permalink
Revert "[common] add AVX/AVX2 memory copy implementations"
Browse files Browse the repository at this point in the history
This reverts commit e61678e.
GCC only supports multi-versioning in C++
  • Loading branch information
gnif committed Nov 18, 2023
1 parent e61678e commit 750cab8
Show file tree
Hide file tree
Showing 3 changed files with 13 additions and 124 deletions.
14 changes: 12 additions & 2 deletions common/include/common/rects.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,18 @@
#include "common/framebuffer.h"
#include "common/types.h"

void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width);
inline static void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width)
{
src += ystart * srcPitch + dx;
dst += ystart * dstPitch + dx;
for (int i = ystart; i < yend; ++i)
{
memcpy(dst, src, width);
src += srcPitch;
dst += dstPitch;
}
}

void rectsBufferToFramebuffer(FrameDamageRect * rects, int count, int bpp,
FrameBuffer * frame, int dstPitch, int height,
Expand Down
84 changes: 1 addition & 83 deletions common/src/framebuffer.c
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@
#include <string.h>
#include <emmintrin.h>
#include <smmintrin.h>
#include <immintrin.h>
#include <unistd.h>

bool framebuffer_wait(const FrameBuffer * frame, size_t size)
Expand Down Expand Up @@ -166,9 +165,7 @@ void framebuffer_prepare(FrameBuffer * frame)
atomic_store_explicit(&frame->wp, 0, memory_order_release);
}

__attribute__((target("default")))
bool framebuffer_write(FrameBuffer * frame, const void * restrict src,
size_t size)
bool framebuffer_write(FrameBuffer * frame, const void * restrict src, size_t size)
{
#ifdef FB_PROFILE
static RunningAvg ra = NULL;
Expand Down Expand Up @@ -225,85 +222,6 @@ bool framebuffer_write(FrameBuffer * frame, const void * restrict src,
return true;
}

#if 1
__attribute__((target("avx2")))
bool framebuffer_write(FrameBuffer *frame, const void *restrict src, size_t size)
{
#ifdef FB_PROFILE
static RunningAvg ra = NULL;
static int raCount = 0;
const uint64_t ts = microtime();
if (!ra)
ra = runningavg_new(100);
#endif

__m256i *restrict s = (__m256i *)src;
__m256i *restrict d = (__m256i *)frame->data;
size_t wp = 0;

_mm_mfence();

/* copy in chunks */
while (size > 127)
{
__m256i *_d = (__m256i *)d;
__m256i *_s = (__m256i *)s;
__m256i v1 = _mm256_stream_load_si256(_s + 0);
__m256i v2 = _mm256_stream_load_si256(_s + 1);
__m256i v3 = _mm256_stream_load_si256(_s + 2);
__m256i v4 = _mm256_stream_load_si256(_s + 3);

_mm256_stream_si256(_d + 0, v1);
_mm256_stream_si256(_d + 1, v2);
_mm256_stream_si256(_d + 2, v3);
_mm256_stream_si256(_d + 3, v4);

s += 4;
d += 4;
size -= 128;
wp += 128;

if (wp % FB_CHUNK_SIZE == 0)
atomic_store_explicit(&frame->wp, wp, memory_order_release);
}

if (size > 63)
{
__m256i *_d = (__m256i *)d;
__m256i *_s = (__m256i *)s;
__m256i v1 = _mm256_stream_load_si256(_s);
__m256i v2 = _mm256_stream_load_si256(_s + 1);

_mm256_stream_si256(_d, v1);
_mm256_stream_si256(_d + 1, v2);

s += 2;
d += 2;
size -= 64;
wp += 64;

if (wp % FB_CHUNK_SIZE == 0)
atomic_store_explicit(&frame->wp, wp, memory_order_release);
}

if (size)
{
memcpy(frame->data + wp, s, size);
wp += size;
}

atomic_store_explicit(&frame->wp, wp, memory_order_release);

#ifdef FB_PROFILE
runningavg_push(ra, microtime() - ts);
if (++raCount % 100 == 0)
DEBUG_INFO("Average Copy Time: %.2fμs", runningavg_calc(ra));
#endif

return true;
}
#endif

const uint8_t * framebuffer_get_buffer(const FrameBuffer * frame)
{
return frame->data;
Expand Down
39 changes: 0 additions & 39 deletions common/src/rects.c
Original file line number Diff line number Diff line change
Expand Up @@ -22,45 +22,6 @@
#include "common/util.h"

#include <stdlib.h>
#include <immintrin.h>

__attribute__((target("default")))
void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width)
{
src += ystart * srcPitch + dx;
dst += ystart * dstPitch + dx;
for (int i = ystart; i < yend; ++i)
{
memcpy(dst, src, width);
src += srcPitch;
dst += dstPitch;
}
}

__attribute__((target("avx")))
void rectCopyUnaligned(uint8_t * dst, const uint8_t * src,
int ystart, int yend, int dx, int dstPitch, int srcPitch, int width)
{
src += ystart * srcPitch + dx;
dst += ystart * dstPitch + dx;
for (int i = ystart; i < yend; ++i)
{
int col;
for(col = 0; col <= width - 32; col += 32)
{
_mm_prefetch(src + col + 256, _MM_HINT_T0);
__m256i srcData = _mm256_loadu_si256((__m256i*)(src + col));
_mm256_storeu_si256((__m256i*)(dst + col), srcData);
}

for(; col < width; ++col)
dst[col] = src[col];

src += srcPitch;
dst += dstPitch;
}
}

struct Corner
{
Expand Down

0 comments on commit 750cab8

Please sign in to comment.