Skip to content

Commit

Permalink
cleanup...?
Browse files Browse the repository at this point in the history
  • Loading branch information
RyuaNerin committed Aug 18, 2023
1 parent ed0bcda commit 721557a
Show file tree
Hide file tree
Showing 7 changed files with 138 additions and 42 deletions.
9 changes: 9 additions & 0 deletions avoutil/alloc.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ func Alloc8(name string, values ...uint32) Mem {
return mem
}

func Alloc16(name string, values ...uint16) Mem {
mem := GLOBL(name, NOPTR|RODATA)

for idx, v := range values {
DATA(2*idx, U16(v))
}
return mem
}

func Alloc32(name string, values ...uint32) Mem {
mem := GLOBL(name, NOPTR|RODATA)

Expand Down
20 changes: 13 additions & 7 deletions avoutil/memcpy.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ var memcpyN = 0
// size: uint64
func Memcpy(dst, src Op, size Register, avx2 bool) {
if size.Size() != 8 {
panic("wong arguments")
panic("size must be gp64")
}

Comment("Memcpy")
Expand All @@ -41,6 +41,9 @@ func Memcpy(dst, src Op, size Register, avx2 bool) {

//////////////////////////////

////_mm_prefetch(src, C_MM_HINT_NTA)
//PREFETCHNTA(src)

cpy := func(sz int, tmp Op, mov func(a, b Op)) {
labelStart := fmt.Sprintf("memcpy_%d_sz%d_start", memcpyN, sz)
labelEnd := fmt.Sprintf("memcpy_%d_sz%d_end", memcpyN, sz)
Expand All @@ -63,9 +66,9 @@ func Memcpy(dst, src Op, size Register, avx2 bool) {

if enableXYZ {
if avx2 {
cpy(32, op256, VMOVDQ_autoAU)
cpy(32, op256, VMOVDQa)
}
cpy(16, op128, MOVO_autoAU)
cpy(16, op128, MOVOa)

}
cpy(8, op64, MOVQ)
Expand All @@ -86,17 +89,20 @@ func MemcpyStatic(dst, src Mem, size int, avx2 bool) {

//////////////////////////////

////_mm_prefetch(src, C_MM_HINT_NTA)
//PREFETCHNTA(src)

idx := 0
for size > 0 {
sz := 1

if enableXYZ && avx2 && size >= 32 {
VMOVDQ_autoAU(src.Offset(idx), op256)
VMOVDQ_autoAU(op256, dst.Offset(idx))
VMOVDQa(src.Offset(idx), op256)
VMOVDQa(op256, dst.Offset(idx))
sz = 32
} else if enableXYZ && size >= 16 {
MOVO_autoAU(src.Offset(idx), op128)
MOVO_autoAU(op128, dst.Offset(idx))
MOVOa(src.Offset(idx), op128)
MOVOa(op128, dst.Offset(idx))
sz = 16
} else if size >= 8 {
MOVQ(src.Offset(idx), op64)
Expand Down
6 changes: 3 additions & 3 deletions avoutil/memset.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ var (
// size : gp32
func Memset(dst Op, val uint8, size Register, useAVX2 bool) {
if size.Size() != 8 {
panic("wong arguments")
panic("size must be gp64")
}

Comment("memset")
Expand Down Expand Up @@ -76,9 +76,9 @@ func Memset(dst Op, val uint8, size Register, useAVX2 bool) {
if enableXYZ {
reg := YMM()
if useAVX2 {
step(32, reg, MOVO, VMOVDQ_autoAU)
step(32, reg, MOVO, VMOVDQa)
}
step(16, reg.AsX(), MOVO, MOVO_autoAU)
step(16, reg.AsX(), MOVO, MOVOa)
}
step(8, tmp.As64(), MOVQ, MOVQ)
step(4, tmp.As32(), MOVL, MOVL)
Expand Down
18 changes: 12 additions & 6 deletions avoutil/mov.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,14 @@ func isAligned(alignedByte int, args ...Op) bool {
return true
}

func VMOVDQ_autoAU2(dst, src Op) Op {
VMOVDQ_autoAU(src, dst)
// VMOVDQA vs VMOVDQU + dst, src
func VMOVDQad(dst, src Op) Op {
VMOVDQa(src, dst)
return dst
}
func VMOVDQ_autoAU(mxy, mxy1 Op) {

// VMOVDQA vs VMOVDQU
func VMOVDQa(mxy, mxy1 Op) {
if isAligned(YmmSize, mxy, mxy1) {
CheckType(
`
Expand Down Expand Up @@ -69,11 +72,14 @@ func VMOVDQ_autoAU(mxy, mxy1 Op) {
}
}

func MOVO_autoAU2(dst, src Op) Op {
MOVO_autoAU(src, dst)
// MOVOA vs MOVOU + dst, src
func MOVOad(dst, src Op) Op {
MOVOa(src, dst)
return dst
}
func MOVO_autoAU(mx, mx1 Op) {

// MOVOA vs MOVOU
func MOVOa(mx, mx1 Op) {
if isAligned(XmmSize, mx, mx1) {
CheckType(
`
Expand Down
8 changes: 4 additions & 4 deletions avoutil/simd/simd_avx2.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package simd

import (
. "kryptosimd/avoutil"

. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"

. "kryptosimd/avoutil"
)

/*
Expand All @@ -26,7 +26,7 @@ Operation
dst[MAX:256] := 0
*/
func F_mm256_loadu_si256(dst VecVirtual, src Op) VecVirtual {
VMOVDQ_autoAU2(dst, src)
VMOVDQad(dst, src)
return dst
}

Expand All @@ -47,7 +47,7 @@ Operation
MEM[mem_addr+255:mem_addr] := a[255:0]
*/
func F_mm256_storeu_si256(dst, src Op) {
VMOVDQ_autoAU2(dst, src)
VMOVDQad(dst, src)
}

/*
Expand Down
Loading

0 comments on commit 721557a

Please sign in to comment.