Skip to content

Commit

Permalink
func(dst, arg...)
Browse files Browse the repository at this point in the history
  • Loading branch information
RyuaNerin committed Aug 18, 2023
1 parent 11e3ee1 commit ed0bcda
Show file tree
Hide file tree
Showing 16 changed files with 5,515 additions and 6,002 deletions.
6 changes: 4 additions & 2 deletions aria/avo/x86/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,13 @@ func processFinSSE2() {
dst[j] = rk[j] ^ t[j]
}
*/
tmp := XMM()
F_mm_storeu_si128(
dst,
F_mm_xor_si128(
A_mm_loadu_si128(rk),
A_mm_loadu_si128(t),
tmp,
F_mm_loadu_si128(tmp, rk),
F_mm_loadu_si128(XMM(), t),
),
)

Expand Down
6 changes: 4 additions & 2 deletions avoutil/mov.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,9 @@ func isAligned(alignedByte int, args ...Op) bool {
return true
}

func VMOVDQ_autoAU2(dst, src Op) {
func VMOVDQ_autoAU2(dst, src Op) Op {
VMOVDQ_autoAU(src, dst)
return dst
}
func VMOVDQ_autoAU(mxy, mxy1 Op) {
if isAligned(YmmSize, mxy, mxy1) {
Expand Down Expand Up @@ -68,8 +69,9 @@ func VMOVDQ_autoAU(mxy, mxy1 Op) {
}
}

func MOVO_autoAU2(dst, src Op) {
func MOVO_autoAU2(dst, src Op) Op {
MOVO_autoAU(src, dst)
return dst
}
func MOVO_autoAU(mx, mx1 Op) {
if isAligned(XmmSize, mx, mx1) {
Expand Down
36 changes: 18 additions & 18 deletions avoutil/simd/simd_avx2.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package simd
import (
. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
. "github.com/mmcloughlin/avo/reg"

. "kryptosimd/avoutil"
)
Expand All @@ -24,8 +25,8 @@ Operation
dst[255:0] := MEM[mem_addr+255:mem_addr]
dst[MAX:256] := 0
*/
func F_mm256_loadu_si256(dst, src Op) Op {
VMOVDQ_autoAU(src, dst)
func F_mm256_loadu_si256(dst VecVirtual, src Op) VecVirtual {
VMOVDQ_autoAU2(dst, src)
return dst
}

Expand All @@ -45,9 +46,8 @@ Operation
MEM[mem_addr+255:mem_addr] := a[255:0]
*/
func F_mm256_storeu_si256(dst, src Op) Op {
VMOVDQ_autoAU(src, dst)
return dst
func F_mm256_storeu_si256(dst, src Op) {
VMOVDQ_autoAU2(dst, src)
}

/*
Expand All @@ -67,7 +67,7 @@ Operation
dst[255:0] := (a[255:0] XOR b[255:0])
dst[MAX:256] := 0
*/
func F_mm256_xor_si256(dst, a, b Op) Op {
func F_mm256_xor_si256(dst VecVirtual, a, b Op) VecVirtual {
CheckType(
`
// VPXOR m256 ymm ymm
Expand Down Expand Up @@ -99,7 +99,7 @@ Operation
dst[255:0] := (a[255:0] OR b[255:0])
dst[MAX:256] := 0
*/
func F_mm256_or_si256(dst, a, b Op) Op {
func F_mm256_or_si256(dst VecVirtual, a, b Op) VecVirtual {
CheckType(
`
// VPOR m256 ymm ymm
Expand Down Expand Up @@ -131,7 +131,7 @@ Operation
dst[255:0] := (a[255:0] AND b[255:0])
dst[MAX:256] := 0
*/
func F_mm256_and_si256(dst, a, b Op) Op {
func F_mm256_and_si256(dst VecVirtual, a, b Op) VecVirtual {
CheckType(
`
// VPAND m256 ymm ymm
Expand Down Expand Up @@ -177,7 +177,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_shuffle_epi8(dst, x, y Op) Op {
func F_mm256_shuffle_epi8(dst VecVirtual, x, y Op) VecVirtual {
CheckType(
`
// VPSHUFB m256 ymm ymm
Expand Down Expand Up @@ -220,7 +220,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_add_epi32(dst, a, b Op) Op {
func F_mm256_add_epi32(dst VecVirtual, a, b Op) VecVirtual {
CheckType(
`
// VPADDD m256 ymm ymm
Expand Down Expand Up @@ -267,7 +267,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_slli_epi32(dst, a, imm8 Op) Op {
func F_mm256_slli_epi32(dst VecVirtual, a, imm8 Op) VecVirtual {
CheckType(
`
// VPSLLD imm8 ymm ymm
Expand Down Expand Up @@ -326,7 +326,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_srli_epi32(dst, a, imm8 Op) Op {
func F_mm256_srli_epi32(dst VecVirtual, a, imm8 Op) VecVirtual {
CheckType(
`
// VPSRLD imm8 ymm ymm
Expand Down Expand Up @@ -394,7 +394,7 @@ Operation
dst[255:224] := SELECT4(a[255:128], imm8[7:6])
dst[MAX:256] := 0
*/
func F_mm256_shuffle_epi32(dst, a, imm8 Op) Op {
func F_mm256_shuffle_epi32(dst VecVirtual, a, imm8 Op) VecVirtual {
CheckType(
`
// VPSHUFD imm8 m256 ymm
Expand Down Expand Up @@ -447,7 +447,7 @@ Operation
dst[255:128] := SELECT4(a[255:0], b[255:0], imm8[7:4])
dst[MAX:256] := 0
*/
func F_mm256_permute2x128_si256(dst, a, b, imm8 Op) Op {
func F_mm256_permute2x128_si256(dst VecVirtual, a, b, imm8 Op) VecVirtual {
CheckType(
`
// VPERM2I128 imm8 m256 ymm ymm
Expand Down Expand Up @@ -481,7 +481,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_add_epi64(dst, x, y Op) Op {
func F_mm256_add_epi64(dst VecVirtual, x, y Op) VecVirtual {
CheckType(
`
// VPADDQ m256 ymm ymm
Expand Down Expand Up @@ -529,7 +529,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_slli_epi64(dst, x, r Op) Op {
func F_mm256_slli_epi64(dst VecVirtual, x, r Op) VecVirtual {
CheckType(
`
// VPSLLQ imm8 ymm ymm
Expand Down Expand Up @@ -589,7 +589,7 @@ Operation
ENDFOR
dst[MAX:256] := 0
*/
func F_mm256_srli_epi64(dst, x, r Op) Op {
func F_mm256_srli_epi64(dst VecVirtual, x, r Op) VecVirtual {
CheckType(
`
// VPSRLQ imm8 ymm ymm
Expand Down Expand Up @@ -654,7 +654,7 @@ Operation
dst[255:192] := SELECT4(a[255:0], imm8[7:6])
dst[MAX:256] := 0
*/
func F_mm256_permute4x64_epi64(dst, a, imm8 Op) Op {
func F_mm256_permute4x64_epi64(dst VecVirtual, a, imm8 Op) VecVirtual {
CheckType(
`
// VPERMQ imm8 m256 ymm
Expand Down
Loading

0 comments on commit ed0bcda

Please sign in to comment.