Skip to content

Commit

Permalink
aria xor simd
Browse files Browse the repository at this point in the history
  • Loading branch information
RyuaNerin committed Aug 18, 2023
1 parent 6582196 commit 11e3ee1
Show file tree
Hide file tree
Showing 15 changed files with 5,687 additions and 46 deletions.
13 changes: 13 additions & 0 deletions .vscode/launch.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,19 @@
"mode": "debug",
"program": "${file}"
},
{
"name": "Build ARIA (AMD64)",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "${workspaceFolder}/aria/avo/x86",
"env": {},
"args": [
"-out", "../../aria_amd64.s",
"-stubs", "../../aria_amd64_stubs.go",
"-pkg", "aria",
]
},
{
"name": "Build LEA (AMD64)",
"type": "go",
Expand Down
242 changes: 242 additions & 0 deletions aria/aria.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,242 @@
// Package aria implements ARIA encryption, as defined in KS X 1213-1
package aria

import (
"crypto/cipher"
"fmt"
)

const (
// The HIGHT block size in bytes.
BlockSize = 16
)

type KeySizeError int

func (k KeySizeError) Error() string {
return fmt.Sprintf("krypto/aria: invalid key size %d", int(k))
}

type aria struct {
rounds int
ek [rkSize]byte
dk [rkSize]byte
}

var (
processFin func(dst []byte, rk []byte, t []byte) = processFinGo
)

// NewCipher creates and returns a new cipher.Block. The key argument should be the ARIA key, either 16, 24, or 32 bytes to select ARIA-128, ARIA-192, or ARIA-256.
func NewCipher(key []byte) (cipher.Block, error) {
l := len(key)
switch l {
case 16:
case 24:
case 32:
default:
return nil, KeySizeError(l)
}

block := new(aria)
block.rounds = (l*8 + 256) / 32

////////////////////////////////////////

var t, w1, w2, w3 [16]byte

q := (l*8 - 128) / 64
for i := 0; i < 16; i++ {
t[i] = s[i%4][krk[q][i]^key[i]]
}

dl(t[:], w1[:])
if block.rounds == 14 {
for i := 0; i < 8; i++ {
w1[i] ^= key[16+i]
}
} else if block.rounds == 16 {
for i := 0; i < 16; i++ {
w1[i] ^= key[16+i]
}
}

if q == 2 {
q = 0
} else {
q = q + 1
}

for i := 0; i < 16; i++ {
t[i] = s[(2+i)%4][krk[q][i]^w1[i]]
}
dl(t[:], w2[:])
for i := 0; i < 16; i++ {
w2[i] ^= key[i]
}

if q == 2 {
q = 0
} else {
q = (q + 1)
}
for i := 0; i < 16; i++ {
t[i] = s[i%4][krk[q][i]^w2[i]]
}
dl(t[:], w3[:])
for i := 0; i < 16; i++ {
w3[i] ^= w1[i]
}

for i := 0; i < 16*(block.rounds+1); i++ {
block.ek[i] = 0
}

rotXOR(key, 0, block.ek[:], 0)
rotXOR(w1[:], 19, block.ek[:], 0)
rotXOR(w1[:], 0, block.ek[:], 16)
rotXOR(w2[:], 19, block.ek[:], 16)
rotXOR(w2[:], 0, block.ek[:], 32)
rotXOR(w3[:], 19, block.ek[:], 32)
rotXOR(w3[:], 0, block.ek[:], 48)
rotXOR(key, 19, block.ek[:], 48)
rotXOR(key, 0, block.ek[:], 64)
rotXOR(w1[:], 31, block.ek[:], 64)
rotXOR(w1[:], 0, block.ek[:], 80)
rotXOR(w2[:], 31, block.ek[:], 80)
rotXOR(w2[:], 0, block.ek[:], 96)
rotXOR(w3[:], 31, block.ek[:], 96)
rotXOR(w3[:], 0, block.ek[:], 112)
rotXOR(key, 31, block.ek[:], 112)
rotXOR(key, 0, block.ek[:], 128)
rotXOR(w1[:], 67, block.ek[:], 128)
rotXOR(w1[:], 0, block.ek[:], 144)
rotXOR(w2[:], 67, block.ek[:], 144)
rotXOR(w2[:], 0, block.ek[:], 160)
rotXOR(w3[:], 67, block.ek[:], 160)
rotXOR(w3[:], 0, block.ek[:], 176)
rotXOR(key, 67, block.ek[:], 176)
rotXOR(key, 0, block.ek[:], 192)
rotXOR(w1[:], 97, block.ek[:], 192)
if block.rounds > 12 {
rotXOR(w1[:], 0, block.ek[:], 208)
rotXOR(w2[:], 97, block.ek[:], 208)
rotXOR(w2[:], 0, block.ek[:], 224)
rotXOR(w3[:], 97, block.ek[:], 224)
}
if block.rounds > 14 {
rotXOR(w3[:], 0, block.ek[:], 240)
rotXOR(key, 97, block.ek[:], 240)
rotXOR(key, 0, block.ek[:], 256)
rotXOR(w1[:], 109, block.ek[:], 256)
}

////////////////////////////////////////

copy(block.dk[:], block.ek[:])

for j := 0; j < 16; j++ {
t[j] = block.dk[j]
block.dk[j] = block.dk[16*block.rounds+j]
block.dk[16*block.rounds+j] = t[j]
}
for i := 1; i <= block.rounds/2; i++ {
dl(block.dk[i*16:], t[:])
dl(block.dk[(block.rounds-i)*16:], block.dk[i*16:])
for j := 0; j < 16; j++ {
block.dk[(block.rounds-i)*16+j] = t[j]
}
}

return block, nil
}

func (s *aria) BlockSize() int {
return BlockSize
}

func (s *aria) Encrypt(dst, src []byte) {
if len(src) < BlockSize {
panic(fmt.Sprintf("krypto/aria: invalid block size %d (src)", len(src)))
}
if len(dst) < BlockSize {
panic(fmt.Sprintf("krypto/aria: invalid block size %d (dst)", len(dst)))
}
copy(dst, src[:BlockSize])
process(dst, s.rounds, s.ek[:])
}

func (s *aria) Decrypt(dst, src []byte) {
if len(src) < BlockSize {
panic(fmt.Sprintf("krypto/aria: invalid block size %d (src)", len(src)))
}
if len(dst) < BlockSize {
panic(fmt.Sprintf("krypto/aria: invalid block size %d (dst)", len(dst)))
}
copy(dst, src[:BlockSize])
process(dst, s.rounds, s.dk[:])
}

func process(dst []byte, rounds int, rk []byte) {
var t [16]byte

ei := 0
for i := 0; i < rounds/2; i++ {
for j := 0; j < 16; j++ {
t[j] = s[j%4][rk[ei+j]^dst[j]]
}
dl(t[:], dst)
ei += 16
for j := 0; j < 16; j++ {
t[j] = s[(2+j)%4][rk[ei+j]^dst[j]]
}
dl(t[:], dst)
ei += 16
}
dl(dst, t[:])

processFin(dst, rk[ei:], t[:])
}

func processFinGo(dst []byte, rk []byte, t []byte) {
for j := 0; j < 16; j++ {
dst[j] = rk[j] ^ t[j]
}
}

func dl(i, o []byte) {
var T byte

T = i[3] ^ i[4] ^ i[9] ^ i[14]
o[0] = i[6] ^ i[8] ^ i[13] ^ T
o[5] = i[1] ^ i[10] ^ i[15] ^ T
o[11] = i[2] ^ i[7] ^ i[12] ^ T
o[14] = i[0] ^ i[5] ^ i[11] ^ T
T = i[2] ^ i[5] ^ i[8] ^ i[15]
o[1] = i[7] ^ i[9] ^ i[12] ^ T
o[4] = i[0] ^ i[11] ^ i[14] ^ T
o[10] = i[3] ^ i[6] ^ i[13] ^ T
o[15] = i[1] ^ i[4] ^ i[10] ^ T
T = i[1] ^ i[6] ^ i[11] ^ i[12]
o[2] = i[4] ^ i[10] ^ i[15] ^ T
o[7] = i[3] ^ i[8] ^ i[13] ^ T
o[9] = i[0] ^ i[5] ^ i[14] ^ T
o[12] = i[2] ^ i[7] ^ i[9] ^ T
T = i[0] ^ i[7] ^ i[10] ^ i[13]
o[3] = i[5] ^ i[11] ^ i[14] ^ T
o[6] = i[2] ^ i[9] ^ i[12] ^ T
o[8] = i[1] ^ i[4] ^ i[15] ^ T
o[13] = i[3] ^ i[6] ^ i[8] ^ T
}

// Right-rotate 128 bit source string s by n bits and XOR it to target string t
func rotXOR(s []byte, n int, t []byte, ti int) {
q := n / 8
n %= 8
for i := 0; i < 16; i++ {
t[ti+(q+i)%16] ^= (s[i] >> n)
if n != 0 {
t[ti+(q+i+1)%16] ^= (s[i] << (8 - n))
}
}
}
7 changes: 7 additions & 0 deletions aria/aria_amd64.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
//go:build amd64 && gc && !purego

package aria

func init() {
processFin = processFinSSE2
}
17 changes: 17 additions & 0 deletions aria/aria_amd64.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
// Code generated by command: go run main.go -out ../../aria_amd64.s -stubs ../../aria_amd64_stubs.go -pkg aria. DO NOT EDIT.

//go:build amd64 && gc && !purego

#include "textflag.h"

// func processFinSSE2(dst []byte, rk []byte, t []byte)
// Requires: SSE2
TEXT ·processFinSSE2(SB), NOSPLIT, $0-72
MOVQ dst_base+0(FP), AX
MOVQ rk_base+24(FP), CX
MOVQ t_base+48(FP), DX
MOVOU (CX), X0
MOVOU (DX), X1
PXOR X1, X0
MOVOU X0, (AX)
RET
7 changes: 7 additions & 0 deletions aria/aria_amd64_stubs.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 0 additions & 40 deletions aria/avo/context.go

This file was deleted.

30 changes: 29 additions & 1 deletion aria/avo/x86/main.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,41 @@
package main

import (
. "kryptosimd/avoutil/simd"

. "github.com/mmcloughlin/avo/build"
. "github.com/mmcloughlin/avo/operand"
)

func main() {
Package("kryptosimd/aria/x86/avo")
Package("kryptosimd/aria/avo/x86")
ConstraintExpr("amd64,gc,!purego")

processFinSSE2()

Generate()
print("done")
}

func processFinSSE2() {
TEXT("processFinSSE2", NOSPLIT, "func(dst []byte, rk []byte, t []byte)")

dst := Mem{Base: Load(Param("dst").Base(), GP64())}
rk := Mem{Base: Load(Param("rk").Base(), GP64())}
t := Mem{Base: Load(Param("t").Base(), GP64())}

/**
for j := 0; j < 16; j++ {
dst[j] = rk[j] ^ t[j]
}
*/
F_mm_storeu_si128(
dst,
F_mm_xor_si128(
A_mm_loadu_si128(rk),
A_mm_loadu_si128(t),
),
)

RET()
}
Loading

0 comments on commit 11e3ee1

Please sign in to comment.