Skip to content

Commit

Permalink
Merge pull request #1303 from rivosinc/zvk-vector-crypto
Browse files Browse the repository at this point in the history
Zvk vector crypto support (v5)
  • Loading branch information
aswaterman committed Jun 20, 2023
2 parents 8b10de6 + a55f96a commit 5731a47
Show file tree
Hide file tree
Showing 56 changed files with 3,171 additions and 19 deletions.
21 changes: 21 additions & 0 deletions riscv/arith.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdint>
#include <climits>
#include <cstddef>
#include <type_traits>

inline uint64_t mulhu(uint64_t a, uint64_t b)
{
Expand Down Expand Up @@ -221,4 +222,24 @@ static inline uint64_t xperm(uint64_t rs1, uint64_t rs2, size_t sz_log2, size_t
return r;
}

// Rotates right an unsigned integer by the given number of bits.
template <typename T>
static inline T rotate_right(T x, std::size_t shiftamt) {
static_assert(std::is_unsigned<T>::value);
static constexpr T mask = (8 * sizeof(T)) - 1;
const std::size_t rshift = shiftamt & mask;
const std::size_t lshift = (-rshift) & mask;
return (x << lshift) | (x >> rshift);
}

// Rotates right an unsigned integer by the given number of bits.
template <typename T>
static inline T rotate_left(T x, std::size_t shiftamt) {
static_assert(std::is_unsigned<T>::value);
static constexpr T mask = (8 * sizeof(T)) - 1;
const std::size_t lshift = shiftamt & mask;
const std::size_t rshift = (-lshift) & mask;
return (x << lshift) | (x >> rshift);
}

#endif
1 change: 1 addition & 0 deletions riscv/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ class insn_t
uint64_t v_vta() { return x(26, 1); }
uint64_t v_vma() { return x(27, 1); }
uint64_t v_mew() { return x(28, 1); }
uint64_t v_zimm6() { return x(15, 5) + (x(26, 1) << 5); }

uint64_t p_imm2() { return x(20, 2); }
uint64_t p_imm3() { return x(20, 3); }
Expand Down
1 change: 0 additions & 1 deletion riscv/insns/sm4_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ static const uint8_t sm4_sbox[256] = {
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
0xD7, 0xCB, 0x39, 0x48
};

43 changes: 43 additions & 0 deletions riscv/insns/vaesdf_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// vaesdf.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd does receive the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, scalar_key);
// InvMixColumns is not performed in the final round.

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
37 changes: 37 additions & 0 deletions riscv/insns/vaesdf_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// vaesdf.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd in contains the input state,
// - vs2 contains the input round key,
// - vd out receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, round_key);
// InvMixColumns is not performed in the final round.

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
44 changes: 44 additions & 0 deletions riscv/insns/vaesdm_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// vaesdm.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd in contains the input state,
// - vs2 contains the input round key,
// - vd out receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, scalar_key);
// InvMixColumns
VAES_INV_MIX_COLUMNS(aes_state);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
38 changes: 38 additions & 0 deletions riscv/insns/vaesdm_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// vaesdm.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd does receive the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, round_key);
// InvMixColumns
VAES_INV_MIX_COLUMNS(aes_state);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
43 changes: 43 additions & 0 deletions riscv/insns/vaesef_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// vaesef.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns is not performed for the final round.
// AddRoundKey
EGU8x16_XOREQ(aes_state, scalar_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
37 changes: 37 additions & 0 deletions riscv/insns/vaesef_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// vaesef.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns is not performed for the final round.
// AddRoundKey
EGU8x16_XOREQ(aes_state, round_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
44 changes: 44 additions & 0 deletions riscv/insns/vaesem_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// vaesem.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns
VAES_MIX_COLUMNS(aes_state);
// AddRoundKey
EGU8x16_XOREQ(aes_state, scalar_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
38 changes: 38 additions & 0 deletions riscv/insns/vaesem_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// vaesem.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns
VAES_MIX_COLUMNS(aes_state);
// AddRoundKey
EGU8x16_XOREQ(aes_state, round_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
Loading

0 comments on commit 5731a47

Please sign in to comment.