Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Zvk vector crypto support (v5) #1303

Merged
merged 10 commits into from
Jun 20, 2023
21 changes: 21 additions & 0 deletions riscv/arith.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include <cstdint>
#include <climits>
#include <cstddef>
#include <type_traits>

inline uint64_t mulhu(uint64_t a, uint64_t b)
{
Expand Down Expand Up @@ -221,4 +222,24 @@ static inline uint64_t xperm(uint64_t rs1, uint64_t rs2, size_t sz_log2, size_t
return r;
}

// Rotates right an unsigned integer by the given number of bits.
template <typename T>
static inline T rotate_right(T x, std::size_t shiftamt) {
static_assert(std::is_unsigned<T>::value);
static constexpr T mask = (8 * sizeof(T)) - 1;
const std::size_t rshift = shiftamt & mask;
const std::size_t lshift = (-rshift) & mask;
return (x << lshift) | (x >> rshift);
}

// Rotates right an unsigned integer by the given number of bits.
template <typename T>
static inline T rotate_left(T x, std::size_t shiftamt) {
static_assert(std::is_unsigned<T>::value);
static constexpr T mask = (8 * sizeof(T)) - 1;
const std::size_t lshift = shiftamt & mask;
const std::size_t rshift = (-lshift) & mask;
return (x << lshift) | (x >> rshift);
}

#endif
1 change: 1 addition & 0 deletions riscv/decode.h
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ class insn_t
uint64_t v_vta() { return x(26, 1); }
uint64_t v_vma() { return x(27, 1); }
uint64_t v_mew() { return x(28, 1); }
uint64_t v_zimm6() { return x(15, 5) + (x(26, 1) << 5); }

uint64_t p_imm2() { return x(20, 2); }
uint64_t p_imm3() { return x(20, 3); }
Expand Down
1 change: 0 additions & 1 deletion riscv/insns/sm4_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,3 @@ static const uint8_t sm4_sbox[256] = {
0x18, 0xF0, 0x7D, 0xEC, 0x3A, 0xDC, 0x4D, 0x20, 0x79, 0xEE, 0x5F, 0x3E,
0xD7, 0xCB, 0x39, 0x48
};

43 changes: 43 additions & 0 deletions riscv/insns/vaesdf_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// vaesdf.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd does receive the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, scalar_key);
// InvMixColumns is not performed in the final round.

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
37 changes: 37 additions & 0 deletions riscv/insns/vaesdf_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// vaesdf.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd in contains the input state,
// - vs2 contains the input round key,
// - vd out receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, round_key);
// InvMixColumns is not performed in the final round.

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
44 changes: 44 additions & 0 deletions riscv/insns/vaesdm_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// vaesdm.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd in contains the input state,
// - vs2 contains the input round key,
// - vd out receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, scalar_key);
// InvMixColumns
VAES_INV_MIX_COLUMNS(aes_state);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
38 changes: 38 additions & 0 deletions riscv/insns/vaesdm_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// vaesdm.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd does receive the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// InvShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_INV_SHIFT_ROWS(aes_state);
// InvSubBytes - Apply S-box to every byte in the state
VAES_INV_SUB_BYTES(aes_state);
// AddRoundKey (which is also InvAddRoundKey as it's xor)
EGU8x16_XOREQ(aes_state, round_key);
// InvMixColumns
VAES_INV_MIX_COLUMNS(aes_state);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
43 changes: 43 additions & 0 deletions riscv/insns/vaesef_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
// vaesef.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns is not performed for the final round.
// AddRoundKey
EGU8x16_XOREQ(aes_state, scalar_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
37 changes: 37 additions & 0 deletions riscv/insns/vaesef_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
// vaesef.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns is not performed for the final round.
// AddRoundKey
EGU8x16_XOREQ(aes_state, round_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
44 changes: 44 additions & 0 deletions riscv/insns/vaesem_vs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
// vaesem.vs vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vs_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
// This statement will be executed before the first execution
// of the loop, and only if the loop is going to be entered.
// We cannot use a block ( { ... } ) since we want the variables declared
// here to be visible in the loop block.
// We capture the "scalar", vs2's first element, by copy, even though
// the "no overlap" constraint means that vs2 should remain constant
// during the loop.
const EGU8x16_t scalar_key = P.VU.elt_group<EGU8x16_t>(vs2_num, 0);,
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns
VAES_MIX_COLUMNS(aes_state);
// AddRoundKey
EGU8x16_XOREQ(aes_state, scalar_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
38 changes: 38 additions & 0 deletions riscv/insns/vaesem_vv.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// vaesem.vv vd, vs2

#include "zvkned_ext_macros.h"
#include "zvk_ext_macros.h"

require_vaes_vv_constraints;

VI_ZVK_VD_VS2_NOOPERANDS_PRELOOP_EGU32x4_NOVM_LOOP(
{},
{}, // No PRELOOP.
{
// For AES128, AES192, or AES256, state and key are 128b/16B values:
// - vd contains the input state,
// - vs2 contains the round key,
// - vd receives the output state.
//
// While the spec calls for handling the vector as made of EGU32x4
// element groups (i.e., 4 uint32_t), it is convenient to treat
// AES state and key as EGU8x16 (i.e., 16 uint8_t). This is why
// we extract the operands here instead of using the existing LOOP
// macro that defines/extracts the operand variables as EGU32x4.
EGU8x16_t aes_state = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg);
const EGU8x16_t round_key = P.VU.elt_group<EGU8x16_t>(vs2_num, idx_eg);

// SubBytes - Apply S-box to every byte in the state
VAES_SUB_BYTES(aes_state);
// ShiftRows - Rotate each row bytes by 0, 1, 2, 3 positions.
VAES_SHIFT_ROWS(aes_state);
// MixColumns
VAES_MIX_COLUMNS(aes_state);
// AddRoundKey
EGU8x16_XOREQ(aes_state, round_key);

// Update the destination register.
EGU8x16_t &vd = P.VU.elt_group<EGU8x16_t>(vd_num, idx_eg, true);
EGU8x16_COPY(vd, aes_state);
}
);
Loading
Loading