Skip to content

Commit

Permalink
keccak: enable asm backend for p1600 (#68)
Browse files Browse the repository at this point in the history
  • Loading branch information
codahale authored Jan 9, 2024
1 parent a3a4e01 commit 2dc13bf
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 11 deletions.
14 changes: 13 additions & 1 deletion keccak/benches/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
extern crate keccak;
extern crate test;

use keccak::{f1600, f200, f400, f800};
use keccak::{f1600, f200, f400, f800, p1600};

macro_rules! impl_bench {
($name:ident, $fn:ident, $type:expr) => {
Expand All @@ -21,6 +21,18 @@ impl_bench!(b_f400, f400, 0u16);
impl_bench!(b_f800, f800, 0u32);
impl_bench!(b_f1600, f1600, 0u64);

#[bench]
fn b_p1600_24(b: &mut test::Bencher) {
let mut data = [0u64; 25];
b.iter(|| p1600(&mut data, 24));
}

#[bench]
fn b_p1600_16(b: &mut test::Bencher) {
let mut data = [0u64; 25];
b.iter(|| p1600(&mut data, 16));
}

#[cfg(feature = "simd")]
mod simd {
use keccak::simd::{f1600x2, f1600x4, f1600x8, u64x2, u64x4, u64x8};
Expand Down
13 changes: 6 additions & 7 deletions keccak/src/armv8.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
/// Keccak-f1600 on ARMv8.4-A with FEAT_SHA3.
/// Keccak-p1600 on ARMv8.4-A with FEAT_SHA3.
///
/// See p. K12.2.2 p. 11,749 of the ARM Reference manual.
/// Adapted from the Keccak-f1600 implementation in the XKCP/K12.
/// see <https://github.com/XKCP/K12/blob/df6a21e6d1f34c1aa36e8d702540899c97dba5a0/lib/ARMv8Asha3/KeccakP-1600-ARMv8Asha3.S#L69>
#[target_feature(enable = "sha3")]
pub unsafe fn f1600_armv8_sha3_asm(state: &mut [u64; 25]) {
pub unsafe fn p1600_armv8_sha3_asm(state: &mut [u64; 25], round_count: usize) {
core::arch::asm!("
// Read state
ld1.1d {{ v0- v3}}, [x0], #32
Expand All @@ -16,11 +16,9 @@ pub unsafe fn f1600_armv8_sha3_asm(state: &mut [u64; 25]) {
ld1.1d {{v24}}, [x0]
sub x0, x0, #192
// Loop 24 rounds
// NOTE: This loop actually computes two f1600 functions in
// parallel, in both the lower and the upper 64-bit of the
// 128-bit registers v0-v24.
mov x8, #24
0: sub x8, x8, #1
// Theta Calculations
Expand Down Expand Up @@ -115,7 +113,8 @@ pub unsafe fn f1600_armv8_sha3_asm(state: &mut [u64; 25]) {
st1.1d {{v24}}, [x0]
",
in("x0") state.as_mut_ptr(),
in("x1") crate::RC.as_ptr(),
in("x1") crate::RC[24-round_count..].as_ptr(),
in("x8") round_count,
clobber_abi("C"),
options(nostack)
);
Expand Down Expand Up @@ -185,9 +184,9 @@ mod tests {
];

let mut state = [0u64; 25];
unsafe { f1600_armv8_sha3_asm(&mut state) };
unsafe { p1600_armv8_sha3_asm(&mut state, 24) };
assert_eq!(state, state_first);
unsafe { f1600_armv8_sha3_asm(&mut state) };
unsafe { p1600_armv8_sha3_asm(&mut state, 24) };
assert_eq!(state, state_second);
}
}
9 changes: 6 additions & 3 deletions keccak/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,6 @@ impl_lanesize!(u64, 24, |rc: u64| { rc });

macro_rules! impl_keccak {
($pname:ident, $fname:ident, $type:ty) => {

/// Keccak-p sponge function
pub fn $pname(state: &mut [$type; PLEN], round_count: usize) {
keccak_p(state, round_count);
Expand All @@ -184,14 +183,18 @@ impl_keccak!(p1600, f1600, u64);
/// Keccak-p[1600, rc] permutation.
#[cfg(all(target_arch = "aarch64", feature = "asm"))]
pub fn p1600(state: &mut [u64; PLEN], round_count: usize) {
keccak_p(state, round_count);
if armv8_sha3_intrinsics::get() {
unsafe { armv8::p1600_armv8_sha3_asm(state, round_count) }
} else {
keccak_p(state, round_count);
}
}

/// Keccak-f[1600] permutation.
#[cfg(all(target_arch = "aarch64", feature = "asm"))]
pub fn f1600(state: &mut [u64; PLEN]) {
if armv8_sha3_intrinsics::get() {
unsafe { armv8::f1600_armv8_sha3_asm(state) }
unsafe { armv8::p1600_armv8_sha3_asm(state, 24) }
} else {
keccak_p(state, u64::KECCAK_F_ROUND_COUNT);
}
Expand Down

0 comments on commit 2dc13bf

Please sign in to comment.