From 2dc13bf92e6a20b7342aab54f5124495415d697d Mon Sep 17 00:00:00 2001 From: Coda Hale Date: Tue, 9 Jan 2024 12:41:20 -0700 Subject: [PATCH] keccak: enable asm backend for p1600 (#68) --- keccak/benches/mod.rs | 14 +++++++++++++- keccak/src/armv8.rs | 13 ++++++------- keccak/src/lib.rs | 9 ++++++--- 3 files changed, 25 insertions(+), 11 deletions(-) diff --git a/keccak/benches/mod.rs b/keccak/benches/mod.rs index c080857..960c40a 100644 --- a/keccak/benches/mod.rs +++ b/keccak/benches/mod.rs @@ -4,7 +4,7 @@ extern crate keccak; extern crate test; -use keccak::{f1600, f200, f400, f800}; +use keccak::{f1600, f200, f400, f800, p1600}; macro_rules! impl_bench { ($name:ident, $fn:ident, $type:expr) => { @@ -21,6 +21,18 @@ impl_bench!(b_f400, f400, 0u16); impl_bench!(b_f800, f800, 0u32); impl_bench!(b_f1600, f1600, 0u64); +#[bench] +fn b_p1600_24(b: &mut test::Bencher) { + let mut data = [0u64; 25]; + b.iter(|| p1600(&mut data, 24)); +} + +#[bench] +fn b_p1600_16(b: &mut test::Bencher) { + let mut data = [0u64; 25]; + b.iter(|| p1600(&mut data, 16)); +} + #[cfg(feature = "simd")] mod simd { use keccak::simd::{f1600x2, f1600x4, f1600x8, u64x2, u64x4, u64x8}; diff --git a/keccak/src/armv8.rs b/keccak/src/armv8.rs index 9661954..698c8a1 100644 --- a/keccak/src/armv8.rs +++ b/keccak/src/armv8.rs @@ -1,10 +1,10 @@ -/// Keccak-f1600 on ARMv8.4-A with FEAT_SHA3. +/// Keccak-p1600 on ARMv8.4-A with FEAT_SHA3. /// /// See p. K12.2.2 p. 11,749 of the ARM Reference manual. /// Adapted from the Keccak-f1600 implementation in the XKCP/K12. /// see #[target_feature(enable = "sha3")] -pub unsafe fn f1600_armv8_sha3_asm(state: &mut [u64; 25]) { +pub unsafe fn p1600_armv8_sha3_asm(state: &mut [u64; 25], round_count: usize) { core::arch::asm!(" // Read state ld1.1d {{ v0- v3}}, [x0], #32 @@ -16,11 +16,9 @@ pub unsafe fn f1600_armv8_sha3_asm(state: &mut [u64; 25]) { ld1.1d {{v24}}, [x0] sub x0, x0, #192 - // Loop 24 rounds // NOTE: This loop actually computes two f1600 functions in // parallel, in both the lower and the upper 64-bit of the // 128-bit registers v0-v24. - mov x8, #24 0: sub x8, x8, #1 // Theta Calculations @@ -115,7 +113,8 @@ pub unsafe fn f1600_armv8_sha3_asm(state: &mut [u64; 25]) { st1.1d {{v24}}, [x0] ", in("x0") state.as_mut_ptr(), - in("x1") crate::RC.as_ptr(), + in("x1") crate::RC[24-round_count..].as_ptr(), + in("x8") round_count, clobber_abi("C"), options(nostack) ); @@ -185,9 +184,9 @@ mod tests { ]; let mut state = [0u64; 25]; - unsafe { f1600_armv8_sha3_asm(&mut state) }; + unsafe { p1600_armv8_sha3_asm(&mut state, 24) }; assert_eq!(state, state_first); - unsafe { f1600_armv8_sha3_asm(&mut state) }; + unsafe { p1600_armv8_sha3_asm(&mut state, 24) }; assert_eq!(state, state_second); } } diff --git a/keccak/src/lib.rs b/keccak/src/lib.rs index 0e35a3b..dc18ea3 100644 --- a/keccak/src/lib.rs +++ b/keccak/src/lib.rs @@ -161,7 +161,6 @@ impl_lanesize!(u64, 24, |rc: u64| { rc }); macro_rules! impl_keccak { ($pname:ident, $fname:ident, $type:ty) => { - /// Keccak-p sponge function pub fn $pname(state: &mut [$type; PLEN], round_count: usize) { keccak_p(state, round_count); @@ -184,14 +183,18 @@ impl_keccak!(p1600, f1600, u64); /// Keccak-p[1600, rc] permutation. #[cfg(all(target_arch = "aarch64", feature = "asm"))] pub fn p1600(state: &mut [u64; PLEN], round_count: usize) { - keccak_p(state, round_count); + if armv8_sha3_intrinsics::get() { + unsafe { armv8::p1600_armv8_sha3_asm(state, round_count) } + } else { + keccak_p(state, round_count); + } } /// Keccak-f[1600] permutation. #[cfg(all(target_arch = "aarch64", feature = "asm"))] pub fn f1600(state: &mut [u64; PLEN]) { if armv8_sha3_intrinsics::get() { - unsafe { armv8::f1600_armv8_sha3_asm(state) } + unsafe { armv8::p1600_armv8_sha3_asm(state, 24) } } else { keccak_p(state, u64::KECCAK_F_ROUND_COUNT); }