diff --git a/src/lib.rs b/src/lib.rs index 0394f58..53b7393 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -82,6 +82,8 @@ use u256_impl::u256; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use pulp::cast; use pulp::u64x4; +#[allow(unused_imports)] +use pulp::u64x8; #[doc(hidden)] pub mod prime; diff --git a/src/native32.rs b/src/native32.rs index 715e717..85650d3 100644 --- a/src/native32.rs +++ b/src/native32.rs @@ -1,6 +1,8 @@ use aligned_vec::avec; use pulp::u32x8; +#[allow(unused_imports)] +use pulp::{u32x16, u64x8}; /// Negacyclic NTT plan for multiplying two 32bit polynomials. #[derive(Clone, Debug)] diff --git a/src/native64.rs b/src/native64.rs index 034a3e8..a45b36c 100644 --- a/src/native64.rs +++ b/src/native64.rs @@ -1,5 +1,7 @@ use aligned_vec::avec; +#[allow(unused_imports)] +use pulp::{b8, u32x16, u64x8}; use pulp::{i32x4, m64x4, u32x4, u32x8, u64x4}; pub(crate) use crate::native32::mul_mod32; @@ -438,8 +440,8 @@ fn reconstruct_32bit_01234_avx2( let sign = simd.cmp_gt_u32x8(v4, half_p4); let sign: [i32x4; 2] = pulp::cast(sign); // sign extend so that -1i32 becomes -1i64 - let sign0: m64x4 = pulp::cast(simd.convert_i32x4_to_i64x4(sign[0])); - let sign1: m64x4 = pulp::cast(simd.convert_i32x4_to_i64x4(sign[1])); + let sign0: m64x4 = unsafe { core::mem::transmute(simd.convert_i32x4_to_i64x4(sign[0])) }; + let sign1: m64x4 = unsafe { core::mem::transmute(simd.convert_i32x4_to_i64x4(sign[1])) }; let v0: [u32x4; 2] = pulp::cast(v0); let v1: [u32x4; 2] = pulp::cast(v1); diff --git a/src/native_binary32.rs b/src/native_binary32.rs index 18fa72c..8047cb7 100644 --- a/src/native_binary32.rs +++ b/src/native_binary32.rs @@ -1,6 +1,8 @@ use aligned_vec::avec; use pulp::u32x8; +#[allow(unused_imports)] +use pulp::{u32x16, u64x8}; use crate::native32::mul_mod32; diff --git a/src/native_binary64.rs b/src/native_binary64.rs index eaa09d0..f8427dc 100644 --- a/src/native_binary64.rs +++ b/src/native_binary64.rs @@ -1,5 +1,7 @@ use aligned_vec::avec; +#[allow(unused_imports)] +use pulp::{b8, u32x16, u64x8}; use pulp::{i32x4, m64x4, u32x4, u32x8, u64x4}; pub(crate) use crate::native32::mul_mod32; @@ -109,8 +111,8 @@ fn reconstruct_32bit_012_avx2( let sign = simd.cmp_gt_u32x8(v2, half_p2); let sign: [i32x4; 2] = pulp::cast(sign); // sign extend so that -1i32 becomes -1i64 - let sign0: m64x4 = pulp::cast(simd.convert_i32x4_to_i64x4(sign[0])); - let sign1: m64x4 = pulp::cast(simd.convert_i32x4_to_i64x4(sign[1])); + let sign0: m64x4 = unsafe { core::mem::transmute(simd.convert_i32x4_to_i64x4(sign[0])) }; + let sign1: m64x4 = unsafe { core::mem::transmute(simd.convert_i32x4_to_i64x4(sign[1])) }; let v0: [u32x4; 2] = pulp::cast(v0); let v1: [u32x4; 2] = pulp::cast(v1); diff --git a/src/prime32.rs b/src/prime32.rs index 9cd0ccc..a4a2486 100644 --- a/src/prime32.rs +++ b/src/prime32.rs @@ -8,6 +8,8 @@ use aligned_vec::{avec, ABox}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use pulp::cast; +#[allow(unused_imports)] +use pulp::u32x16; use pulp::{u32x4, u32x8}; const RECURSION_THRESHOLD: usize = 2048; diff --git a/src/prime32/generic.rs b/src/prime32/generic.rs index b28322c..a45b1b7 100644 --- a/src/prime32/generic.rs +++ b/src/prime32/generic.rs @@ -5,6 +5,8 @@ use core::iter::zip; use pulp::u32x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use pulp::{as_arrays, as_arrays_mut, cast}; +#[allow(unused_imports)] +use pulp::{b16, u32x16}; #[inline(always)] pub(crate) fn add(p: u32, a: u32, b: u32) -> u32 { diff --git a/src/prime32/less_than_30bit.rs b/src/prime32/less_than_30bit.rs index 670633b..8ef083d 100644 --- a/src/prime32/less_than_30bit.rs +++ b/src/prime32/less_than_30bit.rs @@ -1,3 +1,5 @@ +#[allow(unused_imports)] +use pulp::u32x16; use pulp::u32x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/src/prime32/less_than_31bit.rs b/src/prime32/less_than_31bit.rs index ef68039..a1eb796 100644 --- a/src/prime32/less_than_31bit.rs +++ b/src/prime32/less_than_31bit.rs @@ -1,3 +1,5 @@ +#[allow(unused_imports)] +use pulp::u32x16; use pulp::u32x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] diff --git a/src/prime32/shoup.rs b/src/prime32/shoup.rs index 7bbb99b..f3265bb 100644 --- a/src/prime32/shoup.rs +++ b/src/prime32/shoup.rs @@ -3,6 +3,8 @@ use crate::Butterfly; use core::iter::zip; use pulp::cast; +#[allow(unused_imports)] +use pulp::u32x16; use pulp::u32x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use pulp::{as_arrays, as_arrays_mut}; diff --git a/src/prime64.rs b/src/prime64.rs index d1447c9..3d03f44 100644 --- a/src/prime64.rs +++ b/src/prime64.rs @@ -4,6 +4,8 @@ use aligned_vec::{avec, ABox}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use pulp::cast; use pulp::u64x4; +#[allow(unused_imports)] +use pulp::u64x8; const RECURSION_THRESHOLD: usize = 1024; diff --git a/src/prime64/generic_solinas.rs b/src/prime64/generic_solinas.rs index 1f9ab7e..f696492 100644 --- a/src/prime64/generic_solinas.rs +++ b/src/prime64/generic_solinas.rs @@ -5,6 +5,8 @@ use core::{fmt::Debug, iter::zip}; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] use pulp::cast; use pulp::u64x4; +#[allow(unused_imports)] +use pulp::{b8, u64x8}; pub(crate) trait PrimeModulus: Debug + Copy { type Div: Debug + Copy; diff --git a/src/prime64/less_than_50bit.rs b/src/prime64/less_than_50bit.rs index 39edbe9..b6c7c75 100644 --- a/src/prime64/less_than_50bit.rs +++ b/src/prime64/less_than_50bit.rs @@ -1,3 +1,5 @@ +use pulp::u64x8; + #[inline(always)] pub(crate) fn fwd_butterfly_avx512( simd: crate::V4IFma, diff --git a/src/prime64/less_than_51bit.rs b/src/prime64/less_than_51bit.rs index 26c22ab..607ad8b 100644 --- a/src/prime64/less_than_51bit.rs +++ b/src/prime64/less_than_51bit.rs @@ -1,3 +1,5 @@ +use pulp::u64x8; + #[inline(always)] pub(crate) fn fwd_butterfly_avx512( simd: crate::V4IFma, diff --git a/src/prime64/less_than_62bit.rs b/src/prime64/less_than_62bit.rs index d6837c8..a6258a8 100644 --- a/src/prime64/less_than_62bit.rs +++ b/src/prime64/less_than_62bit.rs @@ -1,4 +1,6 @@ use pulp::u64x4; +#[allow(unused_imports)] +use pulp::u64x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(feature = "nightly")] diff --git a/src/prime64/less_than_63bit.rs b/src/prime64/less_than_63bit.rs index aebeda9..c52b43f 100644 --- a/src/prime64/less_than_63bit.rs +++ b/src/prime64/less_than_63bit.rs @@ -1,4 +1,6 @@ use pulp::u64x4; +#[allow(unused_imports)] +use pulp::u64x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(feature = "nightly")] diff --git a/src/prime64/shoup.rs b/src/prime64/shoup.rs index e106a9c..6f06191 100644 --- a/src/prime64/shoup.rs +++ b/src/prime64/shoup.rs @@ -5,6 +5,8 @@ use core::iter::zip; use pulp::cast; use pulp::u64x4; +#[allow(unused_imports)] +use pulp::u64x8; #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] #[cfg(feature = "nightly")]