From 5a1db8a41fe5cf3d880da7d8cf8580cd73728bb3 Mon Sep 17 00:00:00 2001 From: Giacomo Fenzi Date: Mon, 26 Aug 2024 21:53:34 +0200 Subject: [PATCH] Add impl of FftField for Fp2 and Fp3 (#848) * Add impl of FftField for Fp2 and Fp3 * Move to extensions * Testing out * Expand range of benchmarks and better parallelization handling for small FFTs * Format --------- Co-authored-by: Pratyush Mishra --- ff/src/fields/models/cubic_extension.rs | 28 +++++++++++++- ff/src/fields/models/fp3.rs | 1 - ff/src/fields/models/quadratic_extension.rs | 20 +++++++++- poly/benches/fft.rs | 2 +- poly/src/domain/radix2/fft.rs | 42 ++++++++++++++------- 5 files changed, 75 insertions(+), 18 deletions(-) diff --git a/ff/src/fields/models/cubic_extension.rs b/ff/src/fields/models/cubic_extension.rs index 8908a5afa..1049908bf 100644 --- a/ff/src/fields/models/cubic_extension.rs +++ b/ff/src/fields/models/cubic_extension.rs @@ -1,6 +1,7 @@ use crate::{ fields::{Field, PrimeField}, - AdditiveGroup, LegendreSymbol, One, SqrtPrecomputation, ToConstraintField, UniformRand, Zero, + AdditiveGroup, FftField, LegendreSymbol, One, SqrtPrecomputation, ToConstraintField, + UniformRand, Zero, }; use ark_serialize::{ CanonicalDeserialize, CanonicalDeserializeWithFlags, CanonicalSerialize, @@ -770,3 +771,28 @@ mod cube_ext_tests { } } } + +impl FftField for CubicExtField

+where + P::BaseField: FftField, +{ + const GENERATOR: Self = Self::new( + P::BaseField::GENERATOR, + P::BaseField::ZERO, + P::BaseField::ZERO, + ); + const TWO_ADICITY: u32 = P::BaseField::TWO_ADICITY; + const TWO_ADIC_ROOT_OF_UNITY: Self = Self::new( + P::BaseField::TWO_ADIC_ROOT_OF_UNITY, + P::BaseField::ZERO, + P::BaseField::ZERO, + ); + const SMALL_SUBGROUP_BASE: Option = P::BaseField::SMALL_SUBGROUP_BASE; + const SMALL_SUBGROUP_BASE_ADICITY: Option = P::BaseField::SMALL_SUBGROUP_BASE_ADICITY; + const LARGE_SUBGROUP_ROOT_OF_UNITY: Option = + if let Some(x) = P::BaseField::LARGE_SUBGROUP_ROOT_OF_UNITY { + Some(Self::new(x, P::BaseField::ZERO, P::BaseField::ZERO)) + } else { + None + }; +} diff --git a/ff/src/fields/models/fp3.rs b/ff/src/fields/models/fp3.rs index 3d44f8fe3..96007f3fa 100644 --- a/ff/src/fields/models/fp3.rs +++ b/ff/src/fields/models/fp3.rs @@ -39,7 +39,6 @@ impl CubicExtConfig for Fp3ConfigWrapper

{ type FrobCoeff = P::Fp; const DEGREE_OVER_BASE_PRIME_FIELD: usize = 3; - const NONRESIDUE: Self::BaseField = P::NONRESIDUE; const SQRT_PRECOMP: Option>> = diff --git a/ff/src/fields/models/quadratic_extension.rs b/ff/src/fields/models/quadratic_extension.rs index d89feddae..4ebbb5a62 100644 --- a/ff/src/fields/models/quadratic_extension.rs +++ b/ff/src/fields/models/quadratic_extension.rs @@ -1,7 +1,7 @@ use crate::{ biginteger::BigInteger, fields::{Field, LegendreSymbol, PrimeField}, - AdditiveGroup, One, SqrtPrecomputation, ToConstraintField, UniformRand, Zero, + AdditiveGroup, FftField, One, SqrtPrecomputation, ToConstraintField, UniformRand, Zero, }; use ark_serialize::{ CanonicalDeserialize, CanonicalDeserializeWithFlags, CanonicalSerialize, @@ -825,3 +825,21 @@ mod quad_ext_tests { } } } + +impl FftField for QuadExtField

+where + P::BaseField: FftField, +{ + const GENERATOR: Self = Self::new(P::BaseField::GENERATOR, P::BaseField::ZERO); + const TWO_ADICITY: u32 = P::BaseField::TWO_ADICITY; + const TWO_ADIC_ROOT_OF_UNITY: Self = + Self::new(P::BaseField::TWO_ADIC_ROOT_OF_UNITY, P::BaseField::ZERO); + const SMALL_SUBGROUP_BASE: Option = P::BaseField::SMALL_SUBGROUP_BASE; + const SMALL_SUBGROUP_BASE_ADICITY: Option = P::BaseField::SMALL_SUBGROUP_BASE_ADICITY; + const LARGE_SUBGROUP_ROOT_OF_UNITY: Option = + if let Some(x) = P::BaseField::LARGE_SUBGROUP_ROOT_OF_UNITY { + Some(Self::new(x, P::BaseField::ZERO)) + } else { + None + }; +} diff --git a/poly/benches/fft.rs b/poly/benches/fft.rs index 0697f768e..47ccb0dbc 100644 --- a/poly/benches/fft.rs +++ b/poly/benches/fft.rs @@ -12,7 +12,7 @@ use criterion::{criterion_group, criterion_main, Bencher, BenchmarkId, Criterion // degree bounds to benchmark on // e.g. degree bound of 2^{15}, means we do an FFT for a degree (2^{15} - 1) polynomial -const BENCHMARK_MIN_DEGREE: usize = 1 << 15; +const BENCHMARK_MIN_DEGREE: usize = 1 << 4; const BENCHMARK_MAX_DEGREE_BLS12_381: usize = 1 << 22; const BENCHMARK_MAX_DEGREE_MNT6_753: usize = 1 << 17; const BENCHMARK_LOG_INTERVAL_DEGREE: usize = 1; diff --git a/poly/src/domain/radix2/fft.rs b/poly/src/domain/radix2/fft.rs index 15d5451a6..875a12414 100644 --- a/poly/src/domain/radix2/fft.rs +++ b/poly/src/domain/radix2/fft.rs @@ -224,23 +224,33 @@ impl Radix2EvaluationDomain { max_threads: usize, gap: usize, ) { - cfg_chunks_mut!(xi, chunk_size).for_each(|cxi| { - let (lo, hi) = cxi.split_at_mut(gap); - // If the chunk is sufficiently big that parallelism helps, - // we parallelize the butterfly operation within the chunk. - - if gap > MIN_GAP_SIZE_FOR_PARALLELISATION && num_chunks < max_threads { - cfg_iter_mut!(lo) - .zip(hi) - .zip(cfg_iter!(roots).step_by(step)) - .for_each(g); - } else { + if xi.len() <= MIN_INPUT_SIZE_FOR_PARALLELIZATION { + xi.chunks_mut(chunk_size).for_each(|cxi| { + let (lo, hi) = cxi.split_at_mut(gap); lo.iter_mut() .zip(hi) .zip(roots.iter().step_by(step)) .for_each(g); - } - }); + }); + } else { + cfg_chunks_mut!(xi, chunk_size).for_each(|cxi| { + let (lo, hi) = cxi.split_at_mut(gap); + // If the chunk is sufficiently big that parallelism helps, + // we parallelize the butterfly operation within the chunk. + + if gap > MIN_GAP_SIZE_FOR_PARALLELIZATION && num_chunks < max_threads { + cfg_iter_mut!(lo) + .zip(hi) + .zip(cfg_iter!(roots).step_by(step)) + .for_each(g); + } else { + lo.iter_mut() + .zip(hi) + .zip(roots.iter().step_by(step)) + .for_each(g); + } + }); + } } fn io_helper>(&self, xi: &mut [T], root: F) { @@ -349,7 +359,11 @@ const MIN_NUM_CHUNKS_FOR_COMPACTION: usize = 1 << 7; /// The minimum size of a chunk at which parallelization of `butterfly`s is /// beneficial. This value was chosen empirically. -const MIN_GAP_SIZE_FOR_PARALLELISATION: usize = 1 << 10; +const MIN_GAP_SIZE_FOR_PARALLELIZATION: usize = 1 << 10; + +/// The minimum size of a chunk at which parallelization of `butterfly`s is +/// beneficial. This value was chosen empirically. +const MIN_INPUT_SIZE_FOR_PARALLELIZATION: usize = 1 << 10; // minimum size at which to parallelize. #[cfg(feature = "parallel")]