Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: fflonk gpu implementation #26

Merged
merged 36 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
d473769
fflonk gpu implementation prototype
saitima Aug 30, 2024
69be711
feat(fflonk): resolve conflicts
saitima Sep 10, 2024
28527b8
feat(fflonk): initial naive impl of full prover
saitima Sep 14, 2024
e37fd71
fix(boojum-cuda): "un-swap" coarse and fine count for powers_data_g_i…
mcarilli Sep 10, 2024
5c23661
chore(main): release 0.150.8 (#25)
zksync-era-bot Sep 11, 2024
ef38d39
feat(proof-compression): convenient functions
saitima Sep 25, 2024
89d9757
fix(proof-compression): deps
saitima Sep 25, 2024
c95a5f6
fix(proof-compression): deps
saitima Sep 25, 2024
c3fae93
merge latest changes
robik75 Sep 26, 2024
c7e78a7
chore(proof-compression) merge upstream fixes
saitima Sep 26, 2024
d5d4bf4
fix(fflonk): valid proof on A100
saitima Sep 26, 2024
dc55459
feature(fflonk): mem pool experiment
saitima Sep 29, 2024
be8c3e3
feat(fflonk): static allocator
saitima Sep 30, 2024
298b4ff
feat(fflonk): device based precomputation
saitima Oct 3, 2024
59a0401
chore(fflonk) better context handling
saitima Oct 10, 2024
5f9939c
make convenience members public
itegulov Oct 7, 2024
cac3095
make CompressionSchedule name public
itegulov Oct 9, 2024
4c9ea39
debug(fflonk): 3 vs 4 col permutations
saitima Oct 11, 2024
514e7d3
simple reproducible test
saitima Oct 16, 2024
041abd6
feat(fflont): device based trace and permutations construction
saitima Oct 22, 2024
7deafa9
chore(fflonk): fmt
saitima Oct 24, 2024
a4d765f
fix: resolve conflicts
saitima Oct 24, 2024
5ca9ec2
feat!: fflonk
robik75 Aug 16, 2024
e5fc7f1
chore(proof-compression): restructure deps
saitima Oct 30, 2024
f4bde55
chore(proof-compression): resolve conflicts
saitima Oct 30, 2024
958eb0d
chore(proof-compression): restructure
saitima Oct 30, 2024
d6f668f
chore(main): update metadata of fflonk and proof-compression
saitima Oct 31, 2024
9ff85b2
chore: resolve conflicts
saitima Oct 31, 2024
ac91509
chore(shivini): test_data update (#41)
robik75 Oct 30, 2024
be8357b
chore: resolve conflicts
saitima Oct 31, 2024
1e5db6b
chore: resolve conflicts
saitima Oct 31, 2024
414156e
chore: resolve conflicts
saitima Oct 31, 2024
cec30fd
chore: resolve conflicts
saitima Oct 31, 2024
8e81ab7
chore(fflonk): bump dep versions
saitima Oct 31, 2024
f1b1bd5
chore(proof-compression): resolve conflicts
saitima Oct 31, 2024
d361864
fix: resolve conflicts
saitima Oct 31, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
[workspace]
members = [
"crates/*"
]
members = ["crates/*"]
exclude = ["crates/proof-compression"]
resolver = "2"

[workspace.package]
Expand All @@ -27,13 +26,17 @@ gpu-ffi-bindings-generator = { version = "=0.152.0", path = "crates/gpu-ffi", pa
gpu-prover = { version = "=0.152.0", path = "crates/gpu-prover", package = "zksync-gpu-prover" }
shivini = { version = "=0.152.0", path = "crates/shivini" }
wrapper-prover = { version = "=0.152.0", path = "crates/wrapper-prover", package = "zksync-wrapper-prover" }
# fflonk = { version = "=0.152.0", path = "crates/fflonk", package = "fflonk-gpu" }
# proof-compression = { version = "=0.150.1", path = "crates/proof-compression", package = "proof-compression" }

# These dependencies should be shared by all the crates.
boojum = "=0.30.5"
circuit_definitions = "=0.150.10"
franklin-crypto = "=0.30.5"
snark_wrapper = "=0.30.5"
zkevm_test_harness = "=0.150.10"
circuit_definitions = { version = "=0.150.11" }
zkevm_test_harness = { version = "=0.150.11" }
boojum = "=0.30.6"
franklin-crypto = "=0.30.6"
rescue_poseidon = "=0.30.6"
snark_wrapper = "=0.30.6"
fflonk-cpu = {package = "fflonk", version = "=0.30.6"}

[profile.release]
debug = "line-tables-only"
1 change: 1 addition & 0 deletions crates/fflonk/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
data/
28 changes: 28 additions & 0 deletions crates/fflonk/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
[package]
edition.workspace = true
authors.workspace = true
homepage.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
repository.workspace = true
version.workspace = true
name = "fflonk-cuda"
description = "CUDA implementation of the fflonk prover"
exclude = ["/data"]

[dependencies]
fflonk-cpu = {workspace = true}
circuit_definitions.workspace = true
gpu-ffi.workspace = true
rand = "0.4"
derivative = "2.2"
byteorder = "1"
bincode = "1.3"
serde = { version = "1", features = ["derive", "rc"] }
serde_json = "1"
serde_derive = "1"

[features]
default = ["sanity"]
sanity = []
9 changes: 9 additions & 0 deletions crates/fflonk/src/allocator/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use super::*;
mod pinned;
pub use pinned::*;

mod pool;
pub use pool::*;

mod static_device;
pub use static_device::*;
44 changes: 44 additions & 0 deletions crates/fflonk/src/allocator/pinned.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
use super::*;

// Both assembly and device setup has an ability to store data on the pinned memory
// - Assembly uses for the variables(7487741), state and setup columns
// - Device setup uses variable indexes and gate selectors
static mut _STATIC_HOST_ALLOC: Option<GlobalHost> = None;

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct GlobalHost;

impl GlobalHost {
pub fn init(domain_size: usize) -> CudaResult<Self> {
let num_variables = 0;
let num_cols = 3;

let size_of_indexes_in_bytes = 8 * num_cols * domain_size;
let size_of_vars_in_bytes = 32 * num_variables;

let total_size_in_bytes = size_of_indexes_in_bytes + size_of_vars_in_bytes;

todo!()
}
}

pub trait HostAllocator: Allocator + Default + Clone + Send + Sync + 'static {}

unsafe impl Allocator for GlobalHost {
fn allocate(
&self,
layout: std::alloc::Layout,
) -> Result<std::ptr::NonNull<[u8]>, std::alloc::AllocError> {
host_allocate(layout.size())
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
.map_err(|_| std::alloc::AllocError)
}

unsafe fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
host_dealloc(ptr.as_ptr().cast()).expect("deallocate static buffer")
}
}

impl HostAllocator for GlobalHost {}
impl HostAllocator for std::alloc::Global {}
76 changes: 76 additions & 0 deletions crates/fflonk/src/allocator/pool.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
use super::*;

pub trait DeviceAllocator: Default {
fn allocate(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>>;
fn allocate_zeroed(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>>;
fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout);
fn allocate_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>>;
fn deallocate_async(
&self,
ptr: std::ptr::NonNull<u8>,
layout: std::alloc::Layout,
stream: bc_stream,
);
fn allocate_zeroed_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>>;
}

#[derive(Clone, Debug, Default, Eq, PartialEq)]
pub struct PoolAllocator;

impl DeviceAllocator for PoolAllocator {
fn allocate(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>> {
unimplemented!("Pool allocator can't do static allocation/deallocation")
}

fn deallocate(&self, ptr: std::ptr::NonNull<u8>, layout: std::alloc::Layout) {
unimplemented!("Pool allocator can't do static allocation/deallocation")
}

fn allocate_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>> {
allocate_async_on(layout.size(), pool, stream)
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
}

fn deallocate_async(
&self,
ptr: std::ptr::NonNull<u8>,
_layout: std::alloc::Layout,
stream: bc_stream,
) {
dealloc_async(ptr.as_ptr().cast(), stream).expect("deallocate")
}

fn allocate_zeroed(&self, layout: std::alloc::Layout) -> CudaResult<std::ptr::NonNull<[u8]>> {
let ptr = self.allocate(layout)?;
Ok(ptr)
}
fn allocate_zeroed_async(
&self,
layout: std::alloc::Layout,
pool: bc_mem_pool,
stream: bc_stream,
) -> CudaResult<std::ptr::NonNull<[u8]>> {
allocate_zeroed_async_on(layout.size(), pool, stream)
.map(|ptr| unsafe { std::ptr::NonNull::new_unchecked(ptr as _) })
.map(|ptr| std::ptr::NonNull::slice_from_raw_parts(ptr, layout.size()))
}
}

unsafe impl Send for PoolAllocator {}
unsafe impl Sync for PoolAllocator {}
Loading
Loading