From 9fecf2c737e7d2054f5aa8518a5c098577bf93ff Mon Sep 17 00:00:00 2001 From: Dmitry Zolotukhin Date: Sat, 3 Feb 2024 17:23:53 +0100 Subject: [PATCH] Refactored code to support multiple GPU backends. Moved GPU code into correlation/gpu. --- .github/workflows/cargo-build.yml | 6 +- src/correlation/{ => gpu}/correlation.metal | 0 src/correlation/{ => gpu}/correlation.wgsl | 0 src/correlation/{ => gpu}/gpu.rs | 0 src/correlation/{ => gpu}/metal.rs | 0 src/correlation/gpu/mod.rs | 419 +++++++++++ .../{ => gpu}/shaders/compile-spirv.sh | 0 .../shaders/cross_check_filter.comp.glsl | 0 .../{ => gpu}/shaders/cross_check_filter.spv | Bin .../shaders/cross_correlate.comp.glsl | 0 .../{ => gpu}/shaders/cross_correlate.spv | Bin .../{ => gpu}/shaders/init_out_data.comp.glsl | 0 .../{ => gpu}/shaders/init_out_data.spv | Bin .../prepare_initialdata_correlation.comp.glsl | 0 .../prepare_initialdata_correlation.spv | Bin .../prepare_initialdata_searchdata.comp.glsl | 0 .../prepare_initialdata_searchdata.spv | Bin .../shaders/prepare_searchdata.comp.glsl | 0 .../{ => gpu}/shaders/prepare_searchdata.spv | Bin src/correlation/{ => gpu}/vk.rs | 682 +++++------------- src/{correlation.rs => correlation/mod.rs} | 23 +- 21 files changed, 603 insertions(+), 527 deletions(-) rename src/correlation/{ => gpu}/correlation.metal (100%) rename src/correlation/{ => gpu}/correlation.wgsl (100%) rename src/correlation/{ => gpu}/gpu.rs (100%) rename src/correlation/{ => gpu}/metal.rs (100%) create mode 100644 src/correlation/gpu/mod.rs rename src/correlation/{ => gpu}/shaders/compile-spirv.sh (100%) rename src/correlation/{ => gpu}/shaders/cross_check_filter.comp.glsl (100%) rename src/correlation/{ => gpu}/shaders/cross_check_filter.spv (100%) rename src/correlation/{ => gpu}/shaders/cross_correlate.comp.glsl (100%) rename src/correlation/{ => gpu}/shaders/cross_correlate.spv (100%) rename src/correlation/{ => gpu}/shaders/init_out_data.comp.glsl (100%) rename src/correlation/{ => gpu}/shaders/init_out_data.spv (100%) rename src/correlation/{ => gpu}/shaders/prepare_initialdata_correlation.comp.glsl (100%) rename src/correlation/{ => gpu}/shaders/prepare_initialdata_correlation.spv (100%) rename src/correlation/{ => gpu}/shaders/prepare_initialdata_searchdata.comp.glsl (100%) rename src/correlation/{ => gpu}/shaders/prepare_initialdata_searchdata.spv (100%) rename src/correlation/{ => gpu}/shaders/prepare_searchdata.comp.glsl (100%) rename src/correlation/{ => gpu}/shaders/prepare_searchdata.spv (100%) rename src/correlation/{ => gpu}/vk.rs (76%) rename src/{correlation.rs => correlation/mod.rs} (97%) diff --git a/.github/workflows/cargo-build.yml b/.github/workflows/cargo-build.yml index fdbacc2..91f71fb 100644 --- a/.github/workflows/cargo-build.yml +++ b/.github/workflows/cargo-build.yml @@ -166,8 +166,8 @@ jobs: - name: Compile shaders run: | - xcrun -sdk macosx metal -c src/correlation/correlation.metal -o src/correlation/correlation.air - xcrun -sdk macosx metallib src/correlation/correlation.air -o src/correlation/correlation.metallib + xcrun -sdk macosx metal -c src/correlation/gpu/shaders/correlation.metal -o src/correlation/gpu/shaders/correlation.air + xcrun -sdk macosx metallib src/correlation/gpu/shaders/correlation.air -o src/correlation/gpu/shaders/correlation.metallib - name: Build run: cargo build --target=${{ matrix.arch }}-apple-darwin --release @@ -180,7 +180,7 @@ jobs: uses: actions/upload-artifact@v4 with: name: shaders-metallib - path: src/correlation/correlation.metallib + path: src/correlation/gpu/shaders/correlation.metallib - name: Upload application uses: actions/upload-artifact@v4 diff --git a/src/correlation/correlation.metal b/src/correlation/gpu/correlation.metal similarity index 100% rename from src/correlation/correlation.metal rename to src/correlation/gpu/correlation.metal diff --git a/src/correlation/correlation.wgsl b/src/correlation/gpu/correlation.wgsl similarity index 100% rename from src/correlation/correlation.wgsl rename to src/correlation/gpu/correlation.wgsl diff --git a/src/correlation/gpu.rs b/src/correlation/gpu/gpu.rs similarity index 100% rename from src/correlation/gpu.rs rename to src/correlation/gpu/gpu.rs diff --git a/src/correlation/metal.rs b/src/correlation/gpu/metal.rs similarity index 100% rename from src/correlation/metal.rs rename to src/correlation/gpu/metal.rs diff --git a/src/correlation/gpu/mod.rs b/src/correlation/gpu/mod.rs new file mode 100644 index 0000000..8f158f3 --- /dev/null +++ b/src/correlation/gpu/mod.rs @@ -0,0 +1,419 @@ +#[cfg(target_os = "macos")] +mod metal; +#[cfg(not(target_os = "macos"))] +mod vk; + +use std::{error, fmt}; +#[cfg(not(target_os = "macos"))] +use vk::ShaderModuleType; + +#[cfg(not(target_os = "macos"))] +pub type DefaultDeviceContext = vk::DeviceContext; + +use crate::data::Grid; +use nalgebra::Matrix3; + +use crate::correlation::{ + CorrelationDirection, CorrelationParameters, HardwareMode, ProjectionMode, CORRIDOR_MIN_RANGE, + CROSS_CHECK_SEARCH_AREA, KERNEL_SIZE, NEIGHBOR_DISTANCE, +}; + +use super::Match; + +// Decrease when using a low-powered GPU +const CORRIDOR_SEGMENT_LENGTH_HIGHPERFORMANCE: usize = 512; +const SEARCH_AREA_SEGMENT_LENGTH_HIGHPERFORMANCE: usize = 1024; +const CORRIDOR_SEGMENT_LENGTH_LOWPOWER: usize = 8; +const SEARCH_AREA_SEGMENT_LENGTH_LOWPOWER: usize = 128; + +trait Device { + unsafe fn run_shader( + &mut self, + dimensions: (usize, usize), + shader_type: ShaderModuleType, + shader_params: ShaderParams, + ) -> Result<(), Box>; + + unsafe fn transfer_in_images( + &self, + img1: &Grid, + img2: &Grid, + ) -> Result<(), Box>; + + unsafe fn save_corr( + &self, + correlation_values: &mut Grid>, + correlation_threshold: f32, + ) -> Result<(), Box>; + + unsafe fn save_result( + &self, + out_image: &mut Grid>, + correlation_values: &Grid>, + ) -> Result<(), Box>; + + unsafe fn destroy_buffers(&mut self); +} + +trait DeviceContext +where + D: Device, +{ + fn is_low_power(&self) -> bool; + + fn get_device_name(&self) -> Option; + + fn prepare_device( + &mut self, + img1_dimensions: (usize, usize), + img2_dimensions: (usize, usize), + ) -> Result<(), Box>; + + fn device(&self) -> Result<&D, GpuError>; + + fn device_mut(&mut self) -> Result<&mut D, GpuError>; +} + +#[repr(C)] +#[derive(Copy, Clone)] +struct ShaderParams { + img1_width: u32, + img1_height: u32, + img2_width: u32, + img2_height: u32, + out_width: u32, + out_height: u32, + scale: f32, + iteration_pass: u32, + fundamental_matrix: [f32; 3 * 4], + corridor_offset: i32, + corridor_start: u32, + corridor_end: u32, + kernel_size: u32, + threshold: f32, + min_stdev: f32, + neighbor_distance: u32, + extend_range: f32, + min_range: f32, +} + +pub struct GpuContext<'a> { + min_stdev: f32, + correlation_threshold: f32, + fundamental_matrix: Matrix3, + img1_dimensions: (usize, usize), + img2_dimensions: (usize, usize), + + correlation_values: Grid>, + + corridor_segment_length: usize, + search_area_segment_length: usize, + corridor_size: usize, + corridor_extend_range: f64, + + device_context: &'a mut DefaultDeviceContext, +} + +impl GpuContext<'_> { + pub fn new( + device_context: &mut DefaultDeviceContext, + img1_dimensions: (usize, usize), + img2_dimensions: (usize, usize), + projection_mode: ProjectionMode, + fundamental_matrix: Matrix3, + ) -> Result> { + let (search_area_segment_length, corridor_segment_length) = if device_context.is_low_power() + { + ( + SEARCH_AREA_SEGMENT_LENGTH_LOWPOWER, + CORRIDOR_SEGMENT_LENGTH_LOWPOWER, + ) + } else { + ( + SEARCH_AREA_SEGMENT_LENGTH_HIGHPERFORMANCE, + CORRIDOR_SEGMENT_LENGTH_HIGHPERFORMANCE, + ) + }; + + device_context.prepare_device(img1_dimensions, img2_dimensions)?; + let correlation_values = Grid::new(img1_dimensions.0, img1_dimensions.1, None); + + let params = CorrelationParameters::for_projection(&projection_mode); + Ok(GpuContext { + min_stdev: params.min_stdev, + correlation_threshold: params.correlation_threshold, + corridor_size: params.corridor_size, + corridor_extend_range: params.corridor_extend_range, + fundamental_matrix, + img1_dimensions, + img2_dimensions, + correlation_values, + corridor_segment_length, + search_area_segment_length, + device_context, + }) + } + + pub fn get_device_name(&self) -> String { + self.device_context.get_device_name().map_or( + String::from("Error: device not initialized"), + |device_name| device_name, + ) + } + + pub fn cross_check_filter( + &mut self, + scale: f32, + dir: CorrelationDirection, + ) -> Result<(), Box> { + let device = self.device_context.device_mut()?; + device.set_buffer_direction(&dir)?; + let (out_dimensions, out_dimensions_reverse) = match dir { + CorrelationDirection::Forward => (self.img1_dimensions, self.img2_dimensions), + CorrelationDirection::Reverse => (self.img2_dimensions, self.img1_dimensions), + }; + + let search_area = CROSS_CHECK_SEARCH_AREA * (1.0 / scale).round() as usize; + + // Reuse/repurpose ShaderParams. + let params = ShaderParams { + img1_width: out_dimensions.0 as u32, + img1_height: out_dimensions.1 as u32, + img2_width: out_dimensions_reverse.0 as u32, + img2_height: out_dimensions_reverse.1 as u32, + out_width: 0, + out_height: 0, + fundamental_matrix: [0.0; 3 * 4], + scale: 0.0, + iteration_pass: 0, + corridor_offset: 0, + corridor_start: 0, + corridor_end: 0, + kernel_size: 0, + threshold: 0.0, + min_stdev: 0.0, + neighbor_distance: search_area as u32, + extend_range: 0.0, + min_range: 0.0, + }; + unsafe { + device.run_shader(out_dimensions, ShaderModuleType::CrossCheckFilter, params)?; + } + Ok(()) + } + + pub fn complete_process( + &mut self, + ) -> Result>, Box> { + let device = self.device_context.device_mut()?; + let mut out_image = Grid::new(self.img1_dimensions.0, self.img1_dimensions.1, None); + unsafe { + device.save_result(&mut out_image, &self.correlation_values)?; + device.destroy_buffers(); + } + Ok(out_image) + } + + pub fn correlate_images( + &mut self, + img1: &Grid, + img2: &Grid, + scale: f32, + first_pass: bool, + progress_listener: Option<&PL>, + dir: CorrelationDirection, + ) -> Result<(), Box> { + { + let device = self.device_context.device()?; + device.set_buffer_direction(&dir)?; + } + let max_width = img1.width().max(img2.width()); + let max_height = img1.height().max(img2.height()); + let max_dimensions = (max_width, max_height); + let img1_dimensions = (img1.width(), img1.height()); + let out_dimensions = match dir { + CorrelationDirection::Forward => self.img1_dimensions, + CorrelationDirection::Reverse => self.img2_dimensions, + }; + + let mut progressbar_completed_percentage = 0.02; + let send_progress = |value| { + let value = match dir { + CorrelationDirection::Forward => value * 0.98 / 2.0, + CorrelationDirection::Reverse => 0.51 + value * 0.98 / 2.0, + }; + if let Some(pl) = progress_listener { + pl.report_status(value); + } + }; + + let mut params = ShaderParams { + img1_width: img1.width() as u32, + img1_height: img1.height() as u32, + img2_width: img2.width() as u32, + img2_height: img2.height() as u32, + out_width: out_dimensions.0 as u32, + out_height: out_dimensions.1 as u32, + fundamental_matrix: self.convert_fundamental_matrix(&dir), + scale, + iteration_pass: 0, + corridor_offset: 0, + corridor_start: 0, + corridor_end: 0, + kernel_size: KERNEL_SIZE as u32, + threshold: self.correlation_threshold, + min_stdev: self.min_stdev, + neighbor_distance: NEIGHBOR_DISTANCE as u32, + extend_range: self.corridor_extend_range as f32, + min_range: CORRIDOR_MIN_RANGE as f32, + }; + + let device = self.device_context.device_mut()?; + + unsafe { device.transfer_in_images(img1, img2)? }; + + if first_pass { + unsafe { + device.run_shader(out_dimensions, ShaderModuleType::InitOutData, params)?; + } + } else { + unsafe { + device.run_shader( + out_dimensions, + ShaderModuleType::PrepareInitialdataSearchdata, + params, + )?; + } + progressbar_completed_percentage = 0.02; + send_progress(progressbar_completed_percentage); + + let segment_length = self.search_area_segment_length; + let neighbor_width = (NEIGHBOR_DISTANCE as f32 / scale).ceil() as usize * 2 + 1; + let neighbor_pixels = neighbor_width * neighbor_width; + let neighbor_segments = neighbor_pixels / segment_length + 1; + + params.iteration_pass = 0; + for l in 0u32..neighbor_segments as u32 { + params.corridor_start = l * segment_length as u32; + params.corridor_end = (l + 1) * segment_length as u32; + if params.corridor_end > neighbor_pixels as u32 { + params.corridor_end = neighbor_pixels as u32; + } + unsafe { + device.run_shader( + img1_dimensions, + ShaderModuleType::PrepareSearchdata, + params, + )?; + } + + let percent_complete = + progressbar_completed_percentage + 0.09 * (l as f32 / neighbor_segments as f32); + send_progress(percent_complete); + } + progressbar_completed_percentage = 0.11; + send_progress(progressbar_completed_percentage); + + params.iteration_pass = 1; + for l in 0u32..neighbor_segments as u32 { + params.corridor_start = l * segment_length as u32; + params.corridor_end = (l + 1) * segment_length as u32; + if params.corridor_end > neighbor_pixels as u32 { + params.corridor_end = neighbor_pixels as u32; + } + unsafe { + device.run_shader( + img1_dimensions, + ShaderModuleType::PrepareSearchdata, + params, + )?; + } + + let percent_complete = + progressbar_completed_percentage + 0.09 * (l as f32 / neighbor_segments as f32); + send_progress(percent_complete); + } + + progressbar_completed_percentage = 0.20; + } + send_progress(progressbar_completed_percentage); + params.iteration_pass = if first_pass { 0 } else { 1 }; + + unsafe { + device.run_shader( + max_dimensions, + ShaderModuleType::PrepareInitialdataCorrelation, + params, + )?; + } + + let corridor_size = self.corridor_size; + let corridor_stripes = 2 * corridor_size + 1; + let max_length = img2.width().max(img2.height()); + let segment_length = self.corridor_segment_length; + let corridor_length = max_length - (KERNEL_SIZE * 2); + let corridor_segments = corridor_length / segment_length + 1; + for corridor_offset in -(corridor_size as i32)..=corridor_size as i32 { + for l in 0u32..corridor_segments as u32 { + params.corridor_offset = corridor_offset; + params.corridor_start = l * segment_length as u32; + params.corridor_end = (l + 1) * segment_length as u32; + if params.corridor_end > corridor_length as u32 { + params.corridor_end = corridor_length as u32; + } + unsafe { + device.run_shader(img1_dimensions, ShaderModuleType::CrossCorrelate, params)?; + } + + let corridor_complete = params.corridor_end as f32 / corridor_length as f32; + let percent_complete = progressbar_completed_percentage + + (1.0 - progressbar_completed_percentage) + * (corridor_offset as f32 + corridor_size as f32 + corridor_complete) + / corridor_stripes as f32; + send_progress(percent_complete); + } + } + + if matches!(dir, CorrelationDirection::Forward) { + unsafe { device.save_corr(&mut self.correlation_values, self.correlation_threshold)? }; + } + Ok(()) + } + + fn convert_fundamental_matrix(&self, dir: &CorrelationDirection) -> [f32; 3 * 4] { + let fundamental_matrix = match dir { + CorrelationDirection::Forward => self.fundamental_matrix, + CorrelationDirection::Reverse => self.fundamental_matrix.transpose(), + }; + let mut f = [0f32; 3 * 4]; + // Matrix layout in GLSL (OpenGL) is pure madness: https://www.opengl.org/archives/resources/faq/technical/transformations.htm. + // "Column major" means that vectors are vertical and a matrix multiplies a vector. + // "Row major" means a horizontal vector multiplies a matrix. + // This says nothing about how the matrix is stored in memory. + for row in 0..3 { + for col in 0..3 { + f[col * 4 + row] = fundamental_matrix[(row, col)] as f32; + } + } + f + } +} + +#[derive(Debug)] +pub struct GpuError { + msg: &'static str, +} + +impl GpuError { + fn new(msg: &'static str) -> GpuError { + GpuError { msg } + } +} + +impl std::error::Error for GpuError {} + +impl fmt::Display for GpuError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.msg) + } +} diff --git a/src/correlation/shaders/compile-spirv.sh b/src/correlation/gpu/shaders/compile-spirv.sh similarity index 100% rename from src/correlation/shaders/compile-spirv.sh rename to src/correlation/gpu/shaders/compile-spirv.sh diff --git a/src/correlation/shaders/cross_check_filter.comp.glsl b/src/correlation/gpu/shaders/cross_check_filter.comp.glsl similarity index 100% rename from src/correlation/shaders/cross_check_filter.comp.glsl rename to src/correlation/gpu/shaders/cross_check_filter.comp.glsl diff --git a/src/correlation/shaders/cross_check_filter.spv b/src/correlation/gpu/shaders/cross_check_filter.spv similarity index 100% rename from src/correlation/shaders/cross_check_filter.spv rename to src/correlation/gpu/shaders/cross_check_filter.spv diff --git a/src/correlation/shaders/cross_correlate.comp.glsl b/src/correlation/gpu/shaders/cross_correlate.comp.glsl similarity index 100% rename from src/correlation/shaders/cross_correlate.comp.glsl rename to src/correlation/gpu/shaders/cross_correlate.comp.glsl diff --git a/src/correlation/shaders/cross_correlate.spv b/src/correlation/gpu/shaders/cross_correlate.spv similarity index 100% rename from src/correlation/shaders/cross_correlate.spv rename to src/correlation/gpu/shaders/cross_correlate.spv diff --git a/src/correlation/shaders/init_out_data.comp.glsl b/src/correlation/gpu/shaders/init_out_data.comp.glsl similarity index 100% rename from src/correlation/shaders/init_out_data.comp.glsl rename to src/correlation/gpu/shaders/init_out_data.comp.glsl diff --git a/src/correlation/shaders/init_out_data.spv b/src/correlation/gpu/shaders/init_out_data.spv similarity index 100% rename from src/correlation/shaders/init_out_data.spv rename to src/correlation/gpu/shaders/init_out_data.spv diff --git a/src/correlation/shaders/prepare_initialdata_correlation.comp.glsl b/src/correlation/gpu/shaders/prepare_initialdata_correlation.comp.glsl similarity index 100% rename from src/correlation/shaders/prepare_initialdata_correlation.comp.glsl rename to src/correlation/gpu/shaders/prepare_initialdata_correlation.comp.glsl diff --git a/src/correlation/shaders/prepare_initialdata_correlation.spv b/src/correlation/gpu/shaders/prepare_initialdata_correlation.spv similarity index 100% rename from src/correlation/shaders/prepare_initialdata_correlation.spv rename to src/correlation/gpu/shaders/prepare_initialdata_correlation.spv diff --git a/src/correlation/shaders/prepare_initialdata_searchdata.comp.glsl b/src/correlation/gpu/shaders/prepare_initialdata_searchdata.comp.glsl similarity index 100% rename from src/correlation/shaders/prepare_initialdata_searchdata.comp.glsl rename to src/correlation/gpu/shaders/prepare_initialdata_searchdata.comp.glsl diff --git a/src/correlation/shaders/prepare_initialdata_searchdata.spv b/src/correlation/gpu/shaders/prepare_initialdata_searchdata.spv similarity index 100% rename from src/correlation/shaders/prepare_initialdata_searchdata.spv rename to src/correlation/gpu/shaders/prepare_initialdata_searchdata.spv diff --git a/src/correlation/shaders/prepare_searchdata.comp.glsl b/src/correlation/gpu/shaders/prepare_searchdata.comp.glsl similarity index 100% rename from src/correlation/shaders/prepare_searchdata.comp.glsl rename to src/correlation/gpu/shaders/prepare_searchdata.comp.glsl diff --git a/src/correlation/shaders/prepare_searchdata.spv b/src/correlation/gpu/shaders/prepare_searchdata.spv similarity index 100% rename from src/correlation/shaders/prepare_searchdata.spv rename to src/correlation/gpu/shaders/prepare_searchdata.spv diff --git a/src/correlation/vk.rs b/src/correlation/gpu/vk.rs similarity index 76% rename from src/correlation/vk.rs rename to src/correlation/gpu/vk.rs index 0658392..c6d32b6 100644 --- a/src/correlation/vk.rs +++ b/src/correlation/gpu/vk.rs @@ -1,49 +1,20 @@ -use std::{cmp::Ordering, collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime}; +use std::{cmp::Ordering, collections::HashMap, error, ffi::CStr, slice}; use ash::{prelude::VkResult, vk}; -use nalgebra::Matrix3; use rayon::iter::ParallelIterator; -use crate::data::{Grid, Point2D}; - -use super::{ - CorrelationDirection, CorrelationParameters, HardwareMode, ProjectionMode, CORRIDOR_MIN_RANGE, - CROSS_CHECK_SEARCH_AREA, KERNEL_SIZE, NEIGHBOR_DISTANCE, +use crate::{ + correlation::{gpu::ShaderParams, Match}, + data::{Grid, Point2D}, }; +use super::{CorrelationDirection, GpuError, HardwareMode}; + // This should be supported by most modern GPUs, even old/budget ones like Celeron N3350. // Based on https://www.vulkan.gpuinfo.org, only old integrated GPUs like 4th gen Core i5 have a // limit of 4. // But storing everything in the same buffer will likely have other issues like memory limits. const MAX_BINDINGS: u32 = 6; -// Decrease when using a low-powered GPU -const CORRIDOR_SEGMENT_LENGTH_HIGHPERFORMANCE: usize = 512; -const SEARCH_AREA_SEGMENT_LENGTH_HIGHPERFORMANCE: usize = 1024; -const CORRIDOR_SEGMENT_LENGTH_LOWPOWER: usize = 8; -const SEARCH_AREA_SEGMENT_LENGTH_LOWPOWER: usize = 128; - -#[repr(C)] -#[derive(Copy, Clone)] -struct ShaderParams { - img1_width: u32, - img1_height: u32, - img2_width: u32, - img2_height: u32, - out_width: u32, - out_height: u32, - scale: f32, - iteration_pass: u32, - fundamental_matrix: [f32; 3 * 4], - corridor_offset: i32, - corridor_start: u32, - corridor_end: u32, - kernel_size: u32, - threshold: f32, - min_stdev: f32, - neighbor_distance: u32, - extend_range: f32, - min_range: f32, -} const _: () = { // Validate that ShaderParams fits into the minimum guaranteed push constants size. @@ -52,24 +23,7 @@ const _: () = { assert!(std::mem::size_of::() < 128); }; -pub struct GpuContext<'a> { - min_stdev: f32, - correlation_threshold: f32, - fundamental_matrix: Matrix3, - img1_dimensions: (usize, usize), - img2_dimensions: (usize, usize), - - correlation_values: Grid>, - - corridor_segment_length: usize, - search_area_segment_length: usize, - corridor_size: usize, - corridor_extend_range: f64, - - device_context: &'a mut DeviceContext, -} - -struct Device { +pub struct Device { instance: ash::Instance, name: String, device: ash::Device, @@ -116,7 +70,7 @@ struct DescriptorSets { } #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -enum ShaderModuleType { +pub enum ShaderModuleType { InitOutData, PrepareInitialdataSearchdata, PrepareInitialdataCorrelation, @@ -137,296 +91,6 @@ struct Control { command_buffer: vk::CommandBuffer, } -impl GpuContext<'_> { - pub fn new<'a>( - device_context: &'a mut DeviceContext, - img1_dimensions: (usize, usize), - img2_dimensions: (usize, usize), - projection_mode: ProjectionMode, - fundamental_matrix: Matrix3, - ) -> Result, Box> { - let (search_area_segment_length, corridor_segment_length) = if device_context.low_power { - ( - SEARCH_AREA_SEGMENT_LENGTH_LOWPOWER, - CORRIDOR_SEGMENT_LENGTH_LOWPOWER, - ) - } else { - ( - SEARCH_AREA_SEGMENT_LENGTH_HIGHPERFORMANCE, - CORRIDOR_SEGMENT_LENGTH_HIGHPERFORMANCE, - ) - }; - - device_context.prepare_device(img1_dimensions, img2_dimensions)?; - let correlation_values = Grid::new(img1_dimensions.0, img1_dimensions.1, None); - - let params = CorrelationParameters::for_projection(&projection_mode); - Ok(GpuContext { - min_stdev: params.min_stdev, - correlation_threshold: params.correlation_threshold, - corridor_size: params.corridor_size, - corridor_extend_range: params.corridor_extend_range, - fundamental_matrix, - img1_dimensions, - img2_dimensions, - correlation_values, - corridor_segment_length, - search_area_segment_length, - device_context, - }) - } - - pub fn get_device_name(&self) -> String { - self.device_context - .device() - .ok() - .map_or(String::from("Error: device not initialized"), |device| { - device.name.to_owned() - }) - } - - pub fn cross_check_filter( - &mut self, - scale: f32, - dir: CorrelationDirection, - ) -> Result<(), Box> { - let device = self.device_context.device_mut()?; - device.set_buffer_direction(&dir)?; - let (out_dimensions, out_dimensions_reverse) = match dir { - CorrelationDirection::Forward => (self.img1_dimensions, self.img2_dimensions), - CorrelationDirection::Reverse => (self.img2_dimensions, self.img1_dimensions), - }; - - let search_area = CROSS_CHECK_SEARCH_AREA * (1.0 / scale).round() as usize; - - // Reuse/repurpose ShaderParams. - let params = ShaderParams { - img1_width: out_dimensions.0 as u32, - img1_height: out_dimensions.1 as u32, - img2_width: out_dimensions_reverse.0 as u32, - img2_height: out_dimensions_reverse.1 as u32, - out_width: 0, - out_height: 0, - fundamental_matrix: [0.0; 3 * 4], - scale: 0.0, - iteration_pass: 0, - corridor_offset: 0, - corridor_start: 0, - corridor_end: 0, - kernel_size: 0, - threshold: 0.0, - min_stdev: 0.0, - neighbor_distance: search_area as u32, - extend_range: 0.0, - min_range: 0.0, - }; - unsafe { - device.run_shader(out_dimensions, ShaderModuleType::CrossCheckFilter, params)?; - } - Ok(()) - } - - pub fn complete_process( - &mut self, - ) -> Result>, Box> { - let device = self.device_context.device_mut()?; - let mut out_image = Grid::new(self.img1_dimensions.0, self.img1_dimensions.1, None); - unsafe { - device.save_result(&mut out_image, &self.correlation_values)?; - device - .buffers - .as_ref() - .map(|buffers| buffers.destroy(&device.device)); - device.buffers = None; - } - Ok(out_image) - } - - pub fn correlate_images( - &mut self, - img1: &Grid, - img2: &Grid, - scale: f32, - first_pass: bool, - progress_listener: Option<&PL>, - dir: CorrelationDirection, - ) -> Result<(), Box> { - { - let device = self.device_context.device()?; - device.set_buffer_direction(&dir)?; - } - let max_width = img1.width().max(img2.width()); - let max_height = img1.height().max(img2.height()); - let max_dimensions = (max_width, max_height); - let img1_dimensions = (img1.width(), img1.height()); - let out_dimensions = match dir { - CorrelationDirection::Forward => self.img1_dimensions, - CorrelationDirection::Reverse => self.img2_dimensions, - }; - - let mut progressbar_completed_percentage = 0.02; - let send_progress = |value| { - let value = match dir { - CorrelationDirection::Forward => value * 0.98 / 2.0, - CorrelationDirection::Reverse => 0.51 + value * 0.98 / 2.0, - }; - if let Some(pl) = progress_listener { - pl.report_status(value); - } - }; - - let mut params = ShaderParams { - img1_width: img1.width() as u32, - img1_height: img1.height() as u32, - img2_width: img2.width() as u32, - img2_height: img2.height() as u32, - out_width: out_dimensions.0 as u32, - out_height: out_dimensions.1 as u32, - fundamental_matrix: self.convert_fundamental_matrix(&dir), - scale, - iteration_pass: 0, - corridor_offset: 0, - corridor_start: 0, - corridor_end: 0, - kernel_size: KERNEL_SIZE as u32, - threshold: self.correlation_threshold, - min_stdev: self.min_stdev, - neighbor_distance: NEIGHBOR_DISTANCE as u32, - extend_range: self.corridor_extend_range as f32, - min_range: CORRIDOR_MIN_RANGE as f32, - }; - - let device = self.device_context.device_mut()?; - - unsafe { device.transfer_in_images(img1, img2)? }; - - if first_pass { - unsafe { - device.run_shader(out_dimensions, ShaderModuleType::InitOutData, params)?; - } - } else { - unsafe { - device.run_shader( - out_dimensions, - ShaderModuleType::PrepareInitialdataSearchdata, - params, - )?; - } - progressbar_completed_percentage = 0.02; - send_progress(progressbar_completed_percentage); - - let segment_length = self.search_area_segment_length; - let neighbor_width = (NEIGHBOR_DISTANCE as f32 / scale).ceil() as usize * 2 + 1; - let neighbor_pixels = neighbor_width * neighbor_width; - let neighbor_segments = neighbor_pixels / segment_length + 1; - - params.iteration_pass = 0; - for l in 0u32..neighbor_segments as u32 { - params.corridor_start = l * segment_length as u32; - params.corridor_end = (l + 1) * segment_length as u32; - if params.corridor_end > neighbor_pixels as u32 { - params.corridor_end = neighbor_pixels as u32; - } - unsafe { - device.run_shader( - img1_dimensions, - ShaderModuleType::PrepareSearchdata, - params, - )?; - } - - let percent_complete = - progressbar_completed_percentage + 0.09 * (l as f32 / neighbor_segments as f32); - send_progress(percent_complete); - } - progressbar_completed_percentage = 0.11; - send_progress(progressbar_completed_percentage); - - params.iteration_pass = 1; - for l in 0u32..neighbor_segments as u32 { - params.corridor_start = l * segment_length as u32; - params.corridor_end = (l + 1) * segment_length as u32; - if params.corridor_end > neighbor_pixels as u32 { - params.corridor_end = neighbor_pixels as u32; - } - unsafe { - device.run_shader( - img1_dimensions, - ShaderModuleType::PrepareSearchdata, - params, - )?; - } - - let percent_complete = - progressbar_completed_percentage + 0.09 * (l as f32 / neighbor_segments as f32); - send_progress(percent_complete); - } - - progressbar_completed_percentage = 0.20; - } - send_progress(progressbar_completed_percentage); - params.iteration_pass = if first_pass { 0 } else { 1 }; - - unsafe { - device.run_shader( - max_dimensions, - ShaderModuleType::PrepareInitialdataCorrelation, - params, - )?; - } - - let corridor_size = self.corridor_size; - let corridor_stripes = 2 * corridor_size + 1; - let max_length = img2.width().max(img2.height()); - let segment_length = self.corridor_segment_length; - let corridor_length = max_length - (KERNEL_SIZE * 2); - let corridor_segments = corridor_length / segment_length + 1; - for corridor_offset in -(corridor_size as i32)..=corridor_size as i32 { - for l in 0u32..corridor_segments as u32 { - params.corridor_offset = corridor_offset; - params.corridor_start = l * segment_length as u32; - params.corridor_end = (l + 1) * segment_length as u32; - if params.corridor_end > corridor_length as u32 { - params.corridor_end = corridor_length as u32; - } - unsafe { - device.run_shader(img1_dimensions, ShaderModuleType::CrossCorrelate, params)?; - } - - let corridor_complete = params.corridor_end as f32 / corridor_length as f32; - let percent_complete = progressbar_completed_percentage - + (1.0 - progressbar_completed_percentage) - * (corridor_offset as f32 + corridor_size as f32 + corridor_complete) - / corridor_stripes as f32; - send_progress(percent_complete); - } - } - - if matches!(dir, CorrelationDirection::Forward) { - unsafe { device.save_corr(&mut self.correlation_values, self.correlation_threshold)? }; - } - Ok(()) - } - - fn convert_fundamental_matrix(&self, dir: &CorrelationDirection) -> [f32; 3 * 4] { - let fundamental_matrix = match dir { - CorrelationDirection::Forward => self.fundamental_matrix, - CorrelationDirection::Reverse => self.fundamental_matrix.transpose(), - }; - let mut f = [0f32; 3 * 4]; - // Matrix layout in GLSL (OpenGL) is pure madness: https://www.opengl.org/archives/resources/faq/technical/transformations.htm. - // "Column major" means that vectors are vertical and a matrix multiplies a vector. - // "Row major" means a horizontal vector multiplies a matrix. - // This says nothing about how the matrix is stored in memory. - for row in 0..3 { - for col in 0..3 { - f[col * 4 + row] = fundamental_matrix[(row, col)] as f32; - } - } - f - } -} - pub struct DeviceContext { entry: ash::Entry, low_power: bool, @@ -446,6 +110,16 @@ impl DeviceContext { device: None, }) } +} + +impl super::DeviceContext for DeviceContext { + fn is_low_power(&self) -> bool { + self.low_power + } + + fn get_device_name(&self) -> Option { + self.device().ok().map(|device| device.name.to_owned()) + } fn prepare_device( &mut self, @@ -571,72 +245,6 @@ impl Device { Ok(result) } - unsafe fn run_shader( - &mut self, - dimensions: (usize, usize), - shader_type: ShaderModuleType, - shader_params: ShaderParams, - ) -> VkResult<()> { - let pipeline_config = self.pipelines.get(&shader_type).unwrap(); - let command_buffer = self.control.command_buffer; - - let workgroup_size = ((dimensions.0 + 15) / 16, ((dimensions.1 + 15) / 16)); - self.device.reset_fences(&[self.control.fence])?; - self.device - .reset_command_buffer(command_buffer, vk::CommandBufferResetFlags::empty())?; - let info = vk::CommandBufferBeginInfo::builder() - .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT); - self.device.begin_command_buffer(command_buffer, &info)?; - - self.device.cmd_bind_pipeline( - self.control.command_buffer, - vk::PipelineBindPoint::COMPUTE, - pipeline_config.pipeline, - ); - // It's way easier to map all descriptor sets identically, instead of ensuring that every - // kernel gets to use set = 0. - // The cross correlation kernel will need to switch to descriptor set = 1. - self.device.cmd_bind_descriptor_sets( - command_buffer, - vk::PipelineBindPoint::COMPUTE, - self.descriptor_sets.pipeline_layout, - 0, - &self.descriptor_sets.descriptor_sets, - &[], - ); - - let push_constants_data = slice::from_raw_parts( - &shader_params as *const ShaderParams as *const u8, - std::mem::size_of::(), - ); - self.device.cmd_push_constants( - command_buffer, - self.descriptor_sets.pipeline_layout, - vk::ShaderStageFlags::COMPUTE, - 0, - push_constants_data, - ); - self.device.cmd_dispatch( - command_buffer, - workgroup_size.0 as u32, - workgroup_size.1 as u32, - 1, - ); - - self.device.end_command_buffer(command_buffer)?; - - let command_buffers = [command_buffer]; - let submit_info = vk::SubmitInfo::builder().command_buffers(&command_buffers); - self.device.queue_submit( - self.control.queue, - &[submit_info.build()], - self.control.fence, - )?; - - self.device - .wait_for_fences(&[self.control.fence], true, u64::MAX) - } - unsafe fn map_buffer_write( &self, dst_buffer: &Buffer, @@ -793,81 +401,6 @@ impl Device { Ok(()) } - unsafe fn transfer_in_images( - &self, - img1: &Grid, - img2: &Grid, - ) -> Result<(), Box> { - let buffers = self.buffers()?; - let img2_offset = img1.width() * img1.height(); - let size = img1.width() * img1.height() + img2.width() * img2.height(); - let copy_images = |img_slice: &mut [f32]| { - img1.iter() - .for_each(|(x, y, val)| img_slice[y * img1.width() + x] = *val as f32); - img2.iter().for_each(|(x, y, val)| { - img_slice[img2_offset + y * img2.width() + x] = *val as f32 - }); - }; - - self.map_buffer_write(&buffers.buffer_img, size, copy_images)?; - - Ok(()) - } - - unsafe fn save_corr( - &self, - correlation_values: &mut Grid>, - correlation_threshold: f32, - ) -> Result<(), Box> { - let buffers = self.buffers()?; - let size = correlation_values.width() * correlation_values.height(); - let width = correlation_values.width(); - let copy_corr_data = |out_corr: &[f32]| { - correlation_values - .par_iter_mut() - .for_each(|(x, y, out_point)| { - let corr = out_corr[y * width + x]; - if corr > correlation_threshold { - *out_point = Some(corr); - } - }); - }; - - self.map_buffer_read(&buffers.buffer_out_corr, size, copy_corr_data)?; - - Ok(()) - } - - unsafe fn save_result( - &self, - out_image: &mut Grid>, - correlation_values: &Grid>, - ) -> Result<(), Box> { - let buffers = self.buffers()?; - let size = out_image.width() * out_image.height() * 2; - let width = out_image.width(); - let copy_out_image = |out_data: &[i32]| { - out_image.par_iter_mut().for_each(|(x, y, out_point)| { - let pos = 2 * (y * width + x); - let (match_x, match_y) = (out_data[pos], out_data[pos + 1]); - if let Some(corr) = correlation_values.val(x, y) { - *out_point = if match_x > 0 && match_y > 0 { - let point_match = Point2D::new(match_x as u32, match_y as u32); - Some((point_match, *corr)) - } else { - None - }; - } else { - *out_point = None; - }; - }); - }; - - self.map_buffer_read(&buffers.buffer_out, size, copy_out_image)?; - - Ok(()) - } - unsafe fn init_vk(entry: &ash::Entry) -> VkResult { let app_name = CStr::from_bytes_with_nul_unchecked(b"Cybervision\0"); let engine_name = CStr::from_bytes_with_nul_unchecked(b"cybervision\0"); @@ -1232,7 +765,7 @@ impl Device { }) } - fn set_buffer_direction(&self, direction: &CorrelationDirection) -> Result<(), GpuError> { + pub fn set_buffer_direction(&self, direction: &CorrelationDirection) -> Result<(), GpuError> { let descriptor_sets = &self.descriptor_sets; let buffers = &self.buffers()?; let create_buffer_infos = |buffers: &[Buffer]| { @@ -1399,6 +932,158 @@ impl Device { } } +impl super::Device for Device { + unsafe fn run_shader( + &mut self, + dimensions: (usize, usize), + shader_type: ShaderModuleType, + shader_params: ShaderParams, + ) -> Result<(), Box> { + let pipeline_config = self.pipelines.get(&shader_type).unwrap(); + let command_buffer = self.control.command_buffer; + + let workgroup_size = ((dimensions.0 + 15) / 16, ((dimensions.1 + 15) / 16)); + self.device.reset_fences(&[self.control.fence])?; + self.device + .reset_command_buffer(command_buffer, vk::CommandBufferResetFlags::empty())?; + let info = vk::CommandBufferBeginInfo::builder() + .flags(vk::CommandBufferUsageFlags::ONE_TIME_SUBMIT); + self.device.begin_command_buffer(command_buffer, &info)?; + + self.device.cmd_bind_pipeline( + self.control.command_buffer, + vk::PipelineBindPoint::COMPUTE, + pipeline_config.pipeline, + ); + // It's way easier to map all descriptor sets identically, instead of ensuring that every + // kernel gets to use set = 0. + // The cross correlation kernel will need to switch to descriptor set = 1. + self.device.cmd_bind_descriptor_sets( + command_buffer, + vk::PipelineBindPoint::COMPUTE, + self.descriptor_sets.pipeline_layout, + 0, + &self.descriptor_sets.descriptor_sets, + &[], + ); + + let push_constants_data = slice::from_raw_parts( + &shader_params as *const ShaderParams as *const u8, + std::mem::size_of::(), + ); + self.device.cmd_push_constants( + command_buffer, + self.descriptor_sets.pipeline_layout, + vk::ShaderStageFlags::COMPUTE, + 0, + push_constants_data, + ); + self.device.cmd_dispatch( + command_buffer, + workgroup_size.0 as u32, + workgroup_size.1 as u32, + 1, + ); + + self.device.end_command_buffer(command_buffer)?; + + let command_buffers = [command_buffer]; + let submit_info = vk::SubmitInfo::builder().command_buffers(&command_buffers); + self.device.queue_submit( + self.control.queue, + &[submit_info.build()], + self.control.fence, + )?; + + self.device + .wait_for_fences(&[self.control.fence], true, u64::MAX)?; + + Ok(()) + } + + unsafe fn transfer_in_images( + &self, + img1: &Grid, + img2: &Grid, + ) -> Result<(), Box> { + let buffers = self.buffers()?; + let img2_offset = img1.width() * img1.height(); + let size = img1.width() * img1.height() + img2.width() * img2.height(); + let copy_images = |img_slice: &mut [f32]| { + img1.iter() + .for_each(|(x, y, val)| img_slice[y * img1.width() + x] = *val as f32); + img2.iter().for_each(|(x, y, val)| { + img_slice[img2_offset + y * img2.width() + x] = *val as f32 + }); + }; + + self.map_buffer_write(&buffers.buffer_img, size, copy_images)?; + + Ok(()) + } + + unsafe fn save_corr( + &self, + correlation_values: &mut Grid>, + correlation_threshold: f32, + ) -> Result<(), Box> { + let buffers = self.buffers()?; + let size = correlation_values.width() * correlation_values.height(); + let width = correlation_values.width(); + let copy_corr_data = |out_corr: &[f32]| { + correlation_values + .par_iter_mut() + .for_each(|(x, y, out_point)| { + let corr = out_corr[y * width + x]; + if corr > correlation_threshold { + *out_point = Some(corr); + } + }); + }; + + self.map_buffer_read(&buffers.buffer_out_corr, size, copy_corr_data)?; + + Ok(()) + } + + unsafe fn save_result( + &self, + out_image: &mut Grid>, + correlation_values: &Grid>, + ) -> Result<(), Box> { + let buffers = self.buffers()?; + let size = out_image.width() * out_image.height() * 2; + let width = out_image.width(); + let copy_out_image = |out_data: &[i32]| { + out_image.par_iter_mut().for_each(|(x, y, out_point)| { + let pos = 2 * (y * width + x); + let (match_x, match_y) = (out_data[pos], out_data[pos + 1]); + if let Some(corr) = correlation_values.val(x, y) { + *out_point = if match_x > 0 && match_y > 0 { + let point_match = Point2D::new(match_x as u32, match_y as u32); + Some((point_match, *corr)) + } else { + None + }; + } else { + *out_point = None; + }; + }); + }; + + self.map_buffer_read(&buffers.buffer_out, size, copy_out_image)?; + + Ok(()) + } + + unsafe fn destroy_buffers(&mut self) { + if let Some(buffers) = self.buffers.as_ref() { + buffers.destroy(&self.device) + } + self.buffers = None; + } +} + impl Buffer { unsafe fn destroy(&self, device: &ash::Device) { device.free_memory(self.buffer_memory, None); @@ -1500,30 +1185,11 @@ impl Drop for Device { self.control.destroy(&self.device); destroy_pipelines(&self.device, &self.pipelines); self.descriptor_sets.destroy(&self.device); - self.buffers - .as_ref() - .map(|buffers| buffers.destroy(&self.device)); + if let Some(buffers) = self.buffers.as_ref() { + buffers.destroy(&self.device) + } self.device.destroy_device(None); self.instance.destroy_instance(None); } } } - -#[derive(Debug)] -pub struct GpuError { - msg: &'static str, -} - -impl GpuError { - fn new(msg: &'static str) -> GpuError { - GpuError { msg } - } -} - -impl std::error::Error for GpuError {} - -impl fmt::Display for GpuError { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.msg) - } -} diff --git a/src/correlation.rs b/src/correlation/mod.rs similarity index 97% rename from src/correlation.rs rename to src/correlation/mod.rs index f90eece..c7e6db5 100644 --- a/src/correlation.rs +++ b/src/correlation/mod.rs @@ -1,14 +1,7 @@ -#[cfg(target_os = "macos")] -mod metal; -#[cfg(not(target_os = "macos"))] -mod vk; +mod gpu; -#[cfg(target_os = "macos")] -type GpuContext = metal::GpuContext; -#[cfg(not(target_os = "macos"))] -type GpuContext<'a> = vk::GpuContext<'a>; +use self::gpu::{DefaultDeviceContext, GpuContext}; -use self::vk::DeviceContext; use crate::data::{Grid, Point2D}; use nalgebra::{Matrix3, Vector3}; use rayon::iter::ParallelIterator; @@ -33,6 +26,8 @@ const CROSS_CHECK_SEARCH_AREA: usize = 2; type Match = (Point2D, f32); +pub type GpuDevice = DefaultDeviceContext; + #[derive(Debug)] pub struct PointData { pub delta: [f32; KPC], @@ -131,17 +126,13 @@ impl CorrelationParameters { } } -pub type GpuDevice = DeviceContext; - -pub fn create_gpu_context( - hardware_mode: HardwareMode, -) -> Result> { - vk::DeviceContext::new(hardware_mode) +pub fn create_gpu_context(hardware_mode: HardwareMode) -> Result> { + GpuDevice::new(hardware_mode) } impl PointCorrelations<'_> { pub fn new( - device_context: Option<&mut DeviceContext>, + device_context: Option<&mut GpuDevice>, img1_dimensions: (u32, u32), img2_dimensions: (u32, u32), fundamental_matrix: Matrix3,