From d9a7ef40f360b9cfb91471855862f92f09a97c3f Mon Sep 17 00:00:00 2001 From: Dmitry Zolotukhin Date: Fri, 18 Aug 2023 18:26:31 +0200 Subject: [PATCH] Use correlation value for pose estimation. Only points with the best correlation should be used to estimate the pose. --- src/correlation.rs | 104 +++++++++++++++++++++++++++++++++--------- src/correlation.wgsl | 27 ++++++----- src/reconstruction.rs | 6 +-- src/triangulation.rs | 53 ++++++++++++++------- 4 files changed, 136 insertions(+), 54 deletions(-) diff --git a/src/correlation.rs b/src/correlation.rs index b9d6c824..6c7fb240 100644 --- a/src/correlation.rs +++ b/src/correlation.rs @@ -8,7 +8,7 @@ const KERNEL_WIDTH: usize = KERNEL_SIZE * 2 + 1; const KERNEL_POINT_COUNT: usize = KERNEL_WIDTH * KERNEL_WIDTH; const THRESHOLD_AFFINE: f32 = 0.6; -const THRESHOLD_PERSPECTIVE: f32 = 0.7; +const THRESHOLD_PERSPECTIVE: f32 = 0.6; const MIN_STDEV_AFFINE: f32 = 1.0; const MIN_STDEV_PERSPECTIVE: f32 = 3.0; const CORRIDOR_SIZE_AFFINE: usize = 2; @@ -24,7 +24,7 @@ const CORRIDOR_EXTEND_RANGE_PERSPECTIVE: f64 = 1.0; const CORRIDOR_MIN_RANGE: f64 = 2.5; const CROSS_CHECK_SEARCH_AREA: usize = 2; -type Match = (u32, u32); +type Match = (u32, u32, f32); #[derive(Debug)] pub struct PointData { @@ -77,7 +77,7 @@ struct EpipolarLine { } struct BestMatch { - pos: Option, + pos: Option<(u32, u32)>, corr: Option, } @@ -188,26 +188,29 @@ impl PointCorrelations { img2: DMatrix, scale: f32, progress_listener: Option<&PL>, - ) { + ) -> Result<(), Box> { + // Start with reverse direction - so that the last correlation values will be from the forward direction. self.correlate_images_step( - &img1, &img2, + &img1, scale, progress_listener, - CorrelationDirection::Forward, - ); + CorrelationDirection::Reverse, + )?; self.correlate_images_step( - &img2, &img1, + &img2, scale, progress_listener, - CorrelationDirection::Reverse, - ); + CorrelationDirection::Forward, + )?; self.cross_check_filter(scale, CorrelationDirection::Forward); self.cross_check_filter(scale, CorrelationDirection::Reverse); self.first_pass = false; + + Ok(()) } fn correlate_images_step( @@ -217,13 +220,13 @@ impl PointCorrelations { scale: f32, progress_listener: Option<&PL>, dir: CorrelationDirection, - ) { + ) -> Result<(), Box> { if let Some(gpu_context) = &mut self.gpu_context { let dir = match dir { CorrelationDirection::Forward => gpu::CorrelationDirection::Forward, CorrelationDirection::Reverse => gpu::CorrelationDirection::Reverse, }; - gpu_context.correlate_images( + return gpu_context.correlate_images( img1, img2, scale, @@ -231,7 +234,6 @@ impl PointCorrelations { progress_listener, dir, ); - return; }; let img2_data = compute_image_point_data(img2); let mut out_data: DMatrix> = @@ -299,6 +301,8 @@ impl PointCorrelations { correlated_points[(out_row, out_col)] = point; } } + + Ok(()) } fn correlate_point( @@ -364,7 +368,9 @@ impl PointCorrelations { corridor_range.clone(), ); } - *out_point = best_match.pos + *out_point = best_match + .pos + .and_then(|m| best_match.corr.map(|corr| (m.0, m.1, corr))) } fn get_epipolar_line( @@ -575,7 +581,7 @@ impl PointCorrelations { search_area: usize, row: usize, col: usize, - m: (u32, u32), + m: Match, ) -> bool { let min_row = (m.0 as usize) .saturating_sub(search_area) @@ -726,7 +732,7 @@ pub fn compute_point_data( } mod gpu { - const MAX_BINDINGS: u32 = 5; + const MAX_BINDINGS: u32 = 6; use std::{borrow::Cow, collections::HashMap, error, fmt}; @@ -785,6 +791,7 @@ mod gpu { buffer_internal_img2: wgpu::Buffer, buffer_internal_int: wgpu::Buffer, buffer_out: wgpu::Buffer, + buffer_out_corr: wgpu::Buffer, buffer_out_reverse: wgpu::Buffer, pipeline_configs: HashMap, @@ -884,7 +891,7 @@ mod gpu { ); let buffer_internal_img1 = init_buffer( &device, - (img1_pixels * 4) * std::mem::size_of::(), + (img1_pixels * 2) * std::mem::size_of::(), true, false, ); @@ -906,6 +913,12 @@ mod gpu { false, false, ); + let buffer_out_corr = init_buffer( + &device, + img1_pixels * 1 * std::mem::size_of::(), + false, + false, + ); let buffer_out_reverse = init_buffer( &device, img2_pixels * 2 * std::mem::size_of::(), @@ -932,6 +945,7 @@ mod gpu { buffer_internal_img2, buffer_internal_int, buffer_out, + buffer_out_corr, buffer_out_reverse, pipeline_configs: HashMap::new(), }; @@ -950,7 +964,7 @@ mod gpu { first_pass: bool, progress_listener: Option<&PL>, dir: CorrelationDirection, - ) { + ) -> Result<(), Box> { let max_width = img1.ncols().max(img2.ncols()); let max_height = img1.nrows().max(img2.nrows()); let max_shape = (max_height, max_width); @@ -1067,6 +1081,8 @@ mod gpu { send_progress(percent_complete); } } + + Ok(()) } pub fn cross_check_filter(&mut self, scale: f32, dir: CorrelationDirection) { @@ -1180,6 +1196,7 @@ mod gpu { self.buffer_internal_img2.destroy(); self.buffer_internal_int.destroy(); self.buffer_out_reverse.destroy(); + self.buffer_out_corr.destroy(); let mut out_image = DMatrix::from_element(self.img1_shape.0, self.img1_shape.1, None); @@ -1189,7 +1206,14 @@ mod gpu { usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, mapped_at_creation: false, }); + let out_corr_buffer = self.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: self.buffer_out_corr.size(), + usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST, + mapped_at_creation: false, + }); let out_buffer_slice = out_buffer.slice(..); + let out_corr_buffer_slice = out_corr_buffer.slice(..); { let mut encoder = self .device @@ -1201,31 +1225,51 @@ mod gpu { 0, self.buffer_out.size(), ); + encoder.copy_buffer_to_buffer( + &self.buffer_out_corr, + 0, + &out_corr_buffer, + 0, + self.buffer_out_corr.size(), + ); self.queue.submit(Some(encoder.finish())); - let (sender, receiver) = mpsc::channel(); - out_buffer_slice.map_async(wgpu::MapMode::Read, move |v| sender.send(v).unwrap()); + let (sender_out, receiver_out) = mpsc::channel(); + out_buffer_slice + .map_async(wgpu::MapMode::Read, move |v| sender_out.send(v).unwrap()); + let (sender_out_corr, receiver_out_corr) = mpsc::channel(); + out_corr_buffer_slice.map_async(wgpu::MapMode::Read, move |v| { + sender_out_corr.send(v).unwrap() + }); self.device.poll(wgpu::Maintain::Wait); - if let Err(err) = receiver.recv() { + if let Err(err) = receiver_out.recv() { + return Err(err.into()); + } + if let Err(err) = receiver_out_corr.recv() { return Err(err.into()); } } let out_buffer_slice_mapped = out_buffer_slice.get_mapped_range(); + let out_corr_buffer_slice_mapped = out_corr_buffer_slice.get_mapped_range(); let out_data: &[i32] = bytemuck::cast_slice(&out_buffer_slice_mapped); + let out_corr_data: &[f32] = bytemuck::cast_slice(&out_buffer_slice_mapped); for col in 0..out_image.ncols() { for row in 0..out_image.nrows() { let pos = 2 * (row * out_image.ncols() + col); let point_match = (out_data[pos], out_data[pos + 1]); + let corr = out_corr_data[row * out_image.ncols() + col]; out_image[(row, col)] = if point_match.0 > 0 && point_match.1 > 0 { - Some((point_match.1 as u32, point_match.0 as u32)) + Some((point_match.1 as u32, point_match.0 as u32, corr)) } else { None }; } } drop(out_buffer_slice_mapped); + drop(out_corr_buffer_slice_mapped); out_buffer.unmap(); + out_corr_buffer.unmap(); Ok(out_image) } @@ -1300,6 +1344,18 @@ mod gpu { }, count: None, }, + wgpu::BindGroupLayoutEntry { + binding: 5, + visibility: wgpu::ShaderStages::COMPUTE, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: wgpu::BufferSize::new( + self.buffer_out_corr.size(), + ), + }, + count: None, + }, ], }); @@ -1377,6 +1433,10 @@ mod gpu { binding: 4, resource: buffer_out.as_entire_binding(), }, + wgpu::BindGroupEntry { + binding: 5, + resource: self.buffer_out_corr.as_entire_binding(), + }, ], }); diff --git a/src/correlation.wgsl b/src/correlation.wgsl index bd5347ba..9277a39f 100644 --- a/src/correlation.wgsl +++ b/src/correlation.wgsl @@ -21,15 +21,17 @@ struct Parameters }; var parameters: Parameters; +// Array of image data: [img1, img2] @group(0) @binding(0) var images: array; -// For searchdata: contains [min_corridor, stdev, _] for image1 -// For cross_correlate: contains [avg, stdev, corr] for image1 -@group(0) @binding(1) var internals_img1: array>; +// For searchdata: contains [min_corridor, stdev] for image1 +// For cross_correlate: contains [avg, stdev] for image1 +@group(0) @binding(1) var internals_img1: array>; // Contains [avg, stdev] for image 2 @group(0) @binding(2) var internals_img2: array>; // Contains [min, max, neighbor_count] for the corridor range @group(0) @binding(3) var internals_int: array>; -@group(0) @binding(4) var result: array>; +@group(0) @binding(4) var result_matches: array>; +@group(0) @binding(5) var result_corr: array; @compute @workgroup_size(16, 16, 1) fn init_out_data(@builtin(global_invocation_id) global_id: vec3) { @@ -40,7 +42,8 @@ fn init_out_data(@builtin(global_invocation_id) global_id: vec3) { let out_height = parameters.out_height; if x < out_width && y < out_height { - result[out_width*y+x] = vec2(-1, -1); + result_matches[out_width*y+x] = vec2(-1, -1); + result_corr[out_width*y+x] = -1.0; } } @@ -53,7 +56,7 @@ fn prepare_initialdata_searchdata(@builtin(global_invocation_id) global_id: vec3 let img1_height = parameters.img1_height; if x < img1_width && y < img1_height { - internals_img1[img1_width*y+x] = vec3(0.0, 0.0, 0.0); + internals_img1[img1_width*y+x] = vec2(0.0, 0.0); internals_int[img1_width*y+x] = vec3(-1, -1, 0); } } @@ -94,9 +97,9 @@ fn prepare_initialdata_correlation(@builtin(global_invocation_id) global_id: vec } } stdev = sqrt(stdev/kernel_point_count); - internals_img1[img1_width*y+x] = vec3(avg, stdev, -1.0); + internals_img1[img1_width*y+x] = vec2(avg, stdev); } else if x < img1_width && y < img1_height { - internals_img1[img1_width*y+x] = vec3(0.0, -1.0, -1.0); + internals_img1[img1_width*y+x] = vec2(0.0, -1.0); } if x >= kernel_size && x < img2_width-kernel_size && y >= kernel_size && y < img2_height-kernel_size { @@ -194,7 +197,7 @@ fn prepare_searchdata(@builtin(global_invocation_id) global_id: vec3) { continue; } - let coord2 = vec2(result[u32(y_out)*out_width + u32(x_out)]) * scale; + let coord2 = vec2(result_corr[u32(y_out)*out_width + u32(x_out)]) * scale; if coord2.x<0.0 || coord2.y<0.0 { continue; } @@ -269,7 +272,7 @@ fn cross_correlate(@builtin(global_invocation_id) global_id: vec3) { var data_img1 = internals_img1[img1_width*y1+x1]; let avg1 = data_img1[0]; let stdev1 = data_img1[1]; - let current_corr = data_img1[2]; + let current_corr = result_corr[img1_width*y1+x1]; if stdev1 < min_stdev { return; } @@ -343,9 +346,9 @@ fn cross_correlate(@builtin(global_invocation_id) global_id: vec3) { if (best_corr >= threshold && best_corr > current_corr) { let out_pos = out_width*u32((f32(y1)/scale)) + u32(f32(x1)/scale); - data_img1[2] = best_corr; internals_img1[img1_width*y1+x1] = data_img1; - result[out_pos] = best_match; + result_matches[out_pos] = best_match; + result_corr[img1_width*y1+x1] = best_corr; } } diff --git a/src/reconstruction.rs b/src/reconstruction.rs index cc9a74a8..277666bf 100644 --- a/src/reconstruction.rs +++ b/src/reconstruction.rs @@ -257,7 +257,7 @@ pub fn reconstruct(args: &Cli) -> Result<(), Box> { Ok(()) } -type CorrelatedPoints = DMatrix>; +type CorrelatedPoints = DMatrix>; impl ImageReconstruction { fn reconstruct( @@ -509,7 +509,7 @@ impl ImageReconstruction { scale, }; - point_correlations.correlate_images(img1, img2, scale, Some(&pb)); + point_correlations.correlate_images(img1, img2, scale, Some(&pb))?; total_percent_complete += scale * scale / total_percent; } pb.finish_and_clear(); @@ -528,7 +528,7 @@ impl ImageReconstruction { fn triangulate_surface( &mut self, f: Matrix3, - correlated_points: DMatrix>, + correlated_points: CorrelatedPoints, ) -> Result<(), triangulation::TriangulationError> { let start_time = SystemTime::now(); diff --git a/src/triangulation.rs b/src/triangulation.rs index 19001993..03456ec4 100644 --- a/src/triangulation.rs +++ b/src/triangulation.rs @@ -11,16 +11,17 @@ use rayon::prelude::*; const BUNDLE_ADJUSTMENT_MAX_ITERATIONS: usize = 1000; const OUTLIER_FILTER_STDEV_THRESHOLD: f64 = 2.0; -const EXTEND_TRACKS_SEARCH_RADIUS: usize = 1; +const EXTEND_TRACKS_SEARCH_RADIUS: usize = 3; const PERSPECTIVE_SCALE_THRESHOLD: f64 = 0.0001; +const POSE_ESTIMATION_MIN_CORRELATION: f32 = 0.9; const RANSAC_N: usize = 3; const RANSAC_K: usize = 100_000; // TODO: this should be proportional to image size -const RANSAC_INLIERS_T: f64 = 15.0; -const RANSAC_T: f64 = 50.0; +const RANSAC_INLIERS_T: f64 = 1.0; +const RANSAC_T: f64 = 5.0; const RANSAC_D: usize = 100; const RANSAC_D_EARLY_EXIT: usize = 100_000; -const RANSAC_CHECK_INTERVAL: usize = 100; +const RANSAC_CHECK_INTERVAL: usize = 1000; #[derive(Clone, Copy)] pub struct Point { @@ -42,6 +43,7 @@ impl Point { pub type Surface = Vec; type Match = (u32, u32); +type CorrelatedPoints = DMatrix>; pub trait ProgressListener where @@ -104,7 +106,7 @@ impl Triangulation { pub fn triangulate( &mut self, - correlated_points: &DMatrix>, + correlated_points: &CorrelatedPoints, fundamental_matrix: &Matrix3, progress_listener: Option<&PL>, ) -> Result<(), TriangulationError> { @@ -144,7 +146,7 @@ struct AffineTriangulation { impl AffineTriangulation { fn triangulate( &mut self, - correlated_points: &DMatrix>, + correlated_points: &CorrelatedPoints, ) -> Result<(), TriangulationError> { if !self.surface.is_empty() { return Err(TriangulationError::new( @@ -163,7 +165,8 @@ impl AffineTriangulation { .iter() .enumerate() .filter_map(|(row, matched_point)| { - AffineTriangulation::triangulate_point((row, col), matched_point, index) + let point2 = matched_point.map(|p| (p.0, p.1)); + AffineTriangulation::triangulate_point((row, col), &point2, index) }) .collect::>() }) @@ -358,7 +361,7 @@ struct PerspectiveTriangulation { impl PerspectiveTriangulation { fn triangulate( &mut self, - correlated_points: &DMatrix>, + correlated_points: &CorrelatedPoints, fundamental_matrix: &Matrix3, progress_listener: Option<&PL>, ) -> Result<(), TriangulationError> { @@ -405,7 +408,12 @@ impl PerspectiveTriangulation { )) } }; - let camera2 = match self.recover_relative_pose(&k2, &k2_inv, progress_listener) { + let camera2 = match self.recover_relative_pose( + correlated_points, + &k2, + &k2_inv, + progress_listener, + ) { Some(camera2) => camera2, None => return Err(TriangulationError::new("Unable to find projection matrix")), }; @@ -508,7 +516,7 @@ impl PerspectiveTriangulation { fundamental_matrix: &Matrix3, k1: &Matrix3, k2: &Matrix3, - correlated_points: &DMatrix>, + correlated_points: &CorrelatedPoints, ) -> Option> { // Create essential matrix and camera matrices. let essential_matrix = k2.tr_mul(fundamental_matrix) * k1; @@ -584,6 +592,7 @@ impl PerspectiveTriangulation { fn recover_relative_pose( &self, + correlated_points: &CorrelatedPoints, k: &Matrix3, k_inv: &Matrix3, progress_listener: Option<&PL>, @@ -595,7 +604,14 @@ impl PerspectiveTriangulation { let unlinked_tracks = self .tracks .iter() - .filter(|track| track.range().len() == 2 && track.range().end == track_len) + .filter(|track| { + track.range().len() == 2 + && track.range().end == track_len + && track + .last() + .and_then(|m| correlated_points[(m.0 as usize, m.1 as usize)]) + .is_some_and(|m| m.2 > POSE_ESTIMATION_MIN_CORRELATION) + }) .map(|track| track.to_owned()) .collect::>(); @@ -608,6 +624,10 @@ impl PerspectiveTriangulation { && track.range().end == track_len && track.get(view_i).is_some() && track.point3d.is_some() + && track + .last() + .and_then(|m| correlated_points[(m.0 as usize, m.1 as usize)]) + .is_some_and(|m| m.2 > POSE_ESTIMATION_MIN_CORRELATION) }) .map(|track| track.to_owned()) .collect::>(); @@ -878,7 +898,7 @@ impl PerspectiveTriangulation { .reduce(|acc, val| acc.max(val)) } - fn extend_tracks(&mut self, correlated_points: &DMatrix>, index: usize) { + fn extend_tracks(&mut self, correlated_points: &CorrelatedPoints, index: usize) { let mut remaining_points = correlated_points.clone(); self.tracks.iter_mut().for_each(|track| { let last_pos = track.last().and_then(|last| { @@ -910,7 +930,8 @@ impl PerspectiveTriangulation { }); if let Some(last_pos) = last_pos { - track.add(index + 1, last_pos); + let track_point = (last_pos.0, last_pos.1); + track.add(index + 1, track_point); remaining_points[(last_pos.0 as usize, last_pos.1 as usize)] = None; }; }); @@ -923,11 +944,9 @@ impl PerspectiveTriangulation { .iter() .enumerate() .filter_map(|(row, m)| { - if m.is_none() { - return None; - } + let track_point2 = m.map(|m| (m.0, m.1))?; let mut track = Track::new(index, (row as u32, col as u32)); - track.add(index + 1, (*m)?); + track.add(index + 1, track_point2); Some(track) })