diff --git a/CHANGELOG.md b/CHANGELOG.md index db7c559..3bf6dd8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.7.1] - unreleased +## [0.7.1] - 2024-01-19 ### Fixed +- A (seemingly rare) race condition in FEE GPU code. - examples/analytic_cuda_device.cu wasn't complete. ## [0.7.0] - 2023-10-31 diff --git a/Cargo.lock b/Cargo.lock index f770f4f..2d1880d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -786,7 +786,7 @@ dependencies = [ [[package]] name = "mwa_hyperbeam" -version = "0.7.0" +version = "0.7.1" dependencies = [ "approx", "cbindgen", diff --git a/Cargo.toml b/Cargo.toml index c672ee0..eca58fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "mwa_hyperbeam" -version = "0.7.0" +version = "0.7.1" authors = [ "Christopher H. Jordan ", "Jack L. B. Line ", diff --git a/src/fee/ffi/mod.rs b/src/fee/ffi/mod.rs index b787e64..3f2d1fa 100644 --- a/src/fee/ffi/mod.rs +++ b/src/fee/ffi/mod.rs @@ -599,15 +599,17 @@ pub unsafe extern "C" fn fee_calc_jones_gpu_device( let za = slice::from_raw_parts(za_rad, num_azza as usize); let d_az = ffi_error!(DevicePointer::copy_to_device(az)); let d_za = ffi_error!(DevicePointer::copy_to_device(za)); - let d_latitude_rad = ffi_error!(latitude_rad - .as_ref() - .map(|f| DevicePointer::copy_to_device(&[*f as GpuFloat])) - .transpose()); + let d_latitude_rad = match latitude_rad.as_ref() { + Some(f) => ffi_error!(DevicePointer::copy_to_device(&[*f as GpuFloat])), + + // This won't allocate and accessing the pointer will give a null ptr. + None => DevicePointer::default(), + }; ffi_error!(beam.calc_jones_device_pair_inner( d_az.get(), d_za.get(), num_azza, - d_latitude_rad.map(|p| p.get()).unwrap_or(std::ptr::null()), + d_latitude_rad.get(), iau_bool, d_jones.cast() )); diff --git a/src/fee/gpu/mod.rs b/src/fee/gpu/mod.rs index e8a6740..b420eda 100644 --- a/src/fee/gpu/mod.rs +++ b/src/fee/gpu/mod.rs @@ -394,15 +394,19 @@ impl FEEBeamGpu { let d_zas = DevicePointer::copy_to_device(&zas)?; // Allocate the latitude if we have to. - let d_latitude_rad = latitude_rad - .map(|f| DevicePointer::copy_to_device(&[f as GpuFloat])) - .transpose()?; + let d_latitude_rad = match latitude_rad { + Some(f) => DevicePointer::copy_to_device(&[f as GpuFloat])?, + + // This won't allocate and accessing the pointer will give a + // null ptr. + None => DevicePointer::default(), + }; self.calc_jones_device_pair_inner( d_azs.get(), d_zas.get(), azels.len().try_into().expect("much fewer than i32::MAX"), - d_latitude_rad.map(|p| p.get()).unwrap_or(std::ptr::null()), + d_latitude_rad.get(), iau_reorder, d_results.get_mut() as *mut std::ffi::c_void, )?; @@ -433,15 +437,19 @@ impl FEEBeamGpu { let d_zas = DevicePointer::copy_to_device(za_rad)?; // Allocate the latitude if we have to. - let d_latitude_rad = latitude_rad - .map(|f| DevicePointer::copy_to_device(&[f as GpuFloat])) - .transpose()?; + let d_latitude_rad = match latitude_rad { + Some(f) => DevicePointer::copy_to_device(&[f as GpuFloat])?, + + // This won't allocate and accessing the pointer will give a + // null ptr. + None => DevicePointer::default(), + }; self.calc_jones_device_pair_inner( d_azs.get(), d_zas.get(), az_rad.len().try_into().expect("much fewer than i32::MAX"), - d_latitude_rad.map(|p| p.get()).unwrap_or(std::ptr::null()), + d_latitude_rad.get(), iau_reorder, d_results.get_mut() as *mut std::ffi::c_void, )?; diff --git a/src/gpu.rs b/src/gpu.rs index 7143ea1..9f7d3c4 100644 --- a/src/gpu.rs +++ b/src/gpu.rs @@ -61,13 +61,17 @@ impl DevicePointer { /// attempt to catch problems but there are no guarantees. #[track_caller] pub unsafe fn malloc(size: usize) -> Result, GpuError> { - let mut d_ptr = std::ptr::null_mut(); - gpuMalloc(&mut d_ptr, size); - check_for_errors(GpuCall::Malloc)?; - Ok(Self { - ptr: d_ptr.cast(), - num_elements: size / std::mem::size_of::(), - }) + if size == 0 { + Ok(Self::default()) + } else { + let mut d_ptr = std::ptr::null_mut(); + gpuMalloc(&mut d_ptr, size); + check_for_errors(GpuCall::Malloc)?; + Ok(Self { + ptr: d_ptr.cast(), + num_elements: size / std::mem::size_of::(), + }) + } } /// Get the number of elements of `T` that have been allocated on the @@ -160,6 +164,15 @@ impl Drop for DevicePointer { } } +impl Default for DevicePointer { + fn default() -> Self { + Self { + ptr: std::ptr::null_mut(), + num_elements: 0, + } + } +} + #[derive(Error, Debug)] pub enum GpuError { #[error("When overwriting, the new amount of memory did not equal the old amount")]