Skip to content

Commit

Permalink
Fix a nasty race condition.
Browse files Browse the repository at this point in the history
This commit reflects Dev's discovery of the problem -- thanks Dev.

In these few instances of `Option<DevicePointer<GpuFloat>>`, the pointer within
a `DevicePointer` was being taken, but then the `DevicePointer` itself was
consumed, so if the relevant memory was altered, the code did not behave as
intended. Curiously, this never led to a segfault despite being a use after
free, and witnessing unintended behaviour seemed to require running many GPU
tests concurrently.

The code is now simpler and less prone to error; don't have an
`Option<DevicePointer>`, just make a `DevicePointer` and if it has no contents,
it will give out a null pointer anyway.
  • Loading branch information
cjordan authored and d3v-null committed Jan 22, 2024
1 parent 6e12d39 commit 18e9f5f
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 23 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic
Versioning](https://semver.org/spec/v2.0.0.html).

## [0.7.1] - unreleased
## [0.7.1] - 2024-01-19
### Fixed
- A (seemingly rare) race condition in FEE GPU code.
- examples/analytic_cuda_device.cu wasn't complete.

## [0.7.0] - 2023-10-31
Expand Down
2 changes: 1 addition & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "mwa_hyperbeam"
version = "0.7.0"
version = "0.7.1"
authors = [
"Christopher H. Jordan <[email protected]>",
"Jack L. B. Line <[email protected]>",
Expand Down
12 changes: 7 additions & 5 deletions src/fee/ffi/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -599,15 +599,17 @@ pub unsafe extern "C" fn fee_calc_jones_gpu_device(
let za = slice::from_raw_parts(za_rad, num_azza as usize);
let d_az = ffi_error!(DevicePointer::copy_to_device(az));
let d_za = ffi_error!(DevicePointer::copy_to_device(za));
let d_latitude_rad = ffi_error!(latitude_rad
.as_ref()
.map(|f| DevicePointer::copy_to_device(&[*f as GpuFloat]))
.transpose());
let d_latitude_rad = match latitude_rad.as_ref() {
Some(f) => ffi_error!(DevicePointer::copy_to_device(&[*f as GpuFloat])),

// This won't allocate and accessing the pointer will give a null ptr.
None => DevicePointer::default(),
};
ffi_error!(beam.calc_jones_device_pair_inner(
d_az.get(),
d_za.get(),
num_azza,
d_latitude_rad.map(|p| p.get()).unwrap_or(std::ptr::null()),
d_latitude_rad.get(),
iau_bool,
d_jones.cast()
));
Expand Down
24 changes: 16 additions & 8 deletions src/fee/gpu/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -394,15 +394,19 @@ impl FEEBeamGpu {
let d_zas = DevicePointer::copy_to_device(&zas)?;

// Allocate the latitude if we have to.
let d_latitude_rad = latitude_rad
.map(|f| DevicePointer::copy_to_device(&[f as GpuFloat]))
.transpose()?;
let d_latitude_rad = match latitude_rad {
Some(f) => DevicePointer::copy_to_device(&[f as GpuFloat])?,

// This won't allocate and accessing the pointer will give a
// null ptr.
None => DevicePointer::default(),
};

self.calc_jones_device_pair_inner(
d_azs.get(),
d_zas.get(),
azels.len().try_into().expect("much fewer than i32::MAX"),
d_latitude_rad.map(|p| p.get()).unwrap_or(std::ptr::null()),
d_latitude_rad.get(),
iau_reorder,
d_results.get_mut() as *mut std::ffi::c_void,
)?;
Expand Down Expand Up @@ -433,15 +437,19 @@ impl FEEBeamGpu {
let d_zas = DevicePointer::copy_to_device(za_rad)?;

// Allocate the latitude if we have to.
let d_latitude_rad = latitude_rad
.map(|f| DevicePointer::copy_to_device(&[f as GpuFloat]))
.transpose()?;
let d_latitude_rad = match latitude_rad {
Some(f) => DevicePointer::copy_to_device(&[f as GpuFloat])?,

// This won't allocate and accessing the pointer will give a
// null ptr.
None => DevicePointer::default(),
};

self.calc_jones_device_pair_inner(
d_azs.get(),
d_zas.get(),
az_rad.len().try_into().expect("much fewer than i32::MAX"),
d_latitude_rad.map(|p| p.get()).unwrap_or(std::ptr::null()),
d_latitude_rad.get(),
iau_reorder,
d_results.get_mut() as *mut std::ffi::c_void,
)?;
Expand Down
27 changes: 20 additions & 7 deletions src/gpu.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,13 +61,17 @@ impl<T> DevicePointer<T> {
/// attempt to catch problems but there are no guarantees.
#[track_caller]
pub unsafe fn malloc(size: usize) -> Result<DevicePointer<T>, GpuError> {
let mut d_ptr = std::ptr::null_mut();
gpuMalloc(&mut d_ptr, size);
check_for_errors(GpuCall::Malloc)?;
Ok(Self {
ptr: d_ptr.cast(),
num_elements: size / std::mem::size_of::<T>(),
})
if size == 0 {
Ok(Self::default())
} else {
let mut d_ptr = std::ptr::null_mut();
gpuMalloc(&mut d_ptr, size);
check_for_errors(GpuCall::Malloc)?;
Ok(Self {
ptr: d_ptr.cast(),
num_elements: size / std::mem::size_of::<T>(),
})
}
}

/// Get the number of elements of `T` that have been allocated on the
Expand Down Expand Up @@ -160,6 +164,15 @@ impl<T> Drop for DevicePointer<T> {
}
}

impl<T> Default for DevicePointer<T> {
fn default() -> Self {
Self {
ptr: std::ptr::null_mut(),
num_elements: 0,
}
}
}

#[derive(Error, Debug)]
pub enum GpuError {
#[error("When overwriting, the new amount of memory did not equal the old amount")]
Expand Down

0 comments on commit 18e9f5f

Please sign in to comment.