Skip to content

Commit

Permalink
Improve device scoring.
Browse files Browse the repository at this point in the history
Improve searching for the right buffer.
Keep trying all heaps when attempting to allocate memory.
Fixed incorrect type in shader.
  • Loading branch information
zlogic committed Jan 28, 2024
1 parent 9bed704 commit 4aff1cf
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 75 deletions.
2 changes: 1 addition & 1 deletion src/correlation/shaders/init_out_data.comp.glsl
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ layout(std430, set = 0, binding = 3) buffer Internals_Int
{
// Layout:
// Contains [min, max, neighbor_count] for the corridor range
int internals_int[];
ivec3 internals_int[];
};
layout(std430, set = 0, binding = 4) buffer Result_Matches
{
Expand Down
Binary file modified src/correlation/shaders/init_out_data.spv
Binary file not shown.
130 changes: 56 additions & 74 deletions src/correlation/vk.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime};
use std::{cmp::Ordering, collections::HashMap, error, ffi::CStr, fmt, slice, time::SystemTime};

use ash::{prelude::VkResult, vk};
use nalgebra::Matrix3;
Expand Down Expand Up @@ -477,11 +477,13 @@ impl Device {
instance.destroy_instance(None);
err
};
println!("Created all buffers");
// Init pipelines and shaders.
let descriptor_sets = unsafe {
Device::create_descriptor_sets(&device, &buffers, CorrelationDirection::Forward)
.map_err(cleanup_err)?
};
println!("Created descriptor sets");
let cleanup_err = |err| unsafe {
descriptor_sets.destroy(&device);
buffers.destroy(&device);
Expand All @@ -491,6 +493,7 @@ impl Device {
};
let pipelines =
unsafe { Device::create_pipelines(&device, &descriptor_sets).map_err(cleanup_err)? };
println!("Created pipelines");
let cleanup_err = |err| unsafe {
destroy_pipelines(&device, &pipelines);
descriptor_sets.destroy(&device);
Expand All @@ -502,6 +505,7 @@ impl Device {
// Init control struct - queues, fences, command buffer.
let control =
unsafe { Device::create_control(&device, compute_queue_index).map_err(cleanup_err)? };
println!("Created control");
let result = Device {
_entry: entry,
instance,
Expand Down Expand Up @@ -906,7 +910,7 @@ impl Device {
unsafe fn find_device(
instance: &ash::Instance,
max_buffer_size: usize,
) -> Result<(vk::PhysicalDevice, &'static str, u32), Box<dyn error::Error>> {
) -> Result<(vk::PhysicalDevice, String, u32), Box<dyn error::Error>> {
let devices = instance.enumerate_physical_devices()?;
let device = devices
.iter()
Expand All @@ -923,16 +927,7 @@ impl Device {
let queue_index = Device::find_compute_queue(instance, device)?;

let device_name = CStr::from_ptr(props.device_name.as_ptr());
let device_name = device_name.to_str().unwrap();
println!(
"Device {} type {} {}-{}-{}-{}",
device_name,
props.device_type.as_raw(),
props.limits.max_push_constants_size,
props.limits.max_bound_descriptor_sets,
props.limits.max_storage_buffer_range,
max_buffer_size
);
let device_name = String::from_utf8_lossy(device_name.to_bytes()).to_string();
// TODO: allow to specify a device name filter/regex?
let score = match props.device_type {
vk::PhysicalDeviceType::DISCRETE_GPU => 3,
Expand All @@ -941,24 +936,25 @@ impl Device {
_ => 0,
};
// Prefer real devices instead of dzn emulation.
let dzn_multiplier = if device_name
let is_dzn = device_name
.to_lowercase()
.starts_with("microsoft direct3d12")
{
1
} else {
10
};
Some((device, device_name, queue_index, score * dzn_multiplier))
.starts_with("microsoft direct3d12");
let score = (score, is_dzn);
Some((device, device_name, queue_index, score))
})
.max_by_key(|(_device, _name, _queue_index, score)| *score);
let (device, name, queue_index) = if let Some((device, name, queue_index, _score)) = device
{
.max_by(|(_, _, _, a), (_, _, _, b)| {
if a.1 && !b.1 {
return Ordering::Less;
} else if !a.1 && b.1 {
return Ordering::Greater;
}
return a.0.cmp(&b.0);
});
let (device, name, queue_index) = if let Some((device, name, queue_index, score)) = device {
(device, name, queue_index)
} else {
return Err(GpuError::new("Device not found").into());
};
println!("selected device {}", name);
Ok((device, name, queue_index))
}

Expand Down Expand Up @@ -1009,6 +1005,7 @@ impl Device {
let max_pixels = img1_pixels.max(img2_pixels);
let mut buffers: Vec<Buffer> = vec![];
let cleanup_err = |buffers: &[Buffer], err| {
println!("buffers count is {}", buffers.len());
buffers.iter().for_each(|buffer| {
device.free_memory(buffer.buffer_memory, None);
device.destroy_buffer(buffer.buffer, None)
Expand Down Expand Up @@ -1095,13 +1092,13 @@ impl Device {
buffer_type: BufferType,
) -> Result<Buffer, Box<dyn error::Error>> {
let size = size as u64;
let gpu_local = match buffer_type {
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => true,
BufferType::HostSource | BufferType::HostDestination => false,
};
let host_visible = match buffer_type {
BufferType::HostSource | BufferType::HostDestination => true,
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => false,
let required_memory_properties = match buffer_type {
BufferType::GpuOnly | BufferType::GpuDestination | BufferType::GpuSource => {
vk::MemoryPropertyFlags::DEVICE_LOCAL
}
BufferType::HostSource | BufferType::HostDestination => {
vk::MemoryPropertyFlags::HOST_VISIBLE
}
};
let extra_usage_flags = match buffer_type {
BufferType::HostSource => vk::BufferUsageFlags::TRANSFER_SRC,
Expand All @@ -1122,58 +1119,41 @@ impl Device {
};
let buffer = device.create_buffer(&buffer_create_info, None)?;
let memory_requirements = device.get_buffer_memory_requirements(buffer);
let memory_type_index = memory_properties.memory_types
[..memory_properties.memory_type_count as usize]
.iter()
.enumerate()
.find(|(memory_type_index, memory_type)| {
let buffer_memory = (0..memory_properties.memory_type_count as usize)
.flat_map(|i| {
let memory_type = memory_properties.memory_types[i];
if memory_properties.memory_heaps[memory_type.heap_index as usize].size
< memory_requirements.size
{
return false;
};
if (1 << memory_type_index) & memory_requirements.memory_type_bits == 0 {
return false;
return None;
}

if gpu_local
&& memory_type
.property_flags
.contains(vk::MemoryPropertyFlags::DEVICE_LOCAL)
{
return true;
if ((1 << i) & memory_requirements.memory_type_bits) == 0 {
return None;
}
if host_visible
&& memory_type
.property_flags
.contains(vk::MemoryPropertyFlags::HOST_VISIBLE)
{
return true;
let property_flags = memory_type.property_flags;
if !property_flags.contains(required_memory_properties) {
return None;
}
false
});
let memory_type_index = if let Some((index, _)) = memory_type_index {
index as u32
let host_visible = property_flags.contains(vk::MemoryPropertyFlags::HOST_VISIBLE);
let host_coherent = property_flags.contains(vk::MemoryPropertyFlags::HOST_COHERENT);
let allocate_info = vk::MemoryAllocateInfo {
allocation_size: memory_requirements.size,
memory_type_index: i as u32,
..Default::default()
};
// Some buffers may fill up, in this case allocating memory can fail.
let mem = device.allocate_memory(&allocate_info, None).ok()?;

Some((mem, host_visible, host_coherent))
})
.next();

let (buffer_memory, host_visible, host_coherent) = if let Some(mem) = buffer_memory {
mem
} else {
device.destroy_buffer(buffer, None);
return Err(GpuError::new("Cannot find suitable memory").into());
};
let property_flags =
memory_properties.memory_types[memory_type_index as usize].property_flags;
let host_visible = property_flags.contains(vk::MemoryPropertyFlags::HOST_VISIBLE);
let host_coherent = property_flags.contains(vk::MemoryPropertyFlags::HOST_COHERENT);
let allocate_info = vk::MemoryAllocateInfo {
allocation_size: memory_requirements.size,
memory_type_index,
..Default::default()
};
let buffer_memory = device.allocate_memory(&allocate_info, None);
let buffer_memory = match buffer_memory {
Ok(mem) => mem,
Err(err) => {
device.destroy_buffer(buffer, None);
return Err(err.into());
}
};
let result = Buffer {
buffer,
buffer_memory,
Expand Down Expand Up @@ -1225,6 +1205,7 @@ impl Device {
};
let cross_check_layout = create_layout_bindings(2).map_err(cleanup_err)?;
let cleanup_err = |err| {
println!("failed to allocate descriptor set");
device.destroy_descriptor_set_layout(cross_check_layout, None);
device.destroy_descriptor_set_layout(regular_layout, None);
device.destroy_descriptor_pool(descriptor_pool, None);
Expand Down Expand Up @@ -1398,6 +1379,7 @@ impl Device {
.create_fence(&fence_create_info, None)
.map_err(cleanup_err)?;
let cleanup_err = |err| {
println!("Failed to alloc command buffer");
device.destroy_command_pool(command_pool, None);
device.destroy_fence(fence, None);
err
Expand Down

0 comments on commit 4aff1cf

Please sign in to comment.