From 2585ef9a06e905c6c0eee0242101b1347609ae55 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 15 Apr 2024 17:09:50 +0200 Subject: [PATCH 01/12] add dispatchId in spvasm --- common/common.hpp | 1 + extractor/extractor.cpp | 2 ++ extractor/spirv.cpp | 3 ++- runner/spirv.hpp | 3 ++- tests/example-counter.spvasm | 4 ++-- tests/example-expectation.spvasm | 4 ++-- tests/example-vksp_s0-expectation.spvasm | 4 ++-- tests/example.spvasm | 4 ++-- 8 files changed, 15 insertions(+), 10 deletions(-) diff --git a/common/common.hpp b/common/common.hpp index 5d4f2c2..2d42c80 100644 --- a/common/common.hpp +++ b/common/common.hpp @@ -121,6 +121,7 @@ struct vksp_configuration { uint32_t groupCountX; uint32_t groupCountY; uint32_t groupCountZ; + uint32_t dispatchId; }; struct vksp_specialization_map_entry { diff --git a/extractor/extractor.cpp b/extractor/extractor.cpp index 0307dde..483acf3 100644 --- a/extractor/extractor.cpp +++ b/extractor/extractor.cpp @@ -141,6 +141,8 @@ bool get_dispatch_compute_and_commandBuffer_from_dispatchId(TraceProcessor *tp, GET_STR_VALUE(tp, compute, "debug.shader_name", config.entryPoint); + config.dispatchId = dispatchId; + assert(!it.Next()); return true; } diff --git a/extractor/spirv.cpp b/extractor/spirv.cpp index 47febf5..20678f6 100644 --- a/extractor/spirv.cpp +++ b/extractor/spirv.cpp @@ -41,7 +41,7 @@ class InsertVkspReflectInfoPass : public spvtools::opt::Pass { { auto module = context()->module(); - std::vector ext_words = spvtools::utils::MakeVector("NonSemantic.VkspReflection.1"); + std::vector ext_words = spvtools::utils::MakeVector("NonSemantic.VkspReflection.2"); auto ExtInstId = context()->TakeNextId(); auto ExtInst = new spvtools::opt::Instruction( context(), spv::Op::OpExtInstImport, 0u, ExtInstId, { { SPV_OPERAND_TYPE_LITERAL_STRING, ext_words } }); @@ -66,6 +66,7 @@ class InsertVkspReflectInfoPass : public spvtools::opt::Pass { { SPV_OPERAND_TYPE_LITERAL_INTEGER, { config_->groupCountX } }, { SPV_OPERAND_TYPE_LITERAL_INTEGER, { config_->groupCountY } }, { SPV_OPERAND_TYPE_LITERAL_INTEGER, { config_->groupCountZ } }, + { SPV_OPERAND_TYPE_LITERAL_INTEGER, { config_->dispatchId } }, }); module->AddExtInstDebugInfo(std::unique_ptr(ConfigInst)); diff --git a/runner/spirv.hpp b/runner/spirv.hpp index c51e522..ab5fc8b 100644 --- a/runner/spirv.hpp +++ b/runner/spirv.hpp @@ -39,7 +39,7 @@ class ExtractVkspReflectInfoPass : public spvtools::opt::Pass { Status Process() override { auto module = context()->module(); - uint32_t ext_inst_id = module->GetExtInstImportId("NonSemantic.VkspReflection.1"); + uint32_t ext_inst_id = module->GetExtInstImportId("NonSemantic.VkspReflection.2"); int32_t descriptor_set_0_max_binding = -1; std::map id_to_descriptor_set; std::map id_to_binding; @@ -205,6 +205,7 @@ class ExtractVkspReflectInfoPass : public spvtools::opt::Pass { config_->groupCountX = inst->GetOperand(op_id++).words[0]; config_->groupCountY = inst->GetOperand(op_id++).words[0]; config_->groupCountZ = inst->GetOperand(op_id++).words[0]; + config_->dispatchId = inst->GetOperand(op_id++).words[0]; break; case NonSemanticVkspReflectionDescriptorSetBuffer: { vksp_descriptor_set ds; diff --git a/tests/example-counter.spvasm b/tests/example-counter.spvasm index 5de02f3..585598e 100644 --- a/tests/example-counter.spvasm +++ b/tests/example-counter.spvasm @@ -5,7 +5,7 @@ ; Schema: 0 OpCapability Shader %1 = OpExtInstImport "NonSemantic.ClspvReflection.5" - %49 = OpExtInstImport "NonSemantic.VkspReflection.1" + %49 = OpExtInstImport "NonSemantic.VkspReflection.2" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %2 "test_simple" %gl_GlobalInvocationID %4 %5 %6 @@ -61,7 +61,7 @@ %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input %4 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize %5 = OpVariable %_ptr_StorageBuffer__struct_14 StorageBuffer - %50 = OpExtInst %void %49 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 + %50 = OpExtInst %void %49 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 %51 = OpExtInst %void %49 PushConstants 16 12 "000000000000000000000000" 32 %52 = OpExtInst %void %49 PushConstants 0 12 "000000000000000000000000" 32 %53 = OpExtInst %void %49 DescriptorSetBuffer 0 0 7 0 0 0 1024 63 1024 0 1024 0 0 diff --git a/tests/example-expectation.spvasm b/tests/example-expectation.spvasm index 1203011..b9e3124 100644 --- a/tests/example-expectation.spvasm +++ b/tests/example-expectation.spvasm @@ -9,7 +9,7 @@ OpCapability Int64Atomics OpExtension "SPV_KHR_shader_clock" OpExtension "SPV_KHR_storage_buffer_storage_class" - %58 = OpExtInstImport "NonSemantic.VkspReflection.1" + %58 = OpExtInstImport "NonSemantic.VkspReflection.2" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %1 "test_simple" %gl_GlobalInvocationID %3 %4 %5 %6 @@ -79,7 +79,7 @@ %uint_8 = OpConstant %uint 8 %6 = OpVariable %_ptr_StorageBuffer__struct_19 StorageBuffer %ulong_0 = OpConstant %ulong 0 - %59 = OpExtInst %void %58 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 + %59 = OpExtInst %void %58 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 %60 = OpExtInst %void %58 PushConstants 0 28 "00000000000000000000000000000000000000000000000000000000" 32 %61 = OpExtInst %void %58 DescriptorSetBuffer 0 0 7 0 0 0 1024 63 1024 0 1024 0 0 %62 = OpExtInst %void %58 DescriptorSetBuffer 0 1 7 0 0 0 16 34 16 0 16 0 0 diff --git a/tests/example-vksp_s0-expectation.spvasm b/tests/example-vksp_s0-expectation.spvasm index b0bf380..33563fa 100644 --- a/tests/example-vksp_s0-expectation.spvasm +++ b/tests/example-vksp_s0-expectation.spvasm @@ -9,7 +9,7 @@ OpCapability Int64Atomics OpExtension "SPV_KHR_shader_clock" OpExtension "SPV_KHR_storage_buffer_storage_class" - %73 = OpExtInstImport "NonSemantic.VkspReflection.1" + %73 = OpExtInstImport "NonSemantic.VkspReflection.2" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %3 "test_simple" %gl_GlobalInvocationID %5 %6 %7 %64 @@ -79,7 +79,7 @@ %uint_8 = OpConstant %uint 8 %64 = OpVariable %_ptr_StorageBuffer__struct_59 StorageBuffer %ulong_0 = OpConstant %ulong 0 - %74 = OpExtInst %void %73 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 + %74 = OpExtInst %void %73 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 %75 = OpExtInst %void %73 PushConstants 0 28 "00000000000000000000000000000000000000000000000000000000" 32 %76 = OpExtInst %void %73 DescriptorSetBuffer 0 0 7 0 0 0 1024 63 1024 0 1024 0 0 %77 = OpExtInst %void %73 DescriptorSetBuffer 0 1 7 0 0 0 16 34 16 0 16 0 0 diff --git a/tests/example.spvasm b/tests/example.spvasm index 31ad6e5..e374e9b 100644 --- a/tests/example.spvasm +++ b/tests/example.spvasm @@ -5,7 +5,7 @@ ; Schema: 0 OpCapability Shader %1 = OpExtInstImport "NonSemantic.ClspvReflection.5" - %49 = OpExtInstImport "NonSemantic.VkspReflection.1" + %49 = OpExtInstImport "NonSemantic.VkspReflection.2" OpMemoryModel Logical GLSL450 OpEntryPoint GLCompute %2 "test_simple" %gl_GlobalInvocationID %4 %5 %6 @@ -61,7 +61,7 @@ %gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input %4 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize %5 = OpVariable %_ptr_StorageBuffer__struct_14 StorageBuffer - %50 = OpExtInst %void %49 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 + %50 = OpExtInst %void %49 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 %51 = OpExtInst %void %49 PushConstants 16 12 "000000000000000000000000" 32 %52 = OpExtInst %void %49 PushConstants 0 12 "000000000000000000000000" 32 %53 = OpExtInst %void %49 DescriptorSetBuffer 0 0 7 0 0 0 1024 63 1024 0 1024 0 0 From 3b1c996cbcd97022f2bf5dd967bfd17312078e44 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Mon, 15 Apr 2024 17:34:32 +0200 Subject: [PATCH 02/12] move runner/spirv.hpp to make it available to layer --- runner/spirv.hpp => common/spirv-extract.hpp | 0 runner/runner.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename runner/spirv.hpp => common/spirv-extract.hpp (100%) diff --git a/runner/spirv.hpp b/common/spirv-extract.hpp similarity index 100% rename from runner/spirv.hpp rename to common/spirv-extract.hpp diff --git a/runner/runner.cpp b/runner/runner.cpp index 7c82192..c167c9b 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -16,7 +16,7 @@ #include #include -#include "spirv.hpp" +#include "common/spirv-extract.hpp" #include #include From e2246cba966a9a26e475c32d68b9b6294fa12d25 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 17 Apr 2024 14:47:26 +0200 Subject: [PATCH 03/12] runner: add vkCmdPipelineBarrier after each vkCmdDispatch --- runner/runner.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/runner/runner.cpp b/runner/runner.cpp index c167c9b..b20b78d 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -652,8 +652,14 @@ static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queu vkCmdResetQueryPool(cmdBuffer, queryPool, 0, gNbGpuTimestamps); vkCmdWriteTimestamp(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, queryPool, 0); + VkMemoryBarrier memoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT }; + for (unsigned i = 0; i < gColdRun; i++) { vkCmdDispatch(cmdBuffer, config.groupCountX, config.groupCountY, config.groupCountZ); + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, + &memoryBarrier, 0, nullptr, 0, nullptr); } if (gCounterBuffer != VK_NULL_HANDLE) { @@ -666,6 +672,9 @@ static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queu vkCmdWriteTimestamp(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, queryPool, 1); for (unsigned i = 0; i < gHotRun; i++) { vkCmdDispatch(cmdBuffer, config.groupCountX, config.groupCountY, config.groupCountZ); + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, + &memoryBarrier, 0, nullptr, 0, nullptr); } vkCmdWriteTimestamp(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, queryPool, 2); From 130f7d8a579d3e3b3b5e3a0f1a06089bbe8f9855 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 17 Apr 2024 14:51:47 +0200 Subject: [PATCH 04/12] move PRINT in common.h & extract_from_input in spirv-extract.hpp --- common/common.hpp | 14 +++++++ common/spirv-extract.hpp | 67 +++++++++++++++++++++++++++++++ extractor/extractor.cpp | 9 +---- extractor/spirv.cpp | 1 - extractor/utils.hpp | 25 ------------ layer/CMakeLists.txt | 13 +++++- layer/layer.cpp | 10 ++++- runner/runner.cpp | 86 +++------------------------------------- 8 files changed, 107 insertions(+), 118 deletions(-) delete mode 100644 extractor/utils.hpp diff --git a/common/common.hpp b/common/common.hpp index 2d42c80..ddbc323 100644 --- a/common/common.hpp +++ b/common/common.hpp @@ -17,6 +17,20 @@ #include #include +#define PRINT_IMPL(file, message, ...) \ + do { \ + fprintf(file, "[VKSP] %s: " message "\n", __func__, ##__VA_ARGS__); \ + } while (0) + +#define ERROR(message, ...) PRINT_IMPL(stderr, message, ##__VA_ARGS__) + +#define PRINT(message, ...) \ + do { \ + if (gVerbose) { \ + PRINT_IMPL(stdout, message, ##__VA_ARGS__); \ + } \ + } while (0) + namespace vksp { struct vksp_push_constant { diff --git a/common/spirv-extract.hpp b/common/spirv-extract.hpp index ab5fc8b..a0760e8 100644 --- a/common/spirv-extract.hpp +++ b/common/spirv-extract.hpp @@ -18,6 +18,8 @@ #include "source/opt/pass.h" #include "spirv/unified1/NonSemanticVkspReflection.h" +#include + #define UNDEFINED_ID (UINT32_MAX) namespace vksp { @@ -551,4 +553,69 @@ class ExtractVkspReflectInfoPass : public spvtools::opt::Pass { bool disableCounters_; }; +bool extract_from_input(const char *filename, spv_target_env &spv_target_env, bool disable_counters, bool verbose, + std::vector &shader, std::vector &ds, + std::vector &pc, std::vector &me, + std::vector &counters, vksp::vksp_configuration &config) +{ + FILE *input = fopen(filename, "r"); + fseek(input, 0, SEEK_END); + size_t input_size = ftell(input); + fseek(input, 0, SEEK_SET); + std::vector input_buffer(input_size); + size_t size_read = 0; + do { + size_read += fread(&input_buffer.data()[size_read], 1, input_size - size_read, input); + } while (size_read != input_size); + fclose(input); + + const uint32_t spirv_magic = 0x07230203; + spv_context context = spvContextCreate(spv_target_env); + uint32_t *binary = (uint32_t *)input_buffer.data(); + size_t size = input_size / sizeof(uint32_t); + spv_binary tmp_binary; + if (*(uint32_t *)input_buffer.data() != spirv_magic) { + spv_diagnostic diagnostic; + auto status = spvTextToBinary(context, input_buffer.data(), input_size, &tmp_binary, &diagnostic); + if (status != SPV_SUCCESS) { + ERROR("Error while converting shader from text to binary: %s", diagnostic->error); + spvDiagnosticDestroy(diagnostic); + return false; + } + + binary = tmp_binary->code; + size = tmp_binary->wordCount; + } + + spvtools::Optimizer opt(SPV_ENV_VULKAN_1_3); + opt.RegisterPass(spvtools::Optimizer::PassToken( + std::make_unique(&pc, &ds, &me, &counters, &config, disable_counters))); + opt.RegisterPass(spvtools::CreateStripReflectInfoPass()); + spvtools::OptimizerOptions options; + options.set_run_validator(false); + if (!opt.Run(binary, size, &shader, options)) { + ERROR("Error while running 'CreateVkspReflectInfoPass' and 'CreateStripReflectInfoPass'"); + return false; + } + + if (verbose) { + spv_text text; + spv_diagnostic diag; + spv_result_t spv_result = spvBinaryToText(context, shader.data(), shader.size(), + SPV_BINARY_TO_TEXT_OPTION_INDENT | SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES + | SPV_BINARY_TO_TEXT_OPTION_COMMENT, + &text, &diag); + if (spv_result == SPV_SUCCESS) { + PRINT("Shader:\n%s", text->str); + spvTextDestroy(text); + } else { + ERROR("Could not convert shader from binary to text: %s", diag->error); + spvDiagnosticDestroy(diag); + } + } + + spvContextDestroy(context); + + return true; +} } diff --git a/extractor/extractor.cpp b/extractor/extractor.cpp index 483acf3..44ae68a 100644 --- a/extractor/extractor.cpp +++ b/extractor/extractor.cpp @@ -26,19 +26,12 @@ #include #include +#include "common/common.hpp" #include "spirv.hpp" -#include "utils.hpp" using namespace perfetto::trace_processor; using namespace spvtools; -#define PRINT(message, ...) \ - do { \ - if (gVerbose) { \ - PRINT_IMPL(stdout, message, ##__VA_ARGS__); \ - } \ - } while (0) - #define CHECK(statement, message, ...) \ do { \ if (!(statement)) { \ diff --git a/extractor/spirv.cpp b/extractor/spirv.cpp index 20678f6..221c67d 100644 --- a/extractor/spirv.cpp +++ b/extractor/spirv.cpp @@ -16,7 +16,6 @@ #include "spirv-tools/optimizer.hpp" #include "spirv.hpp" -#include "utils.hpp" #include "common/common.hpp" #include "source/opt/pass.h" diff --git a/extractor/utils.hpp b/extractor/utils.hpp deleted file mode 100644 index 47a6924..0000000 --- a/extractor/utils.hpp +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2023 The Vulkan Shader Profiler authors. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include - -#define PRINT_IMPL(file, message, ...) \ - do { \ - fprintf(file, "[VKSP] %s: " message "\n", __func__, ##__VA_ARGS__); \ - } while (0) - -#define ERROR(message, ...) PRINT_IMPL(stderr, message, ##__VA_ARGS__) - diff --git a/layer/CMakeLists.txt b/layer/CMakeLists.txt index 05167fd..54cb08d 100644 --- a/layer/CMakeLists.txt +++ b/layer/CMakeLists.txt @@ -19,7 +19,18 @@ if (NOT PERFETTO_LIBRARY) endif() add_library (vulkan-shader-profiler MODULE layer.cpp) -target_include_directories(vulkan-shader-profiler PUBLIC ${Vulkan_INCLUDE_DIRS} ${PERFETTO_SDK_PATH} ${SPIRV-Tools-opt_INCLUDE_DIRS}) +target_include_directories(vulkan-shader-profiler PUBLIC + ${Vulkan_INCLUDE_DIRS} + ${PERFETTO_SDK_PATH} + ${SPIRV-Tools-opt_INCLUDE_DIRS} + ${SPIRV_TOOLS_SOURCE_PATH} + ${SPIRV_TOOLS_BUILD_PATH} + ${CMAKE_SOURCE_DIR} +) +if (SPIRV_HEADERS_INCLUDE_PATH) + target_include_directories(vulkan-shader-profiler PUBLIC ${SPIRV_HEADERS_INCLUDE_PATH}) +endif() + target_link_libraries (vulkan-shader-profiler ${SPIRV-Tools-opt_LIBRARIES} ${PERFETTO_LIBRARY}) set(BACKEND "InProcess" diff --git a/layer/layer.cpp b/layer/layer.cpp index 6758d20..b66bf6d 100644 --- a/layer/layer.cpp +++ b/layer/layer.cpp @@ -17,6 +17,11 @@ #include "spirv-tools/libspirv.h" +static bool gVerbose = true; + +#include "common/common.hpp" +#include "common/spirv-extract.hpp" + #include #include #include @@ -62,9 +67,10 @@ static std::unique_ptr gTracingSession; #define DISPATCH_TABLE_ELEMENT(func) PFN_vk##func func; -#define PRINT(message, ...) \ +#undef PRINT_IMPL +#define PRINT_IMPL(file, message, ...) \ do { \ - fprintf(stderr, "[VKSP] %s: " message "\n", __func__, ##__VA_ARGS__); \ + fprintf(file, "[VKSP] %s: " message "\n", __func__, ##__VA_ARGS__); \ TRACE_EVENT_INSTANT(VKSP_PERFETTO_CATEGORY, "PRINT", "message", perfetto::DynamicString(message)); \ } while (0) diff --git a/runner/runner.cpp b/runner/runner.cpp index b20b78d..5a0fe76 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -16,6 +16,9 @@ #include #include +static bool gVerbose = false; + +#include "common/common.hpp" #include "common/spirv-extract.hpp" #include @@ -28,20 +31,6 @@ #include #include -#define PRINT_IMPL(file, message, ...) \ - do { \ - fprintf(file, "[VKSP] %s: " message "\n", __func__, ##__VA_ARGS__); \ - } while (0) - -#define ERROR(message, ...) PRINT_IMPL(stderr, message, ##__VA_ARGS__) - -#define PRINT(message, ...) \ - do { \ - if (gVerbose) { \ - PRINT_IMPL(stdout, message, ##__VA_ARGS__); \ - } \ - } while (0) - #define CHECK(statement, message, ...) \ do { \ if (!(statement)) { \ @@ -56,7 +45,6 @@ static std::map charToByte = { { '0', 0 }, { '1', 1 }, { '2', 2 }, { '3', 3 }, { '4', 4 }, { '5', 5 }, { '6', 6 }, { '7', 7 }, { '8', 8 }, { '9', 9 }, { 'a', 10 }, { 'b', 11 }, { 'c', 12 }, { 'd', 13 }, { 'e', 14 }, { 'f', 15 } }; -static bool gVerbose = false; static std::string gInput = ""; static uint32_t gColdRun = 0, gHotRun = 1; static VkBuffer gCounterBuffer = VK_NULL_HANDLE; @@ -204,71 +192,6 @@ static int get_device_queue_and_cmd_buffer(VkPhysicalDevice &pDevice, VkDevice & return 0; } -static bool extract_from_input(std::vector &shader, std::vector &ds, - std::vector &pc, std::vector &me, - std::vector &counters, vksp::vksp_configuration &config) -{ - FILE *input = fopen(gInput.c_str(), "r"); - fseek(input, 0, SEEK_END); - size_t input_size = ftell(input); - fseek(input, 0, SEEK_SET); - std::vector input_buffer(input_size); - size_t size_read = 0; - do { - size_read += fread(&input_buffer.data()[size_read], 1, input_size - size_read, input); - } while (size_read != input_size); - fclose(input); - - const uint32_t spirv_magic = 0x07230203; - spv_context context = spvContextCreate(gSpvTargetEnv); - uint32_t *binary = (uint32_t *)input_buffer.data(); - size_t size = input_size / sizeof(uint32_t); - spv_binary tmp_binary; - if (*(uint32_t *)input_buffer.data() != spirv_magic) { - spv_diagnostic diagnostic; - auto status = spvTextToBinary(context, input_buffer.data(), input_size, &tmp_binary, &diagnostic); - if (status != SPV_SUCCESS) { - ERROR("Error while converting shader from text to binary: %s", diagnostic->error); - spvDiagnosticDestroy(diagnostic); - return false; - } - - binary = tmp_binary->code; - size = tmp_binary->wordCount; - } - - spvtools::Optimizer opt(SPV_ENV_VULKAN_1_3); - opt.RegisterPass(spvtools::Optimizer::PassToken( - std::make_unique(&pc, &ds, &me, &counters, &config, gDisableCounters))); - opt.RegisterPass(spvtools::CreateStripReflectInfoPass()); - spvtools::OptimizerOptions options; - options.set_run_validator(false); - if (!opt.Run(binary, size, &shader, options)) { - ERROR("Error while running 'CreateVkspReflectInfoPass' and 'CreateStripReflectInfoPass'"); - return false; - } - - if (gVerbose) { - spv_text text; - spv_diagnostic diag; - spv_result_t spv_result = spvBinaryToText(context, shader.data(), shader.size(), - SPV_BINARY_TO_TEXT_OPTION_INDENT | SPV_BINARY_TO_TEXT_OPTION_FRIENDLY_NAMES - | SPV_BINARY_TO_TEXT_OPTION_COMMENT, - &text, &diag); - if (spv_result == SPV_SUCCESS) { - PRINT("Shader:\n%s", text->str); - spvTextDestroy(text); - } else { - ERROR("Could not convert shader from binary to text: %s", diag->error); - spvDiagnosticDestroy(diag); - } - } - - spvContextDestroy(context); - - return true; -} - static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDevice device, VkCommandBuffer cmdBuffer, VkPhysicalDeviceMemoryProperties &memProperties, std::vector &descSet) { @@ -907,7 +830,8 @@ int main(int argc, char **argv) std::vector meVector; std::vector counters; vksp::vksp_configuration config; - CHECK(extract_from_input(shader, dsVector, pcVector, meVector, counters, config), + CHECK(extract_from_input(gInput.c_str(), gSpvTargetEnv, gDisableCounters, gVerbose, shader, dsVector, pcVector, + meVector, counters, config), "Could not extract data from input"); PRINT("Shader name: '%s'", config.shaderName); PRINT("Entry point: '%s'", config.entryPoint); From 4267d8b9178aeb09311c173acd75d0dec922147d Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 17 Apr 2024 15:13:03 +0200 Subject: [PATCH 05/12] Generate buffers file from layer, and use it in runner --- README.md | 17 +- chromeos-utils/vulkan-shader-profiler.sh | 11 +- common/buffers_file.hpp | 132 +++++++ layer/functions.def | 13 + layer/layer.cpp | 417 ++++++++++++++++++++++- runner/runner.cpp | 44 ++- 6 files changed, 628 insertions(+), 6 deletions(-) create mode 100644 common/buffers_file.hpp diff --git a/README.md b/README.md index 65ddd58..ae4ffd6 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ It allows to visualize a vulkan application using perfetto with information about the compute shader to easily identify which shader is taking most of the application time, and what is its Vulkan SPIR-V source code. -Using the `vulkan-shader-profiler-extractor` and `vulkan-shader-profiler-runner`, it is also possible to extract a specific dispatch from the trace (using the `dispatchId` debug information from the trace), and replay it with profiled section with the runner. +Using the `vulkan-shader-profiler-extractor` and `vulkan-shader-profiler-runner`, it is also possible to extract a specific dispatch from the trace (using the `dispatchId` debug information from the trace), and replay it with the runner. # Legal @@ -79,6 +79,8 @@ To run an application with the `vulkan-kernel-profiler`, one need to ensure the * The `Vulkan-Loader` needs to be able to find the manifest in `/manifest/vulkan-shader-profiler.json`. This can be achieve by using the follow environment variable: `VK_ADD_LAYER_PATH=`. * The Layer needs to be enabled. Either directly from the application, or using the following environment variable: `VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER"`. +It is also possible to extract the content of the memories of buffers and images used by a specific dispatch. It requires to first do a first run to then extract the targeted dispatch. After that a second run can be done with `VKSP_EXTRACT_BUFFERS_FROM=` set. It will generates a `` file that can be used later on with the `vulkan-shader-profiler-runner` to initialize the memories of the images and buffers used. + ## On ChromeOS Make sure to have emerged and deployed the `vulkan-shader-profiler`. @@ -125,6 +127,18 @@ Functions used by `vulkan-shader-profiler` internally: * `CmdWriteTimestamp`: To store the timestamp during the command buffer execution. * `GetCalibratedTimestampsEXT`: To convert the device timestamp to the host timeline * `GetPhysicalDeviceProperties`: To convert the number of ticks returned by `CmdWriteTimestamp` to actual time information in nano-seconds. +* The following functions are used for the extracting buffers feature: + * `CmdPipelineBarrier` + * `CmdCopyBuffer` + * `CmdCopyImage` + * `MapMemory` + * `UnmapMemory` + * `GetImageMemoryRequirements` + * `GetBufferMemoryRequirements` + * `DestroyImage` + * `DestroyBuffer` + * `FreeMemory` + * `GetPhysicalDeviceMemoryProperties` # Extracting a dispatch from a trace @@ -158,6 +172,7 @@ Required options: Optional options: +* `-b`: path to a buffers file associated to the input (generated when tracing with `VKSP_EXTRACT_BUFFERS_FROM`). * `-c`: disable the counters. Allow to run with no overhead introduced by the counters. * `-e`: allow to choose the `spv_target_env` to use when using a non-binary input to convert it to binary (default: `vulkan1.3`) * `-n`: allow to run the program multiple times diff --git a/chromeos-utils/vulkan-shader-profiler.sh b/chromeos-utils/vulkan-shader-profiler.sh index 02880be..e4b6f75 100755 --- a/chromeos-utils/vulkan-shader-profiler.sh +++ b/chromeos-utils/vulkan-shader-profiler.sh @@ -16,5 +16,12 @@ set -x -VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER" \ -"$@" +if [[ "$1" == "-e" ]] +then + shift + SPV_FILE="$1" + shift + VKSP_EXTRACT_BUFFERS_FROM="${SPV_FILE}" VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER" "$@" +else + VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER" "$@" +fi diff --git a/common/buffers_file.hpp b/common/buffers_file.hpp new file mode 100644 index 0000000..c9385a3 --- /dev/null +++ b/common/buffers_file.hpp @@ -0,0 +1,132 @@ +// Copyright 2024 The Vulkan Shader Profiler authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include + +namespace vksp { + +using buffer_map_key = std::pair; +using buffer_map_val = std::pair; +using buffers_map = std::map; + +class BuffersFile { +public: + BuffersFile(uint32_t dispatchId) + : m_version(1) + , m_magic(0x766B7370) // VKSP in ASCII + , m_dispatchId(dispatchId) + { + } + + bool ReadFromFile(const char *filename) + { + FILE *fd = fopen(filename, "r"); + uint32_t file_header[3]; + if (fread(file_header, sizeof(file_header), 1, fd) != 1) { + return false; + } + if (file_header[0] != m_magic || file_header[1] != m_version || file_header[2] != m_dispatchId) { + return false; + } + while (true) { + uint32_t buffer_header[3]; + if (fread(buffer_header, sizeof(buffer_header), 1, fd) != 1) { + return false; + } + if (buffer_header[0] == UINT32_MAX || buffer_header[1] == UINT32_MAX || buffer_header[2] == UINT32_MAX) { + break; + } + buffer_map_key key = std::make_pair(buffer_header[0], buffer_header[1]); + + uint32_t size = buffer_header[2]; + void *data = malloc(size); + if (data == nullptr) { + return false; + } + size_t byte_read = 0; + while (byte_read != size) { + byte_read += fread(&(((char *)data)[byte_read]), sizeof(char), size - byte_read, fd); + } + buffer_map_val val = std::make_pair(size, data); + m_buffers[key] = val; + } + + fclose(fd); + return true; + } + + bool WriteToFile(const char *filename) + { + FILE *fd = fopen(filename, "w"); + if (fd == nullptr) { + return false; + } + if (fwrite(&m_magic, sizeof(m_magic), 1, fd) != 1) { + return false; + } + if (fwrite(&m_version, sizeof(m_version), 1, fd) != 1) { + return false; + } + if (fwrite(&m_dispatchId, sizeof(m_dispatchId), 1, fd) != 1) { + return false; + } + for (auto &buffer : m_buffers) { + uint32_t set = buffer.first.first; + uint32_t binding = buffer.first.second; + uint32_t size = buffer.second.first; + void *data = buffer.second.second; + if (fwrite(&set, sizeof(set), 1, fd) != 1) { + return false; + } + if (fwrite(&binding, sizeof(binding), 1, fd) != 1) { + return false; + } + if (fwrite(&size, sizeof(size), 1, fd) != 1) { + return false; + } + uint32_t byte_written = 0; + while (byte_written != size) { + byte_written += fwrite(&(((char *)data)[byte_written]), sizeof(char), size - byte_written, fd); + } + } + uint32_t eof[3] = { UINT32_MAX, UINT32_MAX, UINT32_MAX }; + if (fwrite(eof, sizeof(eof), 1, fd) != 1) { + return false; + } + fclose(fd); + + return true; + } + + void AddBuffer(uint32_t set, uint32_t binding, uint32_t size, void *data) + { + buffer_map_key key = std::make_pair(set, binding); + buffer_map_val val = std::make_pair(size, data); + m_buffers[key] = val; + } + + buffers_map *GetBuffers() { return &m_buffers; } + +private: + const uint32_t m_version; + const uint32_t m_magic; + const uint32_t m_dispatchId; + buffers_map m_buffers; +}; +} diff --git a/layer/functions.def b/layer/functions.def index 07b7b4b..283b6ca 100644 --- a/layer/functions.def +++ b/layer/functions.def @@ -55,6 +55,19 @@ FUNC_DEV_INT(CmdWriteTimestamp) FUNC_DEV_INT(GetCalibratedTimestampsEXT) FUNC_INS_INT(GetPhysicalDeviceProperties) +//Functions used internally for the extracting buffers feature +FUNC_DEV_INT(CmdCopyBuffer) +FUNC_DEV_INT(CmdCopyImage) +FUNC_DEV_INT(CmdPipelineBarrier) +FUNC_DEV_INT(MapMemory) +FUNC_DEV_INT(UnmapMemory) +FUNC_DEV_INT(GetImageMemoryRequirements) +FUNC_DEV_INT(GetBufferMemoryRequirements) +FUNC_DEV_INT(DestroyImage) +FUNC_DEV_INT(DestroyBuffer) +FUNC_DEV_INT(FreeMemory) +FUNC_INS_INT(GetPhysicalDeviceMemoryProperties) + #undef FUNC_INS #undef FUNC_INS_INT #undef FUNC_DEV diff --git a/layer/layer.cpp b/layer/layer.cpp index b66bf6d..1a3cb7f 100644 --- a/layer/layer.cpp +++ b/layer/layer.cpp @@ -19,6 +19,7 @@ static bool gVerbose = true; +#include "common/buffers_file.hpp" #include "common/common.hpp" #include "common/spirv-extract.hpp" @@ -137,6 +138,8 @@ static std::map CmdBufferToPipeline; static std::map PipelineToShaderModule; static std::map PipelineToShaderModuleName; static std::map ShaderModuleToString; +static std::map ImageViewToImage; +static std::map DstSetIndexToPtr; struct ThreadDispatch { VkQueryPool query_pool; @@ -230,6 +233,368 @@ static void update_block_size() } } +/*****************************************************************************/ +/* EXTRACT BUFFERS ***********************************************************/ +/*****************************************************************************/ + +typedef struct DescriptorSetUpdated_ { + VkDescriptorType descriptorType; + union { + struct { + VkBuffer buffer; + VkDeviceSize size; + VkDeviceSize offset; + }; + struct { + VkImage image; + VkImageLayout layout; + uint32_t width; + uint32_t height; + uint32_t depth; + }; + }; +} DescriptorSetUpdated; + +static std::map, DescriptorSetUpdated> DescriptorSetsUpdated; + +typedef struct DescriptorSetExtracted_ { + DescriptorSetUpdated object; + uint32_t dstSet; + uint32_t dstBinding; + VkDeviceMemory memory; + VkDeviceSize size; +} DescriptorSetExtracted; +std::vector DescriptorSetsExtracted; + +std::vector DescriptorSetsToExtract; +vksp::vksp_configuration vksp_config; +uint64_t DispatchIdToExtract = UINT64_MAX; +char *extract_buffers_from_filename = nullptr; +std::condition_variable DispatchIdCond; + +static bool allocate_buffer( + vksp::vksp_descriptor_set &ds, VkDevice device, VkPhysicalDeviceMemoryProperties &memProperties) +{ + VkBuffer buffer; + const VkBufferCreateInfo pCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, ds.buffer.size, + VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr }; + + VkResult res = gdispatch[device].CreateBuffer(device, &pCreateInfo, nullptr, &buffer); + if (res != VK_SUCCESS) { + PRINT("Could not create buffer (%u)", res); + return false; + } + + VkMemoryRequirements memreqs; + gdispatch[device].GetBufferMemoryRequirements(device, buffer, &memreqs); + bool memoryTypeFound = false; + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + auto dev_properties = memProperties.memoryTypes[i].propertyFlags; + bool valid = (1ULL << i) & memreqs.memoryTypeBits; + auto required_properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + bool satisfactory = (dev_properties & required_properties) == required_properties; + if (satisfactory && valid) { + ds.buffer.memoryType = i; + memoryTypeFound = true; + break; + } + } + if (!memoryTypeFound) { + PRINT("Could not find a memoryType for buffer"); + return false; + } + + const VkMemoryAllocateInfo pAllocateInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + nullptr, + ds.buffer.memorySize, + ds.buffer.memoryType, + }; + VkDeviceMemory memory; + res = gdispatch[device].AllocateMemory(device, &pAllocateInfo, nullptr, &memory); + if (res != VK_SUCCESS) { + PRINT("Could not allocate memory (%u)", res); + return false; + } + + res = gdispatch[device].BindBufferMemory(device, buffer, memory, ds.buffer.bindOffset); + if (res != VK_SUCCESS) { + PRINT("Could not bind buffer memory (%u)", res); + return false; + } + + DescriptorSetExtracted dsExtracted = { + .object = { .descriptorType = (VkDescriptorType)ds.type, .buffer = buffer, }, + .dstSet = ds.ds, + .dstBinding = ds.binding, + .memory = memory, + .size = ds.buffer.memorySize, + }; + DescriptorSetsExtracted.push_back(dsExtracted); + + return true; +} + +static bool allocate_image( + vksp::vksp_descriptor_set &ds, VkDevice device, VkPhysicalDeviceMemoryProperties &memProperties) +{ + VkImage image; + VkExtent3D extent = { ds.image.width, ds.image.height, ds.image.depth }; + const VkImageCreateInfo pCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, nullptr, ds.image.imageFlags, + (VkImageType)ds.image.imageType, (VkFormat)ds.image.format, extent, ds.image.mipLevels, ds.image.arrayLayers, + (VkSampleCountFlagBits)ds.image.samples, (VkImageTiling)ds.image.tiling, VK_IMAGE_USAGE_TRANSFER_DST_BIT, + VK_SHARING_MODE_EXCLUSIVE, 0, nullptr, (VkImageLayout)ds.image.initialLayout }; + + VkResult res = gdispatch[device].CreateImage(device, &pCreateInfo, nullptr, &image); + if (res != VK_SUCCESS) { + PRINT("Could not create image (%u)", res); + return false; + } + + VkMemoryRequirements memreqs; + gdispatch[device].GetImageMemoryRequirements(device, image, &memreqs); + bool memoryTypeFound = false; + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + auto dev_properties = memProperties.memoryTypes[i].propertyFlags; + bool valid = (1ULL << i) & memreqs.memoryTypeBits; + auto required_properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + bool satisfactory = (dev_properties & required_properties) == required_properties; + if (satisfactory && valid) { + ds.image.memoryType = i; + memoryTypeFound = true; + break; + } + } + if (!memoryTypeFound) { + PRINT("Could not find a memoryType for image"); + return false; + } + + const VkMemoryAllocateInfo pAllocateInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + nullptr, + std::max(ds.image.memorySize, (uint32_t)memreqs.size), + ds.image.memoryType, + }; + + VkDeviceMemory memory; + res = gdispatch[device].AllocateMemory(device, &pAllocateInfo, nullptr, &memory); + if (res != VK_SUCCESS) { + PRINT("Could not allocate memory (%u)", res); + return false; + } + + res = gdispatch[device].BindImageMemory(device, image, memory, ds.image.bindOffset); + if (res != VK_SUCCESS) { + PRINT("Could not bind image memory (%u)", res); + return false; + } + + DescriptorSetExtracted dsExtracted = { + .object = { .descriptorType = (VkDescriptorType)ds.type, + .image = image, + .layout = (VkImageLayout)ds.image.initialLayout, + .width = ds.image.width, + .height = ds.image.height, + .depth = ds.image.depth, + }, + .dstSet = ds.ds, + .dstBinding = ds.binding, + .memory = memory, + .size = ds.image.memorySize, + }; + DescriptorSetsExtracted.push_back(dsExtracted); + + return true; +} + +static void extract_buffers_setup(VkDevice device, VkPhysicalDevice pDevice) +{ + if (DispatchIdToExtract != UINT64_MAX) { + PRINT("already been called, but is called a second time, ignoring"); + return; + } + extract_buffers_from_filename = getenv("VKSP_EXTRACT_BUFFERS_FROM"); + if (extract_buffers_from_filename == nullptr) { + return; + } + if (access(extract_buffers_from_filename, F_OK)) { + PRINT("Could not find file '%s' from which to extract buffers", extract_buffers_from_filename); + return; + } + VkPhysicalDeviceMemoryProperties memProperties; + gdispatch[PhysicalDeviceToInstance[pDevice]].GetPhysicalDeviceMemoryProperties(pDevice, &memProperties); + + std::vector shader; + std::vector pc; + std::vector me; + std::vector counters; + spv_target_env spv_env = SPV_ENV_VULKAN_1_3; + if (!vksp::extract_from_input(extract_buffers_from_filename, spv_env, true, false, shader, DescriptorSetsToExtract, + pc, me, counters, vksp_config)) { + PRINT("Could not extract information from input"); + return; + } + for (auto &ds : DescriptorSetsToExtract) { + switch (ds.type) { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + if (!allocate_buffer(ds, device, memProperties)) { + return; + } + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + if (!allocate_image(ds, device, memProperties)) { + return; + } + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + break; + default: + PRINT("unsupported descriptor set type (%u)", ds.type); + break; + } + } + DispatchIdToExtract = vksp_config.dispatchId; +} + +static void extract_buffers_copy(VkDevice device, VkCommandBuffer commandBuffer) +{ + VkMemoryBarrier memoryBarrier = { VK_STRUCTURE_TYPE_MEMORY_BARRIER, nullptr, VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT }; + gdispatch[device].CmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, + VK_PIPELINE_STAGE_HOST_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT | VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, + &memoryBarrier, 0, nullptr, 0, nullptr); + + for (auto &ds : DescriptorSetsExtracted) { + auto dstSet = ds.dstSet; + auto dstSetFind = DstSetIndexToPtr.find(dstSet); + if (dstSetFind == DstSetIndexToPtr.end()) { + PRINT("Could not find dstSet pointer for '%u'", dstSet); + continue; + } + auto pair = std::make_pair(dstSetFind->second, ds.dstBinding); + auto dsUpdatedFind = DescriptorSetsUpdated.find(pair); + if (dsUpdatedFind == DescriptorSetsUpdated.end()) { + PRINT("Could not find pair (%u, %u)", dstSet, ds.dstBinding); + continue; + } + if (dsUpdatedFind->second.descriptorType != ds.object.descriptorType) { + PRINT("Found object does not match (expected %u, got %u)", ds.object.descriptorType, + dsUpdatedFind->second.descriptorType); + continue; + } + switch (ds.object.descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + auto offset = dsUpdatedFind->second.offset; + auto range = dsUpdatedFind->second.size; + if (ds.size < offset + range) { + PRINT("buffer size (%lu) is smaller than found buffer offset (%lu) + range (%lu)", ds.size, offset, + range); + continue; + } + const VkBufferCopy pRegion = { offset, offset, range }; + gdispatch[device].CmdCopyBuffer(commandBuffer, dsUpdatedFind->second.buffer, ds.object.buffer, 1, &pRegion); + } break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + VkImageSubresourceRange subresourceRange = { + .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, + .baseMipLevel = 0, + .levelCount = VK_REMAINING_MIP_LEVELS, + .baseArrayLayer = 0, + .layerCount = VK_REMAINING_ARRAY_LAYERS, + }; + + VkImageMemoryBarrier imageBarrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + ds.object.layout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 0, + 0, + ds.object.image, + subresourceRange, + }; + gdispatch[device].CmdPipelineBarrier(commandBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, + VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 0, nullptr, 1, &imageBarrier); + const VkImageCopy pRegion = { + .srcSubresource + = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 }, + .srcOffset = { 0, 0, 0 }, + .dstSubresource + = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 }, + .dstOffset = { 0, 0, 0 }, + .extent = { ds.object.width, ds.object.height, ds.object.depth }, + }; + gdispatch[device].CmdCopyImage(commandBuffer, dsUpdatedFind->second.image, dsUpdatedFind->second.layout, + ds.object.image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &pRegion); + } break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + break; + default: + PRINT("unsupported descriptor set type (%u)", ds.object.descriptorType); + break; + } + } +} + +static void extract_buffers_create_file(VkDevice device) +{ + vksp::BuffersFile buffers_file(DispatchIdToExtract); + for (auto &ds : DescriptorSetsExtracted) { + void *data; + VkResult res = gdispatch[device].MapMemory(device, ds.memory, 0, ds.size, 0, &data); + if (res != VK_SUCCESS) { + PRINT("Could not map memory for (%u, %u) (%u)", ds.dstSet, ds.dstBinding, res); + continue; + } + buffers_file.AddBuffer(ds.dstSet, ds.dstBinding, (uint32_t)ds.size, data); + } + + std::string buffers_filename(extract_buffers_from_filename); + buffers_filename += ".buffers"; + if (!buffers_file.WriteToFile(buffers_filename.c_str())) { + PRINT("Could not write buffers to file"); + return; + } + + for (auto &ds : DescriptorSetsExtracted) { + gdispatch[device].UnmapMemory(device, ds.memory); + } + + std::lock_guard lock(glock); + DispatchIdToExtract = UINT64_MAX; + DispatchIdCond.notify_all(); +} + +static void extract_buffers_clean(VkDevice device) +{ + for (auto &ds : DescriptorSetsExtracted) { + switch (ds.object.descriptorType) { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + gdispatch[device].DestroyBuffer(device, ds.object.buffer, nullptr); + gdispatch[device].FreeMemory(device, ds.memory, nullptr); + } break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + gdispatch[device].DestroyImage(device, ds.object.image, nullptr); + gdispatch[device].FreeMemory(device, ds.memory, nullptr); + } break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + break; + default: + PRINT("unsupported descriptor set type (%u)", ds.object.descriptorType); + break; + } + } +} + /*****************************************************************************/ /* QUEUE THREAD **************************************************************/ /*****************************************************************************/ @@ -345,6 +710,10 @@ static void GenerateTrace(ThreadInfo *info, ThreadDispatch &cmd) "dispatchId", cmd.dispatchId, "shader", ShaderModuleToString[PipelineToShaderModule[cmd.pipeline]], "shader_name", PipelineToShaderModuleName[cmd.pipeline]); TRACE_EVENT_END(VKSP_PERFETTO_CATEGORY, perfetto::Track((uintptr_t)info->queue), (uint64_t)end); + + if (cmd.dispatchId == DispatchIdToExtract) { + extract_buffers_create_file(info->device); + } } static void QueueThreadFct(ThreadInfo *info) @@ -521,8 +890,30 @@ void VKAPI_CALL vksp_CmdDispatch( (void *)commandBuffer, "groupCountX", groupCountX, "groupCountY", groupCountY, "groupCountZ", groupCountZ, "dispatchId", dispatchId); + VkPipeline pipeline = CmdBufferToPipeline[commandBuffer]; + if (dispatchId == DispatchIdToExtract) { + auto vkspName = ShaderModuleToString[PipelineToShaderModule[pipeline]]; + auto moduleName = PipelineToShaderModuleName[pipeline]; + if (strcmp(moduleName.c_str(), vksp_config.entryPoint) != 0) { + PRINT("dispatchIdToExtract: entryPoint differs: '%s' != '%s'", moduleName.c_str(), vksp_config.entryPoint); + } + if (strcmp(vkspName.c_str(), vksp_config.shaderName) != 0) { + PRINT("dispatchIdToExtract: vkspName differs: '%s' != '%s'", vkspName.c_str(), vksp_config.shaderName); + } + if (groupCountX != vksp_config.groupCountX) { + PRINT("dispatchIdToExtract: groupCountX differs: %u != %u", groupCountX, vksp_config.groupCountX); + } + if (groupCountY != vksp_config.groupCountY) { + PRINT("dispatchIdToExtract: groupCountY differs: %u != %u", groupCountY, vksp_config.groupCountY); + } + if (groupCountZ != vksp_config.groupCountZ) { + PRINT("dispatchIdToExtract: groupCountZ differs: %u != %u", groupCountZ, vksp_config.groupCountZ); + } + extract_buffers_copy(device, commandBuffer); + } + ThreadDispatch dispatch = { - .pipeline = CmdBufferToPipeline[commandBuffer], + .pipeline = pipeline, .dispatchId = dispatchId++, .groupCountX = groupCountX, .groupCountY = groupCountY, @@ -686,6 +1077,12 @@ void VKAPI_CALL vksp_UpdateDescriptorSets(VkDevice device, uint32_t descriptorWr pDescriptorWrites[i].descriptorCount, "descriptorType", pDescriptorWrites[i].descriptorType, "buffer", (void *)pDescriptorWrites[i].pBufferInfo->buffer, "offset", pDescriptorWrites[i].pBufferInfo->offset, "range", pDescriptorWrites[i].pBufferInfo->range); + DescriptorSetsUpdated[std::make_pair(pDescriptorWrites[i].dstSet, pDescriptorWrites[i].dstBinding)] = { + .descriptorType = pDescriptorWrites[i].descriptorType, + .buffer = pDescriptorWrites[i].pBufferInfo->buffer, + .size = pDescriptorWrites[i].pBufferInfo->range, + .offset = pDescriptorWrites[i].pBufferInfo->offset, + }; break; case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: @@ -694,6 +1091,11 @@ void VKAPI_CALL vksp_UpdateDescriptorSets(VkDevice device, uint32_t descriptorWr pDescriptorWrites[i].descriptorCount, "descriptorType", (void *)pDescriptorWrites[i].descriptorType, "imageLayout", pDescriptorWrites[i].pImageInfo->imageLayout, "imageView", (void *)pDescriptorWrites[i].pImageInfo->imageView); + DescriptorSetsUpdated[std::make_pair(pDescriptorWrites[i].dstSet, pDescriptorWrites[i].dstBinding)] = { + .descriptorType = pDescriptorWrites[i].descriptorType, + .image = ImageViewToImage[pDescriptorWrites[i].pImageInfo->imageView], + .layout = pDescriptorWrites[i].pImageInfo->imageLayout, + }; break; case VK_DESCRIPTOR_TYPE_SAMPLER: TRACE_EVENT_INSTANT(VKSP_PERFETTO_CATEGORY, "vkUpdateDescriptorSets-write", "dstSet", @@ -728,6 +1130,7 @@ void VKAPI_CALL vksp_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipe TRACE_EVENT_INSTANT(VKSP_PERFETTO_CATEGORY, "vkCmdBindDescriptorSets-ds", "commandBuffer", (void *)commandBuffer, "pipelineBindPoint", pipelineBindPoint, "firstSet", firstSet, "dstSet", (void *)pDescriptorSets[i], "index", i); + DstSetIndexToPtr[firstSet + i] = pDescriptorSets[i]; } gdispatch[device].CmdBindDescriptorSets(commandBuffer, pipelineBindPoint, layout, firstSet, descriptorSetCount, @@ -797,6 +1200,8 @@ VkResult VKAPI_CALL vksp_CreateImageView(VkDevice device, const VkImageViewCreat auto result = gdispatch[device].CreateImageView(device, pCreateInfo, pAllocator, pView); + ImageViewToImage[*pView] = pCreateInfo->image; + TRACE_EVENT_INSTANT(VKSP_PERFETTO_CATEGORY, "vkCreateImageView-result", "pView", (void *)*pView, "image", (void *)pCreateInfo->image, "flags", pCreateInfo->flags, "format", pCreateInfo->format, "viewType", pCreateInfo->viewType, "components_a", pCreateInfo->components.a, "components_b", pCreateInfo->components.b, @@ -1018,12 +1423,20 @@ VkResult VKAPI_CALL vksp_CreateDevice(VkPhysicalDevice physicalDevice, const VkD gdispatch[*pDevice] = dispatchTable; DeviceToPhysicalDevice[*pDevice] = physicalDevice; + extract_buffers_setup(*pDevice, physicalDevice); + return VK_SUCCESS; } void VKAPI_CALL vksp_DestroyDevice(VkDevice device, const VkAllocationCallbacks *pAllocator) { - std::lock_guard lock(glock); + std::unique_lock lock(glock); + + while (DispatchIdToExtract != UINT64_MAX) { + DispatchIdCond.wait(lock); + } + extract_buffers_clean(device); + for (auto &[queue, thread] : QueueThreadPool[device]) { auto info = QueueToThreadInfo[queue]; { diff --git a/runner/runner.cpp b/runner/runner.cpp index 5a0fe76..a8cf6cc 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -18,6 +18,7 @@ static bool gVerbose = false; +#include "common/buffers_file.hpp" #include "common/common.hpp" #include "common/spirv-extract.hpp" @@ -46,6 +47,9 @@ static std::map charToByte { '9', 9 }, { 'a', 10 }, { 'b', 11 }, { 'c', 12 }, { 'd', 13 }, { 'e', 14 }, { 'f', 15 } }; static std::string gInput = ""; +static std::string gBuffersInput = ""; +static std::unique_ptr gBuffersContents; +static vksp::buffers_map *gBuffersMap = nullptr; static uint32_t gColdRun = 0, gHotRun = 1; static VkBuffer gCounterBuffer = VK_NULL_HANDLE; static VkDeviceMemory gCounterMemory; @@ -192,6 +196,27 @@ static int get_device_queue_and_cmd_buffer(VkPhysicalDevice &pDevice, VkDevice & return 0; } +static uint32_t initialize_memory( + VkDevice device, VkDeviceMemory memory, uint32_t ds, uint32_t binding, uint32_t memory_size) +{ + if (gBuffersMap != nullptr) { + vksp::buffer_map_key key = std::make_pair(ds, binding); + auto find = gBuffersMap->find(key); + if (find != gBuffersMap->end()) { + void *memory_data; + void *buffer_data = find->second.second; + uint32_t buffer_size = find->second.first; + CHECK(memory_size == buffer_size, "memory sizes does not match"); + + VkResult res = vkMapMemory(device, memory, 0, memory_size, 0, &memory_data); + CHECK_VK(res, "Could not map memory"); + memcpy(memory_data, buffer_data, memory_size); + vkUnmapMemory(device, memory); + } + } + return 0; +} + static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDevice device, VkCommandBuffer cmdBuffer, VkPhysicalDeviceMemoryProperties &memProperties, std::vector &descSet) { @@ -238,6 +263,9 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe CHECK_VK(res, "Could not allocate memory for buffer"); gMemories.push_back(memory); + CHECK(initialize_memory(device, memory, ds.ds, ds.binding, ds.buffer.memorySize) == 0, + "Could not initialize memory for buffer"); + res = vkBindBufferMemory(device, buffer, memory, ds.buffer.bindOffset); CHECK_VK(res, "Could not bind buffer and memory"); @@ -292,6 +320,9 @@ static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDev CHECK_VK(res, "Could not allocate memory for image"); gMemories.push_back(memory); + CHECK(initialize_memory(device, memory, ds.ds, ds.binding, ds.image.memorySize) == 0, + "Could not initalize memory for image"); + res = vkBindImageMemory(device, image, memory, ds.image.bindOffset); CHECK_VK(res, "Could not bind image and memory"); @@ -757,6 +788,7 @@ static void help() printf("USAGE: vulkan-shader-profiler-runner [OPTIONS] -i \n" "\n" "OPTIONS:\n" + "\t-b\tBuffers file associated to the input generated when tracing\n" "\t-c\tDisable counters\n" "\t-e\tspv_target_env to use (default: vulkan1.3)\n" "\t-h\tDisplay this help and exit\n" @@ -770,8 +802,11 @@ static bool parse_args(int argc, char **argv) { bool bHelp = false; int c; - while ((c = getopt(argc, argv, "chvi:n:m:e:p:")) != -1) { + while ((c = getopt(argc, argv, "chvi:n:m:e:p:b:")) != -1) { switch (c) { + case 'b': + gBuffersInput = std::string(optarg); + break; case 'c': gDisableCounters = true; break; @@ -856,6 +891,13 @@ int main(int argc, char **argv) "Could not allocate descriptor set"); PRINT("Descriptor set allocated"); + if (gBuffersInput != "") { + gBuffersContents = std::make_unique(config.dispatchId); + if (gBuffersContents->ReadFromFile(gBuffersInput.c_str())) { + gBuffersMap = gBuffersContents->GetBuffers(); + } + } + for (auto &ds : dsVector) { switch (ds.type) { case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: From 14681402324ac24098a280f51e41e63864fecc5e Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 17 Apr 2024 16:04:29 +0200 Subject: [PATCH 06/12] runner: enable to dump a buffer after execution --- README.md | 1 + runner/runner.cpp | 80 +++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 71 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index ae4ffd6..63dba81 100644 --- a/README.md +++ b/README.md @@ -177,6 +177,7 @@ Optional options: * `-e`: allow to choose the `spv_target_env` to use when using a non-binary input to convert it to binary (default: `vulkan1.3`) * `-n`: allow to run the program multiple times * `-m`: allow to run the program multiple times before starting to benchmark it +* `-o`: descriptor set index and binding of a buffer to dump after the execution (example: `1.2`, meaning descriptor set `1`, binding `2`). * `-p`: allow to force the usage of the vulkan queue global priority: * `0`:`low` * `1`:`medium` diff --git a/runner/runner.cpp b/runner/runner.cpp index a8cf6cc..40030fc 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -56,6 +56,11 @@ static VkDeviceMemory gCounterMemory; static spv_target_env gSpvTargetEnv = SPV_ENV_VULKAN_1_3; static bool gDisableCounters = false; static uint32_t gPriority = UINT32_MAX; +static uint32_t gOutputDs = UINT32_MAX; +static uint32_t gOutputBinding = UINT32_MAX; +static VkDeviceMemory gOutputMemory = VK_NULL_HANDLE; +static VkDeviceSize gOutputMemorySize; +static std::string gOutputString; static const uint32_t gNbGpuTimestamps = 3; @@ -69,6 +74,19 @@ static std::vector gSamplers; static VkDescriptorPool gDescPool; static VkShaderModule gShaderModule; +static std::vector split_string(std::string input, const char *delimiter) +{ + std::vector vector; + size_t pos = 0; + size_t delimiter_size = strlen(delimiter); + while ((pos = input.find(delimiter)) != std::string::npos) { + auto extension = input.substr(0, pos); + vector.push_back(strdup(extension.c_str())); + input.erase(0, pos + delimiter_size); + } + return vector; +} + static int get_device_queue_and_cmd_buffer(VkPhysicalDevice &pDevice, VkDevice &device, VkQueue &queue, VkCommandBuffer &cmdBuffer, VkPhysicalDeviceMemoryProperties &memProperties, const char *enabledExtensionNames) { @@ -149,15 +167,10 @@ static int get_device_queue_and_cmd_buffer(VkPhysicalDevice &pDevice, VkDevice & VkDeviceQueueCreateInfo queueCreateInfo = { VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO, globalPriority, 0, queueFamilyIndex, nbQueues, queuePriorities.data() }; - std::vector extensions; size_t pos = 0; std::string extensionsStr = std::string(enabledExtensionNames); - extensionsStr.erase(0, 1); - while ((pos = extensionsStr.find(".")) != std::string::npos) { - auto extension = extensionsStr.substr(0, pos); - extensions.push_back(strdup(extension.c_str())); - extensionsStr.erase(0, pos + 1); - } + extensionsStr.erase(0, 1); // remove first '.' + std::vector extensions = split_string(extensionsStr, "."); extensions.push_back(VK_KHR_SHADER_CLOCK_EXTENSION_NAME); const VkDeviceCreateInfo createInfo = { @@ -263,6 +276,11 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe CHECK_VK(res, "Could not allocate memory for buffer"); gMemories.push_back(memory); + if (gOutputDs == ds.ds && gOutputBinding == ds.binding) { + gOutputMemory = memory; + gOutputMemorySize = ds.buffer.memorySize; + } + CHECK(initialize_memory(device, memory, ds.ds, ds.binding, ds.buffer.memorySize) == 0, "Could not initialize memory for buffer"); @@ -320,6 +338,11 @@ static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDev CHECK_VK(res, "Could not allocate memory for image"); gMemories.push_back(memory); + if (gOutputDs == ds.ds && gOutputBinding == ds.binding) { + gOutputMemory = memory; + gOutputMemorySize = ds.image.memorySize; + } + CHECK(initialize_memory(device, memory, ds.ds, ds.binding, ds.image.memorySize) == 0, "Could not initalize memory for image"); @@ -585,6 +608,25 @@ static uint32_t counters_size(std::vector &counters) return sizeof(uint64_t) * (2 + counters.size()); } +static uint32_t dump_output(VkDevice device) +{ + void *data; + VkResult res = vkMapMemory(device, gOutputMemory, 0, gOutputMemorySize, 0, &data); + CHECK_VK(res, "Could not map output memory"); + + std::string filename(gInput); + filename += "." + gOutputString + ".buffer"; + FILE *fd = fopen(filename.c_str(), "w"); + size_t byte_written = 0; + while (byte_written != gOutputMemorySize) { + byte_written += fwrite(&(((char *)data)[byte_written]), sizeof(char), gOutputMemorySize - byte_written, fd); + } + fclose(fd); + + vkUnmapMemory(device, gOutputMemory); + return 0; +} + static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queue, vksp::vksp_configuration &config, std::vector &counters, uint64_t *gpu_timestamps, std::chrono::steady_clock::time_point *host_timestamps) @@ -653,6 +695,10 @@ static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queu vkDestroyQueryPool(device, queryPool, nullptr); + if (gOutputMemory != VK_NULL_HANDLE) { + CHECK(dump_output(device) == 0, "Could not dump output memory"); + } + return 0; } @@ -794,6 +840,7 @@ static void help() "\t-h\tDisplay this help and exit\n" "\t-m\tNumber of cold run\n" "\t-n\tNumber of hot run\n" + "\t-o\tDescriptor set index and binding of a buffer to dump after the execution (example: '1.2')\n" "\t-p\tGlobal priority (0:low 1:medium 2:high 3:realtime default:unspecified)\n" "\t-v\tVerbose mode\n"); } @@ -802,7 +849,7 @@ static bool parse_args(int argc, char **argv) { bool bHelp = false; int c; - while ((c = getopt(argc, argv, "chvi:n:m:e:p:b:")) != -1) { + while ((c = getopt(argc, argv, "chvi:n:m:e:p:b:o:")) != -1) { switch (c) { case 'b': gBuffersInput = std::string(optarg); @@ -825,6 +872,17 @@ static bool parse_args(int argc, char **argv) case 'm': gColdRun = atoi(optarg); break; + case 'o': { + gOutputString = std::string(optarg); + auto splits = split_string(gOutputString, "."); + if (splits.size() == 2) { + gOutputDs = atoi(splits[0]); + gOutputBinding = atoi(splits[1]); + } else { + ERROR("'%s' does not match output (expected: 'ds.binding')", gOutputString.c_str()); + bHelp = true; + } + } break; case 'p': gPriority = atoi(optarg); break; @@ -856,8 +914,10 @@ int main(int argc, char **argv) if (!parse_args(argc, argv)) { return -1; } - PRINT("Arguments parsed: input '%s' verbose '%u' spv_target_env '%s' hot_runs '%u' cold_runs '%u'", gInput.c_str(), - gVerbose, spvTargetEnvDescription(gSpvTargetEnv), gHotRun, gColdRun); + PRINT("Arguments parsed: input '%s' verbose '%u' spv_target_env '%s' hot_runs '%u' cold_runs '%u' buffers '%s' " + "output_ds '%u' output_binding '%u' counters '%u' priority '%u'", + gInput.c_str(), gVerbose, spvTargetEnvDescription(gSpvTargetEnv), gHotRun, gColdRun, gBuffersInput.c_str(), + gOutputDs, gOutputBinding, gDisableCounters, gPriority); std::vector shader; std::vector dsVector; From 021c59d11190e2a4e4ae9f47a7956c5b62d818da Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 17 Apr 2024 17:13:41 +0200 Subject: [PATCH 07/12] fix runner string spliting routine --- runner/runner.cpp | 1 + tests/example-expectation.spvasm | 2 +- tests/example-vksp_s0-expectation.spvasm | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/runner/runner.cpp b/runner/runner.cpp index 40030fc..1d1719a 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -84,6 +84,7 @@ static std::vector split_string(std::string input, const char *del vector.push_back(strdup(extension.c_str())); input.erase(0, pos + delimiter_size); } + vector.push_back(strdup(input.c_str())); return vector; } diff --git a/tests/example-expectation.spvasm b/tests/example-expectation.spvasm index b9e3124..5c87496 100644 --- a/tests/example-expectation.spvasm +++ b/tests/example-expectation.spvasm @@ -79,7 +79,7 @@ %uint_8 = OpConstant %uint 8 %6 = OpVariable %_ptr_StorageBuffer__struct_19 StorageBuffer %ulong_0 = OpConstant %ulong 0 - %59 = OpExtInst %void %58 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 + %59 = OpExtInst %void %58 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 %60 = OpExtInst %void %58 PushConstants 0 28 "00000000000000000000000000000000000000000000000000000000" 32 %61 = OpExtInst %void %58 DescriptorSetBuffer 0 0 7 0 0 0 1024 63 1024 0 1024 0 0 %62 = OpExtInst %void %58 DescriptorSetBuffer 0 1 7 0 0 0 16 34 16 0 16 0 0 diff --git a/tests/example-vksp_s0-expectation.spvasm b/tests/example-vksp_s0-expectation.spvasm index 33563fa..df19a3e 100644 --- a/tests/example-vksp_s0-expectation.spvasm +++ b/tests/example-vksp_s0-expectation.spvasm @@ -79,7 +79,7 @@ %uint_8 = OpConstant %uint 8 %64 = OpVariable %_ptr_StorageBuffer__struct_59 StorageBuffer %ulong_0 = OpConstant %ulong 0 - %74 = OpExtInst %void %73 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 + %74 = OpExtInst %void %73 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing.VK_KHR_shader_clock" 12 "020000000100000001000000" "vksp_s0" "test_simple" 128 1 1 0 %75 = OpExtInst %void %73 PushConstants 0 28 "00000000000000000000000000000000000000000000000000000000" 32 %76 = OpExtInst %void %73 DescriptorSetBuffer 0 0 7 0 0 0 1024 63 1024 0 1024 0 0 %77 = OpExtInst %void %73 DescriptorSetBuffer 0 1 7 0 0 0 16 34 16 0 16 0 0 From b19b37dbd146752d8d13bb32d4fb59a64c08edbc Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Wed, 17 Apr 2024 17:14:18 +0200 Subject: [PATCH 08/12] add test for buffers --- .github/workflows/presubmit.yml | 13 ++- tests/checksum.buffers | Bin 0 -> 4272 bytes tests/checksum.expected_output | 1 + tests/checksum.spvasm | 135 ++++++++++++++++++++++++++++++++ tests/test-buffers.sh | 25 ++++++ 5 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 tests/checksum.buffers create mode 100644 tests/checksum.expected_output create mode 100644 tests/checksum.spvasm create mode 100755 tests/test-buffers.sh diff --git a/.github/workflows/presubmit.yml b/.github/workflows/presubmit.yml index a4df313..a8dca81 100644 --- a/.github/workflows/presubmit.yml +++ b/.github/workflows/presubmit.yml @@ -36,8 +36,8 @@ jobs: sudo apt-get install ninja-build libvulkan-dev clang-15 mesa-vulkan-drivers - name: Build SPIRV-Tools run: | - git clone --depth 1 --branch main https://github.com/KhronosGroup/SPIRV-Tools.git third_party/spirv-tools - git clone --depth 1 --branch main https://github.com/KhronosGroup/SPIRV-Headers.git third_party/spirv-tools/external/spirv-headers + git clone --depth 1 --branch vksp https://github.com/rjodinchr/SPIRV-Tools.git third_party/spirv-tools + git clone --depth 1 --branch vksp https://github.com/rjodinchr/SPIRV-Headers.git third_party/spirv-tools/external/spirv-headers cmake -B third_party/spirv-tools/build -S third_party/spirv-tools/ -G Ninja \ -DCMAKE_CXX_COMPILER="$(which clang++)" \ -DCMAKE_BUILD_TYPE=Release @@ -112,3 +112,12 @@ jobs: PERFETTO_BINARY: ${{ github.workspace }}/third_party/perfetto/out/linux_clang_release/perfetto run: | $(pwd)/tests/test-extractor.sh + - name: Test buffers + env: + LD_LIBRARY_PATH: ${{ github.workspace }}/build_release/layer/:${{ github.workspace }}/third_party/vulkan-loader/build/loader/:$LD_LIBRARY_PATH + VKSP_RUNNER: ${{ github.workspace }}/build_release/runner/vulkan-shader-profiler-runner + PERFETTO_TRACED: ${{ github.workspace }}/third_party/perfetto/out/linux_clang_release/traced + PERFETTO_BINARY: ${{ github.workspace }}/third_party/perfetto/out/linux_clang_release/perfetto + run: | + $(pwd)/tests/test-buffers.sh + diff --git a/tests/checksum.buffers b/tests/checksum.buffers new file mode 100644 index 0000000000000000000000000000000000000000..1f43408c191e6edba1e806c5abc086a6766060de GIT binary patch literal 4272 zcmZ9Qc{CIZ_s7TBm$4fR8e@!oU$PTp9b*_XjCGh93}c(Y46+Q0N{NbWk+LL|C}b%m z5#=F-%A>LrWlJRy-rnEudCz&z`#tBL`~94I&;9qF^QFf}vN!<$BT)c=>Tou40G?tH zQDPYq?`#-`oxPKA7~mp{DqPlB_35u$iOq_ z$lNXFhSJpvaX$4(TF(0{He{)QFMrs(#qdp2kU1%deADK%7zG2T6qlWOrG@-myNbU6m@ihPi9W=1&Hgd)Xhw)oTanFV}=U zKiZ&7xHmEGmXCkIQ#;B-3J3jEnW3g;Ga_H*A4+k^km~*6XU>^9{aSH14rp$Z>Q)g? zsBbp{m0H3@McQdLMaKxF7J^Xv7vtNBUl+>>cRhskxzl%oHUjl@e+u3}0D8L=R!)wg zu6=ffnrCH7ED3Z34(E%A3epQgpQAXW#@NIlXZ>7$CtyW#h*KxegUfw(Q1SC%cwpC& zBQ{6E>MI8W<(&lZDeDs{Ax~dppYB_eBsG5u#;4ee`B`7Kdz!^5Ky2CKbA=l~k|#tg zD7s(5Ei%b2^mpySA?vj~$~=aWV#9~*O1dh9U+gnu)>lNd;e7{t(*Fxj)9dKiEipTO zIpS%6C`^^rT+fBJpo66i(?3X#d{Q!45S~l$c={#Wd1_nRX|lsn>WKy_fMeaP+x<5o zmgBLI(#fw#S&oh1F6D13rK$=c-$#dKfV&~tW!5_r2 zcKilo)#*XB9T{R;jgfJXT-LRCiNud-ZC(Lh!y0uqm`}eYmnW=fCE`O2gY7r`rlsn* z_shM|_9X?bC)gLaKYCS;ydH&MSGWYy`jVGVID}6zM2u$2CFoQUx)x$NULgb z@t30mI&RP=)HfB59x}ZPJziSf6~ANEJjACu--Phr{2eI0xKQj~DPY0v9=UI1a46*~ zm520p(XYhrqhhqO+OA1sbbh+sY;*MB%RL=P*W%NdY#t!C%FWB>X1`~uL+S|lfl{2a z#JXY`kE*@EvWZ~NHUaiZ0Yex5I!=(eeJEJ(WG!sJ>8#UqFy`X#x9<9_6YF)paa$Qp zKvSkGR+P2d^)<@-Tx9qw4%6(AOu*8&PZ2!J(Hi|bU>QTDcP7mh_l}QbU1e5dK9Nqk zrTDTdGexu~2MM>RF#X98IST7jTar_ZGjk4&i!qo3LFCU}4qdfUaYUU1sq{}w#_Q?C zCT!=<#VYDNv4Jl?a>+jWN-`>21iqKu5YxKKW5et0YrzF?i2AOZYO`u#ix3o)k?vHx zL{KCu26olI5X)&aaC_3kYh%6b6v>Uc7w|P>6ucT3A^-aJs0}qdTg321i&)2=l1{1P zC3O!U=fr9=mxP7!DtAdOg~$o&{qWa;Y#SrL_ZmOllMRT4czo5n4Y#dVaF}SOzeK&N z3;*`c2#C97+Ov?7tHZ(dQTgF(n#R4y4T3#Qtw-&%D-x4pT2Z^#cVRB0SbW4rCg~#} z#2TIslN=-PG9V*T9&eKE?IJ46AT$KgDgAEVQ8dGXmX1v^z{)v75u;t|cN_f!og;g( zCn+N7xU;UB=C%uQg^M(?z1Bh_ee)JTX^~w>0T@a#ur5XrVs8oC&kV(^e|8Bd|BmA6 zaZEK^$+J&idUC}Nar6K(dcNM0&+cK&k34Rn75|4q^W8vwqE?j(E>j*K99uwbsCX+n zK3>eznmffa7IoP?JN+AF_eYTreyrAXhTlfS*d*C?`JO0hvS$Jr#?U}#p>R4O6I&`J z&5s$cI{{YpjXhGQ$^8ng`l1ZdoB$&4gytvBO8cS0|LwZpl)Y=#a9f z-?W;WpOsH7oQSA>XNEYFabFuljieK3qFTOdnDB`uA|4xD$93z$cg3cS3QA5f)T1bl zJ2In*p}NY4l9ztqBfH2B!DQbT6#mNc^**Nw3lYLe2ZSMNWxp!;8K$OC)gBe+nWQ>i zQ0O)Y)>lFs+5i4MBC+To?G)DYH^-ZI4SJ+EpY;TUcu6 zjd?HZFG7Ui4XUm*&e!~4P4zIX$+P)MKR}h&)E;cX9d!w*?%vdk^yF)Md27dBkjR@b=Gu6q#OL1qlyGK@Q)ZIK+`T=q+Lv5cwpLEe@v`Y<;^Q0siQlKD6rlWN ztfJdRjt!XOiCLY_HpBT{j!|UeB$r{ou-mQ1;RUHO=n*w29=spQ4&NV2?7En%-b7r9 zJ)RojT6g?9cI$*b=^oIQQeV*I(N(~4`hG@D3a zS(#SrH3gjQxTP`AM1y5`X{x0@y0a3SjCf~u77!y2=y_+P^rRB%y(Vf@#$^edG=AeR z(;DmYvQ{|!0AGY!e5+&jS{q_Dl22kgSfAvr4$S1gaE;4z;AT9U!!lj|kV8p+=&wc( zjRs@Z>;43y)I)0vj^|3INUrx$x>K9U>&wh!DO;&{V5TEKB<2!^L`e8J_?);9T^~qFHBNU(FvYMVt$c zZ;k1G`I00{(t7N3KWK}G(-Z=9RRh`vFaR{ za~3Rvygl@TSGD9{jrok!WM<;IJryvGNiZ9{7H#bh9}cYnJ;z!ml&1hi=Ow;V(h#j; ztMoO%8=bfU#dvT@YtX8}C(CbEUzIpcY9_k{)r)Li_$vl5)$cB9x9@+}oxq}|3MaV1_F}SvB z9ZjYVw8OWjV}jFuRA^ftf53j)QNRVh&WSu8c5t z=7Gik7LqH5Npc{ML3#OO#6VWzt~0hVy!-EG8D|Z6lEFQiynQ9ntZS3}mNfy3iVHe$ z*U^M4Qsd1NsEc~PEXS8A;U%OqELZ7Jq_Ng16zNky;H#69@?PXI*h`EoHfn9&-mn}7 z{Cm$Lv5%ap)iaHU{7hf6TT8_m&Y`fX5@24!M(-uhC0wtmA}&^GkOSy9Z*)bYF`Fs( z_XBNG%?Llt&5upUgop!A|JI6Gi5H3% zLqYt>j?=bB$V-g+b~{YdhaKczAS{9=+j~5`(~bCTmPgCyvbc^T*HvzwZy-)5?>c~{ z57Ao1F%2Q?Q||}L+YKLozIE)2)RaZ5AC~}BB~B(vZ`sP6pwF`S^DrLmjtJmS5}+El zK>m#?R+z45^MaTQk|{f8{IS~??4$;#zgU8=tOr**XH()k7Xf0sA}BtxJ@IXvK@it8 zm=(O^Eu@tAGDKilIr#==U&w7#7p^aQ{YYC~E!96jLx^*>FsQ001rF-a0V@G{lR#?} zl9O<-P~gwPw2@ExQRNpqG*8uB_em0tHTU)}GkIm)fulPosiW6s(N{p#qB)CU0-ue^ zdLo0T#7gE^_^skP;V))g^eb(7C|tol1k`fY>jt|OxjVP3>X*@-G?R}f-^s2LX`5Z* zdKtQEv*mp@%Za;d{EKoN?lK*;ws$XcfiH3A5pw>tWH_xvoT8)5N?Bx##blAM3 zNW_$eIHgq+@NQWNWuAao{kzI3oJ~{9+z7;To1=2aJY%?7$O4f4&&U?1mF9_AI+)>XRaf#)9{nU+E z=mq*9txV+~-VQR95HzgQ`KIGVV;K-+rk-QbJ zZ<_;e5-nY*F^J6CPWgE|Hni&Cr3?G+Ih7qUTy){lT98u@(<h*F$Ge= z6fRh|iuq1H1#ua(WgmoHwU^>f_i|0HvT*(jM7!Kz5~tk*^jP! z@?q%AnC9}^9?|kC!7=SY@-=nsT0M?awzM0CN7q?9nq9Y;JnSZMA+yaADlqi~U5bE8 zr#9WU`|M0n{&ynq?_40`Pbqb@?DBg5G5-koXTtd(_|rN6@PB6rT>r!WS#9~ph{8(B hfPe12m%m!F#RdQh2*v-$u>Z^x{-1;Y**p9%{R`e@93cPz literal 0 HcmV?d00001 diff --git a/tests/checksum.expected_output b/tests/checksum.expected_output new file mode 100644 index 0000000..347a927 --- /dev/null +++ b/tests/checksum.expected_output @@ -0,0 +1 @@ +0lMS5ސWA`҉t<( Ee{Dc"Ӻ0pBV*l#6Zl9n+{$BԘiu_~gBl˭`j?"{O=> \ No newline at end of file diff --git a/tests/checksum.spvasm b/tests/checksum.spvasm new file mode 100644 index 0000000..05013ce --- /dev/null +++ b/tests/checksum.spvasm @@ -0,0 +1,135 @@ +; SPIR-V +; Version: 1.6 +; Generator: Khronos SPIR-V Tools Assembler; 0 +; Bound: 92 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "NonSemantic.ClspvReflection.5" + %82 = OpExtInstImport "NonSemantic.VkspReflection.2" + OpMemoryModel Logical GLSL450 + OpEntryPoint GLCompute %2 "test_simple" %gl_GlobalInvocationID %4 %5 %6 %7 + + ; Debug Information + OpSource OpenCL_C 120 + %8 = OpString "test_simple" + %9 = OpString " kernel" + %10 = OpString "input" + %11 = OpString "uint*" + %12 = OpString "output" + %13 = OpString "uint*" + %14 = OpString "loop" + %15 = OpString "uint" + + ; Annotations + OpDecorate %gl_GlobalInvocationID BuiltIn GlobalInvocationId + OpDecorate %gl_WorkGroupSize BuiltIn WorkgroupSize + OpMemberDecorate %_struct_17 0 Offset 0 + OpMemberDecorate %_struct_18 0 Offset 0 + OpMemberDecorate %_struct_18 1 Offset 16 + OpMemberDecorate %_struct_18 2 Offset 32 + OpDecorate %_struct_18 Block + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %_struct_20 0 Offset 0 + OpDecorate %_struct_20 Block + OpDecorate %5 DescriptorSet 0 + OpDecorate %5 Binding 0 + OpDecorate %6 DescriptorSet 0 + OpDecorate %6 Binding 1 + OpDecorate %21 SpecId 0 + OpDecorate %22 SpecId 1 + OpDecorate %23 SpecId 2 + + ; Types, variables and constants + %uint = OpTypeInt 32 0 + %v3uint = OpTypeVector %uint 3 +%_ptr_Input_v3uint = OpTypePointer Input %v3uint + %21 = OpSpecConstant %uint 1 + %22 = OpSpecConstant %uint 1 + %23 = OpSpecConstant %uint 1 +%gl_WorkGroupSize = OpSpecConstantComposite %v3uint %21 %22 %23 +%_ptr_Private_v3uint = OpTypePointer Private %v3uint + %_struct_17 = OpTypeStruct %uint + %_struct_18 = OpTypeStruct %v3uint %v3uint %_struct_17 +%_ptr_PushConstant__struct_18 = OpTypePointer PushConstant %_struct_18 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %_struct_20 = OpTypeStruct %_runtimearr_uint +%_ptr_StorageBuffer__struct_20 = OpTypePointer StorageBuffer %_struct_20 + %void = OpTypeVoid + %31 = OpTypeFunction %void +%_ptr_PushConstant_uint = OpTypePointer PushConstant %uint + %uint_2 = OpConstant %uint 2 + %uint_0 = OpConstant %uint 0 +%_ptr_Input_uint = OpTypePointer Input %uint + %uint_1 = OpConstant %uint 1 + %bool = OpTypeBool +%_ptr_StorageBuffer_uint = OpTypePointer StorageBuffer %uint + %uint_3 = OpConstant %uint 3 + %uint_4507 = OpConstant %uint 4507 + %uint_4515 = OpConstant %uint 4515 + %uint_4510 = OpConstant %uint 4510 + %uint_32 = OpConstant %uint 32 + %uint_4 = OpConstant %uint 4 + %uint_12 = OpConstant %uint 12 + %uint_16 = OpConstant %uint 16 +%gl_GlobalInvocationID = OpVariable %_ptr_Input_v3uint Input + %4 = OpVariable %_ptr_Private_v3uint Private %gl_WorkGroupSize + %7 = OpVariable %_ptr_PushConstant__struct_18 PushConstant + %5 = OpVariable %_ptr_StorageBuffer__struct_20 StorageBuffer + %6 = OpVariable %_ptr_StorageBuffer__struct_20 StorageBuffer + %83 = OpExtInst %void %82 Configuration ".VK_KHR_8bit_storage.VK_KHR_16bit_storage.VK_KHR_buffer_device_address.VK_KHR_driver_properties.VK_KHR_shader_float16_int8.VK_KHR_shader_float_controls.VK_KHR_shader_non_semantic_info.VK_KHR_storage_buffer_storage_class.VK_KHR_uniform_buffer_standard_layout.VK_KHR_variable_pointers.VK_KHR_vulkan_memory_model.VK_EXT_calibrated_timestamps.VK_EXT_descriptor_indexing" 12 "020000000100000001000000" "vksp_s0" "test_simple" 16 1 1 0 + %84 = OpExtInst %void %82 PushConstants 16 12 "000000000000000000000000" 32 + %85 = OpExtInst %void %82 PushConstants 32 4 "20000000" 32 + %86 = OpExtInst %void %82 PushConstants 0 12 "000000000000000000000000" 32 + %87 = OpExtInst %void %82 DescriptorSetBuffer 0 0 7 0 0 0 4096 63 4096 0 4096 0 0 + %88 = OpExtInst %void %82 DescriptorSetBuffer 0 1 7 0 0 0 128 63 128 0 128 0 0 + %89 = OpExtInst %void %82 SpecializationMapEntry 0 0 4 + %90 = OpExtInst %void %82 SpecializationMapEntry 1 4 4 + %91 = OpExtInst %void %82 SpecializationMapEntry 2 8 4 + + ; Function 2 + %2 = OpFunction %void None %31 + %47 = OpLabel + %48 = OpAccessChain %_ptr_PushConstant_uint %7 %uint_2 %uint_0 + %49 = OpLoad %uint %48 + %50 = OpAccessChain %_ptr_Input_uint %gl_GlobalInvocationID %uint_0 + %51 = OpLoad %uint %50 + %52 = OpAccessChain %_ptr_PushConstant_uint %7 %uint_1 %uint_0 + %53 = OpLoad %uint %52 + %54 = OpIAdd %uint %53 %51 + %55 = OpINotEqual %bool %49 %uint_0 + OpSelectionMerge %56 None + OpBranchConditional %55 %57 %56 + %57 = OpLabel + %58 = OpIMul %uint %54 %49 + OpBranch %59 + %59 = OpLabel + %60 = OpPhi %uint %61 %59 %uint_0 %57 + %62 = OpPhi %uint %63 %59 %uint_0 %57 + %64 = OpIAdd %uint %58 %62 + %65 = OpAccessChain %_ptr_StorageBuffer_uint %5 %uint_0 %64 + %66 = OpLoad %uint %65 + %61 = OpIAdd %uint %66 %60 + %63 = OpIAdd %uint %62 %uint_1 + %67 = OpUGreaterThanEqual %bool %63 %49 + OpLoopMerge %68 %59 None + OpBranchConditional %67 %68 %59 + %68 = OpLabel + OpBranch %56 + %56 = OpLabel + %69 = OpPhi %uint %61 %68 %uint_0 %47 + OpBranch %70 + %70 = OpLabel + %71 = OpAccessChain %_ptr_StorageBuffer_uint %6 %uint_0 %54 + OpStore %71 %69 + OpReturn + OpFunctionEnd + %72 = OpExtInst %void %1 PushConstantGlobalOffset %uint_0 %uint_12 + %73 = OpExtInst %void %1 PushConstantRegionOffset %uint_16 %uint_12 + %74 = OpExtInst %void %1 Kernel %2 %8 %uint_3 %uint_0 %9 + %75 = OpExtInst %void %1 ArgumentInfo %10 %11 %uint_4507 %uint_4515 %uint_0 + %76 = OpExtInst %void %1 ArgumentStorageBuffer %74 %uint_0 %uint_0 %uint_0 %75 + %77 = OpExtInst %void %1 ArgumentInfo %12 %13 %uint_4507 %uint_4515 %uint_0 + %78 = OpExtInst %void %1 ArgumentStorageBuffer %74 %uint_1 %uint_0 %uint_1 %77 + %79 = OpExtInst %void %1 ArgumentInfo %14 %15 %uint_4510 %uint_4515 %uint_0 + %80 = OpExtInst %void %1 ArgumentPodPushConstant %74 %uint_2 %uint_32 %uint_4 %79 + %81 = OpExtInst %void %1 SpecConstantWorkgroupSize %uint_0 %uint_1 %uint_2 diff --git a/tests/test-buffers.sh b/tests/test-buffers.sh new file mode 100755 index 0000000..e01e5e2 --- /dev/null +++ b/tests/test-buffers.sh @@ -0,0 +1,25 @@ +#!/usr/bin/bash + +set -xe + +SCRIPT_DIR=$(dirname $(realpath "${BASH_SOURCE[0]}")) + +VKSP_RUNNER=${VKSP_RUNNER:-"vulkan-shader-profiler-runner"} + +PERFETTO_OUTPUT_TRACE="${SCRIPT_DIR}/trace" +CHECKSUM_OUTPUT_BUFFER="${SCRIPT_DIR}/checksum.spvasm.0.1.buffer" +CHECKSUM_INPUT_BUFFERS="${SCRIPT_DIR}/checksum.spvasm.buffers" +function clean() { + rm -f ${PERFETTO_OUTPUT_TRACE} ${CHECKSUM_OUTPUT_BUFFER} ${CHECKSUM_INPUT_BUFFERS} +} +trap clean EXIT + +${VKSP_RUNNER} -i ${SCRIPT_DIR}/checksum.spvasm -b ${SCRIPT_DIR}/checksum.buffers -o 0.1 +diff ${CHECKSUM_OUTPUT_BUFFER} ${SCRIPT_DIR}/checksum.expected_output + +PERFETTO_OUTPUT_TRACE=${PERFETTO_OUTPUT_TRACE} \ +VK_ADD_LAYER_PATH="${SCRIPT_DIR}/../manifest/" \ +VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER" \ +VKSP_EXTRACT_BUFFERS_FROM="${SCRIPT_DIR}/checksum.spvasm" \ +${SCRIPT_DIR}/perfetto.sh ${VKSP_RUNNER} -i ${SCRIPT_DIR}/checksum.spvasm -b ${SCRIPT_DIR}/checksum.buffers +diff ${CHECKSUM_INPUT_BUFFERS} ${SCRIPT_DIR}/checksum.buffers From d4841a8aa3ff8d7b7adb2562493e20024b7415ca Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Thu, 18 Apr 2024 12:36:16 +0200 Subject: [PATCH 09/12] chromeos-utils: allow to have other layer with vksp --- chromeos-utils/vulkan-shader-profiler.sh | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/chromeos-utils/vulkan-shader-profiler.sh b/chromeos-utils/vulkan-shader-profiler.sh index e4b6f75..d7a6b20 100755 --- a/chromeos-utils/vulkan-shader-profiler.sh +++ b/chromeos-utils/vulkan-shader-profiler.sh @@ -16,12 +16,18 @@ set -x +LAYERS="VK_LAYER_SHADER_PROFILER" +if [[ -n "${VK_LOADER_LAYERS_ENABLE}" ]] +then + LAYERS+=",${VK_LOADER_LAYERS_ENABLE}" +fi + if [[ "$1" == "-e" ]] then shift SPV_FILE="$1" shift - VKSP_EXTRACT_BUFFERS_FROM="${SPV_FILE}" VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER" "$@" + VKSP_EXTRACT_BUFFERS_FROM="${SPV_FILE}" VK_LOADER_LAYERS_ENABLE="${LAYERS}" "$@" else - VK_LOADER_LAYERS_ENABLE="VK_LAYER_SHADER_PROFILER" "$@" + VK_LOADER_LAYERS_ENABLE="${LAYERS}" "$@" fi From c194e1636790db22fb4215e5d3599394c0083e6c Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Thu, 18 Apr 2024 12:36:57 +0200 Subject: [PATCH 10/12] vvl: make sure to enable timeline semaphore --- layer/layer.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/layer/layer.cpp b/layer/layer.cpp index 1a3cb7f..b466f3a 100644 --- a/layer/layer.cpp +++ b/layer/layer.cpp @@ -1401,11 +1401,16 @@ VkResult VKAPI_CALL vksp_CreateDevice(VkPhysicalDevice physicalDevice, const VkD ppEnabledExtensionNames.push_back(pCreateInfo->ppEnabledExtensionNames[i]); } ppEnabledExtensionNames.push_back(VK_EXT_CALIBRATED_TIMESTAMPS_EXTENSION_NAME); + ppEnabledExtensionNames.push_back(VK_KHR_TIMELINE_SEMAPHORE_EXTENSION_NAME); VkDeviceCreateInfo mCreateInfo = *pCreateInfo; mCreateInfo.enabledExtensionCount = ppEnabledExtensionNames.size(); mCreateInfo.ppEnabledExtensionNames = ppEnabledExtensionNames.data(); + VkPhysicalDeviceTimelineSemaphoreFeatures timelineSemaphoreFeature + = { VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES, (void *)mCreateInfo.pNext, VK_TRUE }; + mCreateInfo.pNext = &timelineSemaphoreFeature; + VkResult ret = createFunc(physicalDevice, &mCreateInfo, pAllocator, pDevice); if (ret != VK_SUCCESS) { return ret; From 3ec7f08917742d5031c8ecc98d19d5a3bde33aca Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Fri, 19 Apr 2024 14:49:12 +0200 Subject: [PATCH 11/12] runner: fix initialization of input buffers --- runner/runner.cpp | 209 +++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 186 insertions(+), 23 deletions(-) diff --git a/runner/runner.cpp b/runner/runner.cpp index 1d1719a..c03b5d0 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -210,24 +210,69 @@ static int get_device_queue_and_cmd_buffer(VkPhysicalDevice &pDevice, VkDevice & return 0; } -static uint32_t initialize_memory( - VkDevice device, VkDeviceMemory memory, uint32_t ds, uint32_t binding, uint32_t memory_size) +static uint32_t initialize_buffer(VkDevice device, VkCommandBuffer cmdBuffer, + VkPhysicalDeviceMemoryProperties &memProperties, vksp::vksp_descriptor_set &ds, VkBuffer shaderBuffer) { - if (gBuffersMap != nullptr) { - vksp::buffer_map_key key = std::make_pair(ds, binding); - auto find = gBuffersMap->find(key); - if (find != gBuffersMap->end()) { - void *memory_data; - void *buffer_data = find->second.second; - uint32_t buffer_size = find->second.first; - CHECK(memory_size == buffer_size, "memory sizes does not match"); - - VkResult res = vkMapMemory(device, memory, 0, memory_size, 0, &memory_data); - CHECK_VK(res, "Could not map memory"); - memcpy(memory_data, buffer_data, memory_size); - vkUnmapMemory(device, memory); + if (gBuffersMap == nullptr) { + return 0; + } + uint32_t dstSet = ds.ds; + uint32_t dstBinding = ds.binding; + vksp::buffer_map_key key = std::make_pair(dstSet, dstBinding); + auto find = gBuffersMap->find(key); + if (find == gBuffersMap->end()) { + return 0; + } + void *buffer_data = find->second.second; + CHECK(find->second.first == ds.buffer.memorySize, "mismatch in memorySize during initialization buffer"); + + VkBuffer buffer; + const VkBufferCreateInfo pCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, ds.buffer.size, + VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr }; + + VkResult res = vkCreateBuffer(device, &pCreateInfo, nullptr, &buffer); + CHECK_VK(res, "Could not create initialization buffer"); + gBuffers.push_back(buffer); + + VkMemoryRequirements memreqs; + vkGetBufferMemoryRequirements(device, buffer, &memreqs); + bool memoryTypeFound = false; + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + auto dev_properties = memProperties.memoryTypes[i].propertyFlags; + bool valid = (1ULL << i) & memreqs.memoryTypeBits; + auto required_properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + bool satisfactory = (dev_properties & required_properties) == required_properties; + if (satisfactory && valid) { + ds.buffer.memoryType = i; + memoryTypeFound = true; + break; } } + CHECK(memoryTypeFound, "Could not find memoryType for initialization buffer"); + + const VkMemoryAllocateInfo pAllocateInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + nullptr, + ds.buffer.memorySize, + ds.buffer.memoryType, + }; + VkDeviceMemory memory; + res = vkAllocateMemory(device, &pAllocateInfo, nullptr, &memory); + CHECK_VK(res, "Could not allocate memory for initialization buffer"); + gMemories.push_back(memory); + + res = vkBindBufferMemory(device, buffer, memory, ds.buffer.bindOffset); + CHECK_VK(res, "Could not bind buffer memory for initialization buffer"); + + void *memory_data; + res = vkMapMemory(device, memory, 0, ds.buffer.memorySize, 0, &memory_data); + CHECK_VK(res, "Could not map memory for initialization buffer"); + memcpy(memory_data, buffer_data, ds.buffer.memorySize); + vkUnmapMemory(device, memory); + + const VkBufferCopy pRegion = { 0, 0, ds.buffer.size }; + vkCmdCopyBuffer(cmdBuffer, buffer, shaderBuffer, 1, &pRegion); + return 0; } @@ -282,12 +327,12 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe gOutputMemorySize = ds.buffer.memorySize; } - CHECK(initialize_memory(device, memory, ds.ds, ds.binding, ds.buffer.memorySize) == 0, - "Could not initialize memory for buffer"); - res = vkBindBufferMemory(device, buffer, memory, ds.buffer.bindOffset); CHECK_VK(res, "Could not bind buffer and memory"); + CHECK( + initialize_buffer(device, cmdBuffer, memProperties, ds, buffer) == 0, "Could not initialize memory for buffer"); + const VkDescriptorBufferInfo bufferInfo = { buffer, ds.buffer.offset, ds.buffer.range }; const VkWriteDescriptorSet write = { VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET, @@ -311,6 +356,102 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe return 0; } +static uint32_t initialize_image(VkDevice device, VkCommandBuffer cmdBuffer, + VkPhysicalDeviceMemoryProperties &memProperties, vksp::vksp_descriptor_set &ds, VkImage shaderImage, + VkImageSubresourceRange &subresourceRange) +{ + if (gBuffersMap == nullptr) { + return 0; + } + uint32_t dstSet = ds.ds; + uint32_t dstBinding = ds.binding; + vksp::buffer_map_key key = std::make_pair(dstSet, dstBinding); + auto find = gBuffersMap->find(key); + if (find == gBuffersMap->end()) { + return 0; + } + void *image_data = find->second.second; + CHECK(find->second.first == ds.image.memorySize, "mismatch in memorySize during initialization buffer"); + + VkImage image; + VkExtent3D extent = { ds.image.width, ds.image.height, ds.image.depth }; + const VkImageCreateInfo pCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, nullptr, ds.image.imageFlags, + (VkImageType)ds.image.imageType, (VkFormat)ds.image.format, extent, ds.image.mipLevels, ds.image.arrayLayers, + (VkSampleCountFlagBits)ds.image.samples, (VkImageTiling)ds.image.tiling, VK_IMAGE_USAGE_TRANSFER_SRC_BIT, + VK_SHARING_MODE_EXCLUSIVE, 0, nullptr, (VkImageLayout)ds.image.initialLayout }; + + VkResult res = vkCreateImage(device, &pCreateInfo, nullptr, &image); + CHECK_VK(res, "Could not create initialization image"); + gImages.push_back(image); + + VkMemoryRequirements memreqs; + vkGetImageMemoryRequirements(device, image, &memreqs); + bool memoryTypeFound = false; + for (uint32_t i = 0; i < memProperties.memoryTypeCount; i++) { + auto dev_properties = memProperties.memoryTypes[i].propertyFlags; + bool valid = (1ULL << i) & memreqs.memoryTypeBits; + auto required_properties = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT; + bool satisfactory = (dev_properties & required_properties) == required_properties; + if (satisfactory && valid) { + ds.image.memoryType = i; + memoryTypeFound = true; + break; + } + } + CHECK(memoryTypeFound, "Could not find memoryType for initialization image"); + + const VkMemoryAllocateInfo pAllocateInfo = { + VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, + nullptr, + ds.image.memorySize, + ds.image.memoryType, + }; + + VkDeviceMemory memory; + res = vkAllocateMemory(device, &pAllocateInfo, nullptr, &memory); + CHECK_VK(res, "Could not allocate memory for initialization image"); + gMemories.push_back(memory); + + res = vkBindImageMemory(device, image, memory, ds.image.bindOffset); + CHECK_VK(res, "Could not bind memory for initialization image"); + + void *memory_data; + res = vkMapMemory(device, memory, 0, ds.image.memorySize, 0, &memory_data); + CHECK_VK(res, "Could not map memory for initialization image"); + memcpy(memory_data, image_data, ds.image.memorySize); + vkUnmapMemory(device, memory); + + VkImageMemoryBarrier imageBarrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + (VkImageLayout)ds.image.initialLayout, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + 0, + 0, + image, + subresourceRange, + }; + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, + nullptr, 0, nullptr, 1, &imageBarrier); + + const VkImageCopy pRegion = { + .srcSubresource + = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 }, + .srcOffset = { 0, 0, 0 }, + .dstSubresource + = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 }, + .dstOffset = { 0, 0, 0 }, + .extent = { ds.image.width, ds.image.height, ds.image.depth }, + }; + + vkCmdCopyImage( + cmdBuffer, image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, shaderImage, VK_IMAGE_LAYOUT_GENERAL, 1, &pRegion); + + return 0; +} + static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDevice device, VkCommandBuffer cmdBuffer, VkPhysicalDeviceMemoryProperties &memProperties, std::vector &descSet) { @@ -344,18 +485,40 @@ static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDev gOutputMemorySize = ds.image.memorySize; } - CHECK(initialize_memory(device, memory, ds.ds, ds.binding, ds.image.memorySize) == 0, - "Could not initalize memory for image"); - res = vkBindImageMemory(device, image, memory, ds.image.bindOffset); CHECK_VK(res, "Could not bind image and memory"); + VkImageSubresourceRange subresourceRange = { ds.image.aspectMask, ds.image.baseMipLevel, ds.image.levelCount, + ds.image.baseArrayLayer, ds.image.layerCount }; + + VkImageMemoryBarrier imageBarrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + (VkImageLayout)ds.image.initialLayout, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 0, + 0, + image, + subresourceRange, + }; + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, + nullptr, 0, nullptr, 1, &imageBarrier); + + CHECK(initialize_image(device, cmdBuffer, memProperties, ds, image, subresourceRange) == 0, "Could not initalize memory for image"); + + imageBarrier.srcAccessMask = imageBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; + imageBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; + imageBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL; + + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, + nullptr, 0, nullptr, 1, &imageBarrier); + VkImageView image_view; VkComponentMapping components = { (VkComponentSwizzle)ds.image.component_r, (VkComponentSwizzle)ds.image.component_g, (VkComponentSwizzle)ds.image.component_b, (VkComponentSwizzle)ds.image.component_a }; - VkImageSubresourceRange subresourceRange = { ds.image.aspectMask, ds.image.baseMipLevel, ds.image.levelCount, - ds.image.baseArrayLayer, ds.image.layerCount }; const VkImageViewCreateInfo pViewInfo = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, nullptr, ds.image.viewFlags, image, (VkImageViewType)ds.image.viewType, (VkFormat)ds.image.viewFormat, components, subresourceRange }; res = vkCreateImageView(device, &pViewInfo, nullptr, &image_view); From e9778aa6646b11c4d8b02f376b8c4bdd826070f2 Mon Sep 17 00:00:00 2001 From: Romaric Jodin Date: Fri, 19 Apr 2024 18:12:04 +0200 Subject: [PATCH 12/12] runner: fix output buffer when output is not host_visible --- runner/runner.cpp | 197 ++++++++++++++++++++++++++++++++-------------- 1 file changed, 140 insertions(+), 57 deletions(-) diff --git a/runner/runner.cpp b/runner/runner.cpp index c03b5d0..4700146 100644 --- a/runner/runner.cpp +++ b/runner/runner.cpp @@ -58,9 +58,12 @@ static bool gDisableCounters = false; static uint32_t gPriority = UINT32_MAX; static uint32_t gOutputDs = UINT32_MAX; static uint32_t gOutputBinding = UINT32_MAX; -static VkDeviceMemory gOutputMemory = VK_NULL_HANDLE; -static VkDeviceSize gOutputMemorySize; +static VkBuffer gOutputBuffer = VK_NULL_HANDLE; +static VkImage gOutputImage = VK_NULL_HANDLE; +static vksp::vksp_descriptor_set *gOutputDsPtr = nullptr; static std::string gOutputString; +static VkDeviceMemory gOutputMemory; +static VkDeviceSize gOutputMemorySize; static const uint32_t gNbGpuTimestamps = 3; @@ -210,23 +213,9 @@ static int get_device_queue_and_cmd_buffer(VkPhysicalDevice &pDevice, VkDevice & return 0; } -static uint32_t initialize_buffer(VkDevice device, VkCommandBuffer cmdBuffer, - VkPhysicalDeviceMemoryProperties &memProperties, vksp::vksp_descriptor_set &ds, VkBuffer shaderBuffer) +static uint32_t allocate_buffer(VkDevice device, VkPhysicalDeviceMemoryProperties &memProperties, + vksp::vksp_descriptor_set &ds, VkBuffer &buffer, VkDeviceMemory &memory) { - if (gBuffersMap == nullptr) { - return 0; - } - uint32_t dstSet = ds.ds; - uint32_t dstBinding = ds.binding; - vksp::buffer_map_key key = std::make_pair(dstSet, dstBinding); - auto find = gBuffersMap->find(key); - if (find == gBuffersMap->end()) { - return 0; - } - void *buffer_data = find->second.second; - CHECK(find->second.first == ds.buffer.memorySize, "mismatch in memorySize during initialization buffer"); - - VkBuffer buffer; const VkBufferCreateInfo pCreateInfo = { VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, ds.buffer.size, VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr }; @@ -256,7 +245,6 @@ static uint32_t initialize_buffer(VkDevice device, VkCommandBuffer cmdBuffer, ds.buffer.memorySize, ds.buffer.memoryType, }; - VkDeviceMemory memory; res = vkAllocateMemory(device, &pAllocateInfo, nullptr, &memory); CHECK_VK(res, "Could not allocate memory for initialization buffer"); gMemories.push_back(memory); @@ -264,8 +252,31 @@ static uint32_t initialize_buffer(VkDevice device, VkCommandBuffer cmdBuffer, res = vkBindBufferMemory(device, buffer, memory, ds.buffer.bindOffset); CHECK_VK(res, "Could not bind buffer memory for initialization buffer"); + return 0; +} + +static uint32_t initialize_buffer(VkDevice device, VkCommandBuffer cmdBuffer, + VkPhysicalDeviceMemoryProperties &memProperties, vksp::vksp_descriptor_set &ds, VkBuffer shaderBuffer) +{ + if (gBuffersMap == nullptr) { + return 0; + } + uint32_t dstSet = ds.ds; + uint32_t dstBinding = ds.binding; + vksp::buffer_map_key key = std::make_pair(dstSet, dstBinding); + auto find = gBuffersMap->find(key); + if (find == gBuffersMap->end()) { + return 0; + } + void *buffer_data = find->second.second; + CHECK(find->second.first == ds.buffer.memorySize, "mismatch in memorySize during initialization buffer"); + + VkBuffer buffer; + VkDeviceMemory memory; + CHECK(allocate_buffer(device, memProperties, ds, buffer, memory) == 0, "Could not allocate initialization buffer"); + void *memory_data; - res = vkMapMemory(device, memory, 0, ds.buffer.memorySize, 0, &memory_data); + VkResult res = vkMapMemory(device, memory, 0, ds.buffer.memorySize, 0, &memory_data); CHECK_VK(res, "Could not map memory for initialization buffer"); memcpy(memory_data, buffer_data, ds.buffer.memorySize); vkUnmapMemory(device, memory); @@ -291,6 +302,11 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe CHECK_VK(res, "Could not create buffer"); gBuffers.push_back(buffer); + if (gOutputDs == ds.ds && gOutputBinding == ds.binding) { + gOutputBuffer = buffer; + gOutputDsPtr = &ds; + } + if (bCounter) { VkMemoryRequirements memreqs; vkGetBufferMemoryRequirements(device, buffer, &memreqs); @@ -322,11 +338,6 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe CHECK_VK(res, "Could not allocate memory for buffer"); gMemories.push_back(memory); - if (gOutputDs == ds.ds && gOutputBinding == ds.binding) { - gOutputMemory = memory; - gOutputMemorySize = ds.buffer.memorySize; - } - res = vkBindBufferMemory(device, buffer, memory, ds.buffer.bindOffset); CHECK_VK(res, "Could not bind buffer and memory"); @@ -356,24 +367,9 @@ static uint32_t handle_descriptor_set_buffer(vksp::vksp_descriptor_set &ds, VkDe return 0; } -static uint32_t initialize_image(VkDevice device, VkCommandBuffer cmdBuffer, - VkPhysicalDeviceMemoryProperties &memProperties, vksp::vksp_descriptor_set &ds, VkImage shaderImage, - VkImageSubresourceRange &subresourceRange) +static uint32_t allocate_image(VkDevice device, VkPhysicalDeviceMemoryProperties &memProperties, + vksp::vksp_descriptor_set &ds, VkImage &image, VkDeviceMemory &memory) { - if (gBuffersMap == nullptr) { - return 0; - } - uint32_t dstSet = ds.ds; - uint32_t dstBinding = ds.binding; - vksp::buffer_map_key key = std::make_pair(dstSet, dstBinding); - auto find = gBuffersMap->find(key); - if (find == gBuffersMap->end()) { - return 0; - } - void *image_data = find->second.second; - CHECK(find->second.first == ds.image.memorySize, "mismatch in memorySize during initialization buffer"); - - VkImage image; VkExtent3D extent = { ds.image.width, ds.image.height, ds.image.depth }; const VkImageCreateInfo pCreateInfo = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, nullptr, ds.image.imageFlags, (VkImageType)ds.image.imageType, (VkFormat)ds.image.format, extent, ds.image.mipLevels, ds.image.arrayLayers, @@ -407,7 +403,6 @@ static uint32_t initialize_image(VkDevice device, VkCommandBuffer cmdBuffer, ds.image.memoryType, }; - VkDeviceMemory memory; res = vkAllocateMemory(device, &pAllocateInfo, nullptr, &memory); CHECK_VK(res, "Could not allocate memory for initialization image"); gMemories.push_back(memory); @@ -415,8 +410,32 @@ static uint32_t initialize_image(VkDevice device, VkCommandBuffer cmdBuffer, res = vkBindImageMemory(device, image, memory, ds.image.bindOffset); CHECK_VK(res, "Could not bind memory for initialization image"); + return 0; +} + +static uint32_t initialize_image(VkDevice device, VkCommandBuffer cmdBuffer, + VkPhysicalDeviceMemoryProperties &memProperties, vksp::vksp_descriptor_set &ds, VkImage shaderImage, + VkImageSubresourceRange &subresourceRange) +{ + if (gBuffersMap == nullptr) { + return 0; + } + uint32_t dstSet = ds.ds; + uint32_t dstBinding = ds.binding; + vksp::buffer_map_key key = std::make_pair(dstSet, dstBinding); + auto find = gBuffersMap->find(key); + if (find == gBuffersMap->end()) { + return 0; + } + void *image_data = find->second.second; + CHECK(find->second.first == ds.image.memorySize, "mismatch in memorySize during initialization buffer"); + + VkImage image; + VkDeviceMemory memory; + CHECK(allocate_image(device, memProperties, ds, image, memory) == 0, "Could not allocate initialization image"); + void *memory_data; - res = vkMapMemory(device, memory, 0, ds.image.memorySize, 0, &memory_data); + VkResult res = vkMapMemory(device, memory, 0, ds.image.memorySize, 0, &memory_data); CHECK_VK(res, "Could not map memory for initialization image"); memcpy(memory_data, image_data, ds.image.memorySize); vkUnmapMemory(device, memory); @@ -468,6 +487,11 @@ static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDev CHECK_VK(res, "Could not create image"); gImages.push_back(image); + if (gOutputDs == ds.ds && gOutputBinding == ds.binding) { + gOutputImage = image; + gOutputDsPtr = &ds; + } + const VkMemoryAllocateInfo pAllocateInfo = { VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO, nullptr, @@ -480,11 +504,6 @@ static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDev CHECK_VK(res, "Could not allocate memory for image"); gMemories.push_back(memory); - if (gOutputDs == ds.ds && gOutputBinding == ds.binding) { - gOutputMemory = memory; - gOutputMemorySize = ds.image.memorySize; - } - res = vkBindImageMemory(device, image, memory, ds.image.bindOffset); CHECK_VK(res, "Could not bind image and memory"); @@ -506,7 +525,8 @@ static uint32_t handle_descriptor_set_image(vksp::vksp_descriptor_set &ds, VkDev vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, nullptr, 0, nullptr, 1, &imageBarrier); - CHECK(initialize_image(device, cmdBuffer, memProperties, ds, image, subresourceRange) == 0, "Could not initalize memory for image"); + CHECK(initialize_image(device, cmdBuffer, memProperties, ds, image, subresourceRange) == 0, + "Could not initalize memory for image"); imageBarrier.srcAccessMask = imageBarrier.dstAccessMask = VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT; imageBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL; @@ -772,6 +792,65 @@ static uint32_t counters_size(std::vector &counters) return sizeof(uint64_t) * (2 + counters.size()); } +static uint32_t record_dump_output( + VkDevice device, VkCommandBuffer cmdBuffer, VkPhysicalDeviceMemoryProperties &memProperties) +{ + switch (gOutputDsPtr->type) { + case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: { + VkBuffer buffer; + CHECK(allocate_buffer(device, memProperties, *gOutputDsPtr, buffer, gOutputMemory) == 0, + "Could not allocate output buffer"); + gOutputMemorySize = gOutputDsPtr->buffer.memorySize; + + const VkBufferCopy pRegion = { 0, 0, gOutputDsPtr->buffer.size }; + vkCmdCopyBuffer(cmdBuffer, gOutputBuffer, buffer, 1, &pRegion); + } break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: { + VkImage image; + CHECK(allocate_image(device, memProperties, *gOutputDsPtr, image, gOutputMemory) == 0, + "Could not allocate output image"); + gOutputMemorySize = gOutputDsPtr->image.memorySize; + + VkImageSubresourceRange subresourceRange = { gOutputDsPtr->image.aspectMask, gOutputDsPtr->image.baseMipLevel, + gOutputDsPtr->image.levelCount, gOutputDsPtr->image.baseArrayLayer, gOutputDsPtr->image.layerCount }; + + VkImageMemoryBarrier imageBarrier = { + VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, + nullptr, + 0, + VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + VK_IMAGE_LAYOUT_UNDEFINED, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 0, + 0, + image, + subresourceRange, + }; + vkCmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT, 0, 0, + nullptr, 0, nullptr, 1, &imageBarrier); + + const VkImageCopy pRegion = { + .srcSubresource + = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 }, + .srcOffset = { 0, 0, 0 }, + .dstSubresource + = { .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT, .mipLevel = 0, .baseArrayLayer = 0, .layerCount = 1 }, + .dstOffset = { 0, 0, 0 }, + .extent = { gOutputDsPtr->image.width, gOutputDsPtr->image.height, gOutputDsPtr->image.depth }, + }; + + vkCmdCopyImage( + cmdBuffer, gOutputImage, VK_IMAGE_LAYOUT_GENERAL, image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &pRegion); + } break; + default: + PRINT("Unsupported descritpor set type"); + return -1; + } + return 0; +} + static uint32_t dump_output(VkDevice device) { void *data; @@ -793,7 +872,7 @@ static uint32_t dump_output(VkDevice device) static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queue, vksp::vksp_configuration &config, std::vector &counters, uint64_t *gpu_timestamps, - std::chrono::steady_clock::time_point *host_timestamps) + std::chrono::steady_clock::time_point *host_timestamps, VkPhysicalDeviceMemoryProperties &memProperties) { VkResult res; @@ -838,6 +917,10 @@ static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queu } vkCmdWriteTimestamp(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, queryPool, 2); + if (gOutputDsPtr != nullptr) { + CHECK(record_dump_output(device, cmdBuffer, memProperties) == 0, "Could not record dumping of output"); + } + res = vkEndCommandBuffer(cmdBuffer); CHECK_VK(res, "Could not end command buffer"); @@ -859,10 +942,6 @@ static uint32_t execute(VkDevice device, VkCommandBuffer cmdBuffer, VkQueue queu vkDestroyQueryPool(device, queryPool, nullptr); - if (gOutputMemory != VK_NULL_HANDLE) { - CHECK(dump_output(device) == 0, "Could not dump output memory"); - } - return 0; } @@ -1211,13 +1290,17 @@ int main(int argc, char **argv) uint64_t gpu_timestamps[gNbGpuTimestamps]; std::chrono::steady_clock::time_point host_timestamps[3]; - CHECK( - execute(device, cmdBuffer, queue, config, counters, gpu_timestamps, host_timestamps) == 0, "Could not execute"); + CHECK(execute(device, cmdBuffer, queue, config, counters, gpu_timestamps, host_timestamps, memProperties) == 0, + "Could not execute"); PRINT("Execution completed"); CHECK(print_results(pDevice, device, config, counters, gpu_timestamps, host_timestamps) == 0, "Could not print all results"); + if (gOutputDsPtr != nullptr) { + CHECK(dump_output(device) == 0, "Could not dump output memory"); + } + clean_vk_objects(device, cmdBuffer, descSet, descSetLayoutVector, pipelineLayout, pipeline); return 0;