diff --git a/include/ur_api.h b/include/ur_api.h index d9ec083906..4b45981460 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1629,8 +1629,8 @@ typedef enum ur_device_info_t { ///< `EnqueueDeviceGlobalVariableRead` entry points. UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, ///< [::ur_bool_t] Returns true if the device supports the use of ///< command-buffers. - UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP = 0x1001, ///< [::ur_bool_t] Returns true if the device supports updating the kernel - ///< commands in a command-buffer. + UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP = 0x1002, ///< [::ur_device_command_buffer_update_capability_flags_t] Command-buffer + ///< update capabilities of the device UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP = 0x1111, ///< [::ur_bool_t] return true if enqueue Cluster Launch is supported UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP = 0x2000, ///< [::ur_bool_t] returns true if the device supports the creation of ///< bindless images @@ -8155,6 +8155,27 @@ urBindlessImagesSignalExternalSemaphoreExp( #if !defined(__GNUC__) #pragma region command_buffer_(experimental) #endif +/////////////////////////////////////////////////////////////////////////////// +/// @brief Device kernel execution capability +typedef uint32_t ur_device_command_buffer_update_capability_flags_t; +typedef enum ur_device_command_buffer_update_capability_flag_t { + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS = UR_BIT(0), ///< Device supports updating the kernel arguments in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE = UR_BIT(1), ///< Device supports updating the local work-group size in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE = UR_BIT(2), ///< Device supports updating the global work-group size in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET = UR_BIT(3), ///< Device supports updating the global work offset in command-buffer + ///< commands. + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE = UR_BIT(4), ///< Device supports updating the kernel handle in command-buffer commands. + /// @cond + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_device_command_buffer_update_capability_flag_t; +/// @brief Bit Mask for validating ur_device_command_buffer_update_capability_flags_t +#define UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAGS_MASK 0xffffffe0 + /////////////////////////////////////////////////////////////////////////////// /// @brief Command-buffer query information type typedef enum ur_exp_command_buffer_info_t { @@ -8208,7 +8229,7 @@ typedef struct ur_exp_command_buffer_update_memobj_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_MEMOBJ_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optinal] Pointer to memory object properties. + const ur_kernel_arg_mem_obj_properties_t *pProperties; ///< [in][optional] Pointer to memory object properties. ur_mem_handle_t hNewMemObjArg; ///< [in][optional] Handle of memory object to set at argument index. } ur_exp_command_buffer_update_memobj_arg_desc_t; @@ -8220,7 +8241,7 @@ typedef struct ur_exp_command_buffer_update_pointer_arg_desc_t { ///< ::UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. - const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optinal] Pointer to USM pointer properties. + const ur_kernel_arg_pointer_properties_t *pProperties; ///< [in][optional] Pointer to USM pointer properties. const void *pNewPointerArg; ///< [in][optional] USM pointer to memory location holding the argument ///< value to set at argument index. @@ -8234,7 +8255,7 @@ typedef struct ur_exp_command_buffer_update_value_arg_desc_t { const void *pNext; ///< [in][optional] pointer to extension-specific structure uint32_t argIndex; ///< [in] Argument index. uint32_t argSize; ///< [in] Argument size. - const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optinal] Pointer to value properties. + const ur_kernel_arg_value_properties_t *pProperties; ///< [in][optional] Pointer to value properties. const void *pNewValueArg; ///< [in][optional] Argument value representing matching kernel arg type to ///< set at argument index. @@ -8411,8 +8432,9 @@ urCommandBufferAppendKernelLaunchExp( ///< phKernelAlternatives. ur_kernel_handle_t *phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t *pSyncPointWaitList, ///< [in][optional] A list of sync points that this command depends on. May ///< be ignored if command-buffer is in-order. @@ -8928,8 +8950,9 @@ urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/include/ur_print.h b/include/ur_print.h index c70e661fb1..54082d5330 100644 --- a/include/ur_print.h +++ b/include/ur_print.h @@ -970,6 +970,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintExpExternalSemaphoreDesc(const struct /// - `buff_size < out_size` UR_APIEXPORT ur_result_t UR_APICALL urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, char *buffer, const size_t buff_size, size_t *out_size); +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_command_buffer_update_capability_flag_t enum +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// - `buff_size < out_size` +UR_APIEXPORT ur_result_t UR_APICALL urPrintDeviceCommandBufferUpdateCapabilityFlags(enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size); + /////////////////////////////////////////////////////////////////////////////// /// @brief Print ur_exp_command_buffer_info_t enum /// @returns diff --git a/include/ur_print.hpp b/include/ur_print.hpp index 6bf77e4023..1408a7dea7 100644 --- a/include/ur_print.hpp +++ b/include/ur_print.hpp @@ -197,6 +197,8 @@ inline ur_result_t printFlag(std::ostream &os, uint32_t template <> inline ur_result_t printFlag(std::ostream &os, uint32_t flag); +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag); template <> inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_exp_command_buffer_info_t value, size_t size); @@ -335,6 +337,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_mem_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_external_semaphore_desc_t params); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_image_copy_region_t params); +inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_info_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_exp_command_buffer_command_info_t value); inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct ur_exp_command_buffer_desc_t params); @@ -2541,8 +2544,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; break; - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: - os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP"; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: + os << "UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP"; break; case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: os << "UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP"; @@ -4049,15 +4052,16 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info os << ")"; } break; - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - const ur_bool_t *tptr = (const ur_bool_t *)ptr; - if (sizeof(ur_bool_t) > size) { - os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_bool_t) << ")"; + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + const ur_device_command_buffer_update_capability_flags_t *tptr = (const ur_device_command_buffer_update_capability_flags_t *)ptr; + if (sizeof(ur_device_command_buffer_update_capability_flags_t) > size) { + os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_device_command_buffer_update_capability_flags_t) << ")"; return UR_RESULT_ERROR_INVALID_SIZE; } os << (const void *)(tptr) << " ("; - os << *tptr; + ur::details::printFlag(os, + *tptr); os << ")"; } break; @@ -9669,6 +9673,103 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_exp_image_copy return os; } /////////////////////////////////////////////////////////////////////////////// +/// @brief Print operator for the ur_device_command_buffer_update_capability_flag_t type +/// @returns +/// std::ostream & +inline std::ostream &operator<<(std::ostream &os, enum ur_device_command_buffer_update_capability_flag_t value) { + switch (value) { + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET"; + break; + case UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE: + os << "UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} + +namespace ur::details { +/////////////////////////////////////////////////////////////////////////////// +/// @brief Print ur_device_command_buffer_update_capability_flag_t flag +template <> +inline ur_result_t printFlag(std::ostream &os, uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + } + + if ((val & UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) == (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE) { + val ^= (uint32_t)UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } + return UR_RESULT_SUCCESS; +} +} // namespace ur::details +/////////////////////////////////////////////////////////////////////////////// /// @brief Print operator for the ur_exp_command_buffer_info_t type /// @returns /// std::ostream & diff --git a/scripts/core/EXP-COMMAND-BUFFER.rst b/scripts/core/EXP-COMMAND-BUFFER.rst index 94df623481..78e7337397 100644 --- a/scripts/core/EXP-COMMAND-BUFFER.rst +++ b/scripts/core/EXP-COMMAND-BUFFER.rst @@ -167,8 +167,9 @@ Updating Command-Buffer Commands An adapter implementing the command-buffer experimental feature can optionally support updating the configuration of kernel commands recorded to a -command-buffer. Support for this is reported by returning true in the -${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP query. +command-buffer. The attributes of kernel commands that can be updated are +device specific and can be queried using the +${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP query. Updating kernel commands is done by passing the new kernel configuration to ${x}CommandBufferUpdateKernelLaunchExp along with the command handle of @@ -259,7 +260,13 @@ Enums ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ${x}_device_info_t * ${X}_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP - * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP + * ${X}_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP +* ${x}_device_command_buffer_update_capability_flags_t + * UPDATE_KERNEL_ARGUMENTS + * LOCAL_WORK_SIZE + * GLOBAL_WORK_SIZE + * GLOBAL_WORK_OFFSET + * KERNEL_HANDLE * ${x}_result_t * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_EXP * ${X}_RESULT_ERROR_INVALID_COMMAND_BUFFER_SYNC_POINT_EXP diff --git a/scripts/core/exp-command-buffer.yml b/scripts/core/exp-command-buffer.yml index d3f5a95bc8..66db3c977a 100644 --- a/scripts/core/exp-command-buffer.yml +++ b/scripts/core/exp-command-buffer.yml @@ -21,9 +21,31 @@ etors: - name: COMMAND_BUFFER_SUPPORT_EXP value: "0x1000" desc: "[$x_bool_t] Returns true if the device supports the use of command-buffers." - - name: COMMAND_BUFFER_UPDATE_SUPPORT_EXP - value: "0x1001" - desc: "[$x_bool_t] Returns true if the device supports updating the kernel commands in a command-buffer." + - name: COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP + desc: "[$x_device_command_buffer_update_capability_flags_t] Command-buffer update capabilities of the device" + value: "0x1002" +--- #-------------------------------------------------------------------------- +type: enum +desc: "Device kernel execution capability" +class: $xDevice +name: $x_device_command_buffer_update_capability_flags_t +etors: + - name: KERNEL_ARGUMENTS + value: "$X_BIT(0)" + desc: "Device supports updating the kernel arguments in command-buffer commands." + - name: LOCAL_WORK_SIZE + value: "$X_BIT(1)" + desc: "Device supports updating the local work-group size in command-buffer commands." + - name: GLOBAL_WORK_SIZE + value: "$X_BIT(2)" + desc: "Device supports updating the global work-group size in command-buffer commands." + - name: GLOBAL_WORK_OFFSET + value: "$X_BIT(3)" + desc: "Device supports updating the global work offset in command-buffer commands." + - name: KERNEL_HANDLE + value: "$X_BIT(4)" + desc: "Device supports updating the kernel handle in command-buffer commands." + --- #-------------------------------------------------------------------------- type: enum extend: true @@ -127,7 +149,7 @@ members: desc: "[in] Argument index." - type: "const ur_kernel_arg_mem_obj_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to memory object properties." + desc: "[in][optional] Pointer to memory object properties." - type: $x_mem_handle_t name: hNewMemObjArg desc: "[in][optional] Handle of memory object to set at argument index." @@ -142,7 +164,7 @@ members: desc: "[in] Argument index." - type: "const ur_kernel_arg_pointer_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to USM pointer properties." + desc: "[in][optional] Pointer to USM pointer properties." - type: "const void *" name: pNewPointerArg desc: "[in][optional] USM pointer to memory location holding the argument value to set at argument index." @@ -160,7 +182,7 @@ members: desc: "[in] Argument size." - type: "const ur_kernel_arg_value_properties_t *" name: pProperties - desc: "[in][optinal] Pointer to value properties." + desc: "[in][optional] Pointer to value properties." - type: "const void *" name: pNewValueArg desc: "[in][optional] Argument value representing matching kernel arg type to set at argument index." @@ -319,7 +341,8 @@ params: name: "phKernelAlternatives" desc: | [in][optional][range(0, numKernelAlternatives)] List of kernels handles that might be used to update the kernel in this - command after the command-buffer is finalized. It's invalid to specify the default kernel `hKernel` as part of this list. + command after the command-buffer is finalized. The default kernel `hKernel` is implicitly marked as an alternative. It's + invalid to specify it as part of this list. - type: uint32_t name: numSyncPointsInWaitList desc: "[in] The number of sync points in the provided dependency list." @@ -931,8 +954,9 @@ returns: - $X_RESULT_ERROR_INVALID_OPERATION: - "If $x_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to." - "If the command-buffer `hCommand` belongs to has not been finalized." + - "If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero." + - "If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." - "If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL." - - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`." - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size." - "If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size." - $X_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/source/adapters/cuda/command_buffer.cpp b/source/adapters/cuda/command_buffer.cpp index 0a6f0015e8..1305bae515 100644 --- a/source/adapters/cuda/command_buffer.cpp +++ b/source/adapters/cuda/command_buffer.cpp @@ -864,12 +864,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - ur_exp_command_buffer_command_handle_t hCommand, - const ur_exp_command_buffer_update_kernel_launch_desc_t - *pUpdateKernelLaunch) { +/** + * Validates contents of the update command description. + * @param[in] Command The command which is being updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + auto CommandBuffer = Command->CommandBuffer; + // Update requires command-buffer to be finalized - ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; if (!CommandBuffer->CudaGraphExec) { return UR_RESULT_ERROR_INVALID_OPERATION; } @@ -879,38 +887,61 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - if (pUpdateKernelLaunch->newWorkDim) { + const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; + if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (NewWorkDim) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + if (NewWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } // Error If Local size and not global size - if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && - (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && + (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { return UR_RESULT_ERROR_INVALID_OPERATION; } // Error if local size non-nullptr and created with null // or if local size nullptr and created with non-null const bool IsNewLocalSizeNull = - pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + UpdateCommandDesc->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { return UR_RESULT_ERROR_INVALID_OPERATION; } } - // Kernel corresponding to the command to update - ur_kernel_handle_t NewKernel = pUpdateKernelLaunch->hNewKernel; - - if (hCommand->ValidKernelHandles.count(NewKernel)) { - hCommand->Kernel = NewKernel; - } else { + if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { return UR_RESULT_ERROR_INVALID_VALUE; } + return UR_RESULT_SUCCESS; +} + +/** + * Updates the arguments of CommandDesc->hNewKernel + * @param[in] Device The device associated with the kernel being updated. + * @param[in] UpdateCommandDesc The update command description that contains the + * new kernel and its arguments. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateKernelArguments(ur_device_handle_t Device, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + ur_kernel_handle_t NewKernel = UpdateCommandDesc->hNewKernel; // Update pointer arguments to the kernel - uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + uint32_t NumPointerArgs = UpdateCommandDesc->numNewPointerArgs; const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = - pUpdateKernelLaunch->pNewPointerArgList; + UpdateCommandDesc->pNewPointerArgList; for (uint32_t i = 0; i < NumPointerArgs; i++) { const auto &PointerArgDesc = ArgPointerList[i]; uint32_t ArgIndex = PointerArgDesc.argIndex; @@ -926,9 +957,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Update memobj arguments to the kernel - uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + uint32_t NumMemobjArgs = UpdateCommandDesc->numNewMemObjArgs; const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = - pUpdateKernelLaunch->pNewMemObjArgList; + UpdateCommandDesc->pNewMemObjArgList; for (uint32_t i = 0; i < NumMemobjArgs; i++) { const auto &MemobjArgDesc = ArgMemobjList[i]; uint32_t ArgIndex = MemobjArgDesc.argIndex; @@ -939,8 +970,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( if (ArgValue == nullptr) { NewKernel->setKernelArg(ArgIndex, 0, nullptr); } else { - CUdeviceptr CuPtr = - std::get(ArgValue->Mem).getPtr(CommandBuffer->Device); + CUdeviceptr CuPtr = std::get(ArgValue->Mem).getPtr(Device); NewKernel->setKernelArg(ArgIndex, sizeof(CUdeviceptr), (void *)&CuPtr); } } catch (ur_result_t Err) { @@ -950,9 +980,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Update value arguments to the kernel - uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + uint32_t NumValueArgs = UpdateCommandDesc->numNewValueArgs; const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = - pUpdateKernelLaunch->pNewValueArgList; + UpdateCommandDesc->pNewValueArgList; for (uint32_t i = 0; i < NumValueArgs; i++) { const auto &ValueArgDesc = ArgValueList[i]; uint32_t ArgIndex = ValueArgDesc.argIndex; @@ -960,7 +990,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const void *ArgValue = ValueArgDesc.pNewValueArg; ur_result_t Result = UR_RESULT_SUCCESS; - try { NewKernel->setKernelArg(ArgIndex, ArgSize, ArgValue); } catch (ur_result_t Err) { @@ -969,45 +998,68 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } } - // Set the updated ND range - const uint32_t NewWorkDim = pUpdateKernelLaunch->newWorkDim; - if (NewWorkDim != 0) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - hCommand->WorkDim = NewWorkDim; + return UR_RESULT_SUCCESS; +} + +/** + * Updates the command buffer command with new values from the update + * description. + * @param[in] Command The command to be updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateCommand(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + Command->Kernel = UpdateCommandDesc->hNewKernel; + + if (UpdateCommandDesc->newWorkDim) { + Command->WorkDim = UpdateCommandDesc->newWorkDim; } - if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { - hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + if (UpdateCommandDesc->pNewGlobalWorkOffset) { + Command->setGlobalOffset(UpdateCommandDesc->pNewGlobalWorkOffset); } - if (pUpdateKernelLaunch->pNewGlobalWorkSize) { - hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + if (UpdateCommandDesc->pNewGlobalWorkSize) { + Command->setGlobalSize(UpdateCommandDesc->pNewGlobalWorkSize); } - if (pUpdateKernelLaunch->pNewLocalWorkSize) { - hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + if (UpdateCommandDesc->pNewLocalWorkSize) { + Command->setLocalSize(UpdateCommandDesc->pNewLocalWorkSize); } - size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; - size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { - // If no worksize is provided make sure we pass nullptr to setKernelParams so + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + + UR_CHECK_ERROR(validateCommandDesc(hCommand, pUpdateKernelLaunch)); + UR_CHECK_ERROR( + updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch)); + UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch)); + + // If no work-size is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; - uint32_t WorkDim = hCommand->WorkDim; // Set the number of threads per block to the number of threads per warp - // by default unless user has provided a better number + // by default unless user has provided a better number. size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - CUfunction CuFunc = NewKernel->get(); - ur_context_handle_t Context = CommandBuffer->Context; - ur_device_handle_t Device = CommandBuffer->Device; - auto Result = setKernelParams(Context, Device, WorkDim, GlobalWorkOffset, - GlobalWorkSize, LocalWorkSize, NewKernel, - CuFunc, ThreadsPerBlock, BlocksPerGrid); + CUfunction CuFunc = hCommand->Kernel->get(); + auto Result = setKernelParams( + CommandBuffer->Context, CommandBuffer->Device, hCommand->WorkDim, + hCommand->GlobalWorkOffset, hCommand->GlobalWorkSize, LocalWorkSize, + hCommand->Kernel, CuFunc, ThreadsPerBlock, BlocksPerGrid); if (Result != UR_RESULT_SUCCESS) { return Result; } @@ -1021,8 +1073,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.blockDimX = ThreadsPerBlock[0]; Params.blockDimY = ThreadsPerBlock[1]; Params.blockDimZ = ThreadsPerBlock[2]; - Params.sharedMemBytes = NewKernel->getLocalSize(); - Params.kernelParams = const_cast(NewKernel->getArgIndices().data()); + Params.sharedMemBytes = hCommand->Kernel->getLocalSize(); + Params.kernelParams = + const_cast(hCommand->Kernel->getArgIndices().data()); CUgraphNode Node = hCommand->Node; CUgraphExec CudaGraphExec = CommandBuffer->CudaGraphExec; diff --git a/source/adapters/cuda/command_buffer.hpp b/source/adapters/cuda/command_buffer.hpp index 49e3ba8b25..a936bad72f 100644 --- a/source/adapters/cuda/command_buffer.hpp +++ b/source/adapters/cuda/command_buffer.hpp @@ -99,11 +99,10 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t CommandBuffer; - /* The currently active kernel handle for this command */ + // The currently active kernel handle for this command. ur_kernel_handle_t Kernel; - /* Set of all the kernel handles that can be used when updating this command - */ + // Set of all the kernel handles that can be used when updating this command. std::unordered_set ValidKernelHandles; CUgraphNode Node; diff --git a/source/adapters/cuda/device.cpp b/source/adapters/cuda/device.cpp index bbaaa27cdb..7daf8bdbc8 100644 --- a/source/adapters/cuda/device.cpp +++ b/source/adapters/cuda/device.cpp @@ -1093,8 +1093,17 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: + /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP:*/ return ReturnValue(true); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + return ReturnValue(UpdateCapabilities); + } case UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP: { int Value = getAttribute(hDevice, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR) >= 9; diff --git a/source/adapters/hip/command_buffer.cpp b/source/adapters/hip/command_buffer.cpp index ef6e6fe83c..5ed0caf3df 100644 --- a/source/adapters/hip/command_buffer.cpp +++ b/source/adapters/hip/command_buffer.cpp @@ -78,7 +78,8 @@ ur_exp_command_buffer_command_handle_t_:: ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, hipGraphNode_t Node, hipKernelNodeParams Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr) + const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives) : CommandBuffer(CommandBuffer), Kernel(Kernel), Node(Node), Params(Params), WorkDim(WorkDim), RefCountInternal(1), RefCountExternal(1) { CommandBuffer->incrementInternalReferenceCount(); @@ -98,6 +99,13 @@ ur_exp_command_buffer_command_handle_t_:: std::memset(GlobalWorkOffset + WorkDim, 0, ZeroSize); std::memset(GlobalWorkSize + WorkDim, 0, ZeroSize); } + + /* Add the default Kernel as a valid kernel handle for this command */ + ValidKernelHandles.insert(Kernel); + if (KernelAlternatives) { + ValidKernelHandles.insert(KernelAlternatives, + KernelAlternatives + NumKernelAlternatives); + } } /// Helper function for finding the HIP Nodes associated with the commands in a @@ -834,12 +842,20 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( return commandHandleReleaseInternal(hCommand); } -UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( - ur_exp_command_buffer_command_handle_t hCommand, - const ur_exp_command_buffer_update_kernel_launch_desc_t - *pUpdateKernelLaunch) { +/** + * Validates contents of the update command description. + * @param[in] Command The command which is being updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +validateCommandDesc(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + auto CommandBuffer = Command->CommandBuffer; + // Update requires command-buffer to be finalized - ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; if (!CommandBuffer->HIPGraphExec) { return UR_RESULT_ERROR_INVALID_OPERATION; } @@ -849,53 +865,78 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( return UR_RESULT_ERROR_INVALID_OPERATION; } - if (auto NewWorkDim = pUpdateKernelLaunch->newWorkDim) { - // Error if work dim changes - if (NewWorkDim != hCommand->WorkDim) { + const uint32_t NewWorkDim = UpdateCommandDesc->newWorkDim; + if (!NewWorkDim && Command->Kernel != UpdateCommandDesc->hNewKernel) { + return UR_RESULT_ERROR_INVALID_OPERATION; + } + + if (NewWorkDim) { + UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); + + if (NewWorkDim != Command->WorkDim && + Command->Kernel == UpdateCommandDesc->hNewKernel) { return UR_RESULT_ERROR_INVALID_OPERATION; } // Error If Local size and not global size - if ((pUpdateKernelLaunch->pNewLocalWorkSize != nullptr) && - (pUpdateKernelLaunch->pNewGlobalWorkSize == nullptr)) { + if ((UpdateCommandDesc->pNewLocalWorkSize != nullptr) && + (UpdateCommandDesc->pNewGlobalWorkSize == nullptr)) { return UR_RESULT_ERROR_INVALID_OPERATION; } // Error if local size non-nullptr and created with null // or if local size nullptr and created with non-null const bool IsNewLocalSizeNull = - pUpdateKernelLaunch->pNewLocalWorkSize == nullptr; - const bool IsOriginalLocalSizeNull = hCommand->isNullLocalSize(); + UpdateCommandDesc->pNewLocalWorkSize == nullptr; + const bool IsOriginalLocalSizeNull = Command->isNullLocalSize(); if (IsNewLocalSizeNull ^ IsOriginalLocalSizeNull) { return UR_RESULT_ERROR_INVALID_OPERATION; } } - // Kernel corresponding to the command to update - ur_kernel_handle_t Kernel = hCommand->Kernel; - ur_device_handle_t Device = CommandBuffer->Device; + if (!Command->ValidKernelHandles.count(UpdateCommandDesc->hNewKernel)) { + return UR_RESULT_ERROR_INVALID_VALUE; + } + + return UR_RESULT_SUCCESS; +} + +/** + * Updates the arguments of CommandDesc->hNewKernel + * @param[in] Device The device associated with the kernel being updated. + * @param[in] UpdateCommandDesc The update command description that contains the + * new kernel and its arguments. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateKernelArguments(ur_device_handle_t Device, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + ur_kernel_handle_t NewKernel = UpdateCommandDesc->hNewKernel; // Update pointer arguments to the kernel - uint32_t NumPointerArgs = pUpdateKernelLaunch->numNewPointerArgs; + uint32_t NumPointerArgs = UpdateCommandDesc->numNewPointerArgs; const ur_exp_command_buffer_update_pointer_arg_desc_t *ArgPointerList = - pUpdateKernelLaunch->pNewPointerArgList; + UpdateCommandDesc->pNewPointerArgList; for (uint32_t i = 0; i < NumPointerArgs; i++) { const auto &PointerArgDesc = ArgPointerList[i]; uint32_t ArgIndex = PointerArgDesc.argIndex; const void *ArgValue = PointerArgDesc.pNewPointerArg; try { - Kernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); + NewKernel->setKernelArg(ArgIndex, sizeof(ArgValue), ArgValue); } catch (ur_result_t Err) { return Err; } } // Update memobj arguments to the kernel - uint32_t NumMemobjArgs = pUpdateKernelLaunch->numNewMemObjArgs; + uint32_t NumMemobjArgs = UpdateCommandDesc->numNewMemObjArgs; const ur_exp_command_buffer_update_memobj_arg_desc_t *ArgMemobjList = - pUpdateKernelLaunch->pNewMemObjArgList; + UpdateCommandDesc->pNewMemObjArgList; for (uint32_t i = 0; i < NumMemobjArgs; i++) { const auto &MemobjArgDesc = ArgMemobjList[i]; uint32_t ArgIndex = MemobjArgDesc.argIndex; @@ -903,10 +944,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( try { if (ArgValue == nullptr) { - Kernel->setKernelArg(ArgIndex, 0, nullptr); + NewKernel->setKernelArg(ArgIndex, 0, nullptr); } else { void *HIPPtr = std::get(ArgValue->Mem).getVoid(Device); - Kernel->setKernelArg(ArgIndex, sizeof(void *), (void *)&HIPPtr); + NewKernel->setKernelArg(ArgIndex, sizeof(void *), (void *)&HIPPtr); } } catch (ur_result_t Err) { return Err; @@ -914,9 +955,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( } // Update value arguments to the kernel - uint32_t NumValueArgs = pUpdateKernelLaunch->numNewValueArgs; + uint32_t NumValueArgs = UpdateCommandDesc->numNewValueArgs; const ur_exp_command_buffer_update_value_arg_desc_t *ArgValueList = - pUpdateKernelLaunch->pNewValueArgList; + UpdateCommandDesc->pNewValueArgList; for (uint32_t i = 0; i < NumValueArgs; i++) { const auto &ValueArgDesc = ArgValueList[i]; uint32_t ArgIndex = ValueArgDesc.argIndex; @@ -924,49 +965,74 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( const void *ArgValue = ValueArgDesc.pNewValueArg; try { - Kernel->setKernelArg(ArgIndex, ArgSize, ArgValue); + NewKernel->setKernelArg(ArgIndex, ArgSize, ArgValue); } catch (ur_result_t Err) { return Err; } } - // Set the updated ND range - const uint32_t NewWorkDim = pUpdateKernelLaunch->newWorkDim; - if (NewWorkDim != 0) { - UR_ASSERT(NewWorkDim > 0, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - UR_ASSERT(NewWorkDim < 4, UR_RESULT_ERROR_INVALID_WORK_DIMENSION); - hCommand->WorkDim = NewWorkDim; + return UR_RESULT_SUCCESS; +} + +/** + * Updates the command buffer command with new values from the update + * description. + * @param[in] Command The command to be updated. + * @param[in] UpdateCommandDesc The update command description. + * @return UR_RESULT_SUCCESS or an error code on failure + */ +ur_result_t +updateCommand(ur_exp_command_buffer_command_handle_t Command, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *UpdateCommandDesc) { + + Command->Kernel = UpdateCommandDesc->hNewKernel; + + if (UpdateCommandDesc->hNewKernel) { + Command->WorkDim = UpdateCommandDesc->newWorkDim; } - if (pUpdateKernelLaunch->pNewGlobalWorkOffset) { - hCommand->setGlobalOffset(pUpdateKernelLaunch->pNewGlobalWorkOffset); + if (UpdateCommandDesc->pNewGlobalWorkOffset) { + Command->setGlobalOffset(UpdateCommandDesc->pNewGlobalWorkOffset); } - if (pUpdateKernelLaunch->pNewGlobalWorkSize) { - hCommand->setGlobalSize(pUpdateKernelLaunch->pNewGlobalWorkSize); + if (UpdateCommandDesc->pNewGlobalWorkSize) { + Command->setGlobalSize(UpdateCommandDesc->pNewGlobalWorkSize); } - if (pUpdateKernelLaunch->pNewLocalWorkSize) { - hCommand->setLocalSize(pUpdateKernelLaunch->pNewLocalWorkSize); + if (UpdateCommandDesc->pNewLocalWorkSize) { + Command->setLocalSize(UpdateCommandDesc->pNewLocalWorkSize); } - size_t *GlobalWorkOffset = hCommand->GlobalWorkOffset; - size_t *GlobalWorkSize = hCommand->GlobalWorkSize; + return UR_RESULT_SUCCESS; +} + +UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( + ur_exp_command_buffer_command_handle_t hCommand, + const ur_exp_command_buffer_update_kernel_launch_desc_t + *pUpdateKernelLaunch) { + + ur_exp_command_buffer_handle_t CommandBuffer = hCommand->CommandBuffer; + + UR_CHECK_ERROR(validateCommandDesc(hCommand, pUpdateKernelLaunch)); + UR_CHECK_ERROR( + updateKernelArguments(CommandBuffer->Device, pUpdateKernelLaunch)); + UR_CHECK_ERROR(updateCommand(hCommand, pUpdateKernelLaunch)); // If no worksize is provided make sure we pass nullptr to setKernelParams so // it can guess the local work size. const bool ProvidedLocalSize = !hCommand->isNullLocalSize(); size_t *LocalWorkSize = ProvidedLocalSize ? hCommand->LocalWorkSize : nullptr; - uint32_t WorkDim = hCommand->WorkDim; // Set the number of threads per block to the number of threads per warp // by default unless user has provided a better number size_t ThreadsPerBlock[3] = {32u, 1u, 1u}; size_t BlocksPerGrid[3] = {1u, 1u, 1u}; - hipFunction_t HIPFunc = Kernel->get(); - UR_CHECK_ERROR(setKernelParams(Device, WorkDim, GlobalWorkOffset, - GlobalWorkSize, LocalWorkSize, Kernel, HIPFunc, - ThreadsPerBlock, BlocksPerGrid)); + hipFunction_t HIPFunc = hCommand->Kernel->get(); + UR_CHECK_ERROR(setKernelParams( + CommandBuffer->Device, hCommand->WorkDim, hCommand->GlobalWorkOffset, + hCommand->GlobalWorkSize, LocalWorkSize, hCommand->Kernel, HIPFunc, + ThreadsPerBlock, BlocksPerGrid)); hipKernelNodeParams &Params = hCommand->Params; @@ -977,8 +1043,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferUpdateKernelLaunchExp( Params.blockDim.x = ThreadsPerBlock[0]; Params.blockDim.y = ThreadsPerBlock[1]; Params.blockDim.z = ThreadsPerBlock[2]; - Params.sharedMemBytes = Kernel->getLocalSize(); - Params.kernelParams = const_cast(Kernel->getArgIndices().data()); + Params.sharedMemBytes = hCommand->Kernel->getLocalSize(); + Params.kernelParams = + const_cast(hCommand->Kernel->getArgIndices().data()); hipGraphNode_t Node = hCommand->Node; hipGraphExec_t HipGraphExec = CommandBuffer->HIPGraphExec; diff --git a/source/adapters/hip/command_buffer.hpp b/source/adapters/hip/command_buffer.hpp index d744a3544d..c14fe50ff4 100644 --- a/source/adapters/hip/command_buffer.hpp +++ b/source/adapters/hip/command_buffer.hpp @@ -15,6 +15,7 @@ #include "context.hpp" #include #include +#include // Trace an internal UR call #define UR_TRACE(Call) \ @@ -43,7 +44,8 @@ struct ur_exp_command_buffer_command_handle_t_ { ur_exp_command_buffer_handle_t CommandBuffer, ur_kernel_handle_t Kernel, hipGraphNode_t Node, hipKernelNodeParams Params, uint32_t WorkDim, const size_t *GlobalWorkOffsetPtr, const size_t *GlobalWorkSizePtr, - const size_t *LocalWorkSizePtr); + const size_t *LocalWorkSizePtr, uint32_t NumKernelAlternatives, + ur_kernel_handle_t *KernelAlternatives); void setGlobalOffset(const size_t *GlobalWorkOffsetPtr) { const size_t CopySize = sizeof(size_t) * WorkDim; @@ -95,7 +97,13 @@ struct ur_exp_command_buffer_command_handle_t_ { } ur_exp_command_buffer_handle_t CommandBuffer; + + // The currently active kernel handle for this command. ur_kernel_handle_t Kernel; + + // Set of all the kernel handles that can be used when updating this command. + std::unordered_set ValidKernelHandles; + hipGraphNode_t Node; hipKernelNodeParams Params; diff --git a/source/adapters/hip/device.cpp b/source/adapters/hip/device.cpp index 3ae98e929d..fc75c07998 100644 --- a/source/adapters/hip/device.cpp +++ b/source/adapters/hip/device.cpp @@ -608,7 +608,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_SUCCESS; } - // Intel USM extensions + // Intel USM extensions case UR_DEVICE_INFO_USM_HOST_SUPPORT: { // from cl_intel_unified_shared_memory: "The host memory access capabilities // apply to any host allocation." @@ -889,7 +889,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GLOBAL_VARIABLE_SUPPORT: return ReturnValue(false); - // TODO: Investigate if this information is available on HIP. + // TODO: Investigate if this information is available on HIP. case UR_DEVICE_INFO_COMPONENT_DEVICES: case UR_DEVICE_INFO_COMPOSITE_DEVICE: case UR_DEVICE_INFO_GPU_EU_COUNT: @@ -905,17 +905,33 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + /*case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: */ { + int DriverVersion = 0; + UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); + + // Return supported for the UR command-buffer experimental feature on + // ROCM 5.5.1 and later. This is to workaround HIP driver bug + // https://github.com/ROCm/HIP/issues/2450 in older versions. + // + // The version is returned as (10000000 major + 1000000 minor + patch). + const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 + return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); + } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { int DriverVersion = 0; UR_CHECK_ERROR(hipDriverGetVersion(&DriverVersion)); - - // Return supported for the UR command-buffer experimental feature on - // ROCM 5.5.1 and later. This is to workaround HIP driver bug - // https://github.com/ROCm/HIP/issues/2450 in older versions. - // - // The version is returned as (10000000 major + 1000000 minor + patch). const int CmdBufDriverMinVersion = 50530202; // ROCM 5.5.1 - return ReturnValue(DriverVersion >= CmdBufDriverMinVersion); + if (DriverVersion < CmdBufDriverMinVersion) { + return ReturnValue( + static_cast(0)); + } + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE; + return ReturnValue(UpdateCapabilities); } default: break; diff --git a/source/adapters/level_zero/device.cpp b/source/adapters/level_zero/device.cpp index e6cb650420..9f1af0f457 100644 --- a/source/adapters/level_zero/device.cpp +++ b/source/adapters/level_zero/device.cpp @@ -66,14 +66,14 @@ ur_result_t urDeviceGet( ur_platform_handle_t Platform, ///< [in] handle of the platform instance ur_device_type_t DeviceType, ///< [in] the type of the devices. uint32_t NumEntries, ///< [in] the number of devices to be added to - ///< phDevices. If phDevices in not NULL then - ///< NumEntries should be greater than zero, otherwise - ///< ::UR_RESULT_ERROR_INVALID_SIZE, will be returned. + ///< phDevices. If phDevices in not NULL then + ///< NumEntries should be greater than zero, otherwise + ///< ::UR_RESULT_ERROR_INVALID_SIZE, will be returned. ur_device_handle_t *Devices, ///< [out][optional][range(0, NumEntries)] array of handle of - ///< devices. If NumEntries is less than the number of devices - ///< available, then platform shall only retrieve that number - ///< of devices. + ///< devices. If NumEntries is less than the number of devices + ///< available, then platform shall only retrieve that number + ///< of devices. uint32_t *NumDevices ///< [out][optional] pointer to the number of devices. ///< pNumDevices will be updated with the total number ///< of devices available. @@ -194,10 +194,10 @@ ur_result_t urDeviceGetInfo( ur_device_info_t ParamName, ///< [in] type of the info to retrieve size_t propSize, ///< [in] the number of bytes pointed to by ParamValue. void *ParamValue, ///< [out][optional] array of bytes holding the info. - ///< If propSize is not equal to or greater than the real - ///< number of bytes needed to return the info then the - ///< ::UR_RESULT_ERROR_INVALID_SIZE error is returned and - ///< pDeviceInfo is not used. + ///< If propSize is not equal to or greater than the real + ///< number of bytes needed to return the info then the + ///< ::UR_RESULT_ERROR_INVALID_SIZE error is returned and + ///< pDeviceInfo is not used. size_t *pSize ///< [out][optional] pointer to the actual size in bytes of ///< the queried infoType. ) { @@ -299,9 +299,9 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_NAME: return ReturnValue(Device->ZeDeviceProperties->name); - // zeModuleCreate allows using root device module for sub-devices: - // > The application must only use the module for the device, or its - // > sub-devices, which was provided during creation. + // zeModuleCreate allows using root device module for sub-devices: + // > The application must only use the module for the device, or its + // > sub-devices, which was provided during creation. case UR_DEVICE_INFO_BUILD_ON_SUBDEVICE: return ReturnValue(ur_bool_t{0}); case UR_DEVICE_INFO_COMPILER_AVAILABLE: @@ -472,7 +472,7 @@ ur_result_t urDeviceGetInfo( return ReturnValue(*Device->SubDeviceCreationProperty); } - // Everything under here is not supported yet + // Everything under here is not supported yet case UR_EXT_DEVICE_INFO_OPENCL_C_VERSION: return ReturnValue(""); case UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC: @@ -633,8 +633,8 @@ ur_result_t urDeviceGetInfo( case UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE: return ReturnValue( size_t{Device->ZeDeviceImageProperties->maxImageArraySlices}); - // Handle SIMD widths. - // TODO: can we do better than this? + // Handle SIMD widths. + // TODO: can we do better than this? case UR_DEVICE_INFO_NATIVE_VECTOR_WIDTH_CHAR: case UR_DEVICE_INFO_PREFERRED_VECTOR_WIDTH_CHAR: return ReturnValue(Device->ZeDeviceProperties->physicalEUSimdWidth / 1); @@ -994,20 +994,44 @@ ur_result_t urDeviceGetInfo( } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: return ReturnValue(true); - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { - // Update support requires being able to update kernel arguments and all - // aspects of the kernel NDRange. - const ze_mutable_command_exp_flags_t UpdateMask = - ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | + // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + // // Update support requires being able to update kernel arguments and + // all + // // aspects of the kernel NDRange. + // const ze_mutable_command_exp_flags_t UpdateMask = + // ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS | + // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | + // ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | + // ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; + // + // const bool KernelArgUpdateSupport = + // (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & + // UpdateMask) == UpdateMask; + // return ReturnValue(KernelArgUpdateSupport && + // Device->Platform->ZeMutableCmdListExt.Supported); + // } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { + const bool ZeMutableCommandFlags = + Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags; + + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities = 0; + if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_KERNEL_ARGUMENTS) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + } + ze_mutable_command_exp_flags_t ReqUpdateWG = ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_COUNT | - ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE | - ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET; - - const bool KernelArgUpdateSupport = - (Device->ZeDeviceMutableCmdListsProperties->mutableCommandFlags & - UpdateMask) == UpdateMask; - return ReturnValue(KernelArgUpdateSupport && - Device->Platform->ZeMutableCmdListExt.Supported); + ZE_MUTABLE_COMMAND_EXP_FLAG_GROUP_SIZE; + if ((ZeMutableCommandFlags & ReqUpdateWG) == ReqUpdateWG) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + } + if (ZeMutableCommandFlags & ZE_MUTABLE_COMMAND_EXP_FLAG_GLOBAL_OFFSET) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + } + return ReturnValue(UpdateCapabilities); } case UR_DEVICE_INFO_BINDLESS_IMAGES_SUPPORT_EXP: { // On L0 bindless images are supported. @@ -1549,7 +1573,7 @@ ur_result_t ur_device_handle_t_::initialize(int SubSubDeviceOrdinal, QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeIndex = SubSubDeviceIndex; } else { // Proceed with initialization for root and sub-device - // How is it possible that there are no "compute" capabilities? + // How is it possible that there are no "compute" capabilities? if (QueueGroup[ur_device_handle_t_::queue_group_info_t::Compute].ZeOrdinal < 0) { return UR_RESULT_ERROR_UNKNOWN; diff --git a/source/adapters/mock/ur_mockddi.cpp b/source/adapters/mock/ur_mockddi.cpp index 876e895322..714bf7817c 100644 --- a/source/adapters/mock/ur_mockddi.cpp +++ b/source/adapters/mock/ur_mockddi.cpp @@ -8356,8 +8356,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/adapters/native_cpu/device.cpp b/source/adapters/native_cpu/device.cpp index c5652398e3..bed0898d04 100644 --- a/source/adapters/native_cpu/device.cpp +++ b/source/adapters/native_cpu/device.cpp @@ -388,8 +388,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(false); case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: return ReturnValue(false); + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: + return ReturnValue( + static_cast(0)); case UR_DEVICE_INFO_TIMESTAMP_RECORDING_SUPPORT_EXP: return ReturnValue(false); diff --git a/source/adapters/opencl/command_buffer.cpp b/source/adapters/opencl/command_buffer.cpp index 34cb7f1a3c..7490e7c833 100644 --- a/source/adapters/opencl/command_buffer.cpp +++ b/source/adapters/opencl/command_buffer.cpp @@ -71,10 +71,11 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferCreateExp( const bool IsUpdatable = pCommandBufferDesc ? pCommandBufferDesc->isUpdatable : false; - bool DeviceSupportsUpdate = false; + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; cl_device_id CLDevice = cl_adapter::cast(hDevice); - CL_RETURN_ON_FAILURE(deviceSupportsURCommandBufferKernelUpdate( - CLDevice, DeviceSupportsUpdate)); + CL_RETURN_ON_FAILURE( + deviceSupportsURCommandBufferKernelUpdate(CLDevice, UpdateCapabilities)); + bool DeviceSupportsUpdate = UpdateCapabilities > 0; if (IsUpdatable && !DeviceSupportsUpdate) { return UR_RESULT_ERROR_INVALID_OPERATION; diff --git a/source/adapters/opencl/common.cpp b/source/adapters/opencl/common.cpp index 03775fb87d..49b45e7dc1 100644 --- a/source/adapters/opencl/common.cpp +++ b/source/adapters/opencl/common.cpp @@ -116,8 +116,10 @@ ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle) { return UR_RESULT_SUCCESS; } -cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, - bool &Result) { +cl_int deviceSupportsURCommandBufferKernelUpdate( + cl_device_id Dev, + ur_device_command_buffer_update_capability_flags_t &UpdateCapabilities) { + size_t ExtSize = 0; CL_RETURN_ON_FAILURE( clGetDeviceInfo(Dev, CL_DEVICE_EXTENSIONS, 0, nullptr, &ExtSize)); @@ -129,21 +131,37 @@ cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, std::string SupportedExtensions(ExtStr.c_str()); if (ExtStr.find("cl_khr_command_buffer_mutable_dispatch") == std::string::npos) { - Result = false; + UpdateCapabilities = 0; return CL_SUCCESS; } - // All the CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR capabilities must - // be supported by a device for UR update. - cl_mutable_dispatch_fields_khr mutable_capabilities; + cl_mutable_dispatch_fields_khr MutableCapabilities; CL_RETURN_ON_FAILURE(clGetDeviceInfo( Dev, CL_DEVICE_MUTABLE_DISPATCH_CAPABILITIES_KHR, - sizeof(mutable_capabilities), &mutable_capabilities, nullptr)); - const cl_mutable_dispatch_fields_khr required_caps = - CL_MUTABLE_DISPATCH_ARGUMENTS_KHR | - CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR | - CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR | CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR | - CL_MUTABLE_DISPATCH_EXEC_INFO_KHR; - Result = (mutable_capabilities & required_caps) == required_caps; + sizeof(MutableCapabilities), &MutableCapabilities, nullptr)); + + if (!(MutableCapabilities & CL_MUTABLE_DISPATCH_EXEC_INFO_KHR)) { + UpdateCapabilities = 0; + return CL_SUCCESS; + } + + UpdateCapabilities = 0; + if (MutableCapabilities & CL_MUTABLE_DISPATCH_ARGUMENTS_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS; + } + if (MutableCapabilities & CL_MUTABLE_DISPATCH_GLOBAL_SIZE_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE; + } + if (MutableCapabilities & CL_MUTABLE_DISPATCH_LOCAL_SIZE_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE; + } + if (MutableCapabilities & CL_MUTABLE_DISPATCH_GLOBAL_OFFSET_KHR) { + UpdateCapabilities |= + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + } + return CL_SUCCESS; } diff --git a/source/adapters/opencl/common.hpp b/source/adapters/opencl/common.hpp index 18b08bf095..6993f54ed0 100644 --- a/source/adapters/opencl/common.hpp +++ b/source/adapters/opencl/common.hpp @@ -417,5 +417,6 @@ ur_result_t mapCLErrorToUR(cl_int Result); ur_result_t getNativeHandle(void *URObj, ur_native_handle_t *NativeHandle); -cl_int deviceSupportsURCommandBufferKernelUpdate(cl_device_id Dev, - bool &Result); +cl_int deviceSupportsURCommandBufferKernelUpdate( + cl_device_id Dev, + ur_device_command_buffer_update_capability_flags_t &UpdateCapabilities); diff --git a/source/adapters/opencl/device.cpp b/source/adapters/opencl/device.cpp index 071a3a7c5a..79ba8aa382 100644 --- a/source/adapters/opencl/device.cpp +++ b/source/adapters/opencl/device.cpp @@ -1028,10 +1028,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_KERNEL_SET_SPECIALIZATION_CONSTANTS: { return ReturnValue(false); } - /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU - * EU device-specific information extensions. Some of the queries are - * enabled by cl_intel_device_attribute_query extension, but it's not yet in - * the Registry. */ + /* TODO: Check regularly to see if support is enabled in OpenCL. Intel GPU + * EU device-specific information extensions. Some of the queries are + * enabled by cl_intel_device_attribute_query extension, but it's not yet in + * the Registry. */ case UR_DEVICE_INFO_COMPONENT_DEVICES: case UR_DEVICE_INFO_COMPOSITE_DEVICE: case UR_DEVICE_INFO_PCI_ADDRESS: @@ -1042,7 +1042,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE: case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: case UR_DEVICE_INFO_MAX_MEMORY_BANDWIDTH: - /* This enums have no equivalent in OpenCL */ + /* This enums have no equivalent in OpenCL */ case UR_DEVICE_INFO_MAX_REGISTERS_PER_WORK_GROUP: case UR_DEVICE_INFO_GLOBAL_MEM_FREE: case UR_DEVICE_INFO_MEMORY_CLOCK_RATE: @@ -1065,12 +1065,25 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(ExtStr.find("cl_khr_command_buffer") != std::string::npos); } - case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + // case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP: { + // cl_device_id Dev = cl_adapter::cast(hDevice); + // ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; + // CL_RETURN_ON_FAILURE( + // deviceSupportsURCommandBufferKernelUpdate(Dev, + // UpdateCapabilities)); + // ur_device_command_buffer_update_capability_flags_t + // RequiredCapabilities = + // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + // UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_WORKGROUP; + // return ReturnValue((UpdateCapabilities & RequiredCapabilities) == + // RequiredCapabilities); + // } + case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { cl_device_id Dev = cl_adapter::cast(hDevice); - bool Supported = false; + ur_device_command_buffer_update_capability_flags_t UpdateCapabilities; CL_RETURN_ON_FAILURE( - deviceSupportsURCommandBufferKernelUpdate(Dev, Supported)); - return ReturnValue(Supported); + deviceSupportsURCommandBufferKernelUpdate(Dev, UpdateCapabilities)); + return ReturnValue(UpdateCapabilities); } default: { return UR_RESULT_ERROR_INVALID_ENUMERATION; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index f21320b830..a3f48fd533 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -6500,8 +6500,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index b05194bef1..542dfc3be5 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -8061,8 +8061,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/loader.def.in b/source/loader/loader.def.in index 5e628b4faf..63a5f1843d 100644 --- a/source/loader/loader.def.in +++ b/source/loader/loader.def.in @@ -235,6 +235,7 @@ EXPORTS urPrintContextSetExtendedDeleterParams urPrintDeviceAffinityDomainFlags urPrintDeviceBinary + urPrintDeviceCommandBufferUpdateCapabilityFlags urPrintDeviceCreateWithNativeHandleParams urPrintDeviceExecCapabilityFlags urPrintDeviceFpCapabilityFlags diff --git a/source/loader/loader.map.in b/source/loader/loader.map.in index 18e4018aee..a4ca4a713f 100644 --- a/source/loader/loader.map.in +++ b/source/loader/loader.map.in @@ -235,6 +235,7 @@ urPrintContextSetExtendedDeleterParams; urPrintDeviceAffinityDomainFlags; urPrintDeviceBinary; + urPrintDeviceCommandBufferUpdateCapabilityFlags; urPrintDeviceCreateWithNativeHandleParams; urPrintDeviceExecCapabilityFlags; urPrintDeviceFpCapabilityFlags; diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 20a5e8acfa..ddcb63cda1 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -7112,8 +7112,9 @@ __urdlllocal ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 36e61ba09d..81937709ca 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -7551,8 +7551,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -8313,8 +8314,9 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/source/loader/ur_print.cpp b/source/loader/ur_print.cpp index f9d510e95d..454dc6d436 100644 --- a/source/loader/ur_print.cpp +++ b/source/loader/ur_print.cpp @@ -980,6 +980,14 @@ urPrintExpImageCopyRegion(const struct ur_exp_image_copy_region_t params, return str_copy(&ss, buffer, buff_size, out_size); } +ur_result_t urPrintDeviceCommandBufferUpdateCapabilityFlags( + enum ur_device_command_buffer_update_capability_flag_t value, char *buffer, + const size_t buff_size, size_t *out_size) { + std::stringstream ss; + ss << value; + return str_copy(&ss, buffer, buff_size, out_size); +} + ur_result_t urPrintExpCommandBufferInfo(enum ur_exp_command_buffer_info_t value, char *buffer, const size_t buff_size, size_t *out_size) { diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 0babfaf8ae..f5f02bbee4 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -6407,8 +6407,9 @@ ur_result_t UR_APICALL urCommandBufferAppendKernelLaunchExp( ur_kernel_handle_t * phKernelAlternatives, ///< [in][optional][range(0, numKernelAlternatives)] List of kernels ///< handles that might be used to update the kernel in this - ///< command after the command-buffer is finalized. It's invalid to specify - ///< the default kernel `hKernel` as part of this list. + ///< command after the command-buffer is finalized. The default kernel + ///< `hKernel` is implicitly marked as an alternative. It's + ///< invalid to specify it as part of this list. uint32_t numSyncPointsInWaitList, ///< [in] The number of sync points in the provided dependency list. const ur_exp_command_buffer_sync_point_t * @@ -7026,8 +7027,9 @@ ur_result_t UR_APICALL urCommandBufferReleaseCommandExp( /// - ::UR_RESULT_ERROR_INVALID_OPERATION /// + If ::ur_exp_command_buffer_desc_t::isUpdatable was not set to true on creation of the command buffer `hCommand` belongs to. /// + If the command-buffer `hCommand` belongs to has not been finalized. +/// + If `pUpdateKernellaunch->hNewKernel` is different from the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is zero. +/// + If `pUpdateKernellaunch->hNewKernel` is equal to the currently active kernel in `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value, and `pUpdateKernelLaunch->pNewGlobalWorkSize` is NULL. -/// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero and different from the work-dim currently associated with `hCommand`. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a non-NULL value while `hCommand` is currently associated with a NULL local work size. /// + If `pUpdateKernellaunch->hNewKernel` is equal to the current kernel associated with `hCommand`, and `pUpdateKernellaunch->newWorkDim` is non-zero, and `pUpdateKernelLaunch->pNewLocalWorkSize` is set to a NULL value while `hCommand` is currently associated with a non-NULL local work size. /// - ::UR_RESULT_ERROR_INVALID_COMMAND_BUFFER_COMMAND_HANDLE_EXP diff --git a/test/conformance/exp_command_buffer/fixtures.h b/test/conformance/exp_command_buffer/fixtures.h index f81b664d7f..2cc91c4d3f 100644 --- a/test/conformance/exp_command_buffer/fixtures.h +++ b/test/conformance/exp_command_buffer/fixtures.h @@ -32,14 +32,20 @@ static void checkCommandBufferSupport(ur_device_handle_t device) { } } -static void checkCommandBufferUpdateSupport(ur_device_handle_t device) { - bool updatable_command_buffer_support; +static void checkCommandBufferUpdateSupport( + ur_device_handle_t device, + ur_device_command_buffer_update_capability_flags_t requiredCapabilities) { + ur_device_command_buffer_update_capability_flags_t update_capability_flags; ASSERT_SUCCESS(urDeviceGetInfo( - device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP, - sizeof(ur_bool_t), &updatable_command_buffer_support, nullptr)); + device, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP, + sizeof(update_capability_flags), &update_capability_flags, nullptr)); - if (!updatable_command_buffer_support) { + if (!update_capability_flags) { GTEST_SKIP() << "Updating EXP command-buffers is not supported."; + } else if ((update_capability_flags & requiredCapabilities) != + requiredCapabilities) { + GTEST_SKIP() << "Some of the command-buffer update capabilities " + "required are not supported by the device."; } } @@ -109,7 +115,14 @@ struct urUpdatableCommandBufferExpTest : uur::urQueueTest { UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::SetUp()); UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); - UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); + + auto requiredCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + UUR_RETURN_ON_FATAL_FAILURE( + checkCommandBufferUpdateSupport(device, requiredCapabilities)); // Create a command-buffer with update enabled. ur_exp_command_buffer_desc_t desc{ @@ -118,36 +131,16 @@ struct urUpdatableCommandBufferExpTest : uur::urQueueTest { ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, &desc, &updatable_cmd_buf_handle)); ASSERT_NE(updatable_cmd_buf_handle, nullptr); - - // Currently there are synchronization issue with immediate submission when used for command buffers. - // So, create queue with batched submission for this test suite if the backend is Level Zero. - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO) { - ur_queue_flags_t flags = UR_QUEUE_FLAG_SUBMISSION_BATCHED; - ur_queue_properties_t props = { - /*.stype =*/UR_STRUCTURE_TYPE_QUEUE_PROPERTIES, - /*.pNext =*/nullptr, - /*.flags =*/flags, - }; - ASSERT_SUCCESS(urQueueCreate(context, device, &props, &queue)); - ASSERT_NE(queue, nullptr); - } else { - queue = urQueueTest::queue; - } } void TearDown() override { if (updatable_cmd_buf_handle) { EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } - if (backend == UR_PLATFORM_BACKEND_LEVEL_ZERO && queue) { - ASSERT_SUCCESS(urQueueRelease(queue)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urQueueTest::TearDown()); } ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; - ur_queue_handle_t queue = nullptr; ur_platform_backend_t backend{}; }; @@ -155,30 +148,17 @@ struct urUpdatableCommandBufferExpExecutionTest : uur::urKernelExecutionTest { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); - ASSERT_SUCCESS(urCommandBufferCreateExp(context, device, nullptr, - &cmd_buf_handle)); - ASSERT_NE(cmd_buf_handle, nullptr); - } - - void TearDown() override { - if (cmd_buf_handle) { - EXPECT_SUCCESS(urCommandBufferReleaseExp(cmd_buf_handle)); - } - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::TearDown()); - } - - ur_exp_command_buffer_handle_t cmd_buf_handle = nullptr; - -}; - -struct urUpdatableCommandBufferExpExecutionTest - : uur::urKernelExecutionTest { - void SetUp() override { - UUR_RETURN_ON_FATAL_FAILURE(uur::urKernelExecutionTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE(checkCommandBufferSupport(device)); + auto requiredCapabilities = + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_ARGUMENTS | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_LOCAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_SIZE | + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_GLOBAL_WORK_OFFSET; + UUR_RETURN_ON_FATAL_FAILURE( + checkCommandBufferUpdateSupport(device, requiredCapabilities)); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferSupport(device)); - ASSERT_NO_FATAL_FAILURE(checkCommandBufferUpdateSupport(device)); + UUR_RETURN_ON_FATAL_FAILURE( + checkCommandBufferUpdateSupport(device, requiredCapabilities)); // Create a command-buffer with update enabled. ur_exp_command_buffer_desc_t desc{ @@ -193,10 +173,10 @@ struct urUpdatableCommandBufferExpExecutionTest if (updatable_cmd_buf_handle) { EXPECT_SUCCESS(urCommandBufferReleaseExp(updatable_cmd_buf_handle)); } - UUR_RETURN_ON_FATAL_FAILURE( - urKernelExecutionTest::TearDown()); + UUR_RETURN_ON_FATAL_FAILURE(urKernelExecutionTest::TearDown()); } + ur_platform_backend_t backend{}; ur_exp_command_buffer_handle_t updatable_cmd_buf_handle = nullptr; }; diff --git a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp index a12a7903a3..e694465fd2 100644 --- a/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_fill_kernel_update.cpp @@ -124,7 +124,7 @@ TEST_P(BufferFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -177,7 +177,7 @@ TEST_P(BufferFillCommandTest, UpdateGlobalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -228,7 +228,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t output_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 1, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -257,7 +257,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t input_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -276,7 +276,7 @@ TEST_P(BufferFillCommandTest, SeparateUpdateCalls) { ur_exp_command_buffer_update_kernel_launch_desc_t global_size_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -321,7 +321,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ur_exp_command_buffer_update_kernel_launch_desc_t first_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -349,7 +349,7 @@ TEST_P(BufferFillCommandTest, OverrideUpdate) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -406,7 +406,7 @@ TEST_P(BufferFillCommandTest, OverrideArgList) { ur_exp_command_buffer_update_kernel_launch_desc_t second_update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 2, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp index d33ba3a563..19da365084 100644 --- a/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/buffer_saxpy_kernel_update.cpp @@ -184,7 +184,7 @@ TEST_P(BufferSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 2, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/invalid_update.cpp b/test/conformance/exp_command_buffer/update/invalid_update.cpp index fd9a46c2aa..c5947e039f 100644 --- a/test/conformance/exp_command_buffer/update/invalid_update.cpp +++ b/test/conformance/exp_command_buffer/update/invalid_update.cpp @@ -90,7 +90,7 @@ TEST_P(InvalidUpdateTest, NotFinalizedCommandBuffer) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -141,7 +141,7 @@ TEST_P(InvalidUpdateTest, NotUpdatableCommandBuffer) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 1, // numNewValueArgs @@ -178,7 +178,7 @@ TEST_P(InvalidUpdateTest, GlobalLocalSizeMistach) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -216,7 +216,7 @@ TEST_P(InvalidUpdateTest, ImplToUserDefinedLocalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -249,7 +249,7 @@ TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -267,3 +267,32 @@ TEST_P(InvalidUpdateTest, UserToImplDefinedLocalSize) { urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); } + +// If the kernel handle is not being updated, then it's invalid to change +// the number of dimensions. +TEST_P(InvalidUpdateTest, InvalidDimensions) { + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + finalized = true; + + size_t new_global_size = 64; + ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + kernel, // hNewKernel + 0, // numNewMemObjArgs + 0, // numNewPointerArgs + 0, // numNewValueArgs + n_dimensions + 1, // newWorkDim + nullptr, // pNewMemObjArgList + nullptr, // pNewPointerArgList + nullptr, // pNewValueArgList + nullptr, // pNewGlobalWorkOffset + &new_global_size, // pNewGlobalWorkSize + nullptr, // pNewLocalWorkSize + }; + + // Update command local size to NULL when created with non-NULL value + ur_result_t result = + urCommandBufferUpdateKernelLaunchExp(command_handle, &update_desc); + ASSERT_EQ(UR_RESULT_ERROR_INVALID_OPERATION, result); +} diff --git a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp index a533786917..9aa7b83817 100644 --- a/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp +++ b/test/conformance/exp_command_buffer/update/kernel_handle_update.cpp @@ -73,7 +73,7 @@ struct TestSaxpyKernel : public TestKernel { ASSERT_NO_FATAL_FAILURE(buildKernel()); const size_t AllocationSize = sizeof(uint32_t) * GlobalSize; - for (auto &SharedPtr : Memory) { + for (auto &SharedPtr : Allocations) { ASSERT_SUCCESS(urUSMSharedAlloc(Context, Device, nullptr, nullptr, AllocationSize, &SharedPtr)); ASSERT_NE(SharedPtr, nullptr); @@ -84,28 +84,80 @@ struct TestSaxpyKernel : public TestKernel { } // Index 0 is the output - ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 0, nullptr, Memory[0])); + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 0, nullptr, Allocations[0])); // Index 1 is A ASSERT_SUCCESS(urKernelSetArgValue(Kernel, 1, sizeof(A), nullptr, &A)); // Index 2 is X - ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 2, nullptr, Memory[1])); + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 2, nullptr, Allocations[1])); // Index 3 is Y - ASSERT_SUCCESS(urKernelSetArgPointer(Kernel, 3, nullptr, Memory[2])); + ASSERT_SUCCESS( + urKernelSetArgPointer(Kernel, 3, nullptr, Allocations[2])); + + UpdatePointerDesc[0] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &Allocations[0], // pArgValue + }; + + UpdatePointerDesc[1] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 2, // argIndex + nullptr, // pProperties + &Allocations[1], // pArgValue + }; + + UpdatePointerDesc[2] = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_POINTER_ARG_DESC, // stype + nullptr, // pNext + 3, // argIndex + nullptr, // pProperties + &Allocations[2], // pArgValue + }; + + UpdateValDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_VALUE_ARG_DESC, // stype + nullptr, // pNext + 1, // argIndex + sizeof(A), // argSize + nullptr, // pProperties + &A, // hArgValue + }; + + UpdateDesc = { + UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype + nullptr, // pNext + Kernel, // hNewKernel + 0, // numNewMemObjArgs + 3, // numNewPointerArgs + 1, // numNewValueArgs + NDimensions, // newWorkDim + nullptr, // pNewMemObjArgList + UpdatePointerDesc.data(), // pNewPointerArgList + &UpdateValDesc, // pNewValueArgList + &GlobalOffset, // pNewGlobalWorkOffset + &GlobalSize, // pNewGlobalWorkSize + &LocalSize, // pNewLocalWorkSize + }; } void destroyKernel() override { - for (auto &shared_ptr : Memory) { - if (shared_ptr) { - EXPECT_SUCCESS(urUSMFree(Context, shared_ptr)); + for (auto &Allocation : Allocations) { + if (Allocation) { + EXPECT_SUCCESS(urUSMFree(Context, Allocation)); } } ASSERT_NO_FATAL_FAILURE(TestKernel::destroyKernel()); } void validate() override { - auto *output = static_cast(Memory[0]); - auto *X = static_cast(Memory[1]); - auto *Y = static_cast(Memory[2]); + auto *output = static_cast(Allocations[0]); + auto *X = static_cast(Allocations[1]); + auto *Y = static_cast(Allocations[2]); for (size_t i = 0; i < GlobalSize; i++) { uint32_t result = A * X[i] + Y[i]; @@ -113,13 +165,18 @@ struct TestSaxpyKernel : public TestKernel { } } - const size_t LocalSize = 4; - const size_t GlobalSize = 32; - const size_t GlobalOffset = 0; - const size_t NDimensions = 1; - const uint32_t A = 42; + std::array + UpdatePointerDesc; + ur_exp_command_buffer_update_value_arg_desc_t UpdateValDesc; + ur_exp_command_buffer_update_kernel_launch_desc_t UpdateDesc; + + size_t LocalSize = 4; + size_t GlobalSize = 32; + size_t GlobalOffset = 0; + uint32_t NDimensions = 1; + uint32_t A = 42; - std::array Memory = {nullptr, nullptr, nullptr}; + std::array Allocations = {nullptr, nullptr, nullptr}; }; struct TestFill2DKernel : public TestKernel { @@ -210,6 +267,11 @@ struct urCommandBufferKernelHandleUpdateTest UUR_RETURN_ON_FATAL_FAILURE(urUpdatableCommandBufferExpTest::SetUp()); + UUR_RETURN_ON_FATAL_FAILURE( + uur::command_buffer::checkCommandBufferUpdateSupport( + device, + UR_DEVICE_COMMAND_BUFFER_UPDATE_CAPABILITY_FLAG_KERNEL_HANDLE)); + ur_device_usm_access_capability_flags_t shared_usm_flags; ASSERT_SUCCESS( uur::GetDeviceUSMSingleSharedSupport(device, shared_usm_flags)); @@ -311,6 +373,42 @@ TEST_P(urCommandBufferKernelHandleUpdateTest, UpdateAgain) { ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); } +/* Test that it is possible to change the kernel handle in a command and later restore it to the original handle */ +TEST_P(urCommandBufferKernelHandleUpdateTest, RestoreOriginalKernel) { + + std::vector KernelAlternatives = { + FillUSM2DKernel->Kernel}; + + uur::raii::CommandBufferCommand CommandHandle; + ASSERT_SUCCESS(urCommandBufferAppendKernelLaunchExp( + updatable_cmd_buf_handle, SaxpyKernel->Kernel, SaxpyKernel->NDimensions, + &(SaxpyKernel->GlobalOffset), &(SaxpyKernel->GlobalSize), + &(SaxpyKernel->LocalSize), KernelAlternatives.size(), + KernelAlternatives.data(), 0, nullptr, nullptr, CommandHandle.ptr())); + ASSERT_NE(CommandHandle, nullptr); + + ASSERT_SUCCESS(urCommandBufferFinalizeExp(updatable_cmd_buf_handle)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &FillUSM2DKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->validate()); + ASSERT_NO_FATAL_FAILURE(FillUSM2DKernel->validate()); + + // Updating A, so that the second launch of the saxpy kernel actually has a different output. + SaxpyKernel->A = 20; + ASSERT_SUCCESS(urCommandBufferUpdateKernelLaunchExp( + CommandHandle, &SaxpyKernel->UpdateDesc)); + ASSERT_SUCCESS(urCommandBufferEnqueueExp(updatable_cmd_buf_handle, queue, 0, + nullptr, nullptr)); + ASSERT_SUCCESS(urQueueFinish(queue)); + ASSERT_NO_FATAL_FAILURE(SaxpyKernel->validate()); +} + TEST_P(urCommandBufferKernelHandleUpdateTest, KernelAlternativeNotRegistered) { uur::raii::CommandBufferCommand CommandHandle; diff --git a/test/conformance/exp_command_buffer/update/ndrange_update.cpp b/test/conformance/exp_command_buffer/update/ndrange_update.cpp index 4c5ff6449a..dd3f17a90a 100644 --- a/test/conformance/exp_command_buffer/update/ndrange_update.cpp +++ b/test/conformance/exp_command_buffer/update/ndrange_update.cpp @@ -128,7 +128,7 @@ TEST_P(NDRangeUpdateTest, Update3D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -173,7 +173,7 @@ TEST_P(NDRangeUpdateTest, Update2D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -218,7 +218,7 @@ TEST_P(NDRangeUpdateTest, Update1D) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs @@ -252,7 +252,7 @@ TEST_P(NDRangeUpdateTest, Invalid) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 0, // numNewPointerArgs 0, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp index 31b14e9016..b437971e9a 100644 --- a/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_fill_kernel_update.cpp @@ -120,7 +120,7 @@ TEST_P(USMFillCommandTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs @@ -174,7 +174,7 @@ TEST_P(USMFillCommandTest, UpdateBeforeEnqueue) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs @@ -326,7 +326,7 @@ TEST_P(USMMultipleFillCommandTest, UpdateAllKernels) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 1, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp index 93fc683127..21f21afa11 100644 --- a/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp +++ b/test/conformance/exp_command_buffer/update/usm_saxpy_kernel_update.cpp @@ -148,7 +148,7 @@ TEST_P(USMSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs @@ -254,7 +254,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateParameters) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs @@ -320,7 +320,7 @@ TEST_P(USMMultiSaxpyKernelTest, UpdateWithoutBlocking) { ur_exp_command_buffer_update_kernel_launch_desc_t update_desc = { UR_STRUCTURE_TYPE_EXP_COMMAND_BUFFER_UPDATE_KERNEL_LAUNCH_DESC, // stype nullptr, // pNext - kernel, //hNewKernel + kernel, // hNewKernel 0, // numNewMemObjArgs 2, // numNewPointerArgs 1, // numNewValueArgs diff --git a/test/conformance/exp_enqueue_native/CMakeLists.txt b/test/conformance/exp_enqueue_native/CMakeLists.txt index 403d3caa3c..8638fa1349 100644 --- a/test/conformance/exp_enqueue_native/CMakeLists.txt +++ b/test/conformance/exp_enqueue_native/CMakeLists.txt @@ -9,8 +9,8 @@ if (UR_BUILD_ADAPTER_CUDA) enqueue_native_cuda.cpp ) target_include_directories(test-exp_enqueue_native PRIVATE - ${PROJECT_SOURCE_DIR}/source - ${PROJECT_SOURCE_DIR}/source/adapters/cuda + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/cuda ) target_link_libraries(test-exp_enqueue_native PRIVATE cudadrv) endif() diff --git a/tools/urinfo/urinfo.hpp b/tools/urinfo/urinfo.hpp index 22f4ec6413..59f4a8e5b2 100644 --- a/tools/urinfo/urinfo.hpp +++ b/tools/urinfo/urinfo.hpp @@ -334,8 +334,8 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, printDeviceInfo(hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); std::cout << prefix; - printDeviceInfo( - hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_SUPPORT_EXP); + printDeviceInfo( + hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP); std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_CLUSTER_LAUNCH_EXP); std::cout << prefix;