Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Event wait test #2040

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 78 additions & 29 deletions test/conformance/enqueue/urEnqueueKernelLaunchAndMemcpyInOrder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,30 @@ struct urMultiQueueLaunchMemcpyTest : uur::urMultiDeviceContextTestTemplate<1>,
UUR_RETURN_ON_FATAL_FAILURE(
uur::urMultiDeviceContextTestTemplate<1>::TearDown());
}

void runBackgroundCheck(std::vector<uur::raii::Event> &Events) {
std::vector<std::thread> threads;
for (size_t i = 0; i < devices.size(); i++) {
threads.emplace_back([&, i] {
ur_event_status_t status;
do {
ASSERT_SUCCESS(urEventGetInfo(
Events[i * 2].get(),
UR_EVENT_INFO_COMMAND_EXECUTION_STATUS,
sizeof(ur_event_status_t), &status, nullptr));
} while (status != UR_EVENT_STATUS_COMPLETE);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe sleep for a few miliseconds every iteration?


auto ExpectedValue = InitialValue + i + 1;
for (uint32_t j = 0; j < ArraySize; ++j) {
ASSERT_EQ(reinterpret_cast<uint32_t *>(SharedMem[i])[j],
ExpectedValue);
}
});
}
for (auto &thread : threads) {
thread.join();
}
}
};

template <typename Param>
Expand Down Expand Up @@ -189,7 +213,7 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {

auto useEvents = std::get<1>(GetParam()).value;

std::vector<uur::raii::Event> Events(numOps * 2);
std::vector<uur::raii::Event> Events(numOps * 2 - 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto

for (size_t i = 0; i < numOps; i++) {
size_t waitNum = 0;
ur_event_handle_t *lastEvent = nullptr;
Expand All @@ -202,7 +226,7 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
lastEvent = i > 0 ? Events[i * 2 - 1].ptr() : nullptr;

kernelEvent = Events[i * 2].ptr();
memcpyEvent = Events[i * 2 + 1].ptr();
memcpyEvent = i < numOps - 1 ? Events[i * 2 + 1].ptr() : nullptr;
}

// execute kernel that increments each element by 1
Expand All @@ -220,9 +244,7 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
}

if (useEvents) {
// TODO: just wait on the last event, once urEventWait is implemented
// by V2 L0 adapter
urQueueFinish(queue);
urEventWait(1, Events.back().ptr());
} else {
urQueueFinish(queue);
}
Expand All @@ -237,12 +259,41 @@ TEST_P(urEnqueueKernelLaunchIncrementTest, Success) {
}
}

struct VoidParam {};
template <typename T>
inline std::string
printParams(const testing::TestParamInfo<typename T::ParamType> &info) {
std::stringstream ss;

auto param1 = std::get<0>(info.param);
ss << (param1.value ? "" : "No") << param1.name;

auto param2 = std::get<1>(info.param);
ss << (param2.value ? "" : "No") << param2.name;

if constexpr (std::tuple_size_v < typename T::ParamType >> 2) {
auto param3 = std::get<2>(info.param);
}

return ss.str();
}

using urEnqueueKernelLaunchIncrementMultiDeviceTest =
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<VoidParam>;
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;

INSTANTIATE_TEST_SUITE_P(
, urEnqueueKernelLaunchIncrementMultiDeviceTest,
testing::Combine(
testing::ValuesIn(uur::BoolTestParam::makeBoolParam("UseEventWait")),
testing::ValuesIn(
uur::BoolTestParam::makeBoolParam("RunBackgroundCheck"))),
printParams<urEnqueueKernelLaunchIncrementMultiDeviceTest>);

// Do a chain of kernelLaunch(dev0) -> memcpy(dev0, dev1) -> kernelLaunch(dev1) ... ops
TEST_F(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
auto waitOnEvent = std::get<0>(GetParam()).value;
auto runBackgroundCheck = std::get<1>(GetParam()).value;

size_t returned_size;
ASSERT_SUCCESS(urDeviceGetInfo(devices[0], UR_DEVICE_INFO_EXTENSIONS, 0,
nullptr, &returned_size));
Expand All @@ -265,14 +316,15 @@ TEST_F(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
constexpr size_t global_offset = 0;
constexpr size_t n_dimensions = 1;

std::vector<uur::raii::Event> Events(devices.size() * 2);
std::vector<uur::raii::Event> Events(devices.size() * 2 - 1);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is going to be fragile and difficult to modify. I get what you are trying to do, but I suggest we try avoiding too match pointer arithmetic.
Can you just create two vectors?

for (size_t i = 0; i < devices.size(); i++) {
// Events are: kernelEvent0, memcpyEvent0, kernelEvent1, ...
size_t waitNum = i > 0 ? 1 : 0;
ur_event_handle_t *lastEvent =
i > 0 ? Events[i * 2 - 1].ptr() : nullptr;
ur_event_handle_t *kernelEvent = Events[i * 2].ptr();
ur_event_handle_t *memcpyEvent = Events[i * 2 + 1].ptr();
ur_event_handle_t *memcpyEvent =
i < devices.size() - 1 ? Events[i * 2 + 1].ptr() : nullptr;

// execute kernel that increments each element by 1
ASSERT_SUCCESS(urEnqueueKernelLaunch(
Expand All @@ -287,9 +339,18 @@ TEST_F(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
}
}

// synchronize on the last queue only, this has to ensure all the operations
// While the device(s) execute, loop over the events and if completed, verify the results
if (runBackgroundCheck) {
this->runBackgroundCheck(Events);
}

// synchronize on the last queue/event only, this has to ensure all the operations
// are completed
urQueueFinish(queues.back());
if (waitOnEvent) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since the 'background check' function waits for its threads to finish, this is guaranteed to have been completed already. Was that intentional? That'd be odd for an event wait test :)

urEventWait(1, Events.back().ptr());
} else {
urQueueFinish(queues.back());
}

size_t ExpectedValue = InitialValue;
for (size_t i = 0; i < devices.size(); i++) {
Expand All @@ -301,20 +362,6 @@ TEST_F(urEnqueueKernelLaunchIncrementMultiDeviceTest, Success) {
}
}

template <typename T>
inline std::string
printParams(const testing::TestParamInfo<typename T::ParamType> &info) {
std::stringstream ss;

auto param1 = std::get<0>(info.param);
auto param2 = std::get<1>(info.param);

ss << (param1.value ? "" : "No") << param1.name;
ss << (param2.value ? "" : "No") << param2.name;

return ss.str();
}

using urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest =
urEnqueueKernelLaunchIncrementMultiDeviceTestWithParam<
std::tuple<uur::BoolTestParam, uur::BoolTestParam>>;
Expand Down Expand Up @@ -374,9 +421,11 @@ TEST_P(urEnqueueKernelLaunchIncrementMultiDeviceMultiThreadTest, Success) {
ArraySize * sizeof(uint32_t), useEvents,
lastEvent, signalEvent));

urQueueFinish(queue);
// TODO: when useEvents is implemented for L0 v2 adapter
// wait on event instead
if (useEvents) {
urEventWait(1, Events.back().ptr());
} else {
urQueueFinish(queue);
}

size_t ExpectedValue = InitialValue;
ExpectedValue += numOpsPerThread;
Expand Down