Skip to content

Commit

Permalink
[xla:cpu] Add FFI custom call thunk runtime support to PJRT CPU client.
Browse files Browse the repository at this point in the history
Also add a benchmark that uses PJRT CPU Client.

PiperOrigin-RevId: 647916282
  • Loading branch information
penpornk authored and tensorflower-gardener committed Jun 29, 2024
1 parent 2c51dd3 commit c17e6e6
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 5 deletions.
1 change: 0 additions & 1 deletion third_party/xla/xla/pjrt/cpu/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,6 @@ cc_library(
"//xla/pjrt:semaphore",
"//xla/pjrt:transpose",
"//xla/pjrt:utils",
"//xla/pjrt/distributed:key_value_store_interface",
"//xla/service:buffer_assignment",
"//xla/service:compiler",
"//xla/service:computation_placer_hdr",
Expand Down
23 changes: 19 additions & 4 deletions third_party/xla/xla/pjrt/cpu/cpu_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1588,10 +1588,18 @@ absl::StatusOr<PjRtLoadedExecutable::Result> TfrtCpuExecutable::ExecuteHelper(
cpu::Thunk::CollectiveExecuteParams collective_params,
cpu::Thunk::CollectiveExecuteParams::Create(&run_options));

// TODO(penporn): Consolidate with other thunk parameter set up calls.
TF_ASSIGN_OR_RETURN(
cpu::Thunk::CustomCallExecuteParams custom_call_execute_params,
cpu::Thunk::CustomCallExecuteParams::Create(&run_options));

cpu::Thunk::ExecuteParams execute_params = {
&cpu_executable->host_kernels(), &allocations,
&cpu_executable->host_kernels(),
&allocations,
cpu::runtime::GetXfeedManager(run_options.device_ordinal()),
run_options.intra_op_thread_pool(), &collective_params};
run_options.intra_op_thread_pool(),
&collective_params,
&custom_call_execute_params};

auto execute_event = cpu_executable->thunks().Execute(
execute_params, [&](cpu::ThunkExecutor::Task task) {
Expand Down Expand Up @@ -1714,11 +1722,18 @@ absl::StatusOr<PjRtLoadedExecutable::Result> TfrtCpuExecutable::ExecuteHelper(
collective_params =
cpu::Thunk::CollectiveExecuteParams::Create(&run_options);

absl::StatusOr<cpu::Thunk::CustomCallExecuteParams>
custom_call_params =
cpu::Thunk::CustomCallExecuteParams::Create(&run_options);

if (collective_params.ok()) {
cpu::Thunk::ExecuteParams execute_params = {
&cpu_executable->host_kernels(), &allocations,
&cpu_executable->host_kernels(),
&allocations,
cpu::runtime::GetXfeedManager(run_options.device_ordinal()),
run_options.intra_op_thread_pool(), &*collective_params};
run_options.intra_op_thread_pool(),
&*collective_params,
&*custom_call_params};

auto execute_event = cpu_executable->thunks().Execute(
execute_params, [&](cpu::ThunkExecutor::Task task) {
Expand Down
17 changes: 17 additions & 0 deletions third_party/xla/xla/service/cpu/benchmarks/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,23 @@ xla_cc_test(
],
)

xla_cc_test(
name = "custom_call_benchmark_test",
srcs = ["custom_call_benchmark_test.cc"],
deps = [
":hlo_benchmark_runner",
"//xla/ffi",
"//xla/ffi:ffi_api",
"//xla/tests:hlo_test_base",
"//xla/tests:test_macros_header",
"@com_google_absl//absl/status",
"@com_google_absl//absl/types:span",
"@local_tsl//tsl/platform:logging",
"@local_tsl//tsl/platform:test_benchmark",
"@local_tsl//tsl/platform:test_main",
],
)

xla_cc_test(
name = "gather_benchmark_test",
srcs = ["gather_benchmark_test.cc"],
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/* Copyright 2024 The OpenXLA Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
==============================================================================*/

#include <string_view>

#include "absl/status/status.h"
#include "absl/types/span.h"
#include "xla/ffi/ffi.h"
#include "xla/ffi/ffi_api.h"
#include "xla/service/cpu/benchmarks/hlo_benchmark_runner.h"
#include "tsl/platform/logging.h"
#include "tsl/platform/test_benchmark.h"

namespace xla::cpu {
namespace {

static absl::Status Minimal(
ffi::Result<ffi::BufferR0<PrimitiveType::F32>> unused) {
return absl::OkStatus();
}

XLA_FFI_DEFINE_HANDLER(
kMinimal, Minimal,
ffi::Ffi::Bind()
.Ret<ffi::BufferR0<PrimitiveType::F32>>()); // Unused out buffer

XLA_FFI_REGISTER_HANDLER(ffi::GetXlaFfiApi(), "__xla_bm$$minimal", "Host",
kMinimal);

static void BM_CustomCall_Minimal(benchmark::State& state) {
const char* kModuleStr = R"(
HloModule module
ENTRY custom_call {
ROOT custom-call = f32[] custom-call(),
custom_call_target="__xla_bm$$minimal",
api_version=API_VERSION_TYPED_FFI
}
)";
CHECK_OK(RunHloBenchmark(state, kModuleStr, /*args=*/{},
/*replacements=*/{}));
state.SetItemsProcessed(state.iterations());
}

BENCHMARK(BM_CustomCall_Minimal)->MeasureProcessCPUTime();

} // namespace
} // namespace xla::cpu
3 changes: 3 additions & 0 deletions third_party/xla/xla/service/cpu/runtime/custom_call_thunk.cc
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,9 @@ tsl::AsyncValueRef<Thunk::ExecuteEvent> CustomCallThunk::CallTypedFFI(
"No registered implementation for FFI custom call to %s for Host",
target_name_);
}
if (params.custom_call_params == nullptr) {
return Internal("CustomCallExecuteParams cannot be nullptr.");
}

// Build the FFI call frame.
ffi::CallFrameBuilder builder(
Expand Down

0 comments on commit c17e6e6

Please sign in to comment.