Skip to content

Commit

Permalink
Enable stream_prefetch when local-prefetch
Browse files Browse the repository at this point in the history
  • Loading branch information
antiagainst committed Nov 16, 2024
1 parent f4d0548 commit 1210b34
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 8 deletions.
1 change: 1 addition & 0 deletions include/triton/Tools/Sys/GetEnv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ inline const std::set<std::string> CACHE_INVALIDATING_ENV_VARS = {
"TRITON_DISABLE_LINE_INFO",
"TRITON_DISABLE_RESHAPE_ENCODING_INFERENCE",
"TRITON_ENABLE_LLVM_DEBUG",
"TRITON_HIP_STREAM_PREFETCH",
"TRITON_LLVM_DEBUG_ONLY",
"USE_IR_LOC",
"NVPTX_ENABLE_DUMP",
Expand Down
16 changes: 8 additions & 8 deletions third_party/amd/backend/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,20 @@ def make_ttgir(mod, metadata, options):
amd.passes.ttgpuir.add_optimize_epilogue(pm)
passes.ttgpuir.add_optimize_dot_operands(pm, True)

stream_prefetch = os.getenv("TRITON_HIP_STREAM_PREFETCH", "0") == "1"
use_buffer_ops = os.environ.get("AMDGCN_USE_BUFFER_OPS", "0") == "1"

# The `local-prefetch` scheduling variant requires turning on buffer ops.
if options.instruction_sched_variant == "local-prefetch":
stream_prefetch = use_buffer_ops = True

if amd.has_matrix_core_feature(options.arch):
assert options.num_stages != 0, ("Triton AMD backend pipeliner has been updated. "
"We used to trigger software pipelining with "
"num_stages == 0. Now it will not happen anymore; "
"please update to use num_stages == 2 for "
"equivalent behavior in the past.")
prefetch = os.getenv("TRITON_HIP_STREAM_PREFETCH", "0") == "1"
amd.passes.ttgpuir.add_stream_pipelinev2(pm, options.num_stages, prefetch)
amd.passes.ttgpuir.add_stream_pipelinev2(pm, options.num_stages, stream_prefetch)
passes.common.add_canonicalizer(pm)
amd.passes.ttgpuir.insert_instruction_sched_hints(pm)
passes.ttgpuir.add_optimize_dot_operands(pm, True)
Expand All @@ -244,12 +250,6 @@ def make_ttgir(mod, metadata, options):
if amd.has_matrix_core_feature(options.arch):
amd.passes.ttgpuir.add_reorder_instructions(pm)

use_buffer_ops = os.environ.get("AMDGCN_USE_BUFFER_OPS", "0") == "1"

# The `local-prefetch` scheduling variant requires turning on buffer ops.
if options.instruction_sched_variant == "local-prefetch":
use_buffer_ops = True

if use_buffer_ops:
amd.passes.ttgpuir.add_canonicalize_pointers(pm)
passes.common.add_canonicalizer(pm)
Expand Down

0 comments on commit 1210b34

Please sign in to comment.