Skip to content

Commit

Permalink
implement OpenMPSIMDTag, vectorization fallback mechanisms
Browse files Browse the repository at this point in the history
  • Loading branch information
kaushikcfd committed May 11, 2022
1 parent 590d262 commit b2e1eb1
Show file tree
Hide file tree
Showing 5 changed files with 109 additions and 16 deletions.
5 changes: 3 additions & 2 deletions loopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
AddressSpace,
TemporaryVariable,
SubstitutionRule,
CallMangleInfo)
CallMangleInfo,
OpenMPSIMDTag, VectorizeTag)
from loopy.kernel.function_interface import (
CallableKernel, ScalarCallable)
from loopy.translation_unit import (
Expand Down Expand Up @@ -190,7 +191,7 @@
"AddressSpace",
"TemporaryVariable",
"SubstitutionRule",
"CallMangleInfo",
"CallMangleInfo", "OpenMPSIMDTag", "VectorizeTag",

"make_kernel", "UniqueName", "make_function",

Expand Down
47 changes: 37 additions & 10 deletions loopy/codegen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,21 +395,48 @@ def try_vectorized(self, what, func):
return self.unvectorize(func)

def unvectorize(self, func):
from loopy.kernel.data import VectorizeTag, UnrollTag, OpenMPSIMDTag
from loopy.codegen.result import (merge_codegen_results,
CodeGenerationResult)
vinf = self.vectorization_info
vec_tag, = self.kernel.inames[vinf.iname].tags_of_type(VectorizeTag)
result = []
novec_self = self.copy(vectorization_info=False)

for i in range(vinf.length):
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
generated = func(new_codegen_state)

if isinstance(generated, list):
result.extend(generated)
if isinstance(vec_tag.fallback_impl_tag, UnrollTag):
novec_self = self.copy(vectorization_info=False)

for i in range(vinf.length):
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
generated = func(new_codegen_state)

if isinstance(generated, list):
result.extend(generated)
else:
result.append(generated)
elif isinstance(vec_tag.fallback_impl_tag, OpenMPSIMDTag):
novec_self = self.copy(vectorization_info=False)
astb = self.ast_builder
inner = func(novec_self)
if isinstance(inner, list):
inner = merge_codegen_results(novec_self, inner)
assert isinstance(inner, CodeGenerationResult)
if isinstance(inner.current_ast(novec_self),
astb.ast_comment_class):
# loop body is a comment => do not emit the loop
loop_cgr = inner
else:
result.append(generated)
result.append(astb.emit_pragma("omp simd"))
loop_cgr = inner.with_new_ast(
novec_self,
astb.emit_sequential_loop(
novec_self, vinf.iname, self.kernel.index_dtype,
0, vinf.length-1, inner.current_ast(novec_self)))
result.append(loop_cgr)
elif vec_tag.fallback_impl_tag is None:
raise RuntimeError("Could not vectorize all statements"
f" in name {vinf.iname}")

from loopy.codegen.result import merge_codegen_results
return merge_codegen_results(self, result)

@property
Expand Down
6 changes: 5 additions & 1 deletion loopy/codegen/control.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def generate_code_for_sched_index(codegen_state, sched_index):
elif isinstance(sched_item, EnterLoop):
from loopy.kernel.data import (UnrolledIlpTag, UnrollTag,
ForceSequentialTag, LoopedIlpTag, VectorizeTag,
OpenMPSIMDTag,
InameImplementationTag,
InOrderSequentialSequentialTag, filter_iname_tags_by_type)

Expand All @@ -117,12 +118,15 @@ def generate_code_for_sched_index(codegen_state, sched_index):
from loopy.codegen.loop import (
generate_unroll_loop,
generate_vectorize_loop,
generate_sequential_loop_dim_code)
generate_sequential_loop_dim_code,
generate_openmp_simd_loop)

if filter_iname_tags_by_type(tags, (UnrollTag, UnrolledIlpTag)):
func = generate_unroll_loop
elif filter_iname_tags_by_type(tags, VectorizeTag):
func = generate_vectorize_loop
elif filter_iname_tags_by_type(tags, OpenMPSIMDTag):
func = generate_openmp_simd_loop
elif not tags or filter_iname_tags_by_type(tags, (LoopedIlpTag,
ForceSequentialTag, InOrderSequentialSequentialTag)):
func = generate_sequential_loop_dim_code
Expand Down
34 changes: 32 additions & 2 deletions loopy/codegen/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,27 @@ def generate_unroll_loop(codegen_state, sched_index):

# {{{ vectorized loops

def raise_for_unvectorizable_loop(codegen_state, sched_index):
kernel = codegen_state.kernel
raise RuntimeError(f"Cannot vectorize {kernel.schedule[sched_index]}")


def generate_vectorize_loop(codegen_state, sched_index):
from loopy.kernel.data import VectorizeTag, UnrollTag, OpenMPSIMDTag
kernel = codegen_state.kernel

iname = kernel.linearization[sched_index].iname
vec_tag, = kernel.inames[iname].tags_of_type(VectorizeTag)
fallback_impl_tag = vec_tag.fallback_impl_tag

if isinstance(fallback_impl_tag, UnrollTag):
fallback_codegen_routine = generate_unroll_loop
elif isinstance(fallback_impl_tag, OpenMPSIMDTag):
fallback_codegen_routine = generate_openmp_simd_loop
elif fallback_impl_tag is None:
fallback_codegen_routine = raise_for_unvectorizable_loop
else:
raise NotImplementedError(fallback_impl_tag)

bounds = kernel.get_iname_bounds(iname, constants_only=True)

Expand All @@ -177,7 +194,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
warn(kernel, "vec_upper_not_const",
"upper bound for vectorized loop '%s' is not a constant, "
"cannot vectorize--unrolling instead")
return generate_unroll_loop(codegen_state, sched_index)
return fallback_codegen_routine(codegen_state, sched_index)

length = int(pw_aff_to_expr(length_aff))

Expand All @@ -192,7 +209,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
warn(kernel, "vec_lower_not_0",
"lower bound for vectorized loop '%s' is not zero, "
"cannot vectorize--unrolling instead")
return generate_unroll_loop(codegen_state, sched_index)
return fallback_codegen_routine(codegen_state, sched_index)

# {{{ 'implement' vectorization bounds

Expand Down Expand Up @@ -484,4 +501,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):

# }}}


# {{{ omp simd loop

def generate_openmp_simd_loop(codegen_state, sched_index):
return merge_codegen_results(
codegen_state,
[codegen_state.ast_builder.emit_pragma("omp simd"),
generate_sequential_loop_dim_code(codegen_state,
sched_index)])

# }}}


# vim: foldmethod=marker
33 changes: 32 additions & 1 deletion loopy/kernel/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,29 @@ def __str__(self):
# }}}


class _NotProvided:
pass


class VectorizeTag(UniqueInameTag, HardwareConcurrentTag):
"""
.. attribute:: fallback_impl_tag
If the loop contains instructions that are not vectorizable, the code
generator will implement the loop as directed by `fallback_impl_tag`.
If *None*, then a :class:`RuntimeError` would be raised while
generating code for an unvectorizable instruction within the loop.
"""
def __init__(self, fallback_impl_tag=_NotProvided):
if fallback_impl_tag is _NotProvided:
from warnings import warn
warn("`fallback_impl_tag` not provided to VectorizeTag."
" This will be an error from 2023. To keep the current"
" behavior, instantiate as `VectorizeTag(UnrollTag())`",
DeprecationWarning, stacklevel=2)
fallback_impl_tag = UnrollTag()
super().__init__(fallback_impl_tag=fallback_impl_tag)

def __str__(self):
return "vec"

Expand All @@ -223,6 +245,15 @@ def __str__(self):
return "ord"


class OpenMPSIMDTag(InameImplementationTag):
"""
Directs the code generator to emit code with ``#pragma omp simd``
directive atop the loop.
"""
def __str__(self):
return "omp.simd"


def parse_tag(tag):
from pytools.tag import Tag as TagBase
if tag is None:
Expand All @@ -241,7 +272,7 @@ def parse_tag(tag):
elif tag in ["unr"]:
return UnrollTag()
elif tag in ["vec"]:
return VectorizeTag()
return VectorizeTag(UnrollTag())
elif tag in ["ilp", "ilp.unr"]:
return UnrolledIlpTag()
elif tag == "ilp.seq":
Expand Down

0 comments on commit b2e1eb1

Please sign in to comment.