Skip to content

Commit

Permalink
implement vectorization fallback mechanisms
Browse files Browse the repository at this point in the history
  • Loading branch information
kaushikcfd committed Jul 14, 2022
1 parent fc63161 commit ed9a54a
Show file tree
Hide file tree
Showing 3 changed files with 66 additions and 13 deletions.
5 changes: 3 additions & 2 deletions loopy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@
AddressSpace,
TemporaryVariable,
SubstitutionRule,
CallMangleInfo)
CallMangleInfo,
VectorizeTag)
from loopy.kernel.function_interface import (
CallableKernel, ScalarCallable)
from loopy.translation_unit import (
Expand Down Expand Up @@ -190,7 +191,7 @@
"AddressSpace",
"TemporaryVariable",
"SubstitutionRule",
"CallMangleInfo",
"CallMangleInfo", "VectorizeTag",

"make_kernel", "UniqueName", "make_function",

Expand Down
42 changes: 33 additions & 9 deletions loopy/codegen/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,23 +281,47 @@ def try_vectorized(self, what, func):
return self.unvectorize(func)

def unvectorize(self, func):
from loopy.codegen.result import (merge_codegen_results,
CodeGenerationResult)
from loopy.target import VectorizationFallback

vinf = self.vectorization_info
assert vinf is not None

result = []
novec_self = self.copy(vectorization_info=None)

for i in range(vinf.length):
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
generated = func(new_codegen_state)

if isinstance(generated, list):
result.extend(generated)
if self.target.vectorization_fallback == VectorizationFallback.UNROLL:
for i in range(vinf.length):
idx_aff = isl.Aff.zero_on_domain(vinf.space.params()) + i
new_codegen_state = novec_self.fix(vinf.iname, idx_aff)
generated = func(new_codegen_state)

if isinstance(generated, list):
result.extend(generated)
else:
result.append(generated)
elif self.target.vectorization_fallback == VectorizationFallback.OMP_SIMD:
astb = self.ast_builder
inner = func(novec_self)
if isinstance(inner, list):
inner = merge_codegen_results(novec_self, inner)
assert isinstance(inner, CodeGenerationResult)
if isinstance(inner.current_ast(novec_self),
astb.ast_comment_class):
# loop body is a comment => do not emit the loop
loop_cgr = inner
else:
result.append(generated)
result.append(astb.emit_pragma("omp simd"))
loop_cgr = inner.with_new_ast(
novec_self,
astb.emit_sequential_loop(
novec_self, vinf.iname, self.kernel.index_dtype,
0, vinf.length-1, inner.current_ast(novec_self)))
result.append(loop_cgr)
else:
raise NotImplementedError(self.target.vectorization_fallback)

from loopy.codegen.result import merge_codegen_results
return merge_codegen_results(self, result)

@property
Expand Down
32 changes: 30 additions & 2 deletions loopy/codegen/loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,10 +160,25 @@ def generate_unroll_loop(codegen_state, sched_index):

# {{{ vectorized loops

def raise_for_unvectorizable_loop(codegen_state, sched_index):
kernel = codegen_state.kernel
raise RuntimeError(f"Cannot vectorize {kernel.schedule[sched_index]}")


def generate_vectorize_loop(codegen_state, sched_index):
from loopy.kernel.data import VectorizeTag
from loopy.target import VectorizationFallback
kernel = codegen_state.kernel

iname = kernel.linearization[sched_index].iname
vec_tag, = kernel.inames[iname].tags_of_type(VectorizeTag)

if kernel.target.vectorization_fallback == VectorizationFallback.UNROLL:
fallback_codegen_routine = generate_unroll_loop
elif kernel.target.vectorization_fallback == VectorizationFallback.OMP_SIMD:
fallback_codegen_routine = generate_openmp_simd_loop
else:
raise NotImplementedError(kernel.target.vectorization_fallback)

bounds = kernel.get_iname_bounds(iname, constants_only=True)

Expand All @@ -177,7 +192,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
warn(kernel, "vec_upper_not_const",
"upper bound for vectorized loop '%s' is not a constant, "
"cannot vectorize--unrolling instead")
return generate_unroll_loop(codegen_state, sched_index)
return fallback_codegen_routine(codegen_state, sched_index)

length = int(pw_aff_to_expr(length_aff))

Expand All @@ -192,7 +207,7 @@ def generate_vectorize_loop(codegen_state, sched_index):
warn(kernel, "vec_lower_not_0",
"lower bound for vectorized loop '%s' is not zero, "
"cannot vectorize--unrolling instead")
return generate_unroll_loop(codegen_state, sched_index)
return fallback_codegen_routine(codegen_state, sched_index)

# {{{ 'implement' vectorization bounds

Expand Down Expand Up @@ -484,4 +499,17 @@ def generate_sequential_loop_dim_code(codegen_state, sched_index):

# }}}


# {{{ omp simd loop

def generate_openmp_simd_loop(codegen_state, sched_index):
return merge_codegen_results(
codegen_state,
[codegen_state.ast_builder.emit_pragma("omp simd"),
generate_sequential_loop_dim_code(codegen_state,
sched_index)])

# }}}


# vim: foldmethod=marker

0 comments on commit ed9a54a

Please sign in to comment.