diff --git a/loopy/codegen/instruction.py b/loopy/codegen/instruction.py index 713254075..383988863 100644 --- a/loopy/codegen/instruction.py +++ b/loopy/codegen/instruction.py @@ -127,6 +127,13 @@ def generate_assignment_instruction_code(codegen_state, insn): raise UnvectorizableError( "LHS is scalar, RHS is vector, cannot assign") + if (lhs_is_vector + and (not rhs_is_vector) + and (not + kernel.target.broadcasts_scalar_assignment_to_vec_types)): + raise UnvectorizableError( + "LHS is vector, RHS is not vector, cannot assign") + is_vector = lhs_is_vector del lhs_is_vector diff --git a/loopy/kernel/array.py b/loopy/kernel/array.py index d41a18152..a1f714dde 100644 --- a/loopy/kernel/array.py +++ b/loopy/kernel/array.py @@ -1401,7 +1401,9 @@ def eval_expr_assert_integer_constant(i, expr): # We'll do absolutely nothing here, which will result # in the vector being returned. pass - + elif (vectorization_info is None + and target.allows_non_constant_indexing_for_vec_types): + vector_index = eval_expr(idx) else: idx = eval_expr_assert_integer_constant(i, idx) diff --git a/loopy/target/c/__init__.py b/loopy/target/c/__init__.py index 022308852..6f5a6175f 100644 --- a/loopy/target/c/__init__.py +++ b/loopy/target/c/__init__.py @@ -1012,6 +1012,11 @@ def ast_block_class(self): from cgen import Block return Block + @property + def ast_comment_class(self): + from cgen import Comment + return Comment + @property def ast_block_scope_class(self): return ScopingBlock @@ -1255,6 +1260,10 @@ def emit_comment(self, s): from cgen import Comment return Comment(s) + def emit_pragma(self, s): + from cgen import Pragma + return Pragma(s) + @property def can_implement_conditionals(self): return True @@ -1334,6 +1343,10 @@ def get_dtype_registry(self): fill_registry_with_c99_complex_types(result) return DTypeRegistryWrapper(result) + @property + def allows_non_constant_indexing_for_vec_types(self): + return False + class CASTBuilder(CFamilyASTBuilder): def preamble_generators(self): diff --git a/loopy/target/c/codegen/expression.py b/loopy/target/c/codegen/expression.py index 05c6b3b92..2e8ca1870 100644 --- a/loopy/target/c/codegen/expression.py +++ b/loopy/target/c/codegen/expression.py @@ -217,15 +217,24 @@ def make_var(name): ary = self.find_array(expr) from loopy.kernel.array import get_access_info - from pymbolic import evaluate + from pymbolic import evaluate, substitute from loopy.symbolic import simplify_using_aff index_tuple = tuple( simplify_using_aff(self.kernel, idx) for idx in expr.index_tuple) - access_info = get_access_info(self.kernel.target, ary, index_tuple, - lambda expr: evaluate(expr, self.codegen_state.var_subst_map), - self.codegen_state.vectorization_info) + if self.kernel.target.allows_non_constant_indexing_for_vec_types: + access_info = get_access_info(self.kernel.target, ary, index_tuple, + lambda expr: substitute( + expr, + self.codegen_state.var_subst_map), + self.codegen_state.vectorization_info) + else: + access_info = get_access_info(self.kernel.target, ary, index_tuple, + lambda expr: evaluate( + expr, + self.codegen_state.var_subst_map), + self.codegen_state.vectorization_info) from loopy.kernel.data import ( ImageArg, ArrayArg, TemporaryVariable, ConstantArg) diff --git a/loopy/target/cuda.py b/loopy/target/cuda.py index 08413b615..a39a9fb0f 100644 --- a/loopy/target/cuda.py +++ b/loopy/target/cuda.py @@ -265,6 +265,14 @@ def vector_dtype(self, base, count): # }}} + @property + def allows_non_constant_indexing_for_vec_types(self): + return False + + @property + def broadcasts_scalar_assignment_to_vec_types(self): + return True + # }}} diff --git a/loopy/target/ispc.py b/loopy/target/ispc.py index 3c6ff52b0..dd05c6148 100644 --- a/loopy/target/ispc.py +++ b/loopy/target/ispc.py @@ -199,6 +199,10 @@ def get_dtype_registry(self): # }}} + @property + def allows_non_constant_indexing_for_vec_types(self): + return False + class ISPCASTBuilder(CFamilyASTBuilder): def _arg_names_and_decls(self, codegen_state): diff --git a/loopy/target/opencl.py b/loopy/target/opencl.py index 1e558e99b..22a30e44b 100644 --- a/loopy/target/opencl.py +++ b/loopy/target/opencl.py @@ -594,6 +594,14 @@ def vector_dtype(self, base, count): vec.types[base.numpy_dtype, count], target=self) + @property + def allows_non_constant_indexing_for_vec_types(self): + return False + + @property + def broadcasts_scalar_assignment_to_vec_types(self): + return True + # }}} diff --git a/loopy/target/python.py b/loopy/target/python.py index 4cba32c1c..d6c24d242 100644 --- a/loopy/target/python.py +++ b/loopy/target/python.py @@ -218,6 +218,11 @@ def ast_block_scope_class(self): # and delete the implementation above. return Collection + @property + def ast_comment_class(self): + from genpy import Comment + return Comment + def emit_sequential_loop(self, codegen_state, iname, iname_dtype, lbound, ubound, inner): ecm = codegen_state.expression_to_code_mapper