diff --git a/lib/LowerPrivatePointerPHIPass.cpp b/lib/LowerPrivatePointerPHIPass.cpp index 3ee672f99..1a743b3d6 100644 --- a/lib/LowerPrivatePointerPHIPass.cpp +++ b/lib/LowerPrivatePointerPHIPass.cpp @@ -76,6 +76,26 @@ Value *makeNewGEP(const DataLayout &DL, IRBuilder<> &B, Instruction *Src, clspv::AddressSpace::Private); return B.CreateGEP(SrcTy, Src, Idxs, "", true); } + +Type *getSmallestTypeInStruct(const DataLayout &DL, StructType *Ty) { + Type *SmallestTy = Ty; + for (auto ElTy : Ty->elements()) { + Type *Prev = nullptr; + while (Prev != ElTy) { + Prev = ElTy; + if (auto ElSTy = dyn_cast(ElTy)) { + ElTy = getSmallestTypeInStruct(DL, ElSTy); + } else { + ElTy = BitcastUtils::GetEleType(ElTy); + } + } + if (BitcastUtils::SizeInBits(DL, SmallestTy) > + BitcastUtils::SizeInBits(DL, ElTy)) { + SmallestTy = ElTy; + } + } + return SmallestTy; +} } // namespace llvm::PreservedAnalyses @@ -115,6 +135,22 @@ void clspv::LowerPrivatePointerPHIPass::runOnFunction(Function &F) { WeakInstructions ToBeErased; DenseMap PHIMap; for (auto alloca : worklist) { + auto allocaSTy = dyn_cast(alloca->getAllocatedType()); + if (allocaSTy && BitcastUtils::IsComplexStruct(DL, allocaSTy)) { + auto SmallestTypeInStruct = getSmallestTypeInStruct(DL, allocaSTy); + IRBuilder<> B(alloca); + auto nb_elem = alloca->getAllocationSizeInBits(DL).value() / + BitcastUtils::SizeInBits(DL, SmallestTypeInStruct); + if (nb_elem > 1) { + SmallestTypeInStruct = ArrayType::get(SmallestTypeInStruct, nb_elem); + } + auto new_alloca = + B.CreateAlloca(SmallestTypeInStruct, alloca->getAddressSpace()); + alloca->replaceAllUsesWith(new_alloca); + ToBeErased.push_back(alloca); + alloca = new_alloca; + } + SmallVector> nodes; for (auto use : alloca->users()) { nodes.push_back(std::make_tuple(use, alloca, 0, nullptr)); diff --git a/test/PrivatePointerPHI/complex_struct.ll b/test/PrivatePointerPHI/complex_struct.ll new file mode 100644 index 000000000..9ec82d645 --- /dev/null +++ b/test/PrivatePointerPHI/complex_struct.ll @@ -0,0 +1,30 @@ +; RUN: clspv-opt %s -o %t.ll --passes=lower-private-pointer-phi +; RUN: FileCheck %s < %t.ll + +; CHECK: [[alloca:%[^ ]+]] = alloca [68 x i32], align 4 +; CHECK: loop: +; CHECK-NEXT: [[phi:%[^ ]+]] = phi i32 [ 0, %entry ], [ [[add:%[^ ]+]], %loop ] +; CHECK: [[gep:%[^ ]+]] = getelementptr inbounds [68 x i32], ptr [[alloca]], i32 0, i32 [[phi]] +; CHECK: load i32, ptr [[gep]], align 4 +; CHECK: [[add]] = add i32 1, [[phi]] + +target datalayout = "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024" +target triple = "spir-unknown-unknown" + +define void @test() { +entry: + %alloca = alloca { [4 x i32], [64 x i32] } + br label %loop + +loop: + %phi = phi ptr [ %alloca, %entry ], [ %gep, %loop ] + %count = phi i32 [ 0, %entry ], [ %next, %loop ] + %load = load i32, ptr %phi + %gep = getelementptr { [64 x i32], [32 x i32] }, ptr %phi, i32 0, i32 0, i32 1 + %next = add i32 %count, 1 + %cmp = icmp eq i32 %next, 64 + br i1 %cmp, label %exit, label %loop + +exit: + ret void +}