From 65dd67a36141133510c9e5c2c71e30069524e0d7 Mon Sep 17 00:00:00 2001 From: skc7 Date: Fri, 8 Mar 2024 16:43:57 +0530 Subject: [PATCH] [AMDGPU] Enable asan LDS instrumentation --- .../Instrumentation/AddressSanitizer.cpp | 387 +++++++++++-- .../asan-dynamic-lds-indirect-access.ll | 529 ++++++++++++++++++ .../AMDGPU/asan-dynamic-lds-test.ll | 231 ++++++++ .../AMDGPU/asan-static-indirect-access.ll | 476 ++++++++++++++++ ...atic-lds-indirect-access-function-param.ll | 203 +++++++ .../AMDGPU/asan-static-lds-test.ll | 249 +++++++++ .../AMDGPU/asan_do_not_instrument_lds.ll | 27 - .../asan_instrument_generic_address_space.ll | 96 ++-- 8 files changed, 2071 insertions(+), 127 deletions(-) create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll create mode 100755 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll delete mode 100644 llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 9cc978dc6c16ef..a312f1d50fe9a9 100644 --- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -176,6 +176,7 @@ const char kAsanAllocasUnpoison[] = "__asan_allocas_unpoison"; const char kAMDGPUAddressSharedName[] = "llvm.amdgcn.is.shared"; const char kAMDGPUAddressPrivateName[] = "llvm.amdgcn.is.private"; const char kAMDGPUBallotName[] = "llvm.amdgcn.ballot.i64"; +const char kAMDGPULDSKernelId[] = "llvm.amdgcn.lds.kernel.id"; const char kAMDGPUUnreachableName[] = "llvm.amdgcn.unreachable"; // Accesses sizes are powers of two: 1, 2, 4, 8, 16. @@ -628,12 +629,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel, } // namespace llvm -static uint64_t getRedzoneSizeForScale(int MappingScale) { - // Redzone used for stack and globals is at least 32 bytes. - // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. - return std::max(32U, 1U << MappingScale); -} - static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) { if (TargetTriple.isOSEmscripten()) { return kAsanEmscriptenCtorAndDtorPriority; @@ -939,10 +934,7 @@ class ModuleAddressSanitizer { StringRef getGlobalMetadataSection() const; void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName); void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName); - uint64_t getMinRedzoneSizeForGlobal() const { - return getRedzoneSizeForScale(Mapping.Scale); - } - uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const; + int GetAsanVersion(const Module &M) const; bool CompileKernel; @@ -1239,6 +1231,290 @@ void AddressSanitizerPass::printPipeline( OS << '>'; } +static uint64_t getRedzoneSizeForScale(int MappingScale) { + // Redzone used for stack and globals is at least 32 bytes. + // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively. + return std::max(32U, 1U << MappingScale); +} + +static uint64_t getMinRedzoneSizeForGlobal(int Scale) { + return getRedzoneSizeForScale(Scale); +} + +static uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes) { + constexpr uint64_t kMaxRZ = 1 << 18; + const uint64_t MinRZ = getMinRedzoneSizeForGlobal(Scale); + + uint64_t RZ = 0; + if (SizeInBytes <= MinRZ / 2) { + // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is + // at least 32 bytes, optimize when SizeInBytes is less than or equal to + // half of MinRZ. + RZ = MinRZ - SizeInBytes; + } else { + // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes. + RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ); + + // Round up to multiple of MinRZ. + if (SizeInBytes % MinRZ) + RZ += MinRZ - (SizeInBytes % MinRZ); + } + + assert((RZ + SizeInBytes) % MinRZ == 0); + + return RZ; +} + +static GlobalVariable *getKernelSwLDSGlobal(Module &M, Function &F) { + SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds."); + KernelLDSName += F.getName(); + return M.getNamedGlobal(KernelLDSName); +} + +static GlobalVariable *getKernelSwLDSMetadataGlobal(Module &M, Function &F) { + SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds."); + KernelLDSName += F.getName(); + KernelLDSName += ".md"; + return M.getNamedGlobal(KernelLDSName); +} + +static GlobalVariable *getKernelSwDynLDSGlobal(Module &M, Function &F) { + SmallString<64> KernelLDSName("llvm.amdgcn."); + KernelLDSName += F.getName(); + KernelLDSName += ".dynlds"; + return M.getNamedGlobal(KernelLDSName); +} + +static GlobalVariable *getKernelSwLDSBaseGlobal(Module &M) { + SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.base.table"); + return M.getNamedGlobal(KernelLDSName); +} + +static void updateLDSSizeFnAttr(Function *Func, uint32_t Offset, + bool UsesDynLDS) { + if (Offset != 0) { + std::string Buffer; + raw_string_ostream SS{Buffer}; + SS << format("%u", Offset); + if (UsesDynLDS) + SS << format(",%u", Offset); + Func->addFnAttr("amdgpu-lds-size", Buffer); + } +} + +static void recordLDSAbsoluteAddress(Module &M, GlobalVariable *GV, + uint32_t Address) { + LLVMContext &Ctx = M.getContext(); + auto *IntTy = M.getDataLayout().getIntPtrType(Ctx, 3); + auto *MinC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address)); + auto *MaxC = ConstantAsMetadata::get(ConstantInt::get(IntTy, Address + 1)); + GV->setMetadata(LLVMContext::MD_absolute_symbol, + MDNode::get(Ctx, {MinC, MaxC})); +} + +static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) { + Module *M = F.getParent(); + GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F); + GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F); + if (!SwLDSMetadataGlobal || !SwLDSGlobal) + return; + + LLVMContext &Ctx = M->getContext(); + Type *Int32Ty = Type::getInt32Ty(Ctx); + + Constant *MdInit = SwLDSMetadataGlobal->getInitializer(); + Align MdAlign = Align(SwLDSMetadataGlobal->getAlign().valueOrOne()); + Align LDSAlign = Align(SwLDSGlobal->getAlign().valueOrOne()); + + StructType *MDStructType = + cast(SwLDSMetadataGlobal->getValueType()); + assert(MDStructType); + unsigned NumStructs = MDStructType->getNumElements(); + + std::vector Items; + std::vector Initializers; + uint32_t MallocSize = 0; + //{GV.start, Align(GV.size + Redzone.size), Redzone.start, Redzone.size} + StructType *LDSItemTy = StructType::create( + Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty, Int32Ty}, ""); + for (unsigned i = 0; i < NumStructs; i++) { + Items.push_back(LDSItemTy); + ConstantStruct *member = + dyn_cast(MdInit->getAggregateElement(i)); + Constant *NewInitItem; + if (member) { + ConstantInt *GlobalSize = + cast(member->getAggregateElement(1U)); + unsigned GlobalSizeValue = GlobalSize->getZExtValue(); + Constant *NewItemStartOffset = ConstantInt::get(Int32Ty, MallocSize); + if (GlobalSizeValue) { + Constant *NewItemGlobalSizeConst = + ConstantInt::get(Int32Ty, GlobalSizeValue); + const uint64_t RightRedzoneSize = + getRedzoneSizeForGlobal(Scale, GlobalSizeValue); + MallocSize += GlobalSizeValue; + Constant *NewItemRedzoneStartOffset = + ConstantInt::get(Int32Ty, MallocSize); + MallocSize += RightRedzoneSize; + Constant *NewItemRedzoneSize = + ConstantInt::get(Int32Ty, RightRedzoneSize); + + unsigned NewItemAlignGlobalPlusRedzoneSize = + alignTo(GlobalSizeValue + RightRedzoneSize, LDSAlign); + Constant *NewItemAlignGlobalPlusRedzoneSizeConst = + ConstantInt::get(Int32Ty, NewItemAlignGlobalPlusRedzoneSize); + NewInitItem = ConstantStruct::get( + LDSItemTy, {NewItemStartOffset, NewItemGlobalSizeConst, + NewItemAlignGlobalPlusRedzoneSizeConst, + NewItemRedzoneStartOffset, NewItemRedzoneSize}); + MallocSize = alignTo(MallocSize, LDSAlign); + } else { + Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize); + Constant *zero = ConstantInt::get(Int32Ty, 0); + NewInitItem = ConstantStruct::get( + LDSItemTy, {CurrMallocSize, zero, zero, zero, zero}); + } + } else { + Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize); + Constant *zero = ConstantInt::get(Int32Ty, 0); + NewInitItem = ConstantStruct::get( + LDSItemTy, {CurrMallocSize, zero, zero, zero, zero}); + } + Initializers.push_back(NewInitItem); + } + GlobalVariable *SwDynLDS = getKernelSwDynLDSGlobal(*M, F); + bool usesDynLDS = SwDynLDS ? true : false; + updateLDSSizeFnAttr(&F, MallocSize, usesDynLDS); + if (usesDynLDS) + recordLDSAbsoluteAddress(*M, SwDynLDS, MallocSize); + + StructType *MetadataStructType = StructType::create(Ctx, Items, ""); + + GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable( + *M, MetadataStructType, false, GlobalValue::InternalLinkage, + PoisonValue::get(MetadataStructType), "", nullptr, + GlobalValue::NotThreadLocal, 1, false); + Constant *Data = ConstantStruct::get(MetadataStructType, Initializers); + NewSwLDSMetadataGlobal->setInitializer(Data); + NewSwLDSMetadataGlobal->setAlignment(MdAlign); + GlobalValue::SanitizerMetadata MD; + MD.NoAddress = true; + NewSwLDSMetadataGlobal->setSanitizerMetadata(MD); + + for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) { + if (GEPOperator *GEP = dyn_cast(U.getUser())) { + SmallVector Indices; + for (Use &Idx : GEP->indices()) { + Indices.push_back(cast(Idx)); + } + Constant *NewGEP = ConstantExpr::getGetElementPtr( + MetadataStructType, NewSwLDSMetadataGlobal, Indices, true); + GEP->replaceAllUsesWith(NewGEP); + } else if (LoadInst *Load = dyn_cast(U.getUser())) { + Constant *zero = ConstantInt::get(Int32Ty, 0); + SmallVector Indices{zero, zero, zero}; + Constant *NewGEP = ConstantExpr::getGetElementPtr( + MetadataStructType, NewSwLDSMetadataGlobal, Indices, true); + IRBuilder<> IRB(Load); + LoadInst *NewLoad = IRB.CreateLoad(Load->getType(), NewGEP); + Load->replaceAllUsesWith(NewLoad); + Load->eraseFromParent(); + } else if (StoreInst *Store = dyn_cast(U.getUser())) { + Constant *zero = ConstantInt::get(Int32Ty, 0); + SmallVector Indices{zero, zero, zero}; + Constant *NewGEP = ConstantExpr::getGetElementPtr( + MetadataStructType, NewSwLDSMetadataGlobal, Indices, true); + IRBuilder<> IRB(Store); + StoreInst *NewStore = IRB.CreateStore(Store->getValueOperand(), NewGEP); + Store->replaceAllUsesWith(NewStore); + Store->eraseFromParent(); + } else + report_fatal_error("AMDGPU Sw LDS Metadata User instruction not handled"); + } + SwLDSMetadataGlobal->replaceAllUsesWith(NewSwLDSMetadataGlobal); + NewSwLDSMetadataGlobal->takeName(SwLDSMetadataGlobal); + SwLDSMetadataGlobal->eraseFromParent(); + return; +} + +static void poisonRedzonesForSwLDS(Function &F) { + Module *M = F.getParent(); + GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F); + GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F); + + if (!SwLDSGlobal || !SwLDSMetadataGlobal) + return; + + LLVMContext &Ctx = M->getContext(); + Type *Int64Ty = Type::getInt64Ty(Ctx); + Type *VoidTy = Type::getVoidTy(Ctx); + FunctionCallee AsanPoisonRegion = M->getOrInsertFunction( + StringRef("__asan_poison_region"), + FunctionType::get(VoidTy, {Int64Ty, Int64Ty}, false)); + Constant *MdInit = SwLDSMetadataGlobal->getInitializer(); + + for (User *U : SwLDSGlobal->users()) { + StoreInst *SI = dyn_cast(U); + if (!SI) + continue; + + Type *PtrTy = + cast(SI->getValueOperand()->getType()->getScalarType()); + unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + if (AddrSpace != 1) + report_fatal_error("AMDGPU illegal store to SW LDS"); + + StructType *MDStructType = + cast(SwLDSMetadataGlobal->getValueType()); + assert(MDStructType); + unsigned NumStructs = MDStructType->getNumElements(); + Value *StoreMallocPointer = SI->getValueOperand(); + + for (unsigned i = 0; i < NumStructs; i++) { + ConstantStruct *member = + dyn_cast(MdInit->getAggregateElement(i)); + if (!member) + continue; + + ConstantInt *GlobalSize = + cast(member->getAggregateElement(1U)); + unsigned GlobalSizeValue = GlobalSize->getZExtValue(); + + if (!GlobalSizeValue) + continue; + IRBuilder<> IRB(SI); + IRB.SetInsertPoint(SI->getNextNode()); + + auto *GEPForOffset = IRB.CreateInBoundsGEP( + MDStructType, SwLDSMetadataGlobal, + {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(3)}); + + auto *GEPForSize = IRB.CreateInBoundsGEP( + MDStructType, SwLDSMetadataGlobal, + {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(4)}); + + Value *RedzoneOffset = IRB.CreateLoad(IRB.getInt32Ty(), GEPForOffset); + RedzoneOffset = IRB.CreateZExt(RedzoneOffset, IRB.getInt64Ty()); + Value *RedzoneAddrOffset = IRB.CreateInBoundsGEP( + IRB.getInt8Ty(), StoreMallocPointer, {RedzoneOffset}); + Value *RedzoneAddress = + IRB.CreatePtrToInt(RedzoneAddrOffset, IRB.getInt64Ty()); + Value *RedzoneSize = IRB.CreateLoad(IRB.getInt32Ty(), GEPForSize); + RedzoneSize = IRB.CreateZExt(RedzoneSize, IRB.getInt64Ty()); + IRB.CreateCall(AsanPoisonRegion, {RedzoneAddress, RedzoneSize}); + } + } + return; +} + +static void preProcessAMDGPULDSAccesses(Module &M, int Scale) { + for (Function &F : M) { + UpdateSwLDSMetadataWithRedzoneInfo(F, Scale); + poisonRedzonesForSwLDS(F); + } + return; +} + AddressSanitizerPass::AddressSanitizerPass( const AddressSanitizerOptions &Options, bool UseGlobalGC, bool UseOdrIndicator, AsanDtorKind DestructorKind, @@ -1249,6 +1525,13 @@ AddressSanitizerPass::AddressSanitizerPass( PreservedAnalyses AddressSanitizerPass::run(Module &M, ModuleAnalysisManager &MAM) { + Triple TargetTriple = Triple(M.getTargetTriple()); + + if (TargetTriple.isAMDGPU()) { + unsigned LongSize = M.getDataLayout().getPointerSizeInBits(); + ShadowMapping Mapping = getShadowMapping(TargetTriple, LongSize, false); + preProcessAMDGPULDSAccesses(M, Mapping.Scale); + } ModuleAddressSanitizer ModuleSanitizer( M, Options.InsertVersionCheck, Options.CompileKernel, Options.Recover, UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind); @@ -1304,7 +1587,15 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) { static bool isUnsupportedAMDGPUAddrspace(Value *Addr) { Type *PtrTy = cast(Addr->getType()->getScalarType()); unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); - if (AddrSpace == 3 || AddrSpace == 5) + if (AddrSpace == 5) + return true; + return false; +} + +static bool isGlobalInAMDGPULdsAddrspace(Value *Addr) { + Type *PtrTy = cast(Addr->getType()->getScalarType()); + unsigned int AddrSpace = PtrTy->getPointerAddressSpace(); + if (AddrSpace == 3) return true; return false; } @@ -1795,10 +2086,8 @@ Instruction *AddressSanitizer::instrumentAMDGPUAddress( return InsertBefore; // Instrument generic addresses in supported addressspaces. IRBuilder<> IRB(InsertBefore); - Value *IsShared = IRB.CreateCall(AMDGPUAddressShared, {Addr}); Value *IsPrivate = IRB.CreateCall(AMDGPUAddressPrivate, {Addr}); - Value *IsSharedOrPrivate = IRB.CreateOr(IsShared, IsPrivate); - Value *Cmp = IRB.CreateNot(IsSharedOrPrivate); + Value *Cmp = IRB.CreateNot(IsPrivate); Value *AddrSpaceZeroLanding = SplitBlockAndInsertIfThen(Cmp, InsertBefore, false); InsertBefore = cast(AddrSpaceZeroLanding); @@ -1857,7 +2146,38 @@ void AddressSanitizer::instrumentAddress(Instruction *OrigIns, return; } - Value *AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + Value *AddrLong; + if (TargetTriple.isAMDGCN()) { + Type *PtrTy = cast(Addr->getType()->getScalarType()); + if (PtrTy->getPointerAddressSpace() == 3) { + Module *M = IRB.GetInsertBlock()->getParent()->getParent(); + Function *Func = IRB.GetInsertBlock()->getParent(); + Value *SwLDS; + if (Func->getCallingConv() == CallingConv::AMDGPU_KERNEL) { + SwLDS = getKernelSwLDSGlobal(*M, *Func); + } else { + GlobalVariable *LDSBaseTable = getKernelSwLDSBaseGlobal(*M); + if (LDSBaseTable) { + auto *KernelId = IRB.CreateCall( + M->getOrInsertFunction(kAMDGPULDSKernelId, IRB.getInt32Ty()), {}); + Value *BaseGEP = + IRB.CreateInBoundsGEP(LDSBaseTable->getValueType(), LDSBaseTable, + {IRB.getInt32(0), KernelId}); + SwLDS = IRB.CreateLoad(IRB.getPtrTy(3), BaseGEP); + } else { + SwLDS = IRB.CreateIntToPtr(IRB.getInt32(0), IRB.getPtrTy(3)); + } + } + Value *PtrToInt = IRB.CreatePtrToInt(Addr, IRB.getInt32Ty()); + Value *LoadMallocPtr = IRB.CreateLoad(IRB.getPtrTy(1), SwLDS); + Value *GEP = + IRB.CreateInBoundsGEP(IRB.getInt8Ty(), LoadMallocPtr, {PtrToInt}); + AddrLong = IRB.CreatePointerCast(GEP, IntptrTy); + } else + AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + } else + AddrLong = IRB.CreatePointerCast(Addr, IntptrTy); + if (UseCalls) { if (Exp == 0) RTCI.createRuntimeCall( @@ -2021,7 +2341,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { if (!G->hasInitializer()) return false; // Globals in address space 1 and 4 are supported for AMDGPU. if (G->getAddressSpace() && - !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G))) + (!(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)) || + !(TargetTriple.isAMDGPU() && !isGlobalInAMDGPULdsAddrspace(G)))) return false; if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals. // Two problems with thread-locals: @@ -2029,7 +2350,9 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const { // - Need to poison all copies, not just the main thread's one. if (G->isThreadLocal()) return false; // For now, just ignore this Global if the alignment is large. - if (G->getAlign() && *G->getAlign() > getMinRedzoneSizeForGlobal()) return false; + if (G->getAlign() && + *G->getAlign() > getMinRedzoneSizeForGlobal(Mapping.Scale)) + return false; // For non-COFF targets, only instrument globals known to be defined by this // TU. @@ -2552,7 +2875,8 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M, Type *Ty = G->getValueType(); const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty); - const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes); + const uint64_t RightRedzoneSize = + getRedzoneSizeForGlobal(Mapping.Scale, SizeInBytes); Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize); StructType *NewTy = StructType::get(Ty, RightRedZoneTy); @@ -2568,7 +2892,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M, G->getThreadLocalMode(), G->getAddressSpace()); NewGlobal->copyAttributesFrom(G); NewGlobal->setComdat(G->getComdat()); - NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal())); + NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal(Mapping.Scale))); // Don't fold globals with redzones. ODR violation detector and redzone // poisoning implicitly creates a dependence on the global's address, so it // is no longer valid for it to be marked unnamed_addr. @@ -2688,31 +3012,6 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M, LLVM_DEBUG(dbgs() << M); } -uint64_t -ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const { - constexpr uint64_t kMaxRZ = 1 << 18; - const uint64_t MinRZ = getMinRedzoneSizeForGlobal(); - - uint64_t RZ = 0; - if (SizeInBytes <= MinRZ / 2) { - // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is - // at least 32 bytes, optimize when SizeInBytes is less than or equal to - // half of MinRZ. - RZ = MinRZ - SizeInBytes; - } else { - // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes. - RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ); - - // Round up to multiple of MinRZ. - if (SizeInBytes % MinRZ) - RZ += MinRZ - (SizeInBytes % MinRZ); - } - - assert((RZ + SizeInBytes) % MinRZ == 0); - - return RZ; -} - int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const { int LongSize = M.getDataLayout().getPointerSizeInBits(); bool isAndroid = Triple(M.getTargetTriple()).isAndroid(); diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll new file mode 100755 index 00000000000000..f37fbf350ffde1 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-indirect-access.ll @@ -0,0 +1,529 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s + +%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item } +%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 } + +@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0 +@llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol !1 +@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 0, i32 0 } }, no_sanitize_address +@llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +@llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address + +; Function Attrs: sanitize_address +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 8, !absolute_symbol [[META1:![0-9]+]] +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 }, %1 { i32 96, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1 +; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" +; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0 +; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +define void @use_variables() #0 { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(1) [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 7 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[TMP15]], label %[[ASAN_REPORT:.*]], label %[[BB18:.*]], !prof [[PROF2:![0-9]+]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP13]], label %[[BB16:.*]], label %[[BB17:.*]] +; CHECK: [[BB16]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB17]] +; CHECK: [[BB17]]: +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i64 [[TMP21]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 2147450880 +; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr +; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i8 [[TMP25]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP26]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[TMP28]], label %[[ASAN_REPORT1:.*]], label %[[BB31:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT1]]: +; CHECK-NEXT: br i1 [[TMP26]], label %[[BB29:.*]], label %[[BB30:.*]] +; CHECK: [[BB29]]: +; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP21]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB30]] +; CHECK: [[BB30]]: +; CHECK-NEXT: br label %[[BB31]] +; CHECK: [[BB31]]: +; CHECK-NEXT: [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP20]], align 8 +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr addrspace(1) [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i64 [[TMP33]], 3 +; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[TMP34]], 2147450880 +; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i8 [[TMP37]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP33]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[TMP39]], 3 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[TMP40]] to i8 +; CHECK-NEXT: [[TMP42:%.*]] = icmp sge i8 [[TMP41]], [[TMP37]] +; CHECK-NEXT: [[TMP43:%.*]] = and i1 [[TMP38]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i64 [[TMP44]], 0 +; CHECK-NEXT: br i1 [[TMP45]], label %[[ASAN_REPORT2:.*]], label %[[BB48:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT2]]: +; CHECK-NEXT: br i1 [[TMP43]], label %[[BB46:.*]], label %[[BB47:.*]] +; CHECK: [[BB46]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP33]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB47]] +; CHECK: [[BB47]]: +; CHECK-NEXT: br label %[[BB48]] +; CHECK: [[BB48]]: +; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(1) [[TMP32]], align 4 +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP52]], 3 +; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880 +; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr +; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1 +; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0 +; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]]) +; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0 +; CHECK-NEXT: br i1 [[TMP59]], label %[[ASAN_REPORT3:.*]], label %[[BB62:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT3]]: +; CHECK-NEXT: br i1 [[TMP57]], label %[[BB60:.*]], label %[[BB61:.*]] +; CHECK: [[BB60]]: +; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP52]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB61]] +; CHECK: [[BB61]]: +; CHECK-NEXT: br label %[[BB62]] +; CHECK: [[BB62]]: +; CHECK-NEXT: [[TMP63:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP51]], align 8 +; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP64]], 3 +; CHECK-NEXT: [[TMP66:%.*]] = add i64 [[TMP65]], 2147450880 +; CHECK-NEXT: [[TMP67:%.*]] = inttoptr i64 [[TMP66]] to ptr +; CHECK-NEXT: [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1 +; CHECK-NEXT: [[TMP69:%.*]] = icmp ne i8 [[TMP68]], 0 +; CHECK-NEXT: [[TMP70:%.*]] = and i64 [[TMP64]], 7 +; CHECK-NEXT: [[TMP71:%.*]] = add i64 [[TMP70]], 3 +; CHECK-NEXT: [[TMP72:%.*]] = trunc i64 [[TMP71]] to i8 +; CHECK-NEXT: [[TMP73:%.*]] = icmp sge i8 [[TMP72]], [[TMP68]] +; CHECK-NEXT: [[TMP74:%.*]] = and i1 [[TMP69]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP74]]) +; CHECK-NEXT: [[TMP76:%.*]] = icmp ne i64 [[TMP75]], 0 +; CHECK-NEXT: br i1 [[TMP76]], label %[[ASAN_REPORT4:.*]], label %[[BB79:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT4]]: +; CHECK-NEXT: br i1 [[TMP74]], label %[[BB77:.*]], label %[[BB78:.*]] +; CHECK: [[BB77]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP64]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB78]] +; CHECK: [[BB78]]: +; CHECK-NEXT: br label %[[BB79]] +; CHECK: [[BB79]]: +; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(1) [[TMP63]], align 4 +; CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP83:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP82]] +; CHECK-NEXT: [[TMP84:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP83]], align 4 +; CHECK-NEXT: [[TMP85:%.*]] = ptrtoint ptr addrspace(3) [[TMP50]] to i32 +; CHECK-NEXT: [[TMP86:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP84]], align 8 +; CHECK-NEXT: [[TMP87:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP86]], i32 [[TMP85]] +; CHECK-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(1) [[TMP87]] to i64 +; CHECK-NEXT: [[TMP89:%.*]] = lshr i64 [[TMP88]], 3 +; CHECK-NEXT: [[TMP90:%.*]] = add i64 [[TMP89]], 2147450880 +; CHECK-NEXT: [[TMP91:%.*]] = inttoptr i64 [[TMP90]] to ptr +; CHECK-NEXT: [[TMP92:%.*]] = load i8, ptr [[TMP91]], align 1 +; CHECK-NEXT: [[TMP93:%.*]] = icmp ne i8 [[TMP92]], 0 +; CHECK-NEXT: [[TMP94:%.*]] = and i64 [[TMP88]], 7 +; CHECK-NEXT: [[TMP95:%.*]] = trunc i64 [[TMP94]] to i8 +; CHECK-NEXT: [[TMP96:%.*]] = icmp sge i8 [[TMP95]], [[TMP92]] +; CHECK-NEXT: [[TMP97:%.*]] = and i1 [[TMP93]], [[TMP96]] +; CHECK-NEXT: [[TMP98:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP97]]) +; CHECK-NEXT: [[TMP99:%.*]] = icmp ne i64 [[TMP98]], 0 +; CHECK-NEXT: br i1 [[TMP99]], label %[[ASAN_REPORT5:.*]], label %[[BB102:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT5]]: +; CHECK-NEXT: br i1 [[TMP97]], label %[[BB100:.*]], label %[[BB101:.*]] +; CHECK: [[BB100]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP88]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB101]] +; CHECK: [[BB101]]: +; CHECK-NEXT: br label %[[BB102]] +; CHECK: [[BB102]]: +; CHECK-NEXT: store i8 3, ptr addrspace(3) [[TMP50]], align 4 +; CHECK-NEXT: [[TMP103:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP104:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP103]] +; CHECK-NEXT: [[TMP105:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP104]], align 4 +; CHECK-NEXT: [[TMP106:%.*]] = ptrtoint ptr addrspace(3) [[TMP81]] to i32 +; CHECK-NEXT: [[TMP107:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP105]], align 8 +; CHECK-NEXT: [[TMP108:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP107]], i32 [[TMP106]] +; CHECK-NEXT: [[TMP109:%.*]] = ptrtoint ptr addrspace(1) [[TMP108]] to i64 +; CHECK-NEXT: [[TMP110:%.*]] = lshr i64 [[TMP109]], 3 +; CHECK-NEXT: [[TMP111:%.*]] = add i64 [[TMP110]], 2147450880 +; CHECK-NEXT: [[TMP112:%.*]] = inttoptr i64 [[TMP111]] to ptr +; CHECK-NEXT: [[TMP113:%.*]] = load i8, ptr [[TMP112]], align 1 +; CHECK-NEXT: [[TMP114:%.*]] = icmp ne i8 [[TMP113]], 0 +; CHECK-NEXT: [[TMP115:%.*]] = and i64 [[TMP109]], 7 +; CHECK-NEXT: [[TMP116:%.*]] = trunc i64 [[TMP115]] to i8 +; CHECK-NEXT: [[TMP117:%.*]] = icmp sge i8 [[TMP116]], [[TMP113]] +; CHECK-NEXT: [[TMP118:%.*]] = and i1 [[TMP114]], [[TMP117]] +; CHECK-NEXT: [[TMP119:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP118]]) +; CHECK-NEXT: [[TMP120:%.*]] = icmp ne i64 [[TMP119]], 0 +; CHECK-NEXT: br i1 [[TMP120]], label %[[ASAN_REPORT6:.*]], label %[[BB123:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT6]]: +; CHECK-NEXT: br i1 [[TMP118]], label %[[BB121:.*]], label %[[BB122:.*]] +; CHECK: [[BB121]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP109]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB122]] +; CHECK: [[BB122]]: +; CHECK-NEXT: br label %[[BB123]] +; CHECK: [[BB123]]: +; CHECK-NEXT: store i8 3, ptr addrspace(3) [[TMP81]], align 8 +; CHECK-NEXT: ret void +; + %1 = call i32 @llvm.amdgcn.lds.kernel.id() + %2 = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 %1 + %3 = load ptr addrspace(3), ptr addrspace(1) %2, align 4 + %4 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 0 + %5 = load ptr addrspace(1), ptr addrspace(1) %4, align 8 + %6 = load i32, ptr addrspace(1) %5, align 4 + %7 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %6 + %8 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 1 + %9 = load ptr addrspace(1), ptr addrspace(1) %8, align 8 + %10 = load i32, ptr addrspace(1) %9, align 4 + %11 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %10 + store i8 3, ptr addrspace(3) %7, align 4 + store i8 3, ptr addrspace(3) %11, align 8 + ret void +} + +; Function Attrs: sanitize_address +define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !2 { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META3:![0-9]+]] { +; CHECK-NEXT: [[WID:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB60:.*]] +; CHECK: [[MALLOC]]: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP11]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8 +; CHECK-NEXT: [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB25]] +; CHECK: [[BB25]]: +; CHECK-NEXT: br label %[[BB26]] +; CHECK: [[BB26]]: +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 7 +; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 8 +; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 8 +; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 +; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]] +; CHECK-NEXT: store i32 [[TMP31]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP32]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 +; CHECK-NEXT: [[TMP33:%.*]] = add i32 [[TMP32]], 7 +; CHECK-NEXT: [[TMP34:%.*]] = udiv i32 [[TMP33]], 8 +; CHECK-NEXT: [[TMP35:%.*]] = mul i32 [[TMP34]], 8 +; CHECK-NEXT: store i32 [[TMP35]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP36:%.*]] = add i32 [[TMP31]], [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i64 +; CHECK-NEXT: [[TMP38:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP39:%.*]] = ptrtoint ptr [[TMP38]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP37]], i64 [[TMP39]]) +; CHECK-NEXT: [[TMP41:%.*]] = inttoptr i64 [[TMP40]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP41]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4 +; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP43]] +; CHECK-NEXT: [[TMP45:%.*]] = ptrtoint ptr addrspace(1) [[TMP44]] to i64 +; CHECK-NEXT: [[TMP46:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4 +; CHECK-NEXT: [[TMP47:%.*]] = zext i32 [[TMP46]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP45]], i64 [[TMP47]]) +; CHECK-NEXT: [[TMP48:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4 +; CHECK-NEXT: [[TMP49:%.*]] = zext i32 [[TMP48]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = ptrtoint ptr addrspace(1) [[TMP50]] to i64 +; CHECK-NEXT: [[TMP52:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4 +; CHECK-NEXT: [[TMP53:%.*]] = zext i32 [[TMP52]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP51]], i64 [[TMP53]]) +; CHECK-NEXT: [[TMP54:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4 +; CHECK-NEXT: [[TMP55:%.*]] = zext i32 [[TMP54]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP41]], i64 [[TMP55]] +; CHECK-NEXT: [[TMP57:%.*]] = ptrtoint ptr addrspace(1) [[TMP56]] to i64 +; CHECK-NEXT: [[TMP58:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4 +; CHECK-NEXT: [[TMP59:%.*]] = zext i32 [[TMP58]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP57]], i64 [[TMP59]]) +; CHECK-NEXT: br label %[[BB60]] +; CHECK: [[BB60]]: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP61:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP64:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP63]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP65:%.*]] = ptrtoint ptr addrspace(3) [[TMP62]] to i32 +; CHECK-NEXT: [[TMP66:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP67:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP66]], i32 [[TMP65]] +; CHECK-NEXT: [[TMP68:%.*]] = ptrtoint ptr addrspace(1) [[TMP67]] to i64 +; CHECK-NEXT: [[TMP69:%.*]] = lshr i64 [[TMP68]], 3 +; CHECK-NEXT: [[TMP70:%.*]] = add i64 [[TMP69]], 2147450880 +; CHECK-NEXT: [[TMP71:%.*]] = inttoptr i64 [[TMP70]] to ptr +; CHECK-NEXT: [[TMP72:%.*]] = load i8, ptr [[TMP71]], align 1 +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i8 [[TMP72]], 0 +; CHECK-NEXT: [[TMP74:%.*]] = and i64 [[TMP68]], 7 +; CHECK-NEXT: [[TMP75:%.*]] = trunc i64 [[TMP74]] to i8 +; CHECK-NEXT: [[TMP76:%.*]] = icmp sge i8 [[TMP75]], [[TMP72]] +; CHECK-NEXT: [[TMP77:%.*]] = and i1 [[TMP73]], [[TMP76]] +; CHECK-NEXT: [[TMP78:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP77]]) +; CHECK-NEXT: [[TMP79:%.*]] = icmp ne i64 [[TMP78]], 0 +; CHECK-NEXT: br i1 [[TMP79]], label %[[ASAN_REPORT1:.*]], label %[[BB82:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT1]]: +; CHECK-NEXT: br i1 [[TMP77]], label %[[BB80:.*]], label %[[BB81:.*]] +; CHECK: [[BB80]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP68]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB81]] +; CHECK: [[BB81]]: +; CHECK-NEXT: br label %[[BB82]] +; CHECK: [[BB82]]: +; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP62]], align 1 +; CHECK-NEXT: [[TMP83:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i64 +; CHECK-NEXT: [[TMP84:%.*]] = add i64 [[TMP83]], 3 +; CHECK-NEXT: [[TMP85:%.*]] = inttoptr i64 [[TMP84]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP86:%.*]] = ptrtoint ptr addrspace(3) [[TMP64]] to i32 +; CHECK-NEXT: [[TMP87:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP88:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP87]], i32 [[TMP86]] +; CHECK-NEXT: [[TMP89:%.*]] = ptrtoint ptr addrspace(1) [[TMP88]] to i64 +; CHECK-NEXT: [[TMP90:%.*]] = lshr i64 [[TMP89]], 3 +; CHECK-NEXT: [[TMP91:%.*]] = add i64 [[TMP90]], 2147450880 +; CHECK-NEXT: [[TMP92:%.*]] = inttoptr i64 [[TMP91]] to ptr +; CHECK-NEXT: [[TMP93:%.*]] = load i8, ptr [[TMP92]], align 1 +; CHECK-NEXT: [[TMP94:%.*]] = icmp ne i8 [[TMP93]], 0 +; CHECK-NEXT: [[TMP95:%.*]] = and i64 [[TMP89]], 7 +; CHECK-NEXT: [[TMP96:%.*]] = trunc i64 [[TMP95]] to i8 +; CHECK-NEXT: [[TMP97:%.*]] = icmp sge i8 [[TMP96]], [[TMP93]] +; CHECK-NEXT: [[TMP98:%.*]] = and i1 [[TMP94]], [[TMP97]] +; CHECK-NEXT: [[TMP99:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP98]]) +; CHECK-NEXT: [[TMP100:%.*]] = icmp ne i64 [[TMP99]], 0 +; CHECK-NEXT: br i1 [[TMP100]], label %[[ASAN_REPORT2:.*]], label %[[BB103:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT2]]: +; CHECK-NEXT: br i1 [[TMP98]], label %[[BB101:.*]], label %[[BB102:.*]] +; CHECK: [[BB101]]: +; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP89]], i64 4) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB102]] +; CHECK: [[BB102]]: +; CHECK-NEXT: br label %[[BB103]] +; CHECK: [[BB103]]: +; CHECK-NEXT: [[TMP104:%.*]] = ptrtoint ptr addrspace(3) [[TMP85]] to i32 +; CHECK-NEXT: [[TMP105:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP105]], i32 [[TMP104]] +; CHECK-NEXT: [[TMP107:%.*]] = ptrtoint ptr addrspace(1) [[TMP106]] to i64 +; CHECK-NEXT: [[TMP108:%.*]] = lshr i64 [[TMP107]], 3 +; CHECK-NEXT: [[TMP109:%.*]] = add i64 [[TMP108]], 2147450880 +; CHECK-NEXT: [[TMP110:%.*]] = inttoptr i64 [[TMP109]] to ptr +; CHECK-NEXT: [[TMP111:%.*]] = load i8, ptr [[TMP110]], align 1 +; CHECK-NEXT: [[TMP112:%.*]] = icmp ne i8 [[TMP111]], 0 +; CHECK-NEXT: [[TMP113:%.*]] = and i64 [[TMP107]], 7 +; CHECK-NEXT: [[TMP114:%.*]] = trunc i64 [[TMP113]] to i8 +; CHECK-NEXT: [[TMP115:%.*]] = icmp sge i8 [[TMP114]], [[TMP111]] +; CHECK-NEXT: [[TMP116:%.*]] = and i1 [[TMP112]], [[TMP115]] +; CHECK-NEXT: [[TMP117:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP116]]) +; CHECK-NEXT: [[TMP118:%.*]] = icmp ne i64 [[TMP117]], 0 +; CHECK-NEXT: br i1 [[TMP118]], label %[[ASAN_REPORT3:.*]], label %[[BB121:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT3]]: +; CHECK-NEXT: br i1 [[TMP116]], label %[[BB119:.*]], label %[[BB120:.*]] +; CHECK: [[BB119]]: +; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP107]], i64 4) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB120]] +; CHECK: [[BB120]]: +; CHECK-NEXT: br label %[[BB121]] +; CHECK: [[BB121]]: +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP64]], align 2 +; CHECK-NEXT: br label %[[CONDFREE:.*]] +; CHECK: [[CONDFREE]]: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] +; CHECK: [[FREE]]: +; CHECK-NEXT: [[TMP122:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP123:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP124:%.*]] = ptrtoint ptr [[TMP123]] to i64 +; CHECK-NEXT: [[TMP125:%.*]] = ptrtoint ptr addrspace(1) [[TMP122]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP125]], i64 [[TMP124]]) +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; +WId: + %0 = call i32 @llvm.amdgcn.workitem.id.x() + %1 = call i32 @llvm.amdgcn.workitem.id.y() + %2 = call i32 @llvm.amdgcn.workitem.id.z() + %3 = or i32 %0, %1 + %4 = or i32 %3, %2 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %Malloc, label %26 + +Malloc: ; preds = %WId + %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 + %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 + %8 = add i32 %6, %7 + %9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15 + store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), align 4 + %11 = load i32, ptr addrspace(4) %10, align 4 + store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 1), align 4 + %12 = add i32 %11, 7 + %13 = udiv i32 %12, 8 + %14 = mul i32 %13, 8 + store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 2), align 4 + %15 = add i32 %8, %14 + store i32 %15, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 + %16 = load i32, ptr addrspace(4) %10, align 4 + store i32 %16, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 1), align 4 + %17 = add i32 %16, 7 + %18 = udiv i32 %17, 8 + %19 = mul i32 %18, 8 + store i32 %19, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 + %20 = add i32 %15, %19 + %21 = zext i32 %20 to i64 + %22 = call ptr @llvm.returnaddress(i32 0) + %23 = ptrtoint ptr %22 to i64 + %24 = call i64 @__asan_malloc_impl(i64 %21, i64 %23) + %25 = inttoptr i64 %24 to ptr addrspace(1) + store ptr addrspace(1) %25, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + br label %26 + +26: ; preds = %Malloc, %WId + %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ] + call void @llvm.amdgcn.s.barrier() + %27 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 + %28 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %27 + %29 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 + %30 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %29 + call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] + call void @use_variables() + store i8 7, ptr addrspace(3) %28, align 1 + store i32 8, ptr addrspace(3) %30, align 2 + br label %CondFree + +CondFree: ; preds = %26 + call void @llvm.amdgcn.s.barrier() + br i1 %xyzCond, label %Free, label %End + +Free: ; preds = %CondFree + %31 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + %32 = call ptr @llvm.returnaddress(i32 0) + %33 = ptrtoint ptr %32 to i64 + %34 = ptrtoint ptr addrspace(1) %31 to i64 + call void @__asan_free_impl(i64 %34, i64 %33) + br label %End + +End: ; preds = %Free, %CondFree + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare void @llvm.donothing() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() #3 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.y() #3 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.z() #3 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.returnaddress(i32 immarg) #2 + +declare i64 @__asan_malloc_impl(i64, i64) + +; Function Attrs: convergent nocallback nofree nounwind willreturn +declare void @llvm.amdgcn.s.barrier() #4 + +declare void @__asan_free_impl(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.lds.kernel.id() #3 + +attributes #0 = { sanitize_address } +attributes #1 = { sanitize_address "amdgpu-lds-size"="24,24" } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #3 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #4 = { convergent nocallback nofree nounwind willreturn } + +!0 = !{i32 0, i32 1} +!1 = !{i32 24, i32 25} +!2 = !{i32 0} +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="96,96" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR8]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 96, i32 97} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META3]] = !{i32 0} +;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll new file mode 100755 index 00000000000000..1be50f48aa6f63 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-dynamic-lds-test.ll @@ -0,0 +1,231 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s + +%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item } +%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 } + +@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol !0 +@llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol !1 +@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 0, i32 0 } }, no_sanitize_address + +; Function Attrs: sanitize_address +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol [[META1:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 0, i32 0, i32 0, i32 0 } }, no_sanitize_address, align 1 +; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" +; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0 +; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +define amdgpu_kernel void @k0() #0 { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[WID:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB43:.*]] +; CHECK: [[MALLOC]]: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() +; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) [[TMP9]], i64 15 +; CHECK-NEXT: store i32 [[TMP8]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr addrspace(4) [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = lshr i64 [[TMP11]], 3 +; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP12]], 2147450880 +; CHECK-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP13]] to ptr +; CHECK-NEXT: [[TMP15:%.*]] = load i8, ptr [[TMP14]], align 1 +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i8 [[TMP15]], 0 +; CHECK-NEXT: [[TMP17:%.*]] = and i64 [[TMP11]], 7 +; CHECK-NEXT: [[TMP18:%.*]] = add i64 [[TMP17]], 3 +; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[TMP18]] to i8 +; CHECK-NEXT: [[TMP20:%.*]] = icmp sge i8 [[TMP19]], [[TMP15]] +; CHECK-NEXT: [[TMP21:%.*]] = and i1 [[TMP16]], [[TMP20]] +; CHECK-NEXT: [[TMP22:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP21]]) +; CHECK-NEXT: [[TMP23:%.*]] = icmp ne i64 [[TMP22]], 0 +; CHECK-NEXT: br i1 [[TMP23]], label %[[ASAN_REPORT:.*]], label %[[BB26:.*]], !prof [[PROF2:![0-9]+]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP21]], label %[[BB24:.*]], label %[[BB25:.*]] +; CHECK: [[BB24]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP11]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB25]] +; CHECK: [[BB25]]: +; CHECK-NEXT: br label %[[BB26]] +; CHECK: [[BB26]]: +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(4) [[TMP10]], align 4 +; CHECK-NEXT: store i32 [[TMP27]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], 0 +; CHECK-NEXT: [[TMP29:%.*]] = udiv i32 [[TMP28]], 1 +; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 1 +; CHECK-NEXT: store i32 [[TMP30]], ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP31:%.*]] = add i32 [[TMP8]], [[TMP30]] +; CHECK-NEXT: [[TMP32:%.*]] = zext i32 [[TMP31]] to i64 +; CHECK-NEXT: [[TMP33:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP34:%.*]] = ptrtoint ptr [[TMP33]] to i64 +; CHECK-NEXT: [[TMP35:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP32]], i64 [[TMP34]]) +; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP36]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP37:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4 +; CHECK-NEXT: [[TMP38:%.*]] = zext i32 [[TMP37]] to i64 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP36]], i64 [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4 +; CHECK-NEXT: [[TMP42:%.*]] = zext i32 [[TMP41]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP40]], i64 [[TMP42]]) +; CHECK-NEXT: br label %[[BB43]] +; CHECK: [[BB43]]: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[BB26]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP44:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP44]] +; CHECK-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] +; CHECK-NEXT: [[TMP46:%.*]] = ptrtoint ptr addrspace(3) [[TMP45]] to i32 +; CHECK-NEXT: [[TMP47:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP47]], i32 [[TMP46]] +; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(1) [[TMP48]] to i64 +; CHECK-NEXT: [[TMP50:%.*]] = lshr i64 [[TMP49]], 3 +; CHECK-NEXT: [[TMP51:%.*]] = add i64 [[TMP50]], 2147450880 +; CHECK-NEXT: [[TMP52:%.*]] = inttoptr i64 [[TMP51]] to ptr +; CHECK-NEXT: [[TMP53:%.*]] = load i8, ptr [[TMP52]], align 1 +; CHECK-NEXT: [[TMP54:%.*]] = icmp ne i8 [[TMP53]], 0 +; CHECK-NEXT: [[TMP55:%.*]] = and i64 [[TMP49]], 7 +; CHECK-NEXT: [[TMP56:%.*]] = trunc i64 [[TMP55]] to i8 +; CHECK-NEXT: [[TMP57:%.*]] = icmp sge i8 [[TMP56]], [[TMP53]] +; CHECK-NEXT: [[TMP58:%.*]] = and i1 [[TMP54]], [[TMP57]] +; CHECK-NEXT: [[TMP59:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP58]]) +; CHECK-NEXT: [[TMP60:%.*]] = icmp ne i64 [[TMP59]], 0 +; CHECK-NEXT: br i1 [[TMP60]], label %[[ASAN_REPORT1:.*]], label %[[BB63:.*]], !prof [[PROF2]] +; CHECK: [[ASAN_REPORT1]]: +; CHECK-NEXT: br i1 [[TMP58]], label %[[BB61:.*]], label %[[BB62:.*]] +; CHECK: [[BB61]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP49]]) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB62]] +; CHECK: [[BB62]]: +; CHECK-NEXT: br label %[[BB63]] +; CHECK: [[BB63]]: +; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP45]], align 4 +; CHECK-NEXT: br label %[[CONDFREE:.*]] +; CHECK: [[CONDFREE]]: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] +; CHECK: [[FREE]]: +; CHECK-NEXT: [[TMP64:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP65:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP66:%.*]] = ptrtoint ptr [[TMP65]] to i64 +; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(1) [[TMP64]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP67]], i64 [[TMP66]]) +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; +WId: + %0 = call i32 @llvm.amdgcn.workitem.id.x() + %1 = call i32 @llvm.amdgcn.workitem.id.y() + %2 = call i32 @llvm.amdgcn.workitem.id.z() + %3 = or i32 %0, %1 + %4 = or i32 %3, %2 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %Malloc, label %21 + +Malloc: ; preds = %WId + %6 = load i32, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, align 4 + %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 2), align 4 + %8 = add i32 %6, %7 + %9 = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() + %10 = getelementptr inbounds ptr addrspace(4), ptr addrspace(4) %9, i64 15 + store i32 %8, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 + %11 = load i32, ptr addrspace(4) %10, align 4 + store i32 %11, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 1), align 4 + %12 = add i32 %11, 0 + %13 = udiv i32 %12, 1 + %14 = mul i32 %13, 1 + store i32 %14, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 2), align 4 + %15 = add i32 %8, %14 + %16 = zext i32 %15 to i64 + %17 = call ptr @llvm.returnaddress(i32 0) + %18 = ptrtoint ptr %17 to i64 + %19 = call i64 @__asan_malloc_impl(i64 %16, i64 %18) + %20 = inttoptr i64 %19 to ptr addrspace(1) + store ptr addrspace(1) %20, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + br label %21 + +21: ; preds = %Malloc, %WId + %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ] + call void @llvm.amdgcn.s.barrier() + %22 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 + %23 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %22 + call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.k0.dynlds) ] + store i8 7, ptr addrspace(3) %23, align 4 + br label %CondFree + +CondFree: ; preds = %21 + call void @llvm.amdgcn.s.barrier() + br i1 %xyzCond, label %Free, label %End + +Free: ; preds = %CondFree + %24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + %25 = call ptr @llvm.returnaddress(i32 0) + %26 = ptrtoint ptr %25 to i64 + %27 = ptrtoint ptr addrspace(1) %24 to i64 + call void @__asan_free_impl(i64 %27, i64 %26) + br label %End + +End: ; preds = %Free, %CondFree + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare void @llvm.donothing() #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.y() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.z() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.returnaddress(i32 immarg) #1 + +declare i64 @__asan_malloc_impl(i64, i64) + +; Function Attrs: convergent nocallback nofree nounwind willreturn +declare void @llvm.amdgcn.s.barrier() #3 + +declare void @__asan_free_impl(i64, i64) + +attributes #0 = { sanitize_address "amdgpu-lds-size"="8,8" } +attributes #1 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #3 = { convergent nocallback nofree nounwind willreturn } + +!0 = !{i32 0, i32 1} +!1 = !{i32 8, i32 9} +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="32,32" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR7]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[META1]] = !{i32 32, i32 33} +; CHECK: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll new file mode 100755 index 00000000000000..23f27aa797e73e --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-indirect-access.ll @@ -0,0 +1,476 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s + +%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item } +%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 } + +@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0 +@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 24, i32 3, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 32, i32 4, i32 8 } }, no_sanitize_address +@llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +@llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address + +; Function Attrs: sanitize_address +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; @llvm.amdgcn.sw.lds.base.table = internal addrspace(1) constant [1 x ptr addrspace(3)] [ptr addrspace(3) @llvm.amdgcn.sw.lds.k0], no_sanitize_address +; @llvm.amdgcn.sw.lds.offset.table = internal addrspace(1) constant [1 x [2 x ptr addrspace(1)]] [[2 x ptr addrspace(1)] [ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0), ptr addrspace(1) getelementptr inbounds (%0, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0)]], no_sanitize_address +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 }, %1 { i32 96, i32 3, i32 32, i32 99, i32 29 }, %1 { i32 128, i32 4, i32 32, i32 132, i32 28 } }, no_sanitize_address, align 1 +; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" +; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0 +; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +define void @use_variables() #0 { +; CHECK-LABEL: define void @use_variables( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(1) [[TMP2]] to i64 +; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 3 +; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880 +; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr +; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0 +; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 7 +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 3 +; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8 +; CHECK-NEXT: [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]] +; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]]) +; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0 +; CHECK-NEXT: br i1 [[TMP15]], label %[[ASAN_REPORT:.*]], label %[[BB18:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP13]], label %[[BB16:.*]], label %[[BB17:.*]] +; CHECK: [[BB16]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB17]] +; CHECK: [[BB17]]: +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: [[TMP19:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP2]], align 4 +; CHECK-NEXT: [[TMP20:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 +; CHECK-NEXT: [[TMP21:%.*]] = ptrtoint ptr addrspace(1) [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = lshr i64 [[TMP21]], 3 +; CHECK-NEXT: [[TMP23:%.*]] = add i64 [[TMP22]], 2147450880 +; CHECK-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP23]] to ptr +; CHECK-NEXT: [[TMP25:%.*]] = load i8, ptr [[TMP24]], align 1 +; CHECK-NEXT: [[TMP26:%.*]] = icmp ne i8 [[TMP25]], 0 +; CHECK-NEXT: [[TMP27:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP26]]) +; CHECK-NEXT: [[TMP28:%.*]] = icmp ne i64 [[TMP27]], 0 +; CHECK-NEXT: br i1 [[TMP28]], label %[[ASAN_REPORT1:.*]], label %[[BB31:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT1]]: +; CHECK-NEXT: br i1 [[TMP26]], label %[[BB29:.*]], label %[[BB30:.*]] +; CHECK: [[BB29]]: +; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP21]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB30]] +; CHECK: [[BB30]]: +; CHECK-NEXT: br label %[[BB31]] +; CHECK: [[BB31]]: +; CHECK-NEXT: [[TMP32:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP20]], align 8 +; CHECK-NEXT: [[TMP33:%.*]] = ptrtoint ptr addrspace(1) [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = lshr i64 [[TMP33]], 3 +; CHECK-NEXT: [[TMP35:%.*]] = add i64 [[TMP34]], 2147450880 +; CHECK-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP35]] to ptr +; CHECK-NEXT: [[TMP37:%.*]] = load i8, ptr [[TMP36]], align 1 +; CHECK-NEXT: [[TMP38:%.*]] = icmp ne i8 [[TMP37]], 0 +; CHECK-NEXT: [[TMP39:%.*]] = and i64 [[TMP33]], 7 +; CHECK-NEXT: [[TMP40:%.*]] = add i64 [[TMP39]], 3 +; CHECK-NEXT: [[TMP41:%.*]] = trunc i64 [[TMP40]] to i8 +; CHECK-NEXT: [[TMP42:%.*]] = icmp sge i8 [[TMP41]], [[TMP37]] +; CHECK-NEXT: [[TMP43:%.*]] = and i1 [[TMP38]], [[TMP42]] +; CHECK-NEXT: [[TMP44:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP43]]) +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i64 [[TMP44]], 0 +; CHECK-NEXT: br i1 [[TMP45]], label %[[ASAN_REPORT2:.*]], label %[[BB48:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT2]]: +; CHECK-NEXT: br i1 [[TMP43]], label %[[BB46:.*]], label %[[BB47:.*]] +; CHECK: [[BB46]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP33]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB47]] +; CHECK: [[BB47]]: +; CHECK-NEXT: br label %[[BB48]] +; CHECK: [[BB48]]: +; CHECK-NEXT: [[TMP49:%.*]] = load i32, ptr addrspace(1) [[TMP32]], align 4 +; CHECK-NEXT: [[TMP50:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP49]] +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP52]], 3 +; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880 +; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr +; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1 +; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0 +; CHECK-NEXT: [[TMP58:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP57]]) +; CHECK-NEXT: [[TMP59:%.*]] = icmp ne i64 [[TMP58]], 0 +; CHECK-NEXT: br i1 [[TMP59]], label %[[ASAN_REPORT3:.*]], label %[[BB62:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT3]]: +; CHECK-NEXT: br i1 [[TMP57]], label %[[BB60:.*]], label %[[BB61:.*]] +; CHECK: [[BB60]]: +; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP52]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB61]] +; CHECK: [[BB61]]: +; CHECK-NEXT: br label %[[BB62]] +; CHECK: [[BB62]]: +; CHECK-NEXT: [[TMP63:%.*]] = load ptr addrspace(1), ptr addrspace(1) [[TMP51]], align 8 +; CHECK-NEXT: [[TMP64:%.*]] = ptrtoint ptr addrspace(1) [[TMP63]] to i64 +; CHECK-NEXT: [[TMP65:%.*]] = lshr i64 [[TMP64]], 3 +; CHECK-NEXT: [[TMP66:%.*]] = add i64 [[TMP65]], 2147450880 +; CHECK-NEXT: [[TMP67:%.*]] = inttoptr i64 [[TMP66]] to ptr +; CHECK-NEXT: [[TMP68:%.*]] = load i8, ptr [[TMP67]], align 1 +; CHECK-NEXT: [[TMP69:%.*]] = icmp ne i8 [[TMP68]], 0 +; CHECK-NEXT: [[TMP70:%.*]] = and i64 [[TMP64]], 7 +; CHECK-NEXT: [[TMP71:%.*]] = add i64 [[TMP70]], 3 +; CHECK-NEXT: [[TMP72:%.*]] = trunc i64 [[TMP71]] to i8 +; CHECK-NEXT: [[TMP73:%.*]] = icmp sge i8 [[TMP72]], [[TMP68]] +; CHECK-NEXT: [[TMP74:%.*]] = and i1 [[TMP69]], [[TMP73]] +; CHECK-NEXT: [[TMP75:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP74]]) +; CHECK-NEXT: [[TMP76:%.*]] = icmp ne i64 [[TMP75]], 0 +; CHECK-NEXT: br i1 [[TMP76]], label %[[ASAN_REPORT4:.*]], label %[[BB79:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT4]]: +; CHECK-NEXT: br i1 [[TMP74]], label %[[BB77:.*]], label %[[BB78:.*]] +; CHECK: [[BB77]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP64]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB78]] +; CHECK: [[BB78]]: +; CHECK-NEXT: br label %[[BB79]] +; CHECK: [[BB79]]: +; CHECK-NEXT: [[TMP80:%.*]] = load i32, ptr addrspace(1) [[TMP63]], align 4 +; CHECK-NEXT: [[TMP81:%.*]] = getelementptr inbounds i8, ptr addrspace(3) [[TMP19]], i32 [[TMP80]] +; CHECK-NEXT: [[X:%.*]] = addrspacecast ptr addrspace(3) [[TMP50]] to ptr +; CHECK-NEXT: [[TMP82:%.*]] = addrspacecast ptr addrspace(3) [[TMP50]] to ptr +; CHECK-NEXT: [[TMP83:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[TMP82]]) +; CHECK-NEXT: [[TMP84:%.*]] = xor i1 [[TMP83]], true +; CHECK-NEXT: br i1 [[TMP84]], label %[[BB85:.*]], label %[[BB101:.*]] +; CHECK: [[BB85]]: +; CHECK-NEXT: [[TMP86:%.*]] = ptrtoint ptr [[TMP82]] to i64 +; CHECK-NEXT: [[TMP87:%.*]] = lshr i64 [[TMP86]], 3 +; CHECK-NEXT: [[TMP88:%.*]] = add i64 [[TMP87]], 2147450880 +; CHECK-NEXT: [[TMP89:%.*]] = inttoptr i64 [[TMP88]] to ptr +; CHECK-NEXT: [[TMP90:%.*]] = load i8, ptr [[TMP89]], align 1 +; CHECK-NEXT: [[TMP91:%.*]] = icmp ne i8 [[TMP90]], 0 +; CHECK-NEXT: [[TMP92:%.*]] = and i64 [[TMP86]], 7 +; CHECK-NEXT: [[TMP93:%.*]] = trunc i64 [[TMP92]] to i8 +; CHECK-NEXT: [[TMP94:%.*]] = icmp sge i8 [[TMP93]], [[TMP90]] +; CHECK-NEXT: [[TMP95:%.*]] = and i1 [[TMP91]], [[TMP94]] +; CHECK-NEXT: [[TMP96:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP95]]) +; CHECK-NEXT: [[TMP97:%.*]] = icmp ne i64 [[TMP96]], 0 +; CHECK-NEXT: br i1 [[TMP97]], label %[[ASAN_REPORT5:.*]], label %[[BB100:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT5]]: +; CHECK-NEXT: br i1 [[TMP95]], label %[[BB98:.*]], label %[[BB99:.*]] +; CHECK: [[BB98]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP86]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB99]] +; CHECK: [[BB99]]: +; CHECK-NEXT: br label %[[BB100]] +; CHECK: [[BB100]]: +; CHECK-NEXT: br label %[[BB101]] +; CHECK: [[BB101]]: +; CHECK-NEXT: store i8 3, ptr [[TMP82]], align 4 +; CHECK-NEXT: [[TMP102:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() +; CHECK-NEXT: [[TMP103:%.*]] = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP102]] +; CHECK-NEXT: [[TMP104:%.*]] = load ptr addrspace(3), ptr addrspace(1) [[TMP103]], align 4 +; CHECK-NEXT: [[TMP105:%.*]] = ptrtoint ptr addrspace(3) [[TMP81]] to i32 +; CHECK-NEXT: [[TMP106:%.*]] = load ptr addrspace(1), ptr addrspace(3) [[TMP104]], align 8 +; CHECK-NEXT: [[TMP107:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP106]], i32 [[TMP105]] +; CHECK-NEXT: [[TMP108:%.*]] = ptrtoint ptr addrspace(1) [[TMP107]] to i64 +; CHECK-NEXT: [[TMP109:%.*]] = lshr i64 [[TMP108]], 3 +; CHECK-NEXT: [[TMP110:%.*]] = add i64 [[TMP109]], 2147450880 +; CHECK-NEXT: [[TMP111:%.*]] = inttoptr i64 [[TMP110]] to ptr +; CHECK-NEXT: [[TMP112:%.*]] = load i8, ptr [[TMP111]], align 1 +; CHECK-NEXT: [[TMP113:%.*]] = icmp ne i8 [[TMP112]], 0 +; CHECK-NEXT: [[TMP114:%.*]] = and i64 [[TMP108]], 7 +; CHECK-NEXT: [[TMP115:%.*]] = trunc i64 [[TMP114]] to i8 +; CHECK-NEXT: [[TMP116:%.*]] = icmp sge i8 [[TMP115]], [[TMP112]] +; CHECK-NEXT: [[TMP117:%.*]] = and i1 [[TMP113]], [[TMP116]] +; CHECK-NEXT: [[TMP118:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP117]]) +; CHECK-NEXT: [[TMP119:%.*]] = icmp ne i64 [[TMP118]], 0 +; CHECK-NEXT: br i1 [[TMP119]], label %[[ASAN_REPORT6:.*]], label %[[BB122:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT6]]: +; CHECK-NEXT: br i1 [[TMP117]], label %[[BB120:.*]], label %[[BB121:.*]] +; CHECK: [[BB120]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP108]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB121]] +; CHECK: [[BB121]]: +; CHECK-NEXT: br label %[[BB122]] +; CHECK: [[BB122]]: +; CHECK-NEXT: store i8 3, ptr addrspace(3) [[TMP81]], align 8 +; CHECK-NEXT: ret void +; + %1 = call i32 @llvm.amdgcn.lds.kernel.id() + %2 = getelementptr inbounds [1 x ptr addrspace(3)], ptr addrspace(1) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 %1 + %3 = load ptr addrspace(3), ptr addrspace(1) %2, align 4 + %4 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 0 + %5 = load ptr addrspace(1), ptr addrspace(1) %4, align 8 + %6 = load i32, ptr addrspace(1) %5, align 4 + %7 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %6 + %8 = getelementptr inbounds [1 x [2 x ptr addrspace(1)]], ptr addrspace(1) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 %1, i32 1 + %9 = load ptr addrspace(1), ptr addrspace(1) %8, align 8 + %10 = load i32, ptr addrspace(1) %9, align 4 + %11 = getelementptr inbounds i8, ptr addrspace(3) %3, i32 %10 + %X = addrspacecast ptr addrspace(3) %7 to ptr + %12 = addrspacecast ptr addrspace(3) %7 to ptr + store i8 3, ptr %12, align 4 + store i8 3, ptr addrspace(3) %11, align 8 + ret void +} + +; Function Attrs: sanitize_address +define amdgpu_kernel void @k0() #1 !llvm.amdgcn.lds.kernel.id !1 { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { +; CHECK-NEXT: [[WID:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB44:.*]] +; CHECK: [[MALLOC]]: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 3), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 4), align 4 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]]) +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 3), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 4), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]]) +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4 +; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]]) +; CHECK-NEXT: [[TMP32:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4 +; CHECK-NEXT: [[TMP33:%.*]] = zext i32 [[TMP32]] to i64 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = ptrtoint ptr addrspace(1) [[TMP34]] to i64 +; CHECK-NEXT: [[TMP36:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4 +; CHECK-NEXT: [[TMP37:%.*]] = zext i32 [[TMP36]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP35]], i64 [[TMP37]]) +; CHECK-NEXT: [[TMP38:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4 +; CHECK-NEXT: [[TMP39:%.*]] = zext i32 [[TMP38]] to i64 +; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = ptrtoint ptr addrspace(1) [[TMP40]] to i64 +; CHECK-NEXT: [[TMP42:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4 +; CHECK-NEXT: [[TMP43:%.*]] = zext i32 [[TMP42]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP41]], i64 [[TMP43]]) +; CHECK-NEXT: br label %[[BB44]] +; CHECK: [[BB44]]: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP45:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP46:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP45]] +; CHECK-NEXT: [[TMP47:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP47]] +; CHECK-NEXT: call void @use_variables() +; CHECK-NEXT: [[TMP49:%.*]] = ptrtoint ptr addrspace(3) [[TMP46]] to i32 +; CHECK-NEXT: [[TMP50:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP50]], i32 [[TMP49]] +; CHECK-NEXT: [[TMP52:%.*]] = ptrtoint ptr addrspace(1) [[TMP51]] to i64 +; CHECK-NEXT: [[TMP53:%.*]] = lshr i64 [[TMP52]], 3 +; CHECK-NEXT: [[TMP54:%.*]] = add i64 [[TMP53]], 2147450880 +; CHECK-NEXT: [[TMP55:%.*]] = inttoptr i64 [[TMP54]] to ptr +; CHECK-NEXT: [[TMP56:%.*]] = load i8, ptr [[TMP55]], align 1 +; CHECK-NEXT: [[TMP57:%.*]] = icmp ne i8 [[TMP56]], 0 +; CHECK-NEXT: [[TMP58:%.*]] = and i64 [[TMP52]], 7 +; CHECK-NEXT: [[TMP59:%.*]] = trunc i64 [[TMP58]] to i8 +; CHECK-NEXT: [[TMP60:%.*]] = icmp sge i8 [[TMP59]], [[TMP56]] +; CHECK-NEXT: [[TMP61:%.*]] = and i1 [[TMP57]], [[TMP60]] +; CHECK-NEXT: [[TMP62:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP61]]) +; CHECK-NEXT: [[TMP63:%.*]] = icmp ne i64 [[TMP62]], 0 +; CHECK-NEXT: br i1 [[TMP63]], label %[[ASAN_REPORT:.*]], label %[[BB66:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP61]], label %[[BB64:.*]], label %[[BB65:.*]] +; CHECK: [[BB64]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP52]]) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB65]] +; CHECK: [[BB65]]: +; CHECK-NEXT: br label %[[BB66]] +; CHECK: [[BB66]]: +; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP46]], align 1 +; CHECK-NEXT: [[TMP67:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i64 +; CHECK-NEXT: [[TMP68:%.*]] = add i64 [[TMP67]], 3 +; CHECK-NEXT: [[TMP69:%.*]] = inttoptr i64 [[TMP68]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP70:%.*]] = ptrtoint ptr addrspace(3) [[TMP48]] to i32 +; CHECK-NEXT: [[TMP71:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP72:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP71]], i32 [[TMP70]] +; CHECK-NEXT: [[TMP73:%.*]] = ptrtoint ptr addrspace(1) [[TMP72]] to i64 +; CHECK-NEXT: [[TMP74:%.*]] = lshr i64 [[TMP73]], 3 +; CHECK-NEXT: [[TMP75:%.*]] = add i64 [[TMP74]], 2147450880 +; CHECK-NEXT: [[TMP76:%.*]] = inttoptr i64 [[TMP75]] to ptr +; CHECK-NEXT: [[TMP77:%.*]] = load i8, ptr [[TMP76]], align 1 +; CHECK-NEXT: [[TMP78:%.*]] = icmp ne i8 [[TMP77]], 0 +; CHECK-NEXT: [[TMP79:%.*]] = and i64 [[TMP73]], 7 +; CHECK-NEXT: [[TMP80:%.*]] = trunc i64 [[TMP79]] to i8 +; CHECK-NEXT: [[TMP81:%.*]] = icmp sge i8 [[TMP80]], [[TMP77]] +; CHECK-NEXT: [[TMP82:%.*]] = and i1 [[TMP78]], [[TMP81]] +; CHECK-NEXT: [[TMP83:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP82]]) +; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i64 [[TMP83]], 0 +; CHECK-NEXT: br i1 [[TMP84]], label %[[ASAN_REPORT1:.*]], label %[[BB87:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT1]]: +; CHECK-NEXT: br i1 [[TMP82]], label %[[BB85:.*]], label %[[BB86:.*]] +; CHECK: [[BB85]]: +; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP73]], i64 4) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB86]] +; CHECK: [[BB86]]: +; CHECK-NEXT: br label %[[BB87]] +; CHECK: [[BB87]]: +; CHECK-NEXT: [[TMP88:%.*]] = ptrtoint ptr addrspace(3) [[TMP69]] to i32 +; CHECK-NEXT: [[TMP89:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP89]], i32 [[TMP88]] +; CHECK-NEXT: [[TMP91:%.*]] = ptrtoint ptr addrspace(1) [[TMP90]] to i64 +; CHECK-NEXT: [[TMP92:%.*]] = lshr i64 [[TMP91]], 3 +; CHECK-NEXT: [[TMP93:%.*]] = add i64 [[TMP92]], 2147450880 +; CHECK-NEXT: [[TMP94:%.*]] = inttoptr i64 [[TMP93]] to ptr +; CHECK-NEXT: [[TMP95:%.*]] = load i8, ptr [[TMP94]], align 1 +; CHECK-NEXT: [[TMP96:%.*]] = icmp ne i8 [[TMP95]], 0 +; CHECK-NEXT: [[TMP97:%.*]] = and i64 [[TMP91]], 7 +; CHECK-NEXT: [[TMP98:%.*]] = trunc i64 [[TMP97]] to i8 +; CHECK-NEXT: [[TMP99:%.*]] = icmp sge i8 [[TMP98]], [[TMP95]] +; CHECK-NEXT: [[TMP100:%.*]] = and i1 [[TMP96]], [[TMP99]] +; CHECK-NEXT: [[TMP101:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP100]]) +; CHECK-NEXT: [[TMP102:%.*]] = icmp ne i64 [[TMP101]], 0 +; CHECK-NEXT: br i1 [[TMP102]], label %[[ASAN_REPORT2:.*]], label %[[BB105:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT2]]: +; CHECK-NEXT: br i1 [[TMP100]], label %[[BB103:.*]], label %[[BB104:.*]] +; CHECK: [[BB103]]: +; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP91]], i64 4) #[[ATTR8]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB104]] +; CHECK: [[BB104]]: +; CHECK-NEXT: br label %[[BB105]] +; CHECK: [[BB105]]: +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP48]], align 2 +; CHECK-NEXT: br label %[[CONDFREE:.*]] +; CHECK: [[CONDFREE]]: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] +; CHECK: [[FREE]]: +; CHECK-NEXT: [[TMP106:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP107:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP108:%.*]] = ptrtoint ptr [[TMP107]] to i64 +; CHECK-NEXT: [[TMP109:%.*]] = ptrtoint ptr addrspace(1) [[TMP106]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP109]], i64 [[TMP108]]) +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; +WId: + %0 = call i32 @llvm.amdgcn.workitem.id.x() + %1 = call i32 @llvm.amdgcn.workitem.id.y() + %2 = call i32 @llvm.amdgcn.workitem.id.z() + %3 = or i32 %0, %1 + %4 = or i32 %3, %2 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %Malloc, label %14 + +Malloc: ; preds = %WId + %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 0), align 4 + %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 4, i32 2), align 4 + %8 = add i32 %6, %7 + %9 = zext i32 %8 to i64 + %10 = call ptr @llvm.returnaddress(i32 0) + %11 = ptrtoint ptr %10 to i64 + %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11) + %13 = inttoptr i64 %12 to ptr addrspace(1) + store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + br label %14 + +14: ; preds = %Malloc, %WId + %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ] + call void @llvm.amdgcn.s.barrier() + %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 + %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %15 + %17 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 + %18 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %17 + call void @use_variables() + store i8 7, ptr addrspace(3) %16, align 1 + store i32 8, ptr addrspace(3) %18, align 2 + br label %CondFree + +CondFree: ; preds = %14 + call void @llvm.amdgcn.s.barrier() + br i1 %xyzCond, label %Free, label %End + +Free: ; preds = %CondFree + %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + %20 = call ptr @llvm.returnaddress(i32 0) + %21 = ptrtoint ptr %20 to i64 + %22 = ptrtoint ptr addrspace(1) %19 to i64 + call void @__asan_free_impl(i64 %22, i64 %21) + br label %End + +End: ; preds = %Free, %CondFree + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.y() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.z() #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.returnaddress(i32 immarg) #3 + +declare i64 @__asan_malloc_impl(i64, i64) + +; Function Attrs: convergent nocallback nofree nounwind willreturn +declare void @llvm.amdgcn.s.barrier() #4 + +declare void @__asan_free_impl(i64, i64) + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.lds.kernel.id() #2 + +attributes #0 = { sanitize_address } +attributes #1 = { sanitize_address "amdgpu-lds-size"="40" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #4 = { convergent nocallback nofree nounwind willreturn } + +!0 = !{i32 0, i32 1} +!1 = !{i32 0} +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="160" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR8]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +; CHECK: [[META2]] = !{i32 0} +;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll new file mode 100755 index 00000000000000..072174db11f257 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-indirect-access-function-param.ll @@ -0,0 +1,203 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s + +%llvm.amdgcn.sw.lds.my_kernel.md.type = type { %llvm.amdgcn.sw.lds.my_kernel.md.item, %llvm.amdgcn.sw.lds.my_kernel.md.item } +%llvm.amdgcn.sw.lds.my_kernel.md.item = type { i32, i32, i32 } + +@llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol !0 +@llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.my_kernel.md.type { %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.my_kernel.md.item { i32 8, i32 4096, i32 4096 } }, no_sanitize_address + +; Function Attrs: sanitize_address +;. +; CHECK: @llvm.amdgcn.sw.lds.my_kernel = internal addrspace(3) global ptr poison, no_sanitize_address, align 4, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.my_kernel.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 4096, i32 5120, i32 4128, i32 1024 } }, no_sanitize_address, align 1 +; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" +; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0 +; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +define void @my_function(ptr addrspace(3) %lds_arg) #0 { +; CHECK-LABEL: define void @my_function( +; CHECK-SAME: ptr addrspace(3) [[LDS_ARG:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint ptr addrspace(3) [[LDS_ARG]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr addrspace(1), ptr addrspace(3) null, align 8 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP2]], i32 [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = ptrtoint ptr addrspace(1) [[TMP3]] to i64 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i64 [[TMP4]], 3 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[TMP5]], 2147450880 +; CHECK-NEXT: [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to ptr +; CHECK-NEXT: [[TMP8:%.*]] = load i8, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne i8 [[TMP8]], 0 +; CHECK-NEXT: [[TMP10:%.*]] = and i64 [[TMP4]], 7 +; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[TMP10]], 3 +; CHECK-NEXT: [[TMP12:%.*]] = trunc i64 [[TMP11]] to i8 +; CHECK-NEXT: [[TMP13:%.*]] = icmp sge i8 [[TMP12]], [[TMP8]] +; CHECK-NEXT: [[TMP14:%.*]] = and i1 [[TMP9]], [[TMP13]] +; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP14]]) +; CHECK-NEXT: [[TMP16:%.*]] = icmp ne i64 [[TMP15]], 0 +; CHECK-NEXT: br i1 [[TMP16]], label %[[ASAN_REPORT:.*]], label %[[BB19:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP14]], label %[[BB17:.*]], label %[[BB18:.*]] +; CHECK: [[BB17]]: +; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP4]]) #[[ATTR8:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB18]] +; CHECK: [[BB18]]: +; CHECK-NEXT: br label %[[BB19]] +; CHECK: [[BB19]]: +; CHECK-NEXT: [[LDS_VAL:%.*]] = load i32, ptr addrspace(3) [[LDS_ARG]], align 4 +; CHECK-NEXT: [[NEW_LDS_VAL:%.*]] = add i32 [[LDS_VAL]], 1 +; CHECK-NEXT: store i32 [[NEW_LDS_VAL]], ptr addrspace(3) [[LDS_ARG]], align 4 +; CHECK-NEXT: ret void +; + %lds_val = load i32, ptr addrspace(3) %lds_arg, align 4 + %new_lds_val = add i32 %lds_val, 1 + store i32 %new_lds_val, ptr addrspace(3) %lds_arg, align 4 + ret void +} + +; Function Attrs: sanitize_address +define amdgpu_kernel void @my_kernel() #1 { +; CHECK-LABEL: define amdgpu_kernel void @my_kernel( +; CHECK-SAME: ) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[WID:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB26:.*]] +; CHECK: [[MALLOC]]: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 3), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 4), align 4 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]]) +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 3), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 0, i32 4), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]]) +; CHECK-NEXT: br label %[[BB26]] +; CHECK: [[BB26]]: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP27:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 [[TMP27]] +; CHECK-NEXT: [[LDS_PTR:%.*]] = getelementptr [1024 x i32], ptr addrspace(3) [[TMP28]], i32 0, i32 0 +; CHECK-NEXT: call void @my_function(ptr addrspace(3) [[LDS_PTR]]) +; CHECK-NEXT: br label %[[CONDFREE:.*]] +; CHECK: [[CONDFREE]]: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] +; CHECK: [[FREE]]: +; CHECK-NEXT: [[TMP29:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 +; CHECK-NEXT: [[TMP30:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP31:%.*]] = ptrtoint ptr [[TMP30]] to i64 +; CHECK-NEXT: [[TMP32:%.*]] = ptrtoint ptr addrspace(1) [[TMP29]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP32]], i64 [[TMP31]]) +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; +WId: + %0 = call i32 @llvm.amdgcn.workitem.id.x() + %1 = call i32 @llvm.amdgcn.workitem.id.y() + %2 = call i32 @llvm.amdgcn.workitem.id.z() + %3 = or i32 %0, %1 + %4 = or i32 %3, %2 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %Malloc, label %14 + +Malloc: ; preds = %WId + %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 + %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 2), align 4 + %8 = add i32 %6, %7 + %9 = zext i32 %8 to i64 + %10 = call ptr @llvm.returnaddress(i32 0) + %11 = ptrtoint ptr %10 to i64 + %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11) + %13 = inttoptr i64 %12 to ptr addrspace(1) + store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 + br label %14 + +14: ; preds = %Malloc, %WId + %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ] + call void @llvm.amdgcn.s.barrier() + %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.my_kernel.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.my_kernel.md, i32 0, i32 1, i32 0), align 4 + %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, i32 %15 + %lds_ptr = getelementptr [1024 x i32], ptr addrspace(3) %16, i32 0, i32 0 + call void @my_function(ptr addrspace(3) %lds_ptr) + br label %CondFree + +CondFree: ; preds = %14 + call void @llvm.amdgcn.s.barrier() + br i1 %xyzCond, label %Free, label %End + +Free: ; preds = %CondFree + %17 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.my_kernel, align 8 + %18 = call ptr @llvm.returnaddress(i32 0) + %19 = ptrtoint ptr %18 to i64 + %20 = ptrtoint ptr addrspace(1) %17 to i64 + call void @__asan_free_impl(i64 %20, i64 %19) + br label %End + +End: ; preds = %Free, %CondFree + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.y() #2 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.z() #2 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.returnaddress(i32 immarg) #3 + +declare i64 @__asan_malloc_impl(i64, i64) + +; Function Attrs: convergent nocallback nofree nounwind willreturn +declare void @llvm.amdgcn.s.barrier() #4 + +declare void @__asan_free_impl(i64, i64) + +attributes #0 = { sanitize_address } +attributes #1 = { sanitize_address "amdgpu-lds-size"="4104" } +attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #3 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #4 = { convergent nocallback nofree nounwind willreturn } + +!0 = !{i32 0, i32 1} +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address } +; CHECK: attributes #[[ATTR1]] = { sanitize_address "amdgpu-lds-size"="5152" } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR7:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR8]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll new file mode 100755 index 00000000000000..99dbf6c607ab58 --- /dev/null +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-static-lds-test.ll @@ -0,0 +1,249 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals all --version 5 +; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s + +%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item } +%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 } + +@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol !0 +@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 1, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 4, i32 8 } }, no_sanitize_address + +; Function Attrs: sanitize_address +;. +; CHECK: @llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 8, !absolute_symbol [[META0:![0-9]+]] +; CHECK: @llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %0 { %1 { i32 0, i32 8, i32 32, i32 8, i32 24 }, %1 { i32 32, i32 1, i32 32, i32 33, i32 31 }, %1 { i32 64, i32 4, i32 32, i32 68, i32 28 } }, no_sanitize_address, align 1 +; CHECK: @llvm.used = appending addrspace(1) global [1 x ptr] [ptr @asan.module_ctor], section "llvm.metadata" +; CHECK: @___asan_globals_registered = common hidden addrspace(1) global i64 0 +; CHECK: @__start_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @__stop_asan_globals = extern_weak hidden addrspace(1) global i64 +; CHECK: @llvm.global_ctors = appending addrspace(1) global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 1, ptr @asan.module_ctor, ptr @asan.module_ctor }] +;. +define amdgpu_kernel void @k0() #0 { +; CHECK-LABEL: define amdgpu_kernel void @k0( +; CHECK-SAME: ) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[WID:.*]]: +; CHECK-NEXT: [[TMP0:%.*]] = call i32 @llvm.amdgcn.workitem.id.x() +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.y() +; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.z() +; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP4:%.*]] = or i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[TMP4]], 0 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MALLOC:.*]], label %[[BB32:.*]] +; CHECK: [[MALLOC]]: +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 +; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP6]], [[TMP7]] +; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 +; CHECK-NEXT: [[TMP10:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP11:%.*]] = ptrtoint ptr [[TMP10]] to i64 +; CHECK-NEXT: [[TMP12:%.*]] = call i64 @__asan_malloc_impl(i64 [[TMP9]], i64 [[TMP11]]) +; CHECK-NEXT: [[TMP13:%.*]] = inttoptr i64 [[TMP12]] to ptr addrspace(1) +; CHECK-NEXT: store ptr addrspace(1) [[TMP13]], ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP14:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 3), align 4 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = ptrtoint ptr addrspace(1) [[TMP16]] to i64 +; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 4), align 4 +; CHECK-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP17]], i64 [[TMP19]]) +; CHECK-NEXT: [[TMP20:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 3), align 4 +; CHECK-NEXT: [[TMP21:%.*]] = zext i32 [[TMP20]] to i64 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP21]] +; CHECK-NEXT: [[TMP23:%.*]] = ptrtoint ptr addrspace(1) [[TMP22]] to i64 +; CHECK-NEXT: [[TMP24:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 4), align 4 +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP23]], i64 [[TMP25]]) +; CHECK-NEXT: [[TMP26:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 3), align 4 +; CHECK-NEXT: [[TMP27:%.*]] = zext i32 [[TMP26]] to i64 +; CHECK-NEXT: [[TMP28:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP13]], i64 [[TMP27]] +; CHECK-NEXT: [[TMP29:%.*]] = ptrtoint ptr addrspace(1) [[TMP28]] to i64 +; CHECK-NEXT: [[TMP30:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 0, i32 4), align 4 +; CHECK-NEXT: [[TMP31:%.*]] = zext i32 [[TMP30]] to i64 +; CHECK-NEXT: call void @__asan_poison_region(i64 [[TMP29]], i64 [[TMP31]]) +; CHECK-NEXT: br label %[[BB32]] +; CHECK: [[BB32]]: +; CHECK-NEXT: [[XYZCOND:%.*]] = phi i1 [ false, %[[WID]] ], [ true, %[[MALLOC]] ] +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: [[TMP33:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 +; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP33]] +; CHECK-NEXT: [[TMP35:%.*]] = load i32, ptr addrspace(1) getelementptr inbounds ([[TMP0]], ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 +; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 [[TMP35]] +; CHECK-NEXT: [[TMP37:%.*]] = ptrtoint ptr addrspace(3) [[TMP34]] to i32 +; CHECK-NEXT: [[TMP38:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP38]], i32 [[TMP37]] +; CHECK-NEXT: [[TMP40:%.*]] = ptrtoint ptr addrspace(1) [[TMP39]] to i64 +; CHECK-NEXT: [[TMP41:%.*]] = lshr i64 [[TMP40]], 3 +; CHECK-NEXT: [[TMP42:%.*]] = add i64 [[TMP41]], 2147450880 +; CHECK-NEXT: [[TMP43:%.*]] = inttoptr i64 [[TMP42]] to ptr +; CHECK-NEXT: [[TMP44:%.*]] = load i8, ptr [[TMP43]], align 1 +; CHECK-NEXT: [[TMP45:%.*]] = icmp ne i8 [[TMP44]], 0 +; CHECK-NEXT: [[TMP46:%.*]] = and i64 [[TMP40]], 7 +; CHECK-NEXT: [[TMP47:%.*]] = trunc i64 [[TMP46]] to i8 +; CHECK-NEXT: [[TMP48:%.*]] = icmp sge i8 [[TMP47]], [[TMP44]] +; CHECK-NEXT: [[TMP49:%.*]] = and i1 [[TMP45]], [[TMP48]] +; CHECK-NEXT: [[TMP50:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP49]]) +; CHECK-NEXT: [[TMP51:%.*]] = icmp ne i64 [[TMP50]], 0 +; CHECK-NEXT: br i1 [[TMP51]], label %[[ASAN_REPORT:.*]], label %[[BB54:.*]], !prof [[PROF1:![0-9]+]] +; CHECK: [[ASAN_REPORT]]: +; CHECK-NEXT: br i1 [[TMP49]], label %[[BB52:.*]], label %[[BB53:.*]] +; CHECK: [[BB52]]: +; CHECK-NEXT: call void @__asan_report_store1(i64 [[TMP40]]) #[[ATTR7:[0-9]+]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB53]] +; CHECK: [[BB53]]: +; CHECK-NEXT: br label %[[BB54]] +; CHECK: [[BB54]]: +; CHECK-NEXT: store i8 7, ptr addrspace(3) [[TMP34]], align 4 +; CHECK-NEXT: [[TMP55:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i64 +; CHECK-NEXT: [[TMP56:%.*]] = add i64 [[TMP55]], 3 +; CHECK-NEXT: [[TMP57:%.*]] = inttoptr i64 [[TMP56]] to ptr addrspace(3) +; CHECK-NEXT: [[TMP58:%.*]] = ptrtoint ptr addrspace(3) [[TMP36]] to i32 +; CHECK-NEXT: [[TMP59:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP60:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP59]], i32 [[TMP58]] +; CHECK-NEXT: [[TMP61:%.*]] = ptrtoint ptr addrspace(1) [[TMP60]] to i64 +; CHECK-NEXT: [[TMP62:%.*]] = lshr i64 [[TMP61]], 3 +; CHECK-NEXT: [[TMP63:%.*]] = add i64 [[TMP62]], 2147450880 +; CHECK-NEXT: [[TMP64:%.*]] = inttoptr i64 [[TMP63]] to ptr +; CHECK-NEXT: [[TMP65:%.*]] = load i8, ptr [[TMP64]], align 1 +; CHECK-NEXT: [[TMP66:%.*]] = icmp ne i8 [[TMP65]], 0 +; CHECK-NEXT: [[TMP67:%.*]] = and i64 [[TMP61]], 7 +; CHECK-NEXT: [[TMP68:%.*]] = trunc i64 [[TMP67]] to i8 +; CHECK-NEXT: [[TMP69:%.*]] = icmp sge i8 [[TMP68]], [[TMP65]] +; CHECK-NEXT: [[TMP70:%.*]] = and i1 [[TMP66]], [[TMP69]] +; CHECK-NEXT: [[TMP71:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP70]]) +; CHECK-NEXT: [[TMP72:%.*]] = icmp ne i64 [[TMP71]], 0 +; CHECK-NEXT: br i1 [[TMP72]], label %[[ASAN_REPORT1:.*]], label %[[BB75:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT1]]: +; CHECK-NEXT: br i1 [[TMP70]], label %[[BB73:.*]], label %[[BB74:.*]] +; CHECK: [[BB73]]: +; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP61]], i64 4) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB74]] +; CHECK: [[BB74]]: +; CHECK-NEXT: br label %[[BB75]] +; CHECK: [[BB75]]: +; CHECK-NEXT: [[TMP76:%.*]] = ptrtoint ptr addrspace(3) [[TMP57]] to i32 +; CHECK-NEXT: [[TMP77:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[TMP77]], i32 [[TMP76]] +; CHECK-NEXT: [[TMP79:%.*]] = ptrtoint ptr addrspace(1) [[TMP78]] to i64 +; CHECK-NEXT: [[TMP80:%.*]] = lshr i64 [[TMP79]], 3 +; CHECK-NEXT: [[TMP81:%.*]] = add i64 [[TMP80]], 2147450880 +; CHECK-NEXT: [[TMP82:%.*]] = inttoptr i64 [[TMP81]] to ptr +; CHECK-NEXT: [[TMP83:%.*]] = load i8, ptr [[TMP82]], align 1 +; CHECK-NEXT: [[TMP84:%.*]] = icmp ne i8 [[TMP83]], 0 +; CHECK-NEXT: [[TMP85:%.*]] = and i64 [[TMP79]], 7 +; CHECK-NEXT: [[TMP86:%.*]] = trunc i64 [[TMP85]] to i8 +; CHECK-NEXT: [[TMP87:%.*]] = icmp sge i8 [[TMP86]], [[TMP83]] +; CHECK-NEXT: [[TMP88:%.*]] = and i1 [[TMP84]], [[TMP87]] +; CHECK-NEXT: [[TMP89:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP88]]) +; CHECK-NEXT: [[TMP90:%.*]] = icmp ne i64 [[TMP89]], 0 +; CHECK-NEXT: br i1 [[TMP90]], label %[[ASAN_REPORT2:.*]], label %[[BB93:.*]], !prof [[PROF1]] +; CHECK: [[ASAN_REPORT2]]: +; CHECK-NEXT: br i1 [[TMP88]], label %[[BB91:.*]], label %[[BB92:.*]] +; CHECK: [[BB91]]: +; CHECK-NEXT: call void @__asan_report_store_n(i64 [[TMP79]], i64 4) #[[ATTR7]] +; CHECK-NEXT: call void @llvm.amdgcn.unreachable() +; CHECK-NEXT: br label %[[BB92]] +; CHECK: [[BB92]]: +; CHECK-NEXT: br label %[[BB93]] +; CHECK: [[BB93]]: +; CHECK-NEXT: store i32 8, ptr addrspace(3) [[TMP36]], align 2 +; CHECK-NEXT: br label %[[CONDFREE:.*]] +; CHECK: [[CONDFREE]]: +; CHECK-NEXT: call void @llvm.amdgcn.s.barrier() +; CHECK-NEXT: br i1 [[XYZCOND]], label %[[FREE:.*]], label %[[END:.*]] +; CHECK: [[FREE]]: +; CHECK-NEXT: [[TMP94:%.*]] = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 +; CHECK-NEXT: [[TMP95:%.*]] = call ptr @llvm.returnaddress(i32 0) +; CHECK-NEXT: [[TMP96:%.*]] = ptrtoint ptr [[TMP95]] to i64 +; CHECK-NEXT: [[TMP97:%.*]] = ptrtoint ptr addrspace(1) [[TMP94]] to i64 +; CHECK-NEXT: call void @__asan_free_impl(i64 [[TMP97]], i64 [[TMP96]]) +; CHECK-NEXT: br label %[[END]] +; CHECK: [[END]]: +; CHECK-NEXT: ret void +; +WId: + %0 = call i32 @llvm.amdgcn.workitem.id.x() + %1 = call i32 @llvm.amdgcn.workitem.id.y() + %2 = call i32 @llvm.amdgcn.workitem.id.z() + %3 = or i32 %0, %1 + %4 = or i32 %3, %2 + %5 = icmp eq i32 %4, 0 + br i1 %5, label %Malloc, label %14 + +Malloc: ; preds = %WId + %6 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 + %7 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 2), align 4 + %8 = add i32 %6, %7 + %9 = zext i32 %8 to i64 + %10 = call ptr @llvm.returnaddress(i32 0) + %11 = ptrtoint ptr %10 to i64 + %12 = call i64 @__asan_malloc_impl(i64 %9, i64 %11) + %13 = inttoptr i64 %12 to ptr addrspace(1) + store ptr addrspace(1) %13, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + br label %14 + +14: ; preds = %Malloc, %WId + %xyzCond = phi i1 [ false, %WId ], [ true, %Malloc ] + call void @llvm.amdgcn.s.barrier() + %15 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 1, i32 0), align 4 + %16 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %15 + %17 = load i32, ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0), align 4 + %18 = getelementptr inbounds i8, ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, i32 %17 + store i8 7, ptr addrspace(3) %16, align 4 + store i32 8, ptr addrspace(3) %18, align 2 + br label %CondFree + +CondFree: ; preds = %14 + call void @llvm.amdgcn.s.barrier() + br i1 %xyzCond, label %Free, label %End + +Free: ; preds = %CondFree + %19 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 + %20 = call ptr @llvm.returnaddress(i32 0) + %21 = ptrtoint ptr %20 to i64 + %22 = ptrtoint ptr addrspace(1) %19 to i64 + call void @__asan_free_impl(i64 %22, i64 %21) + br label %End + +End: ; preds = %Free, %CondFree + ret void +} + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.y() #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i32 @llvm.amdgcn.workitem.id.z() #1 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.returnaddress(i32 immarg) #2 + +declare i64 @__asan_malloc_impl(i64, i64) + +; Function Attrs: convergent nocallback nofree nounwind willreturn +declare void @llvm.amdgcn.s.barrier() #3 + +declare void @__asan_free_impl(i64, i64) + +attributes #0 = { sanitize_address "amdgpu-lds-size"="24" } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #3 = { convergent nocallback nofree nounwind willreturn } + +!0 = !{i32 0, i32 1} +;. +; CHECK: attributes #[[ATTR0]] = { sanitize_address "amdgpu-lds-size"="96" } +; CHECK: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +; CHECK: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nounwind willreturn } +; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nounwind willreturn memory(none) } +; CHECK: attributes #[[ATTR5:[0-9]+]] = { convergent nocallback nofree nounwind } +; CHECK: attributes #[[ATTR6:[0-9]+]] = { nounwind } +; CHECK: attributes #[[ATTR7]] = { nomerge } +;. +; CHECK: [[META0]] = !{i32 0, i32 1} +; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 1048575} +;. diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll deleted file mode 100644 index 44149b28fd9f9d..00000000000000 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_do_not_instrument_lds.ll +++ /dev/null @@ -1,27 +0,0 @@ -; RUN: opt < %s -passes=asan -S | FileCheck %s -target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1-ni:7:8:9" -target triple = "amdgcn-amd-amdhsa" - -; Memory access to lds are not instrumented - -@count = addrspace(3) global [100 x i32] undef, align 16 - -define protected amdgpu_kernel void @lds_store(i32 %i) sanitize_address { -entry: - ; CHECK-LABEL: @lds_store( - ; CHECK-NOT: call {{[a-zA-Z]}} - %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i - store i32 0, ptr addrspace(3) %arrayidx1, align 4 - ret void -} - -define protected amdgpu_kernel void @lds_load(i32 %i) sanitize_address { -entry: - ; CHECK-LABEL: @lds_load( - ; CHECK-NOT: call {{[a-zA-Z]}} - %arrayidx1 = getelementptr inbounds [100 x i32], ptr addrspace(3) @count, i32 0, i32 %i - %0 = load i32, ptr addrspace(3) %arrayidx1, align 4 - ret void -} - -; CHECK-LABEL: define internal void @asan.module_ctor() diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll index cb37ba24f1c74e..e5ce018ba0f403 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan_instrument_generic_address_space.ll @@ -9,12 +9,10 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i) ; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]] -; CHECK: 4: +; CHECK: 2: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -31,15 +29,15 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i) ; CHECK-NEXT: br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0:![0-9]+]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]] -; CHECK: 18: +; CHECK: 16: ; CHECK-NEXT: call void @__asan_report_store4(i64 [[TMP5]]) #[[ATTR5:[0-9]+]] ; CHECK-NEXT: call void @llvm.amdgcn.unreachable() ; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: +; CHECK: 17: ; CHECK-NEXT: br label [[TMP20]] -; CHECK: 20: +; CHECK: 18: ; CHECK-NEXT: br label [[TMP21]] -; CHECK: 21: +; CHECK: 19: ; CHECK-NEXT: store i32 0, ptr [[Q]], align 4 ; CHECK-NEXT: ret void ; @@ -47,12 +45,10 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i) ; RECOV-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0:[0-9]+]] { ; RECOV-NEXT: entry: ; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]] -; RECOV: 4: +; RECOV: 2: ; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -68,9 +64,9 @@ define protected amdgpu_kernel void @generic_store(ptr addrspace(1) %p, i32 %i) ; RECOV: asan.report: ; RECOV-NEXT: call void @__asan_report_store4_noabort(i64 [[TMP5]]) #[[ATTR3:[0-9]+]] ; RECOV-NEXT: br label [[TMP16]] -; RECOV: 16: +; RECOV: 14: ; RECOV-NEXT: br label [[TMP17]] -; RECOV: 17: +; RECOV: 15: ; RECOV-NEXT: store i32 0, ptr [[Q]], align 4 ; RECOV-NEXT: ret void ; @@ -86,12 +82,10 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s ; CHECK-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP21:%.*]] -; CHECK: 4: +; CHECK: 2: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -108,15 +102,15 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s ; CHECK-NEXT: br i1 [[TMP17]], label [[ASAN_REPORT:%.*]], label [[TMP20:%.*]], !prof [[PROF0]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP15]], label [[TMP18:%.*]], label [[TMP19:%.*]] -; CHECK: 18: +; CHECK: 16: ; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP5]]) #[[ATTR5]] ; CHECK-NEXT: call void @llvm.amdgcn.unreachable() ; CHECK-NEXT: br label [[TMP19]] -; CHECK: 19: +; CHECK: 17: ; CHECK-NEXT: br label [[TMP20]] -; CHECK: 20: +; CHECK: 18: ; CHECK-NEXT: br label [[TMP21]] -; CHECK: 21: +; CHECK: 19: ; CHECK-NEXT: [[R:%.*]] = load i32, ptr [[Q]], align 4 ; CHECK-NEXT: ret void ; @@ -124,12 +118,10 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s ; RECOV-SAME: ptr addrspace(1) [[P:%.*]], i32 [[I:%.*]]) #[[ATTR0]] { ; RECOV-NEXT: entry: ; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP17:%.*]] -; RECOV: 4: +; RECOV: 2: ; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -145,9 +137,9 @@ define protected amdgpu_kernel void @generic_load(ptr addrspace(1) %p, i32 %i) s ; RECOV: asan.report: ; RECOV-NEXT: call void @__asan_report_load4_noabort(i64 [[TMP5]]) #[[ATTR3]] ; RECOV-NEXT: br label [[TMP16]] -; RECOV: 16: +; RECOV: 14: ; RECOV-NEXT: br label [[TMP17]] -; RECOV: 17: +; RECOV: 15: ; RECOV-NEXT: [[R:%.*]] = load i32, ptr [[Q]], align 4 ; RECOV-NEXT: ret void ; @@ -163,12 +155,10 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti ; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]] -; CHECK: 4: +; CHECK: 2: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -180,15 +170,15 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti ; CHECK-NEXT: br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]] -; CHECK: 13: +; CHECK: 11: ; CHECK-NEXT: call void @__asan_report_store8(i64 [[TMP5]]) #[[ATTR5]] ; CHECK-NEXT: call void @llvm.amdgcn.unreachable() ; CHECK-NEXT: br label [[TMP14]] -; CHECK: 14: +; CHECK: 12: ; CHECK-NEXT: br label [[TMP15]] -; CHECK: 15: +; CHECK: 13: ; CHECK-NEXT: br label [[TMP16]] -; CHECK: 16: +; CHECK: 14: ; CHECK-NEXT: store i64 0, ptr [[Q]], align 8 ; CHECK-NEXT: ret void ; @@ -196,12 +186,10 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti ; RECOV-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] { ; RECOV-NEXT: entry: ; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] -; RECOV: 4: +; RECOV: 2: ; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -212,9 +200,9 @@ define protected amdgpu_kernel void @generic_store_8(ptr addrspace(1) %p) saniti ; RECOV: asan.report: ; RECOV-NEXT: call void @__asan_report_store8_noabort(i64 [[TMP5]]) #[[ATTR3]] ; RECOV-NEXT: br label [[TMP11]] -; RECOV: 11: +; RECOV: 9: ; RECOV-NEXT: br label [[TMP12]] -; RECOV: 12: +; RECOV: 10: ; RECOV-NEXT: store i64 0, ptr [[Q]], align 8 ; RECOV-NEXT: ret void ; @@ -229,12 +217,10 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz ; CHECK-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; CHECK-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; CHECK-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; CHECK-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; CHECK-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP16:%.*]] -; CHECK: 4: +; CHECK: 2: ; CHECK-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; CHECK-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; CHECK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -246,15 +232,15 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz ; CHECK-NEXT: br i1 [[TMP12]], label [[ASAN_REPORT:%.*]], label [[TMP15:%.*]], !prof [[PROF0]] ; CHECK: asan.report: ; CHECK-NEXT: br i1 [[TMP10]], label [[TMP13:%.*]], label [[TMP14:%.*]] -; CHECK: 13: +; CHECK: 11: ; CHECK-NEXT: call void @__asan_report_load8(i64 [[TMP5]]) #[[ATTR5]] ; CHECK-NEXT: call void @llvm.amdgcn.unreachable() ; CHECK-NEXT: br label [[TMP14]] -; CHECK: 14: +; CHECK: 12: ; CHECK-NEXT: br label [[TMP15]] -; CHECK: 15: +; CHECK: 13: ; CHECK-NEXT: br label [[TMP16]] -; CHECK: 16: +; CHECK: 14: ; CHECK-NEXT: [[R:%.*]] = load i64, ptr [[Q]], align 8 ; CHECK-NEXT: ret void ; @@ -262,12 +248,10 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz ; RECOV-SAME: ptr addrspace(1) [[P:%.*]]) #[[ATTR0]] { ; RECOV-NEXT: entry: ; RECOV-NEXT: [[Q:%.*]] = addrspacecast ptr addrspace(1) [[P]] to ptr -; RECOV-NEXT: [[TMP0:%.*]] = call i1 @llvm.amdgcn.is.shared(ptr [[Q]]) ; RECOV-NEXT: [[TMP1:%.*]] = call i1 @llvm.amdgcn.is.private(ptr [[Q]]) -; RECOV-NEXT: [[TMP2:%.*]] = or i1 [[TMP0]], [[TMP1]] -; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP2]], true +; RECOV-NEXT: [[TMP3:%.*]] = xor i1 [[TMP1]], true ; RECOV-NEXT: br i1 [[TMP3]], label [[TMP4:%.*]], label [[TMP12:%.*]] -; RECOV: 4: +; RECOV: 2: ; RECOV-NEXT: [[TMP5:%.*]] = ptrtoint ptr [[Q]] to i64 ; RECOV-NEXT: [[TMP6:%.*]] = lshr i64 [[TMP5]], 3 ; RECOV-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 2147450880 @@ -278,9 +262,9 @@ define protected amdgpu_kernel void @generic_load_8(ptr addrspace(1) %p) sanitiz ; RECOV: asan.report: ; RECOV-NEXT: call void @__asan_report_load8_noabort(i64 [[TMP5]]) #[[ATTR3]] ; RECOV-NEXT: br label [[TMP11]] -; RECOV: 11: +; RECOV: 9: ; RECOV-NEXT: br label [[TMP12]] -; RECOV: 12: +; RECOV: 10: ; RECOV-NEXT: [[R:%.*]] = load i64, ptr [[Q]], align 8 ; RECOV-NEXT: ret void ;