-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Introduce address sanitizer instrumentation for LDS lowered by amdgpu-sw-lower-lds pass #89208
Conversation
@llvm/pr-subscribers-backend-amdgpu @llvm/pr-subscribers-llvm-transforms Author: Chaitanya (skc7) ChangesThis is WIP PR for instrumenting "LDS accesses lowered by amdgpu-sw-lower-lds".
Patch is 187.16 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/89208.diff 8 Files Affected:
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 26fedbfd65dd41..a24f26eada3b0a 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -628,12 +628,6 @@ ASanAccessInfo::ASanAccessInfo(bool IsWrite, bool CompileKernel,
} // namespace llvm
-static uint64_t getRedzoneSizeForScale(int MappingScale) {
- // Redzone used for stack and globals is at least 32 bytes.
- // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
- return std::max(32U, 1U << MappingScale);
-}
-
static uint64_t GetCtorAndDtorPriority(Triple &TargetTriple) {
if (TargetTriple.isOSEmscripten()) {
return kAsanEmscriptenCtorAndDtorPriority;
@@ -939,10 +933,7 @@ class ModuleAddressSanitizer {
StringRef getGlobalMetadataSection() const;
void poisonOneInitializer(Function &GlobalInit, GlobalValue *ModuleName);
void createInitializerPoisonCalls(Module &M, GlobalValue *ModuleName);
- uint64_t getMinRedzoneSizeForGlobal() const {
- return getRedzoneSizeForScale(Mapping.Scale);
- }
- uint64_t getRedzoneSizeForGlobal(uint64_t SizeInBytes) const;
+
int GetAsanVersion(const Module &M) const;
bool CompileKernel;
@@ -1239,6 +1230,246 @@ void AddressSanitizerPass::printPipeline(
OS << '>';
}
+static uint64_t getRedzoneSizeForScale(int MappingScale) {
+ // Redzone used for stack and globals is at least 32 bytes.
+ // For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
+ return std::max(32U, 1U << MappingScale);
+}
+
+static uint64_t getMinRedzoneSizeForGlobal(int Scale) {
+ return getRedzoneSizeForScale(Scale);
+}
+
+static uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes) {
+ constexpr uint64_t kMaxRZ = 1 << 18;
+ const uint64_t MinRZ = getMinRedzoneSizeForGlobal(Scale);
+
+ uint64_t RZ = 0;
+ if (SizeInBytes <= MinRZ / 2) {
+ // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
+ // at least 32 bytes, optimize when SizeInBytes is less than or equal to
+ // half of MinRZ.
+ RZ = MinRZ - SizeInBytes;
+ } else {
+ // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
+ RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
+
+ // Round up to multiple of MinRZ.
+ if (SizeInBytes % MinRZ)
+ RZ += MinRZ - (SizeInBytes % MinRZ);
+ }
+
+ assert((RZ + SizeInBytes) % MinRZ == 0);
+
+ return RZ;
+}
+
+static GlobalVariable *getKernelSwLDSGlobal(Module &M, Function &F) {
+ SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.");
+ KernelLDSName += F.getName();
+ return M.getNamedGlobal(KernelLDSName);
+}
+
+static GlobalVariable *getKernelSwLDSMetadataGlobal(Module &M, Function &F) {
+ SmallString<64> KernelLDSName("llvm.amdgcn.sw.lds.");
+ KernelLDSName += F.getName();
+ KernelLDSName += ".md";
+ return M.getNamedGlobal(KernelLDSName);
+}
+
+static void UpdateSwLDSMetadataWithRedzoneInfo(Function &F, int Scale) {
+ Module *M = F.getParent();
+ GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
+ GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
+ if (!SwLDSMetadataGlobal || !SwLDSGlobal)
+ return;
+
+ LLVMContext &Ctx = M->getContext();
+ Type *Int32Ty = Type::getInt32Ty(Ctx);
+
+ Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
+ Align MdAlign = Align(SwLDSMetadataGlobal->getAlign().valueOrOne());
+ Align LDSAlign = Align(SwLDSGlobal->getAlign().valueOrOne());
+
+ StructType *MDStructType =
+ cast<StructType>(SwLDSMetadataGlobal->getValueType());
+ assert(MDStructType);
+ unsigned NumStructs = MDStructType->getNumElements();
+
+ std::vector<Type *> Items;
+ std::vector<Constant *> Initializers;
+ uint32_t MallocSize = 0;
+ //{GV.start, Align(GV.size + Redzone.size), Redzone.start, Redzone.size}
+ StructType *LDSItemTy =
+ StructType::create(Ctx, {Int32Ty, Int32Ty, Int32Ty, Int32Ty}, "");
+ for (unsigned i = 0; i < NumStructs; i++) {
+ Items.push_back(LDSItemTy);
+ ConstantStruct *member =
+ dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
+ Constant *NewInitItem;
+ if (member) {
+ ConstantInt *GlobalSize =
+ cast<ConstantInt>(member->getAggregateElement(1U));
+ unsigned GlobalSizeValue = GlobalSize->getZExtValue();
+ Constant *NewItemStartOffset = ConstantInt::get(Int32Ty, MallocSize);
+ if (GlobalSizeValue) {
+ const uint64_t RightRedzoneSize =
+ getRedzoneSizeForGlobal(Scale, GlobalSizeValue);
+ MallocSize += GlobalSizeValue;
+ Constant *NewItemRedzoneStartOffset =
+ ConstantInt::get(Int32Ty, MallocSize);
+ MallocSize += RightRedzoneSize;
+ Constant *NewItemRedzoneSize =
+ ConstantInt::get(Int32Ty, RightRedzoneSize);
+
+ unsigned NewItemAlignGlobalPlusRedzoneSize =
+ alignTo(GlobalSizeValue + RightRedzoneSize, LDSAlign);
+ Constant *NewItemAlignGlobalPlusRedzoneSizeConst =
+ ConstantInt::get(Int32Ty, NewItemAlignGlobalPlusRedzoneSize);
+ NewInitItem = ConstantStruct::get(
+ LDSItemTy,
+ {NewItemStartOffset, NewItemAlignGlobalPlusRedzoneSizeConst,
+ NewItemRedzoneStartOffset, NewItemRedzoneSize});
+ MallocSize = alignTo(MallocSize, LDSAlign);
+ } else {
+ Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ NewInitItem =
+ ConstantStruct::get(LDSItemTy, {CurrMallocSize, zero, zero, zero});
+ }
+ } else {
+ Constant *CurrMallocSize = ConstantInt::get(Int32Ty, MallocSize);
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ NewInitItem =
+ ConstantStruct::get(LDSItemTy, {CurrMallocSize, zero, zero, zero});
+ }
+ Initializers.push_back(NewInitItem);
+ }
+
+ StructType *MetadataStructType = StructType::create(Ctx, Items, "");
+
+ GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable(
+ *M, MetadataStructType, false, GlobalValue::InternalLinkage,
+ PoisonValue::get(MetadataStructType), "", nullptr,
+ GlobalValue::NotThreadLocal, 1, false);
+ Constant *Data = ConstantStruct::get(MetadataStructType, Initializers);
+ NewSwLDSMetadataGlobal->setInitializer(Data);
+ NewSwLDSMetadataGlobal->setAlignment(MdAlign);
+ GlobalValue::SanitizerMetadata MD;
+ MD.NoAddress = true;
+ NewSwLDSMetadataGlobal->setSanitizerMetadata(MD);
+
+ for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) {
+ if (GEPOperator *GEP = dyn_cast<GEPOperator>(U.getUser())) {
+ SmallVector<Constant *> Indices;
+ for (Use &Idx : GEP->indices()) {
+ Indices.push_back(cast<Constant>(Idx));
+ }
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
+ MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+ GEP->replaceAllUsesWith(NewGEP);
+ }
+ if (LoadInst *Load = dyn_cast<LoadInst>(U.getUser())) {
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ SmallVector<Constant *> Indices{zero, zero, zero};
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
+ MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+ IRBuilder<> IRB(Load);
+ LoadInst *NewLoad = IRB.CreateLoad(Load->getType(), NewGEP);
+ Load->replaceAllUsesWith(NewLoad);
+ Load->eraseFromParent();
+ }
+ if (StoreInst *Store = dyn_cast<StoreInst>(U.getUser())) {
+ Constant *zero = ConstantInt::get(Int32Ty, 0);
+ SmallVector<Constant *> Indices{zero, zero, zero};
+ Constant *NewGEP = ConstantExpr::getGetElementPtr(
+ MetadataStructType, NewSwLDSMetadataGlobal, Indices, true);
+ IRBuilder<> IRB(Store);
+ StoreInst *NewStore = IRB.CreateStore(Store->getValueOperand(), NewGEP);
+ Store->replaceAllUsesWith(NewStore);
+ Store->eraseFromParent();
+ }
+ }
+ SwLDSMetadataGlobal->replaceAllUsesWith(NewSwLDSMetadataGlobal);
+ NewSwLDSMetadataGlobal->takeName(SwLDSMetadataGlobal);
+ SwLDSMetadataGlobal->eraseFromParent();
+ return;
+}
+
+static void poisonRedzonesForSwLDS(Function& F) {
+ Module *M = F.getParent();
+ GlobalVariable *SwLDSGlobal = getKernelSwLDSGlobal(*M, F);
+ GlobalVariable *SwLDSMetadataGlobal = getKernelSwLDSMetadataGlobal(*M, F);
+
+ if (!SwLDSGlobal || !SwLDSMetadataGlobal) return;
+
+ LLVMContext &Ctx = M->getContext();
+ Type *Int64Ty = Type::getInt64Ty(Ctx);
+ Type *VoidTy = Type::getVoidTy(Ctx);
+ FunctionCallee AsanPoisonRegion = M->getOrInsertFunction(
+ StringRef("__asan_poison_region"),
+ FunctionType::get(VoidTy,
+ {Int64Ty, Int64Ty}, false));
+ Constant *MdInit = SwLDSMetadataGlobal->getInitializer();
+
+ for (User *U : SwLDSGlobal->users()) {
+ StoreInst *SI = dyn_cast<StoreInst>(U);
+ if (!SI) continue;
+
+ // Check if the value being stored is the result of a malloc
+ CallInst *CI = dyn_cast<CallInst>(SI->getValueOperand());
+ if (!CI) continue;
+
+ Function *F = CI->getCalledFunction();
+ if (!F && !(F->getName() == "malloc"))
+ continue;
+
+ StructType *MDStructType =
+ cast<StructType>(SwLDSMetadataGlobal->getValueType());
+ assert(MDStructType);
+ unsigned NumStructs = MDStructType->getNumElements();
+ Value* StoreMallocPointer = SI->getValueOperand();
+
+ for (unsigned i = 0; i < NumStructs; i++) {
+ ConstantStruct *member = dyn_cast<ConstantStruct>(MdInit->getAggregateElement(i));
+ if (!member) continue;
+
+ ConstantInt *GlobalSize =
+ cast<ConstantInt>(member->getAggregateElement(1U));
+ unsigned GlobalSizeValue = GlobalSize->getZExtValue();
+
+ if (!GlobalSizeValue) continue;
+ IRBuilder<> IRB(SI);
+ IRB.SetInsertPoint(SI->getNextNode());
+
+ auto *GEPForOffset = IRB.CreateInBoundsGEP(
+ MDStructType, SwLDSMetadataGlobal,
+ {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(2)});
+
+ auto *GEPForSize = IRB.CreateInBoundsGEP(
+ MDStructType, SwLDSMetadataGlobal,
+ {IRB.getInt32(0), IRB.getInt32(i), IRB.getInt32(3)});
+
+ Value *RedzoneOffset =
+ IRB.CreateLoad(IRB.getInt64Ty(), GEPForOffset);
+ Value* RedzoneAddrOffset = IRB.CreateInBoundsGEP(
+ IRB.getInt8Ty(), StoreMallocPointer, {RedzoneOffset});
+ Value* RedzoneAddress = IRB.CreatePtrToInt(RedzoneAddrOffset, IRB.getInt64Ty());
+ Value *Redzonesize = IRB.CreateLoad(IRB.getInt64Ty(), GEPForSize);
+ IRB.CreateCall(AsanPoisonRegion, {RedzoneAddress, Redzonesize});
+ }
+ }
+ return;
+}
+
+static void preProcessAMDGPULDSAccesses(Module &M, int Scale) {
+ for (Function &F : M) {
+ UpdateSwLDSMetadataWithRedzoneInfo(F, Scale);
+ poisonRedzonesForSwLDS(F);
+ }
+ return;
+}
+
AddressSanitizerPass::AddressSanitizerPass(
const AddressSanitizerOptions &Options, bool UseGlobalGC,
bool UseOdrIndicator, AsanDtorKind DestructorKind,
@@ -1249,6 +1480,13 @@ AddressSanitizerPass::AddressSanitizerPass(
PreservedAnalyses AddressSanitizerPass::run(Module &M,
ModuleAnalysisManager &MAM) {
+ Triple TargetTriple = Triple(M.getTargetTriple());
+
+ if (TargetTriple.isAMDGPU()) {
+ unsigned LongSize = M.getDataLayout().getPointerSizeInBits();
+ ShadowMapping Mapping = getShadowMapping(TargetTriple, LongSize, false);
+ preProcessAMDGPULDSAccesses(M, Mapping.Scale);
+ }
ModuleAddressSanitizer ModuleSanitizer(
M, Options.InsertVersionCheck, Options.CompileKernel, Options.Recover,
UseGlobalGC, UseOdrIndicator, DestructorKind, ConstructorKind);
@@ -1304,7 +1542,15 @@ static bool GlobalWasGeneratedByCompiler(GlobalVariable *G) {
static bool isUnsupportedAMDGPUAddrspace(Value *Addr) {
Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
- if (AddrSpace == 3 || AddrSpace == 5)
+ if (AddrSpace == 5)
+ return true;
+ return false;
+}
+
+static bool isGlobalInAMDGPULdsAddrspace(Value *Addr) {
+ Type *PtrTy = cast<PointerType>(Addr->getType()->getScalarType());
+ unsigned int AddrSpace = PtrTy->getPointerAddressSpace();
+ if (AddrSpace == 3)
return true;
return false;
}
@@ -2021,7 +2267,8 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
if (!G->hasInitializer()) return false;
// Globals in address space 1 and 4 are supported for AMDGPU.
if (G->getAddressSpace() &&
- !(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)))
+ (!(TargetTriple.isAMDGPU() && !isUnsupportedAMDGPUAddrspace(G)) ||
+ !(TargetTriple.isAMDGPU() && !isGlobalInAMDGPULdsAddrspace(G))))
return false;
if (GlobalWasGeneratedByCompiler(G)) return false; // Our own globals.
// Two problems with thread-locals:
@@ -2029,7 +2276,9 @@ bool ModuleAddressSanitizer::shouldInstrumentGlobal(GlobalVariable *G) const {
// - Need to poison all copies, not just the main thread's one.
if (G->isThreadLocal()) return false;
// For now, just ignore this Global if the alignment is large.
- if (G->getAlign() && *G->getAlign() > getMinRedzoneSizeForGlobal()) return false;
+ if (G->getAlign() &&
+ *G->getAlign() > getMinRedzoneSizeForGlobal(Mapping.Scale))
+ return false;
// For non-COFF targets, only instrument globals known to be defined by this
// TU.
@@ -2552,7 +2801,8 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
Type *Ty = G->getValueType();
const uint64_t SizeInBytes = DL.getTypeAllocSize(Ty);
- const uint64_t RightRedzoneSize = getRedzoneSizeForGlobal(SizeInBytes);
+ const uint64_t RightRedzoneSize =
+ getRedzoneSizeForGlobal(Mapping.Scale, SizeInBytes);
Type *RightRedZoneTy = ArrayType::get(IRB.getInt8Ty(), RightRedzoneSize);
StructType *NewTy = StructType::get(Ty, RightRedZoneTy);
@@ -2568,7 +2818,7 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
G->getThreadLocalMode(), G->getAddressSpace());
NewGlobal->copyAttributesFrom(G);
NewGlobal->setComdat(G->getComdat());
- NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal()));
+ NewGlobal->setAlignment(Align(getMinRedzoneSizeForGlobal(Mapping.Scale)));
// Don't fold globals with redzones. ODR violation detector and redzone
// poisoning implicitly creates a dependence on the global's address, so it
// is no longer valid for it to be marked unnamed_addr.
@@ -2688,31 +2938,6 @@ void ModuleAddressSanitizer::instrumentGlobals(IRBuilder<> &IRB, Module &M,
LLVM_DEBUG(dbgs() << M);
}
-uint64_t
-ModuleAddressSanitizer::getRedzoneSizeForGlobal(uint64_t SizeInBytes) const {
- constexpr uint64_t kMaxRZ = 1 << 18;
- const uint64_t MinRZ = getMinRedzoneSizeForGlobal();
-
- uint64_t RZ = 0;
- if (SizeInBytes <= MinRZ / 2) {
- // Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
- // at least 32 bytes, optimize when SizeInBytes is less than or equal to
- // half of MinRZ.
- RZ = MinRZ - SizeInBytes;
- } else {
- // Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
- RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);
-
- // Round up to multiple of MinRZ.
- if (SizeInBytes % MinRZ)
- RZ += MinRZ - (SizeInBytes % MinRZ);
- }
-
- assert((RZ + SizeInBytes) % MinRZ == 0);
-
- return RZ;
-}
-
int ModuleAddressSanitizer::GetAsanVersion(const Module &M) const {
int LongSize = M.getDataLayout().getPointerSizeInBits();
bool isAndroid = Triple(M.getTargetTriple()).isAndroid();
diff --git a/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-instr-lds-dynamic-indirect-access.ll b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-instr-lds-dynamic-indirect-access.ll
new file mode 100755
index 00000000000000..d22583f5b3c2af
--- /dev/null
+++ b/llvm/test/Instrumentation/AddressSanitizer/AMDGPU/asan-instr-lds-dynamic-indirect-access.ll
@@ -0,0 +1,451 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt < %s -passes=asan -S -mtriple=amdgcn-- | FileCheck %s
+
+%llvm.amdgcn.sw.lds.k0.md.type = type { %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item, %llvm.amdgcn.sw.lds.k0.md.item }
+%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32 }
+@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, align 8
+@llvm.amdgcn.sw.lds.k0.md = internal addrspace(1) global %llvm.amdgcn.sw.lds.k0.md.type { %llvm.amdgcn.sw.lds.k0.md.item { i32 0, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 8, i32 8 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 0 }, %llvm.amdgcn.sw.lds.k0.md.item { i32 16, i32 0 } }, no_sanitize_address
+@llvm.amdgcn.sw.lds.base.table = internal addrspace(4) constant [1 x i32] [i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.sw.lds.k0 to i32)]
+@llvm.amdgcn.sw.lds.offset.table = internal addrspace(4) constant [1 x [2 x i32]] [[2 x i32] [i32 ptrtoint (ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 2, i32 0) to i32), i32 ptrtoint (ptr addrspace(1) getelementptr inbounds (%llvm.amdgcn.sw.lds.k0.md.type, ptr addrspace(1) @llvm.amdgcn.sw.lds.k0.md, i32 0, i32 3, i32 0) to i32)]]
+
+define void @use_variables() sanitize_address {
+; CHECK-LABEL: define void @use_variables(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1 x i32], ptr addrspace(4) @llvm.amdgcn.sw.lds.base.table, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = ptrtoint ptr addrspace(4) [[TMP2]] to i64
+; CHECK-NEXT: [[TMP4:%.*]] = lshr i64 [[TMP3]], 3
+; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 2147450880
+; CHECK-NEXT: [[TMP6:%.*]] = inttoptr i64 [[TMP5]] to ptr
+; CHECK-NEXT: [[TMP7:%.*]] = load i8, ptr [[TMP6]], align 1
+; CHECK-NEXT: [[TMP8:%.*]] = icmp ne i8 [[TMP7]], 0
+; CHECK-NEXT: [[TMP9:%.*]] = and i64 [[TMP3]], 7
+; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[TMP9]], 3
+; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[TMP10]] to i8
+; CHECK-NEXT: [[TMP12:%.*]] = icmp sge i8 [[TMP11]], [[TMP7]]
+; CHECK-NEXT: [[TMP13:%.*]] = and i1 [[TMP8]], [[TMP12]]
+; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP13]])
+; CHECK-NEXT: [[TMP15:%.*]] = icmp ne i64 [[TMP14]], 0
+; CHECK-NEXT: br i1 [[TMP15]], label [[ASAN_REPORT:%.*]], label [[TMP18:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK: asan.report:
+; CHECK-NEXT: br i1 [[TMP13]], label [[TMP16:%.*]], label [[TMP17:%.*]]
+; CHECK: 16:
+; CHECK-NEXT: call void @__asan_report_load4(i64 [[TMP3]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: call void @llvm.amdgcn.unreachable()
+; CHECK-NEXT: br label [[TMP17]]
+; CHECK: 17:
+; CHECK-NEXT: br label [[TMP18]]
+; CHECK: 18:
+; CHECK-NEXT: [[TMP19:%.*]] = load i32, ptr addrspace(4) [[TMP2]], align 4
+; CHECK-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP19]] to ptr addrspace(3)
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [1 x [2 x i32]], ptr addrspace(4) @llvm.amdgcn.sw.lds.offset.table, i32 0, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[TMP22:%.*]] = ptrtoint ptr addrspace(4) [[TMP21]] to i64
+; CHECK-NEXT: [[TMP23:%.*]] = lshr i64 [[TMP22]], 3
+; CHECK-NEXT: [[TMP24:%.*]] = add i64 [[TMP23]], 2147450880
+; CHECK-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP24]] to ptr
+; CHECK-NEXT: [[TMP26:%.*]] = load i8, ptr [[TMP25]], align 1
+; CHECK-NEXT: [[TMP27:%.*]] = icmp ne i8 [[TMP26]], 0
+; CHECK-NEXT: [[TMP28:%.*]] = and i64 [[TMP22]], 7
+; CHECK-NEXT: [[TMP29:%.*]] = add i64 [[TMP28]], 3
+; CHECK-NEXT: [[TMP30:%.*]] = trunc i64 [[TMP29]] to i8
+; CHECK-NEXT: [[TMP31:%.*]] = icmp sge i8 [[TMP30]], [[TMP26]]
+; CHECK-NEXT: [[TMP32:%.*]] = and i1 [[TMP27]], [[TMP31]]
+; CHECK-NEXT: [[TMP33:%.*]] = call i64 @llvm.amdgcn.ballot.i64(i1 [[TMP32]])
+; CHECK-NEXT: [[TMP34:%.*]] = icmp ne i64 [[TMP33]], 0
+; CHECK-NEXT: br i1 [[TMP34]], label [[ASAN_REPORT1:%.*]], label [[TMP37:%.*]], !prof [[PROF0]]
+; CHECK: ...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
e442d7b
to
973812e
Compare
973812e
to
65dd67a
Compare
544a3b4
to
91caffb
Compare
%llvm.amdgcn.sw.lds.k0.md.item = type { i32, i32, i32 } | ||
|
||
@llvm.amdgcn.sw.lds.k0 = internal addrspace(3) global ptr poison, no_sanitize_address, align 1, !absolute_symbol !0 | ||
@llvm.amdgcn.k0.dynlds = external addrspace(3) global [0 x i8], no_sanitize_address, align 1, !absolute_symbol !1 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Based on the test name, the case without no_sanitize_address is more interesting? Skipping this would be a separate test?
static void updateLDSSizeFnAttr(Function *Func, uint32_t Offset, | ||
bool UsesDynLDS) { | ||
if (Offset != 0) { | ||
std::string Buffer; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SmallString + raw_svector_ostream
MD.NoAddress = true; | ||
NewSwLDSMetadataGlobal->setSanitizerMetadata(MD); | ||
|
||
for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why do you need to reconstruct every possible user? This handling is nowhere near complete, and can't work for an arbitrary call
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're also going straight to the user, should use the user iterator. What about cases with repeated uses?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the latest patch, I'm updating just the initializer of the metadata global (with redzone size value) and not replacing its uses.
|
||
StructType *MDStructType = | ||
cast<StructType>(SwLDSMetadataGlobal->getValueType()); | ||
assert(MDStructType); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is redundant with cast
br i1 %xyzCond, label %Free, label %End | ||
|
||
Free: ; preds = %CondFree | ||
%24 = load ptr addrspace(1), ptr addrspace(3) @llvm.amdgcn.sw.lds.k0, align 8 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use named values in tests
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The test IR input is generated by running amdgpu-sw-lower-lds pass. Unnamed values are seen because of this. Will update them with named value if needed.
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) | ||
declare i32 @llvm.amdgcn.workitem.id.y() #2 | ||
|
||
; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Drop comments on declarations (could drop the intrinsic declarations too)
; CHECK-NEXT: ret void | ||
; | ||
WId: | ||
%0 = call i32 @llvm.amdgcn.workitem.id.x() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use named values in tests
%4 = or i32 %3, %2 | ||
%5 = icmp eq i32 %4, 0 | ||
br i1 %5, label %Malloc, label %14 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There's a lot going on in these test functions. Can you use simple tests that show single operations? Also need to test an arbitrary call user of a pointer
GlobalVariable *NewSwLDSMetadataGlobal = new GlobalVariable( | ||
*M, MetadataStructType, false, GlobalValue::InternalLinkage, | ||
PoisonValue::get(MetadataStructType), "", nullptr, | ||
GlobalValue::NotThreadLocal, 1, false); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use DefaultGlobalsAddresSpace from DataLayout
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In the latest patch, I'm updating just the initializer of the metadata global (with redzone size value) and not replacing its uses. So new global here is not needed with latest patch.
Initializers.push_back(NewInitItem); | ||
} | ||
GlobalVariable *SwDynLDS = getKernelSwDynLDSGlobal(*M, F); | ||
bool usesDynLDS = SwDynLDS ? true : false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SwDynLDS != nullptr?
MD.NoAddress = true; | ||
NewSwLDSMetadataGlobal->setSanitizerMetadata(MD); | ||
|
||
for (Use &U : make_early_inc_range(SwLDSMetadataGlobal->uses())) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You're also going straight to the user, should use the user iterator. What about cases with repeated uses?
91caffb
to
b761645
Compare
[AMDGPU] Update the MD initializer and donot replace uses of MD global.
b761645
to
70a38ea
Compare
PR for asan instrumentation of "LDS accesses lowered by amdgpu-sw-lower-lds".
Pre-requisite PR: #87265