Skip to content

Commit

Permalink
[AMDGPU] Utilities to asan instrument memory instructions. (llvm#98863)
Browse files Browse the repository at this point in the history
This change adds the utilities required to asan instrument memory
instructions. In "amdgpu-sw-lower-lds" pass llvm#87265, during lowering from
LDS to global memory, new instructions in global memory would be created 
which need to be asan instrumented.
  • Loading branch information
skc7 authored Jul 24, 2024
1 parent 455990d commit ddb75ca
Show file tree
Hide file tree
Showing 3 changed files with 393 additions and 0 deletions.
332 changes: 332 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,332 @@
//===AMDGPUAsanInstrumentation.cpp - ASAN related helper functions===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===-------------------------------------------------------------===//

#include "AMDGPUAsanInstrumentation.h"

#define DEBUG_TYPE "amdgpu-asan-instrumentation"

using namespace llvm;

namespace llvm {
namespace AMDGPU {

static uint64_t getRedzoneSizeForScale(int AsanScale) {
// Redzone used for stack and globals is at least 32 bytes.
// For scales 6 and 7, the redzone has to be 64 and 128 bytes respectively.
return std::max(32U, 1U << AsanScale);
}

static uint64_t getMinRedzoneSizeForGlobal(int AsanScale) {
return getRedzoneSizeForScale(AsanScale);
}

uint64_t getRedzoneSizeForGlobal(int AsanScale, uint64_t SizeInBytes) {
constexpr uint64_t kMaxRZ = 1 << 18;
const uint64_t MinRZ = getMinRedzoneSizeForGlobal(AsanScale);

uint64_t RZ = 0;
if (SizeInBytes <= MinRZ / 2) {
// Reduce redzone size for small size objects, e.g. int, char[1]. MinRZ is
// at least 32 bytes, optimize when SizeInBytes is less than or equal to
// half of MinRZ.
RZ = MinRZ - SizeInBytes;
} else {
// Calculate RZ, where MinRZ <= RZ <= MaxRZ, and RZ ~ 1/4 * SizeInBytes.
RZ = std::clamp((SizeInBytes / MinRZ / 4) * MinRZ, MinRZ, kMaxRZ);

// Round up to multiple of MinRZ.
if (SizeInBytes % MinRZ)
RZ += MinRZ - (SizeInBytes % MinRZ);
}

assert((RZ + SizeInBytes) % MinRZ == 0);

return RZ;
}

static size_t TypeStoreSizeToSizeIndex(uint32_t TypeSize) {
size_t Res = llvm::countr_zero(TypeSize / 8);
return Res;
}

static Instruction *genAMDGPUReportBlock(Module &M, IRBuilder<> &IRB,
Value *Cond, bool Recover) {
Value *ReportCond = Cond;
if (!Recover) {
auto *Ballot =
IRB.CreateIntrinsic(Intrinsic::amdgcn_ballot, IRB.getInt64Ty(), {Cond});
ReportCond = IRB.CreateIsNotNull(Ballot);
}

auto *Trm = SplitBlockAndInsertIfThen(
ReportCond, &*IRB.GetInsertPoint(), false,
MDBuilder(M.getContext()).createUnlikelyBranchWeights());
Trm->getParent()->setName("asan.report");

if (Recover)
return Trm;

Trm = SplitBlockAndInsertIfThen(Cond, Trm, false);
IRB.SetInsertPoint(Trm);
return IRB.CreateIntrinsic(Intrinsic::amdgcn_unreachable, {}, {});
}

static Value *createSlowPathCmp(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
Value *AddrLong, Value *ShadowValue,
uint32_t TypeStoreSize, int AsanScale) {
uint64_t Granularity = static_cast<uint64_t>(1) << AsanScale;
// Addr & (Granularity - 1)
Value *LastAccessedByte =
IRB.CreateAnd(AddrLong, ConstantInt::get(IntptrTy, Granularity - 1));
// (Addr & (Granularity - 1)) + size - 1
if (TypeStoreSize / 8 > 1)
LastAccessedByte = IRB.CreateAdd(
LastAccessedByte, ConstantInt::get(IntptrTy, TypeStoreSize / 8 - 1));
// (uint8_t) ((Addr & (Granularity-1)) + size - 1)
LastAccessedByte =
IRB.CreateIntCast(LastAccessedByte, ShadowValue->getType(), false);
// ((uint8_t) ((Addr & (Granularity-1)) + size - 1)) >= ShadowValue
return IRB.CreateICmpSGE(LastAccessedByte, ShadowValue);
}

static Instruction *generateCrashCode(Module &M, IRBuilder<> &IRB,
Type *IntptrTy, Instruction *InsertBefore,
Value *Addr, bool IsWrite,
size_t AccessSizeIndex,
Value *SizeArgument, bool Recover) {
IRB.SetInsertPoint(InsertBefore);
CallInst *Call = nullptr;
SmallString<128> kAsanReportErrorTemplate{"__asan_report_"};
SmallString<64> TypeStr{IsWrite ? "store" : "load"};
SmallString<64> EndingStr{Recover ? "_noabort" : ""};

SmallString<128> AsanErrorCallbackSizedString;
raw_svector_ostream AsanErrorCallbackSizedOS(AsanErrorCallbackSizedString);
AsanErrorCallbackSizedOS << kAsanReportErrorTemplate << TypeStr << "_n"
<< EndingStr;

SmallVector<Type *, 3> Args2 = {IntptrTy, IntptrTy};
AttributeList AL2;
FunctionCallee AsanErrorCallbackSized = M.getOrInsertFunction(
AsanErrorCallbackSizedOS.str(),
FunctionType::get(IRB.getVoidTy(), Args2, false), AL2);
SmallVector<Type *, 2> Args1{1, IntptrTy};
AttributeList AL1;

SmallString<128> AsanErrorCallbackString;
raw_svector_ostream AsanErrorCallbackOS(AsanErrorCallbackString);
AsanErrorCallbackOS << kAsanReportErrorTemplate << TypeStr
<< (1ULL << AccessSizeIndex) << EndingStr;

FunctionCallee AsanErrorCallback = M.getOrInsertFunction(
AsanErrorCallbackOS.str(),
FunctionType::get(IRB.getVoidTy(), Args1, false), AL1);
if (SizeArgument) {
Call = IRB.CreateCall(AsanErrorCallbackSized, {Addr, SizeArgument});
} else {
Call = IRB.CreateCall(AsanErrorCallback, Addr);
}

Call->setCannotMerge();
return Call;
}

static Value *memToShadow(Module &M, IRBuilder<> &IRB, Type *IntptrTy,
Value *Shadow, int AsanScale, uint32_t AsanOffset) {
// Shadow >> scale
Shadow = IRB.CreateLShr(Shadow, AsanScale);
if (AsanOffset == 0)
return Shadow;
// (Shadow >> scale) | offset
Value *ShadowBase = ConstantInt::get(IntptrTy, AsanOffset);
return IRB.CreateAdd(Shadow, ShadowBase);
}

void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
MaybeAlign Alignment, uint32_t TypeStoreSize,
bool IsWrite, Value *SizeArgument, bool UseCalls,
bool Recover, int AsanScale, int AsanOffset) {
Type *AddrTy = Addr->getType();
Type *IntptrTy = M.getDataLayout().getIntPtrType(
M.getContext(), AddrTy->getPointerAddressSpace());
IRB.SetInsertPoint(InsertBefore);
size_t AccessSizeIndex = TypeStoreSizeToSizeIndex(TypeStoreSize);
Type *ShadowTy = IntegerType::get(M.getContext(),
std::max(8U, TypeStoreSize >> AsanScale));
Type *ShadowPtrTy = PointerType::get(ShadowTy, 0);
Value *AddrLong = IRB.CreatePtrToInt(Addr, IntptrTy);
Value *ShadowPtr =
memToShadow(M, IRB, IntptrTy, AddrLong, AsanScale, AsanOffset);
const uint64_t ShadowAlign =
std::max<uint64_t>(Alignment.valueOrOne().value() >> AsanScale, 1);
Value *ShadowValue = IRB.CreateAlignedLoad(
ShadowTy, IRB.CreateIntToPtr(ShadowPtr, ShadowPtrTy), Align(ShadowAlign));
Value *Cmp = IRB.CreateIsNotNull(ShadowValue);
auto *Cmp2 = createSlowPathCmp(M, IRB, IntptrTy, AddrLong, ShadowValue,
TypeStoreSize, AsanScale);
Cmp = IRB.CreateAnd(Cmp, Cmp2);
Instruction *CrashTerm = genAMDGPUReportBlock(M, IRB, Cmp, Recover);
Instruction *Crash =
generateCrashCode(M, IRB, IntptrTy, CrashTerm, AddrLong, IsWrite,
AccessSizeIndex, SizeArgument, Recover);
Crash->setDebugLoc(OrigIns->getDebugLoc());
return;
}

void getInterestingMemoryOperands(
Module &M, Instruction *I,
SmallVectorImpl<InterestingMemoryOperand> &Interesting) {
const DataLayout &DL = M.getDataLayout();
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
Interesting.emplace_back(I, LI->getPointerOperandIndex(), false,
LI->getType(), LI->getAlign());
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
Interesting.emplace_back(I, SI->getPointerOperandIndex(), true,
SI->getValueOperand()->getType(), SI->getAlign());
} else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(I)) {
Interesting.emplace_back(I, RMW->getPointerOperandIndex(), true,
RMW->getValOperand()->getType(), std::nullopt);
} else if (AtomicCmpXchgInst *XCHG = dyn_cast<AtomicCmpXchgInst>(I)) {
Interesting.emplace_back(I, XCHG->getPointerOperandIndex(), true,
XCHG->getCompareOperand()->getType(),
std::nullopt);
} else if (auto CI = dyn_cast<CallInst>(I)) {
switch (CI->getIntrinsicID()) {
case Intrinsic::masked_load:
case Intrinsic::masked_store:
case Intrinsic::masked_gather:
case Intrinsic::masked_scatter: {
bool IsWrite = CI->getType()->isVoidTy();
// Masked store has an initial operand for the value.
unsigned OpOffset = IsWrite ? 1 : 0;
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = Align(1);
// Otherwise no alignment guarantees. We probably got Undef.
if (auto *Op = dyn_cast<ConstantInt>(CI->getOperand(1 + OpOffset)))
Alignment = Op->getMaybeAlignValue();
Value *Mask = CI->getOperand(2 + OpOffset);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, Mask);
break;
}
case Intrinsic::masked_expandload:
case Intrinsic::masked_compressstore: {
bool IsWrite = CI->getIntrinsicID() == Intrinsic::masked_compressstore;
unsigned OpOffset = IsWrite ? 1 : 0;
auto BasePtr = CI->getOperand(OpOffset);
MaybeAlign Alignment = BasePtr->getPointerAlignment(DL);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
IRBuilder<> IB(I);
Value *Mask = CI->getOperand(1 + OpOffset);
Type *IntptrTy = M.getDataLayout().getIntPtrType(
M.getContext(), BasePtr->getType()->getPointerAddressSpace());
// Use the popcount of Mask as the effective vector length.
Type *ExtTy = VectorType::get(IntptrTy, cast<VectorType>(Ty));
Value *ExtMask = IB.CreateZExt(Mask, ExtTy);
Value *EVL = IB.CreateAddReduce(ExtMask);
Value *TrueMask = ConstantInt::get(Mask->getType(), 1);
Interesting.emplace_back(I, OpOffset, IsWrite, Ty, Alignment, TrueMask,
EVL);
break;
}
case Intrinsic::vp_load:
case Intrinsic::vp_store:
case Intrinsic::experimental_vp_strided_load:
case Intrinsic::experimental_vp_strided_store: {
auto *VPI = cast<VPIntrinsic>(CI);
unsigned IID = CI->getIntrinsicID();
bool IsWrite = CI->getType()->isVoidTy();
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = VPI->getOperand(PtrOpNo)->getPointerAlignment(DL);
Value *Stride = nullptr;
if (IID == Intrinsic::experimental_vp_strided_store ||
IID == Intrinsic::experimental_vp_strided_load) {
Stride = VPI->getOperand(PtrOpNo + 1);
// Use the pointer alignment as the element alignment if the stride is a
// mutiple of the pointer alignment. Otherwise, the element alignment
// should be Align(1).
unsigned PointerAlign = Alignment.valueOrOne().value();
if (!isa<ConstantInt>(Stride) ||
cast<ConstantInt>(Stride)->getZExtValue() % PointerAlign != 0)
Alignment = Align(1);
}
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
VPI->getMaskParam(), VPI->getVectorLengthParam(),
Stride);
break;
}
case Intrinsic::vp_gather:
case Intrinsic::vp_scatter: {
auto *VPI = cast<VPIntrinsic>(CI);
unsigned IID = CI->getIntrinsicID();
bool IsWrite = IID == Intrinsic::vp_scatter;
unsigned PtrOpNo = *VPI->getMemoryPointerParamPos(IID);
Type *Ty = IsWrite ? CI->getArgOperand(0)->getType() : CI->getType();
MaybeAlign Alignment = VPI->getPointerAlignment();
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment,
VPI->getMaskParam(),
VPI->getVectorLengthParam());
break;
}
case Intrinsic::amdgcn_raw_buffer_load:
case Intrinsic::amdgcn_raw_ptr_buffer_load:
case Intrinsic::amdgcn_raw_buffer_load_format:
case Intrinsic::amdgcn_raw_ptr_buffer_load_format:
case Intrinsic::amdgcn_raw_tbuffer_load:
case Intrinsic::amdgcn_raw_ptr_tbuffer_load:
case Intrinsic::amdgcn_struct_buffer_load:
case Intrinsic::amdgcn_struct_ptr_buffer_load:
case Intrinsic::amdgcn_struct_buffer_load_format:
case Intrinsic::amdgcn_struct_ptr_buffer_load_format:
case Intrinsic::amdgcn_struct_tbuffer_load:
case Intrinsic::amdgcn_struct_ptr_tbuffer_load:
case Intrinsic::amdgcn_s_buffer_load:
case Intrinsic::amdgcn_global_load_tr_b64:
case Intrinsic::amdgcn_global_load_tr_b128: {
unsigned PtrOpNo = 0;
bool IsWrite = false;
Type *Ty = CI->getType();
Value *Ptr = CI->getArgOperand(PtrOpNo);
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
break;
}
case Intrinsic::amdgcn_raw_tbuffer_store:
case Intrinsic::amdgcn_raw_ptr_tbuffer_store:
case Intrinsic::amdgcn_raw_buffer_store:
case Intrinsic::amdgcn_raw_ptr_buffer_store:
case Intrinsic::amdgcn_raw_buffer_store_format:
case Intrinsic::amdgcn_raw_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_buffer_store:
case Intrinsic::amdgcn_struct_ptr_buffer_store:
case Intrinsic::amdgcn_struct_buffer_store_format:
case Intrinsic::amdgcn_struct_ptr_buffer_store_format:
case Intrinsic::amdgcn_struct_tbuffer_store:
case Intrinsic::amdgcn_struct_ptr_tbuffer_store: {
unsigned PtrOpNo = 1;
bool IsWrite = true;
Value *Ptr = CI->getArgOperand(PtrOpNo);
Type *Ty = Ptr->getType();
MaybeAlign Alignment = Ptr->getPointerAlignment(DL);
Interesting.emplace_back(I, PtrOpNo, IsWrite, Ty, Alignment);
break;
}
default:
for (unsigned ArgNo = 0; ArgNo < CI->arg_size(); ArgNo++) {
if (Type *Ty = CI->getParamByRefType(ArgNo)) {
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
} else if (Type *Ty = CI->getParamByValType(ArgNo)) {
Interesting.emplace_back(I, ArgNo, false, Ty, Align(1));
}
}
}
}
}
} // end namespace AMDGPU
} // end namespace llvm
60 changes: 60 additions & 0 deletions llvm/lib/Target/AMDGPU/Utils/AMDGPUAsanInstrumentation.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
//===AMDGPUAsanInstrumentation.h - ASAN helper functions -*- C++- *===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===--------------------------------------------------------------===//

#ifndef LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
#define LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H

#include "AMDGPU.h"
#include "AMDGPUBaseInfo.h"
#include "AMDGPUMemoryUtils.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicsAMDGPU.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/OptimizedStructLayout.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
#include "llvm/Transforms/Instrumentation/AddressSanitizerCommon.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/ModuleUtils.h"

namespace llvm {
namespace AMDGPU {

/// Given SizeInBytes of the Value to be instrunmented,
/// Returns the redzone size corresponding to it.
uint64_t getRedzoneSizeForGlobal(int Scale, uint64_t SizeInBytes);

/// Instrument the memory operand Addr.
/// Generates report blocks that catch the addressing errors.
void instrumentAddress(Module &M, IRBuilder<> &IRB, Instruction *OrigIns,
Instruction *InsertBefore, Value *Addr,
MaybeAlign Alignment, uint32_t TypeStoreSize,
bool IsWrite, Value *SizeArgument, bool UseCalls,
bool Recover, int Scale, int Offset);

/// Get all the memory operands from the instruction
/// that needs to be instrumented
void getInterestingMemoryOperands(
Module &M, Instruction *I,
SmallVectorImpl<InterestingMemoryOperand> &Interesting);

} // end namespace AMDGPU
} // end namespace llvm

#endif // LLVM_LIB_TARGET_AMDGPU_UTILS_AMDGPU_ASAN_INSTRUMENTATION_H
1 change: 1 addition & 0 deletions llvm/lib/Target/AMDGPU/Utils/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
add_llvm_component_library(LLVMAMDGPUUtils
AMDGPUAsanInstrumentation.cpp
AMDGPUAsmUtils.cpp
AMDGPUBaseInfo.cpp
AMDGPUDelayedMCExpr.cpp
Expand Down

0 comments on commit ddb75ca

Please sign in to comment.