Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[clang] Add builtin to clear padding bytes (prework for P0528R3) #75371

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions clang/include/clang/Basic/Builtins.td
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,12 @@ def IsConstantEvaluated : LangBuiltin<"CXX_LANG"> {
let Prototype = "bool()";
}

def ClearPadding : LangBuiltin<"CXX_LANG"> {
let Spellings = ["__builtin_clear_padding"];
let Attributes = [NoThrow];
let Prototype = "void(...)";
}

// GCC exception builtins
def EHReturn : Builtin {
let Spellings = ["__builtin_eh_return"];
Expand Down
207 changes: 207 additions & 0 deletions clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include "llvm/Support/ScopedPrinter.h"
#include "llvm/TargetParser/AArch64TargetParser.h"
#include "llvm/TargetParser/X86TargetParser.h"
#include <algorithm>
#include <optional>
#include <sstream>

Expand Down Expand Up @@ -2538,6 +2539,205 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}

template <class T>
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
size_t CurrentStartOffset,
size_t &RunningOffset, T &&WriteZeroAtOffset,
bool VisitVirtualBase);

template <class T>
void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
StructType *ST, size_t CurrentStartOffset,
size_t &RunningOffset, T &&WriteZeroAtOffset,
bool VisitVirtualBase) {
llvm::dbgs() << "clear padding struct: " << ST->getName().data() << '\n';
const auto &DL = CGF.CGM.getModule().getDataLayout();
auto *SL = DL.getStructLayout(ST);
auto *R = dyn_cast<CXXRecordDecl>(Ty->getAsRecordDecl());
if (!R) {
llvm::dbgs() << "Not a CXXRecordDecl\n";
return;
}
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
if (ASTLayout.hasOwnVFPtr()) {
llvm::dbgs() << "vtable ptr. Incrementing RunningOffset from "
<< RunningOffset << " to "
<< RunningOffset + DL.getPointerSizeInBits() / 8 << '\n';
RunningOffset += DL.getPointerSizeInBits() / 8;
}
std::vector<std::pair<size_t, CXXBaseSpecifier>> Bases;
Bases.reserve(R->getNumBases());
// todo get vbases
for (auto Base : R->bases()) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is iterating over bases/fields like this actually guaranteed to return them in order of offset?

Do we need to worry about vtable pointers?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

bases can be reordered to the end for virtual base. anyway, I sorted bases by offset now. for fields, IIRC, c++ requires them to be laid out in the same order
added vtable pointers support.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Empty fields that are [[no_unique_address]] can be out-of-order, if that matters.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

anyway. I changed the approach now.the new approach should work regardless of the orders

auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
if (!Base.isVirtual()) {
auto Offset = static_cast<size_t>(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't use size_t for target offsets. Prefer CharUnits, or if you need a raw number for some reason, uint64_t. (We want to make sure cross-compilation works correctly on 32-bit hosts.)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

removed size_t

ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
Bases.emplace_back(Offset, Base);
}
}

auto VisitBases =
[&](std::vector<std::pair<size_t, CXXBaseSpecifier>> &BasesToVisit) {
std::sort(
BasesToVisit.begin(), BasesToVisit.end(),
[](const auto &P1, const auto &P2) { return P1.first < P2.first; });
for (const auto &Pair : BasesToVisit) {
// is it OK to use structured binding in clang? what is the language
// version?
auto Offset = Pair.first;
auto Base = Pair.second;

llvm::dbgs() << "visiting base at offset " << Offset << '\n';
// Recursively zero out base classes.
auto Index = SL->getElementContainingOffset(Offset);
Value *Idx = CGF.Builder.getSize(Index);
llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(),
CurrentStartOffset + Offset,
RunningOffset, WriteZeroAtOffset, false);
}
};

VisitBases(Bases);

size_t NumFields = std::distance(R->field_begin(), R->field_end());
std::vector<size_t> FieldOffsets;
FieldOffsets.reserve(NumFields);
auto CurrentField = R->field_begin();
for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
// Size needs to be in bytes so we can compare it later.
auto Offset = ASTLayout.getFieldOffset(I) / 8;
llvm::dbgs() << "visiting field at offset " << Offset << '\n';
auto Index = SL->getElementContainingOffset(Offset);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to handle bitfields?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no not at the moment. I need to figure out how to do it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is almost working now. I managed to get all the Padding in bits now. the remaining thing is to zeroing them out, instead of current generating store instruction with zeros, i need to basically do this for the bits that don't occupy the entire byte

byte &= ~PaddingBitMask

How can I generate that IR ?

Value *Idx = CGF.Builder.getSize(Index);
llvm::Type *CurrentFieldType =
CGF.ConvertTypeForMem(CurrentField->getType());
Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(),
CurrentStartOffset + Offset, RunningOffset,
WriteZeroAtOffset, true);
}

if (VisitVirtualBase) {

std::vector<std::pair<size_t, CXXBaseSpecifier>> VBases;
VBases.reserve(R->getNumVBases());
for (auto VBase : R->vbases()) {
auto *BaseRecord =
cast<CXXRecordDecl>(VBase.getType()->getAsRecordDecl());
auto Offset = static_cast<size_t>(
ASTLayout.getVBaseClassOffset(BaseRecord).getQuantity());
VBases.emplace_back(Offset, VBase);
}

VisitBases(VBases);
}
}

template <class T>
void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr,
llvm::Type *Type, ConstantArrayType const *AT,
size_t CurrentStartOffset, size_t &RunningOffset,
T &&WriteZeroAtOffset) {
llvm::dbgs() << "clear padding constant array\n";
for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
++ArrIndex) {

QualType ElementQualType = AT->getElementType();

auto *ElementRecord = ElementQualType->getAsRecordDecl();
if (!ElementRecord) {
llvm::dbgs() << "null!\n";
}
auto ElementAlign =
ElementRecord
? CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment()
: CGF.getContext().getTypeAlignInChars(ElementQualType);

Address FieldElementAddr{Ptr, Type, ElementAlign};

auto Element = CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
auto AllocSize =
CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
llvm::dbgs() << "clearing array index! " << ArrIndex << '\n';
RecursivelyClearPaddingImpl(CGF, Element.getBasePointer(), ElementQualType,
CurrentStartOffset +
ArrIndex * AllocSize.getKnownMinValue(),
RunningOffset, WriteZeroAtOffset, true);
}
}

template <class T>
void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty,
size_t CurrentStartOffset,
size_t &RunningOffset, T &&WriteZeroAtOffset,
bool VisitVirtualBase) {

llvm::dbgs() << "clear padding before current [" << RunningOffset << ", "
<< CurrentStartOffset << ")\n";
for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
WriteZeroAtOffset(RunningOffset);
}
auto *Type = CGF.ConvertTypeForMem(Ty);
auto Size = CGF.CGM.getModule()
.getDataLayout()
.getTypeSizeInBits(Type)
.getKnownMinValue() /
8;

if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset,
RunningOffset, WriteZeroAtOffset);
} else if (auto *ST = dyn_cast<StructType>(Type); ST && Ty->isRecordType()) {
ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset,
WriteZeroAtOffset, VisitVirtualBase);
} else if (Ty->isAtomicType()) {
RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(),
CurrentStartOffset, RunningOffset,
WriteZeroAtOffset, true);
} else {
llvm::dbgs() << "increment running offset from: " << RunningOffset << " to "
<< RunningOffset + Size << '\n';
RunningOffset =
std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size));
}
}

static void RecursivelyClearPadding(CodeGenFunction &CGF, Value *Ptr,
QualType Ty) {
auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
auto WriteZeroAtOffset = [&](uint64_t Offset) {
auto *Index = ConstantInt::get(CGF.IntTy, Offset);
auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
CGF.Builder.CreateAlignedStore(
Zero, Element,
CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
};

size_t RunningOffset = 0;

RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset,
true);

// Clear tail padding
auto *Type = CGF.ConvertTypeForMem(Ty);

auto Size = CGF.CGM.getModule()
.getDataLayout()
.getTypeAllocSize(Type)
.getKnownMinValue();

llvm::dbgs() << "clear tail padding [" << RunningOffset << ", " << Size
<< ")\n";
for (; RunningOffset < Size; ++RunningOffset) {
WriteZeroAtOffset(RunningOffset);
}
}

RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
Expand Down Expand Up @@ -4462,6 +4662,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,

return RValue::get(Ptr);
}
case Builtin::BI__builtin_clear_padding: {
const Expr *Op = E->getArg(0);
Value *Address = EmitScalarExpr(Op);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

EmitPointerWithAlignment()?

auto PointeeTy = Op->getType()->getPointeeType();
RecursivelyClearPadding(*this, Address, PointeeTy);
return RValue::get(nullptr);
}
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
Expand Down
31 changes: 31 additions & 0 deletions clang/lib/Sema/SemaChecking.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2655,6 +2655,37 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
}
case Builtin::BI__builtin_launder:
return BuiltinLaunder(*this, TheCall);
case Builtin::BI__builtin_clear_padding: {
const auto numArgs = TheCall->getNumArgs();
if (numArgs < 1) {
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_few_args_one)
<< 0 /*function call*/ << "T*" << 0;
return ExprError();
}
if (numArgs > 1) {
Diag(TheCall->getEndLoc(), diag::err_typecheck_call_too_many_args_one)
<< 0 /*function call*/ << "T*" << numArgs << 0;
return ExprError();
}

const Expr *PtrArg = TheCall->getArg(0);
const QualType PtrArgType = PtrArg->getType();
if (!PtrArgType->isPointerType()) {
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
<< PtrArgType << "pointer" << 1 << 0 << 3 << 1 << PtrArgType
<< "pointer";
return ExprError();
}
if (PtrArgType->getPointeeType().isConstQualified()) {
Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
<< TheCall->getSourceRange() << 5 /*ConstUnknown*/;
return ExprError();
}
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
diag::err_typecheck_decl_incomplete_type))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please define an error message that explains what's actually going wrong here, instead of reusing err_typecheck_decl_incomplete_type. (The other errors could also be improved a bit.)

return ExprError();
break;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to check the number of arguments somewhere?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. added

}
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_add_1:
case Builtin::BI__sync_fetch_and_add_2:
Expand Down
112 changes: 112 additions & 0 deletions clang/test/CodeGenCXX/builtin-clear-padding-codegen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s

struct alignas(4) Foo {
char a;
alignas(2) char b;
};

struct alignas(4) Bar {
char c;
alignas(2) char d;
};

struct alignas(4) Baz : Foo {
char e;
Bar f;
};

// Baz structure:
// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
// %struct.Foo = type { i8, i8, i8, i8 }
// %struct.Bar = type { i8, i8, i8, i8 }

// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*

// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
// CHECK: store i8 0, i8* [[PAD_1]]
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
// CHECK: store i8 0, i8* [[PAD_2]]

// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
// CHECK: store i8 0, i8* [[PAD_3]]
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
// CHECK: store i8 0, i8* [[PAD_4]]
// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
// CHECK: store i8 0, i8* [[PAD_5]]

// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
// CHECK: store i8 0, i8* [[PAD_6]]
// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
// CHECK: store i8 0, i8* [[PAD_7]]
// CHECK: ret void
void testBaz(Baz *baz) {
__builtin_clear_padding(baz);
}

struct UnsizedTail {
int size;
alignas(8) char buf[];

UnsizedTail(int size) : size(size) {}
};

// UnsizedTail structure:
// "size", PAD_1, PAD_2, PAD_3, PAD_4
// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }

// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
// CHECK: store i8 0, i8* [[PAD_1]]
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
// CHECK: store i8 0, i8* [[PAD_2]]
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
// CHECK: store i8 0, i8* [[PAD_3]]
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
// CHECK: store i8 0, i8* [[PAD_4]]
// CHECK: ret void
void testUnsizedTail(UnsizedTail *u) {
__builtin_clear_padding(u);
}

struct ArrOfStructsWithPadding {
Bar bars[2];
};

// ArrOfStructsWithPadding structure:
// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }

// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
// CHECK: store i8 0, i8* [[PAD_1]]
// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
// CHECK: store i8 0, i8* [[PAD_2]]
// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
// CHECK: store i8 0, i8* [[PAD_3]]
// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
// CHECK: store i8 0, i8* [[PAD_4]]
// CHECK: ret void
void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
__builtin_clear_padding(arr);
}
Loading
Loading