-
Notifications
You must be signed in to change notification settings - Fork 11.9k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[clang] Add builtin to clear padding bytes (prework for P0528R3) #75371
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-libcxx @llvm/pr-subscribers-clang Author: Hui (huixie90) ChangesPatch is 23.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75371.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ec39e926889b93..ba944a6f04fdc0 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -633,6 +633,7 @@ BUILTIN(__builtin_vsscanf, "icC*RcC*Ra", "FS:1:")
BUILTIN(__builtin_thread_pointer, "v*", "nc")
BUILTIN(__builtin_launder, "v*v*", "ntE")
LANGBUILTIN(__builtin_is_constant_evaluated, "b", "nE", CXX_LANG)
+LANGBUILTIN(__builtin_zero_non_value_bits, "v.", "n", CXX_LANG)
// GCC exception builtins
BUILTIN(__builtin_eh_return, "vzv*", "r") // FIXME: Takes intptr_t, not size_t!
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061d..638af25fb7a41e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -63,6 +63,10 @@
#include <optional>
#include <sstream>
+
+
+#include <iostream>
+
using namespace clang;
using namespace CodeGen;
using namespace llvm;
@@ -2456,6 +2460,132 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty, size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset);
+
+template <class T>
+void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty, StructType *ST,
+ size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset) {
+ std::cerr << "\n struct! " << ST->getName().data() << std::endl;
+ auto SL = CGF.CGM.getModule().getDataLayout().getStructLayout(ST);
+ auto R = cast<CXXRecordDecl>(Ty->getAsRecordDecl());
+ const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+ for (auto Base : R->bases()) {
+ std::cerr << "\n\n base!" << std::endl;
+ // Zero padding between base elements.
+ auto BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
+ auto Offset = static_cast<size_t>(
+ ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
+ // Recursively zero out base classes.
+ auto Index = SL->getElementContainingOffset(Offset);
+ Value *Idx = CGF.Builder.getSize(Index);
+ llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
+ Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
+ RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(), CurrentStartOffset + Offset, RunningOffset, WriteZeroAtOffset);
+ }
+
+ size_t NumFields = std::distance(R->field_begin(), R->field_end());
+ auto CurrentField = R->field_begin();
+ for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
+ // Size needs to be in bytes so we can compare it later.
+ auto Offset = ASTLayout.getFieldOffset(I) / 8;
+ auto Index = SL->getElementContainingOffset(Offset);
+ Value *Idx = CGF.Builder.getSize(Index);
+ llvm::Type *CurrentFieldType = CGF.ConvertTypeForMem(CurrentField->getType());
+ Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
+ RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(), CurrentStartOffset + Offset, RunningOffset, WriteZeroAtOffset);
+ }
+}
+
+template <class T>
+void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr, llvm::Type *Type, ConstantArrayType const *AT,
+ size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset) {
+ for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+ ++ArrIndex) {
+
+ QualType ElementQualType = AT->getElementType();
+
+ auto *ElementRecord = ElementQualType->getAsRecordDecl();
+ if(!ElementRecord){
+ std::cerr<< "\n\n null!" << std::endl;
+ }
+ auto ElementAlign =ElementRecord?
+ CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment():
+ CGF.getContext().getTypeAlignInChars(ElementQualType);
+
+ std::cerr<< "\n\n align: " << ElementAlign.getQuantity() << std::endl;
+
+ // Value *Idx = CGF.Builder.getSize(0);
+ // Value *ArrayElement = CGF.Builder.CreateGEP(ElementType, Ptr, Idx);
+
+ Address FieldElementAddr{Ptr, Type, ElementAlign};
+
+ auto Element =
+ CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
+ auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
+ auto AllocSize = CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
+ std::cerr << "\n\n clearing array index! " << ArrIndex << std::endl;
+ RecursivelyClearPaddingImpl(CGF, Element.getPointer(), ElementQualType, CurrentStartOffset + ArrIndex * AllocSize.getKnownMinValue(), RunningOffset, WriteZeroAtOffset);
+ }
+}
+
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty, size_t CurrentStartOffset,
+ size_t& RunningOffset, T&& WriteZeroAtOffset) {
+
+ std::cerr << "\n\n zero padding before current ["<< RunningOffset << ", " << CurrentStartOffset<< ")"<< std::endl;
+ for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
+ WriteZeroAtOffset(RunningOffset);
+ }
+ auto Type = CGF.ConvertTypeForMem(Ty);
+ auto Size = CGF.CGM.getModule()
+ .getDataLayout()
+ .getTypeSizeInBits(Type)
+ .getKnownMinValue() / 8;
+
+ if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
+ ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset, RunningOffset, WriteZeroAtOffset);
+ }
+ else if (auto *ST = dyn_cast<StructType>(Type)) {
+ ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset, WriteZeroAtOffset);
+ } else {
+ std::cerr << "\n\n increment running offset from: " << RunningOffset << " to " << RunningOffset + Size << std::endl;
+ RunningOffset += Size;
+ }
+
+}
+
+static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
+ QualType Ty) {
+ auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+ auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+ auto WriteZeroAtOffset = [&](size_t Offset) {
+ auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+ auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+ CGF.Builder.CreateAlignedStore(
+ Zero, Element,
+ CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+ };
+
+
+ size_t RunningOffset = 0;
+
+ RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset);
+
+ // Clear tail padding
+ auto Type = CGF.ConvertTypeForMem(Ty);
+
+ auto Size = CGF.CGM.getModule()
+ .getDataLayout()
+ .getTypeSizeInBits(Type)
+ .getKnownMinValue() / 8;
+
+ std::cerr << "\n\n zero tail padding ["<< RunningOffset << ", " << Size << ")"<< std::endl;
+ for (; RunningOffset < Size; ++RunningOffset) {
+ WriteZeroAtOffset(RunningOffset);
+ }
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -4315,6 +4445,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Ptr);
}
+ case Builtin::BI__builtin_zero_non_value_bits: {
+ const Expr *Op = E->getArg(0);
+ Value *Address = EmitScalarExpr(Op);
+ auto PointeeTy = Op->getType()->getPointeeType();
+ RecursivelyZeroNonValueBits(*this, Address, PointeeTy);
+ return RValue::get(nullptr);
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 77c8334f3ca25d..3470dfca3ed6c7 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2327,6 +2327,26 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
}
case Builtin::BI__builtin_launder:
return SemaBuiltinLaunder(*this, TheCall);
+ case Builtin::BI__builtin_zero_non_value_bits: {
+ // const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts();
+ // const QualType PtrArgType = PtrArg->getType();
+ // if (!PtrArgType->isPointerType() ||
+ // !PtrArgType->getPointeeType()->isRecordType()) {
+ // Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+ // << PtrArgType << "structure pointer" << 1 << 0 << 3 << 1 << PtrArgType
+ // << "structure pointer";
+ // return ExprError();
+ // }
+ // if (PtrArgType->getPointeeType().isConstQualified()) {
+ // Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+ // << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+ // return ExprError();
+ // }
+ // if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+ // diag::err_typecheck_decl_incomplete_type))
+ // return ExprError();
+ break;
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_add_1:
case Builtin::BI__sync_fetch_and_add_2:
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
new file mode 100644
index 00000000000000..8ad1e148ded861
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+struct alignas(4) Foo {
+ char a;
+ alignas(2) char b;
+};
+
+struct alignas(4) Bar {
+ char c;
+ alignas(2) char d;
+};
+
+struct alignas(4) Baz : Foo {
+ char e;
+ Bar f;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
+// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
+// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
+
+// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
+// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_5]]
+
+// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
+// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
+// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_6]]
+// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_7]]
+// CHECK: ret void
+void testBaz(Baz *baz) {
+ __builtin_zero_non_value_bits(baz);
+}
+
+struct UnsizedTail {
+ int size;
+ alignas(8) char buf[];
+
+ UnsizedTail(int size) : size(size) {}
+};
+
+// UnsizedTail structure:
+// "size", PAD_1, PAD_2, PAD_3, PAD_4
+// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
+
+// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
+// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
+// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testUnsizedTail(UnsizedTail *u) {
+ __builtin_zero_non_value_bits(u);
+}
+
+struct ArrOfStructsWithPadding {
+ Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
+// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
+// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
+// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
+// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
+ __builtin_zero_non_value_bits(arr);
+}
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
new file mode 100644
index 00000000000000..4373dcbdb720ba
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
@@ -0,0 +1,359 @@
+// RUN: mkdir -p %t
+// RUN: %clang++ %s -o %t/run
+// RUN: %t/run
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+
+template <class T>
+void print_bytes(const T *object)
+{
+ auto size = sizeof(T);
+ const unsigned char * const bytes = reinterpret_cast<const unsigned char *>(object);
+ size_t i;
+
+ fprintf(stderr, "[ ");
+ for(i = 0; i < size; i++)
+ {
+ fprintf(stderr, "%02x ", bytes[i]);
+ }
+ fprintf(stderr, "]\n");
+}
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) BasicWithPadding {
+ T x;
+ alignas(A2) T y;
+};
+
+template <size_t A1, size_t A2, size_t N, class T>
+struct alignas(A1) SpacedArrayMembers {
+ T x[N];
+ alignas(A2) char c;
+ T y[N];
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) PaddedPointerMembers {
+ T *x;
+ alignas(A2) T *y;
+};
+
+template <size_t A1, size_t A2, size_t A3, class T>
+struct alignas(A1) ThreeMembers {
+ T x;
+ alignas(A2) T y;
+ alignas(A3) T z;
+};
+
+template <class T>
+struct Normal {
+ T a;
+ T b;
+};
+
+template <class T>
+struct X {
+ T x;
+};
+
+template <class T>
+struct Z {
+ T z;
+};
+
+template <size_t A, class T>
+struct YZ : public Z<T> {
+ alignas(A) T y;
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) HasBase : public X<T>, public YZ<A2, T> {
+ T a;
+ alignas(A2) T b;
+};
+
+template <size_t A1, size_t A2, class T>
+void testAllForType(T a, T b, T c, T d) {
+ using B = BasicWithPadding<A1, A2, T>;
+ B basic1;
+ memset(&basic1, 0, sizeof(B));
+ basic1.x = a;
+ basic1.y = b;
+ B basic2;
+ memset(&basic2, 42, sizeof(B));
+ basic2.x = a;
+ basic2.y = b;
+ assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+ __builtin_zero_non_value_bits(&basic2);
+ assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+ using A = SpacedArrayMembers<A1, A2, 2, T>;
+ A arr1;
+ memset(&arr1, 0, sizeof(A));
+ arr1.x[0] = a;
+ arr1.x[1] = b;
+ arr1.y[0] = c;
+ arr1.y[1] = d;
+ A arr2;
+ memset(&arr2, 42, sizeof(A));
+ arr2.x[0] = a;
+ arr2.x[1] = b;
+ arr2.y[0] = c;
+ arr2.y[1] = d;
+ arr2.c = 0;
+ assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+ __builtin_zero_non_value_bits(&arr2);
+ assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+
+ using P = PaddedPointerMembers<A1, A2, T>;
+ P ptr1;
+ memset(&ptr1, 0, sizeof(P));
+ ptr1.x = &a;
+ ptr1.y = &b;
+ P ptr2;
+ memset(&ptr2, 42, sizeof(P));
+ ptr2.x = &a;
+ ptr2.y = &b;
+ assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+ __builtin_zero_non_value_bits(&ptr2);
+ assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+
+ using Three = ThreeMembers<A1, A2, A2, T>;
+ Three three1;
+ memset(&three1, 0, sizeof(Three));
+ three1.x = a;
+ three1.y = b;
+ three1.z = c;
+ Three three2;
+ memset(&three2, 42, sizeof(Three));
+ three2.x = a;
+ three2.y = b;
+ three2.z = c;
+ __builtin_zero_non_value_bits(&three2);
+ assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+
+ using N = Normal<T>;
+ N normal1;
+ memset(&normal1, 0, sizeof(N));
+ normal1.a = a;
+ normal1.b = b;
+ N normal2;
+ memset(&normal2, 42, sizeof(N));
+ normal2.a = a;
+ normal2.b = b;
+ __builtin_zero_non_value_bits(&normal2);
+ assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+
+ using H = HasBase<A1, A2, T>;
+ H base1;
+ memset(&base1, 0, sizeof(H));
+ base1.a = a;
+ base1.b = b;
+ base1.x = c;
+ base1.y = d;
+ base1.z = a;
+ H base2;
+ memset(&base2, 42, sizeof(H));
+ base2.a = a;
+ base2.b = b;
+ base2.x = c;
+ base2.y = d;
+ base2.z = a;
+ assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+ __builtin_zero_non_value_bits(&base2);
+ unsigned i = 0;
+ assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+}
+
+struct UnsizedTail {
+ int size;
+ alignas(8) char buf[];
+
+ UnsizedTail(int size) : size(size) {}
+};
+
+void otherTests() {
+ const size_t size1 = sizeof(UnsizedTail) + 4;
+ char buff1[size1];
+ char buff2[size1];
+ memset(buff1, 0, size1);
+ memset(buff2, 42, size1);
+ auto *u1 = new (buff1) UnsizedTail(4);
+ u1->buf[0] = 1;
+ u1->buf[1] = 2;
+ u1->buf[2] = 3;
+ u1->buf[3] = 4;
+ auto *u2 = new (buff2) UnsizedTail(4);
+ u2->buf[0] = 1;
+ u2->buf[1] = 2;
+ u2->buf[2] = 3;
+ u2->buf[3] = 4;
+ assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+ __builtin_zero_non_value_bits(u2);
+ assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+
+ using B = BasicWithPadding<8, 4, char>;
+ auto *basic1 = new B;
+ memset(basic1, 0, sizeof(B));
+ basic1->x = 1;
+ basic1->y = 2;
+ auto *basic2 = new B;
+ memset(basic2, 42, sizeof(B));
+ basic2->x = 1;
+ basic2->y = 2;
+ assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+ __builtin_zero_non_value_bits(basic2);
+ assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+ delete basic2;
+ delete basic1;
+
+ using B = BasicWithPadding<8, 4, char>;
+ B *basic3 = new B;
+ memset(basic3, 0, sizeof(B));
+ basic3->x = 1;
+ basic3->y = 2;
+ B *basic4 = new B;
+ memset(basic4, 42, sizeof(B));
+ basic4->x = 1;
+ basic4->y = 2;
+ assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+ __builtin_zero_non_value_bits(const_cast<volatile B *>(basic4));
+ assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+ delete basic4;
+ delete basic3;
+}
+
+struct Foo {
+ int x;
+ int y;
+};
+
+typedef float Float4Vec __attribute__((ext_vector_type(4)));
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+struct S1 {
+ int x = 0;
+ char c = 0;
+};
+
+struct S2{
+ [[no_unique_address]] S1 s1;
+ bool b;
+ long double l;
+ bool b2;
+};
+
+struct S3{
+ [[no_unique_address]] S1 s1;
+ bool b;
+};
+
+struct alignas(32) S4 {
+ int i;
+};
+struct B1{
+
+};
+
+struct B2 {
+ int x;
+};
+struct B3{
+ char c;
+};
+
+struct B4{
+ bool b;
+};
+
+struct B5{
+ int x2;
+};
+
+struct D:B1,B2,B3,B4,B5{
+ long double l;
+ bool b2;
+}...
[truncated]
|
@llvm/pr-subscribers-clang-codegen Author: Hui (huixie90) ChangesPatch is 23.59 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/75371.diff 6 Files Affected:
diff --git a/clang/include/clang/Basic/Builtins.def b/clang/include/clang/Basic/Builtins.def
index ec39e926889b93..ba944a6f04fdc0 100644
--- a/clang/include/clang/Basic/Builtins.def
+++ b/clang/include/clang/Basic/Builtins.def
@@ -633,6 +633,7 @@ BUILTIN(__builtin_vsscanf, "icC*RcC*Ra", "FS:1:")
BUILTIN(__builtin_thread_pointer, "v*", "nc")
BUILTIN(__builtin_launder, "v*v*", "ntE")
LANGBUILTIN(__builtin_is_constant_evaluated, "b", "nE", CXX_LANG)
+LANGBUILTIN(__builtin_zero_non_value_bits, "v.", "n", CXX_LANG)
// GCC exception builtins
BUILTIN(__builtin_eh_return, "vzv*", "r") // FIXME: Takes intptr_t, not size_t!
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061d..638af25fb7a41e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -63,6 +63,10 @@
#include <optional>
#include <sstream>
+
+
+#include <iostream>
+
using namespace clang;
using namespace CodeGen;
using namespace llvm;
@@ -2456,6 +2460,132 @@ static RValue EmitHipStdParUnsupportedBuiltin(CodeGenFunction *CGF,
return RValue::get(CGF->Builder.CreateCall(UBF, Args));
}
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty, size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset);
+
+template <class T>
+void ClearPaddingStruct(CodeGenFunction &CGF, Value *Ptr, QualType Ty, StructType *ST,
+ size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset) {
+ std::cerr << "\n struct! " << ST->getName().data() << std::endl;
+ auto SL = CGF.CGM.getModule().getDataLayout().getStructLayout(ST);
+ auto R = cast<CXXRecordDecl>(Ty->getAsRecordDecl());
+ const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R);
+ for (auto Base : R->bases()) {
+ std::cerr << "\n\n base!" << std::endl;
+ // Zero padding between base elements.
+ auto BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl());
+ auto Offset = static_cast<size_t>(
+ ASTLayout.getBaseClassOffset(BaseRecord).getQuantity());
+ // Recursively zero out base classes.
+ auto Index = SL->getElementContainingOffset(Offset);
+ Value *Idx = CGF.Builder.getSize(Index);
+ llvm::Type *CurrentBaseType = CGF.ConvertTypeForMem(Base.getType());
+ Value *BaseElement = CGF.Builder.CreateGEP(CurrentBaseType, Ptr, Idx);
+ RecursivelyClearPaddingImpl(CGF, BaseElement, Base.getType(), CurrentStartOffset + Offset, RunningOffset, WriteZeroAtOffset);
+ }
+
+ size_t NumFields = std::distance(R->field_begin(), R->field_end());
+ auto CurrentField = R->field_begin();
+ for (size_t I = 0; I < NumFields; ++I, ++CurrentField) {
+ // Size needs to be in bytes so we can compare it later.
+ auto Offset = ASTLayout.getFieldOffset(I) / 8;
+ auto Index = SL->getElementContainingOffset(Offset);
+ Value *Idx = CGF.Builder.getSize(Index);
+ llvm::Type *CurrentFieldType = CGF.ConvertTypeForMem(CurrentField->getType());
+ Value *Element = CGF.Builder.CreateGEP(CurrentFieldType, Ptr, Idx);
+ RecursivelyClearPaddingImpl(CGF, Element, CurrentField->getType(), CurrentStartOffset + Offset, RunningOffset, WriteZeroAtOffset);
+ }
+}
+
+template <class T>
+void ClearPaddingConstantArray(CodeGenFunction &CGF, Value *Ptr, llvm::Type *Type, ConstantArrayType const *AT,
+ size_t CurrentStartOffset, size_t &RunningOffset, T&& WriteZeroAtOffset) {
+ for (size_t ArrIndex = 0; ArrIndex < AT->getSize().getLimitedValue();
+ ++ArrIndex) {
+
+ QualType ElementQualType = AT->getElementType();
+
+ auto *ElementRecord = ElementQualType->getAsRecordDecl();
+ if(!ElementRecord){
+ std::cerr<< "\n\n null!" << std::endl;
+ }
+ auto ElementAlign =ElementRecord?
+ CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment():
+ CGF.getContext().getTypeAlignInChars(ElementQualType);
+
+ std::cerr<< "\n\n align: " << ElementAlign.getQuantity() << std::endl;
+
+ // Value *Idx = CGF.Builder.getSize(0);
+ // Value *ArrayElement = CGF.Builder.CreateGEP(ElementType, Ptr, Idx);
+
+ Address FieldElementAddr{Ptr, Type, ElementAlign};
+
+ auto Element =
+ CGF.Builder.CreateConstArrayGEP(FieldElementAddr, ArrIndex);
+ auto *ElementType = CGF.ConvertTypeForMem(ElementQualType);
+ auto AllocSize = CGF.CGM.getModule().getDataLayout().getTypeAllocSize(ElementType);
+ std::cerr << "\n\n clearing array index! " << ArrIndex << std::endl;
+ RecursivelyClearPaddingImpl(CGF, Element.getPointer(), ElementQualType, CurrentStartOffset + ArrIndex * AllocSize.getKnownMinValue(), RunningOffset, WriteZeroAtOffset);
+ }
+}
+
+template <class T>
+void RecursivelyClearPaddingImpl(CodeGenFunction &CGF, Value *Ptr, QualType Ty, size_t CurrentStartOffset,
+ size_t& RunningOffset, T&& WriteZeroAtOffset) {
+
+ std::cerr << "\n\n zero padding before current ["<< RunningOffset << ", " << CurrentStartOffset<< ")"<< std::endl;
+ for (; RunningOffset < CurrentStartOffset; ++RunningOffset) {
+ WriteZeroAtOffset(RunningOffset);
+ }
+ auto Type = CGF.ConvertTypeForMem(Ty);
+ auto Size = CGF.CGM.getModule()
+ .getDataLayout()
+ .getTypeSizeInBits(Type)
+ .getKnownMinValue() / 8;
+
+ if (auto *AT = dyn_cast<ConstantArrayType>(Ty)) {
+ ClearPaddingConstantArray(CGF, Ptr, Type, AT, CurrentStartOffset, RunningOffset, WriteZeroAtOffset);
+ }
+ else if (auto *ST = dyn_cast<StructType>(Type)) {
+ ClearPaddingStruct(CGF, Ptr, Ty, ST, CurrentStartOffset, RunningOffset, WriteZeroAtOffset);
+ } else {
+ std::cerr << "\n\n increment running offset from: " << RunningOffset << " to " << RunningOffset + Size << std::endl;
+ RunningOffset += Size;
+ }
+
+}
+
+static void RecursivelyZeroNonValueBits(CodeGenFunction &CGF, Value *Ptr,
+ QualType Ty) {
+ auto *I8Ptr = CGF.Builder.CreateBitCast(Ptr, CGF.Int8PtrTy);
+ auto *Zero = ConstantInt::get(CGF.Int8Ty, 0);
+ auto WriteZeroAtOffset = [&](size_t Offset) {
+ auto *Index = ConstantInt::get(CGF.IntTy, Offset);
+ auto *Element = CGF.Builder.CreateGEP(CGF.Int8Ty, I8Ptr, Index);
+ CGF.Builder.CreateAlignedStore(
+ Zero, Element,
+ CharUnits::One().alignmentAtOffset(CharUnits::fromQuantity(Offset)));
+ };
+
+
+ size_t RunningOffset = 0;
+
+ RecursivelyClearPaddingImpl(CGF, Ptr, Ty, 0, RunningOffset, WriteZeroAtOffset);
+
+ // Clear tail padding
+ auto Type = CGF.ConvertTypeForMem(Ty);
+
+ auto Size = CGF.CGM.getModule()
+ .getDataLayout()
+ .getTypeSizeInBits(Type)
+ .getKnownMinValue() / 8;
+
+ std::cerr << "\n\n zero tail padding ["<< RunningOffset << ", " << Size << ")"<< std::endl;
+ for (; RunningOffset < Size; ++RunningOffset) {
+ WriteZeroAtOffset(RunningOffset);
+ }
+}
+
RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
const CallExpr *E,
ReturnValueSlot ReturnValue) {
@@ -4315,6 +4445,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
return RValue::get(Ptr);
}
+ case Builtin::BI__builtin_zero_non_value_bits: {
+ const Expr *Op = E->getArg(0);
+ Value *Address = EmitScalarExpr(Op);
+ auto PointeeTy = Op->getType()->getPointeeType();
+ RecursivelyZeroNonValueBits(*this, Address, PointeeTy);
+ return RValue::get(nullptr);
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_sub:
case Builtin::BI__sync_fetch_and_or:
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 77c8334f3ca25d..3470dfca3ed6c7 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2327,6 +2327,26 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
}
case Builtin::BI__builtin_launder:
return SemaBuiltinLaunder(*this, TheCall);
+ case Builtin::BI__builtin_zero_non_value_bits: {
+ // const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts();
+ // const QualType PtrArgType = PtrArg->getType();
+ // if (!PtrArgType->isPointerType() ||
+ // !PtrArgType->getPointeeType()->isRecordType()) {
+ // Diag(PtrArg->getBeginLoc(), diag::err_typecheck_convert_incompatible)
+ // << PtrArgType << "structure pointer" << 1 << 0 << 3 << 1 << PtrArgType
+ // << "structure pointer";
+ // return ExprError();
+ // }
+ // if (PtrArgType->getPointeeType().isConstQualified()) {
+ // Diag(PtrArg->getBeginLoc(), diag::err_typecheck_assign_const)
+ // << TheCall->getSourceRange() << 5 /*ConstUnknown*/;
+ // return ExprError();
+ // }
+ // if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(),
+ // diag::err_typecheck_decl_incomplete_type))
+ // return ExprError();
+ break;
+ }
case Builtin::BI__sync_fetch_and_add:
case Builtin::BI__sync_fetch_and_add_1:
case Builtin::BI__sync_fetch_and_add_2:
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
new file mode 100644
index 00000000000000..8ad1e148ded861
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits-codegen.cpp
@@ -0,0 +1,112 @@
+// RUN: %clang_cc1 -triple=x86_64-linux-gnu -emit-llvm -o - %s | FileCheck %s
+
+struct alignas(4) Foo {
+ char a;
+ alignas(2) char b;
+};
+
+struct alignas(4) Bar {
+ char c;
+ alignas(2) char d;
+};
+
+struct alignas(4) Baz : Foo {
+ char e;
+ Bar f;
+};
+
+// Baz structure:
+// "a", PAD_1, "b", PAD_2, "c", PAD_3, PAD_4, PAD_5, "c", PAD_6, "d", PAD_7
+// %struct.Baz = type { %struct.Foo, i8, [3 x i8], %struct.Bar }
+// %struct.Foo = type { i8, i8, i8, i8 }
+// %struct.Bar = type { i8, i8, i8, i8 }
+
+// CHECK-LABEL: define void @_Z7testBazP3Baz(%struct.Baz* %baz)
+// CHECK: [[ADDR:%.*]] = alloca %struct.Baz*
+// CHECK: store %struct.Baz* %baz, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ:%.*]] = load %struct.Baz*, %struct.Baz** [[ADDR]]
+// CHECK: [[BAZ_RAW_PTR:%.*]] = bitcast %struct.Baz* [[BAZ]] to i8*
+
+// CHECK: [[FOO_BASE:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 0
+// CHECK: [[FOO_RAW_PTR:%.*]] = bitcast %struct.Foo* [[FOO_BASE]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[FOO_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: [[PAD_5:%.*]] = getelementptr i8, i8* [[BAZ_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_5]]
+
+// CHECK: [[BAR_MEMBER:%.*]] = getelementptr inbounds %struct.Baz, %struct.Baz* [[BAZ]], i32 0, i32 3
+// CHECK: [[BAR_RAW_PTR:%.*]] = bitcast %struct.Bar* [[BAR_MEMBER]] to i8*
+// CHECK: [[PAD_6:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_6]]
+// CHECK: [[PAD_7:%.*]] = getelementptr i8, i8* [[BAR_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_7]]
+// CHECK: ret void
+void testBaz(Baz *baz) {
+ __builtin_zero_non_value_bits(baz);
+}
+
+struct UnsizedTail {
+ int size;
+ alignas(8) char buf[];
+
+ UnsizedTail(int size) : size(size) {}
+};
+
+// UnsizedTail structure:
+// "size", PAD_1, PAD_2, PAD_3, PAD_4
+// %struct.UnsizedTail = type { i32, [4 x i8], [0 x i8] }
+
+// CHECK-LABEL: define void @_Z15testUnsizedTailP11UnsizedTail(%struct.UnsizedTail* %u)
+// CHECK: [[U_ADDR:%.*]] = alloca %struct.UnsizedTail*
+// CHECK: store %struct.UnsizedTail* %u, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U:%.*]] = load %struct.UnsizedTail*, %struct.UnsizedTail** [[U_ADDR]]
+// CHECK: [[U_RAW_PTR:%.*]] = bitcast %struct.UnsizedTail* [[U]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 4
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 5
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 6
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[U_RAW_PTR]], i32 7
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testUnsizedTail(UnsizedTail *u) {
+ __builtin_zero_non_value_bits(u);
+}
+
+struct ArrOfStructsWithPadding {
+ Bar bars[2];
+};
+
+// ArrOfStructsWithPadding structure:
+// "c" (1), PAD_1, "d" (1), PAD_2, "c" (2), PAD_3, "d" (2), PAD_4
+// %struct.ArrOfStructsWithPadding = type { [2 x %struct.Bar] }
+
+// CHECK-LABEL: define void @_Z27testArrOfStructsWithPaddingP23ArrOfStructsWithPadding(%struct.ArrOfStructsWithPadding* %arr)
+// CHECK: [[ARR_ADDR:%.*]] = alloca %struct.ArrOfStructsWithPadding*
+// CHECK: store %struct.ArrOfStructsWithPadding* %arr, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[ARR:%.*]] = load %struct.ArrOfStructsWithPadding*, %struct.ArrOfStructsWithPadding** [[ARR_ADDR]]
+// CHECK: [[BARS:%.*]] = getelementptr inbounds %struct.ArrOfStructsWithPadding, %struct.ArrOfStructsWithPadding* [[ARR]], i32 0, i32 0
+// CHECK: [[FIRST:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 0
+// CHECK: [[FIRST_RAW_PTR:%.*]] = bitcast %struct.Bar* [[FIRST]] to i8*
+// CHECK: [[PAD_1:%.*]] = getelementptr i8, i8* [[FIRST_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_1]]
+// CHECK: [[PAD_2:%.*]] = getelementptr i8, i8* %4, i32 3
+// CHECK: store i8 0, i8* [[PAD_2]]
+// CHECK: [[SECOND:%.*]] = getelementptr inbounds [2 x %struct.Bar], [2 x %struct.Bar]* [[BARS]], i64 0, i64 1
+// CHECK: [[SECOND_RAW_PTR:%.*]] = bitcast %struct.Bar* [[SECOND]] to i8*
+// CHECK: [[PAD_3:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 1
+// CHECK: store i8 0, i8* [[PAD_3]]
+// CHECK: [[PAD_4:%.*]] = getelementptr i8, i8* [[SECOND_RAW_PTR]], i32 3
+// CHECK: store i8 0, i8* [[PAD_4]]
+// CHECK: ret void
+void testArrOfStructsWithPadding(ArrOfStructsWithPadding *arr) {
+ __builtin_zero_non_value_bits(arr);
+}
diff --git a/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
new file mode 100644
index 00000000000000..4373dcbdb720ba
--- /dev/null
+++ b/clang/test/CodeGenCXX/builtin-zero-non-value-bits.cpp
@@ -0,0 +1,359 @@
+// RUN: mkdir -p %t
+// RUN: %clang++ %s -o %t/run
+// RUN: %t/run
+
+#include <cassert>
+#include <cstdio>
+#include <cstring>
+#include <new>
+
+template <class T>
+void print_bytes(const T *object)
+{
+ auto size = sizeof(T);
+ const unsigned char * const bytes = reinterpret_cast<const unsigned char *>(object);
+ size_t i;
+
+ fprintf(stderr, "[ ");
+ for(i = 0; i < size; i++)
+ {
+ fprintf(stderr, "%02x ", bytes[i]);
+ }
+ fprintf(stderr, "]\n");
+}
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) BasicWithPadding {
+ T x;
+ alignas(A2) T y;
+};
+
+template <size_t A1, size_t A2, size_t N, class T>
+struct alignas(A1) SpacedArrayMembers {
+ T x[N];
+ alignas(A2) char c;
+ T y[N];
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) PaddedPointerMembers {
+ T *x;
+ alignas(A2) T *y;
+};
+
+template <size_t A1, size_t A2, size_t A3, class T>
+struct alignas(A1) ThreeMembers {
+ T x;
+ alignas(A2) T y;
+ alignas(A3) T z;
+};
+
+template <class T>
+struct Normal {
+ T a;
+ T b;
+};
+
+template <class T>
+struct X {
+ T x;
+};
+
+template <class T>
+struct Z {
+ T z;
+};
+
+template <size_t A, class T>
+struct YZ : public Z<T> {
+ alignas(A) T y;
+};
+
+template <size_t A1, size_t A2, class T>
+struct alignas(A1) HasBase : public X<T>, public YZ<A2, T> {
+ T a;
+ alignas(A2) T b;
+};
+
+template <size_t A1, size_t A2, class T>
+void testAllForType(T a, T b, T c, T d) {
+ using B = BasicWithPadding<A1, A2, T>;
+ B basic1;
+ memset(&basic1, 0, sizeof(B));
+ basic1.x = a;
+ basic1.y = b;
+ B basic2;
+ memset(&basic2, 42, sizeof(B));
+ basic2.x = a;
+ basic2.y = b;
+ assert(memcmp(&basic1, &basic2, sizeof(B)) != 0);
+ __builtin_zero_non_value_bits(&basic2);
+ assert(memcmp(&basic1, &basic2, sizeof(B)) == 0);
+ using A = SpacedArrayMembers<A1, A2, 2, T>;
+ A arr1;
+ memset(&arr1, 0, sizeof(A));
+ arr1.x[0] = a;
+ arr1.x[1] = b;
+ arr1.y[0] = c;
+ arr1.y[1] = d;
+ A arr2;
+ memset(&arr2, 42, sizeof(A));
+ arr2.x[0] = a;
+ arr2.x[1] = b;
+ arr2.y[0] = c;
+ arr2.y[1] = d;
+ arr2.c = 0;
+ assert(memcmp(&arr1, &arr2, sizeof(A)) != 0);
+ __builtin_zero_non_value_bits(&arr2);
+ assert(memcmp(&arr1, &arr2, sizeof(A)) == 0);
+
+ using P = PaddedPointerMembers<A1, A2, T>;
+ P ptr1;
+ memset(&ptr1, 0, sizeof(P));
+ ptr1.x = &a;
+ ptr1.y = &b;
+ P ptr2;
+ memset(&ptr2, 42, sizeof(P));
+ ptr2.x = &a;
+ ptr2.y = &b;
+ assert(memcmp(&ptr1, &ptr2, sizeof(P)) != 0);
+ __builtin_zero_non_value_bits(&ptr2);
+ assert(memcmp(&ptr1, &ptr2, sizeof(P)) == 0);
+
+ using Three = ThreeMembers<A1, A2, A2, T>;
+ Three three1;
+ memset(&three1, 0, sizeof(Three));
+ three1.x = a;
+ three1.y = b;
+ three1.z = c;
+ Three three2;
+ memset(&three2, 42, sizeof(Three));
+ three2.x = a;
+ three2.y = b;
+ three2.z = c;
+ __builtin_zero_non_value_bits(&three2);
+ assert(memcmp(&three1, &three2, sizeof(Three)) == 0);
+
+ using N = Normal<T>;
+ N normal1;
+ memset(&normal1, 0, sizeof(N));
+ normal1.a = a;
+ normal1.b = b;
+ N normal2;
+ memset(&normal2, 42, sizeof(N));
+ normal2.a = a;
+ normal2.b = b;
+ __builtin_zero_non_value_bits(&normal2);
+ assert(memcmp(&normal1, &normal2, sizeof(N)) == 0);
+
+ using H = HasBase<A1, A2, T>;
+ H base1;
+ memset(&base1, 0, sizeof(H));
+ base1.a = a;
+ base1.b = b;
+ base1.x = c;
+ base1.y = d;
+ base1.z = a;
+ H base2;
+ memset(&base2, 42, sizeof(H));
+ base2.a = a;
+ base2.b = b;
+ base2.x = c;
+ base2.y = d;
+ base2.z = a;
+ assert(memcmp(&base1, &base2, sizeof(H)) != 0);
+ __builtin_zero_non_value_bits(&base2);
+ unsigned i = 0;
+ assert(memcmp(&base1, &base2, sizeof(H)) == 0);
+}
+
+struct UnsizedTail {
+ int size;
+ alignas(8) char buf[];
+
+ UnsizedTail(int size) : size(size) {}
+};
+
+void otherTests() {
+ const size_t size1 = sizeof(UnsizedTail) + 4;
+ char buff1[size1];
+ char buff2[size1];
+ memset(buff1, 0, size1);
+ memset(buff2, 42, size1);
+ auto *u1 = new (buff1) UnsizedTail(4);
+ u1->buf[0] = 1;
+ u1->buf[1] = 2;
+ u1->buf[2] = 3;
+ u1->buf[3] = 4;
+ auto *u2 = new (buff2) UnsizedTail(4);
+ u2->buf[0] = 1;
+ u2->buf[1] = 2;
+ u2->buf[2] = 3;
+ u2->buf[3] = 4;
+ assert(memcmp(u1, u2, sizeof(UnsizedTail)) != 0);
+ __builtin_zero_non_value_bits(u2);
+ assert(memcmp(u1, u2, sizeof(UnsizedTail)) == 0);
+
+ using B = BasicWithPadding<8, 4, char>;
+ auto *basic1 = new B;
+ memset(basic1, 0, sizeof(B));
+ basic1->x = 1;
+ basic1->y = 2;
+ auto *basic2 = new B;
+ memset(basic2, 42, sizeof(B));
+ basic2->x = 1;
+ basic2->y = 2;
+ assert(memcmp(basic1, basic2, sizeof(B)) != 0);
+ __builtin_zero_non_value_bits(basic2);
+ assert(memcmp(basic1, basic2, sizeof(B)) == 0);
+ delete basic2;
+ delete basic1;
+
+ using B = BasicWithPadding<8, 4, char>;
+ B *basic3 = new B;
+ memset(basic3, 0, sizeof(B));
+ basic3->x = 1;
+ basic3->y = 2;
+ B *basic4 = new B;
+ memset(basic4, 42, sizeof(B));
+ basic4->x = 1;
+ basic4->y = 2;
+ assert(memcmp(basic3, basic4, sizeof(B)) != 0);
+ __builtin_zero_non_value_bits(const_cast<volatile B *>(basic4));
+ assert(memcmp(basic3, basic4, sizeof(B)) == 0);
+ delete basic4;
+ delete basic3;
+}
+
+struct Foo {
+ int x;
+ int y;
+};
+
+typedef float Float4Vec __attribute__((ext_vector_type(4)));
+typedef float Float3Vec __attribute__((ext_vector_type(3)));
+
+struct S1 {
+ int x = 0;
+ char c = 0;
+};
+
+struct S2{
+ [[no_unique_address]] S1 s1;
+ bool b;
+ long double l;
+ bool b2;
+};
+
+struct S3{
+ [[no_unique_address]] S1 s1;
+ bool b;
+};
+
+struct alignas(32) S4 {
+ int i;
+};
+struct B1{
+
+};
+
+struct B2 {
+ int x;
+};
+struct B3{
+ char c;
+};
+
+struct B4{
+ bool b;
+};
+
+struct B5{
+ int x2;
+};
+
+struct D:B1,B2,B3,B4,B5{
+ long double l;
+ bool b2;
+}...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
I don't think this should be tagged as libc++, since it doesn't actually touch anything of libc++. |
@@ -0,0 +1,556 @@ | |||
// RUN: mkdir -p %t |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are these files missing a license?
@@ -0,0 +1,556 @@ | |||
// RUN: mkdir -p %t | |||
// RUN: %clang++ %s -o %t/run | |||
// RUN: %t/run |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm not sure if this is going to work on all configurations, for example when they cross compile. Is there a precedent for these kinds of runtime tests in the codegen tests? Probably worth making sure that this is how it's done elsewhere.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tests that need to actually run generated code go into the test-suite repo. (I don't think that's really necessary here, though; if you're going to start using this in libc++, libc++ runtime tests will give us equivalent test coverage.)
@AaronBallman We're struggling a bit to find the right person to help review this -- do you know who would be a good candidate? Hui is very familiar with libc++ but he's less familiar with codegen, and I'm not very useful in that area of the project :-). This work is required to implement https://wg21.link/P0528R3 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please clang-format changes.
clang/lib/Sema/SemaChecking.cpp
Outdated
@@ -2327,6 +2327,25 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID, | |||
} | |||
case Builtin::BI__builtin_launder: | |||
return SemaBuiltinLaunder(*this, TheCall); | |||
case Builtin::BI__builtin_clear_padding: { | |||
const Expr *PtrArg = TheCall->getArg(0)->IgnoreParenImpCasts(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why IgnoreParenImpCasts()?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(), | ||
diag::err_typecheck_decl_incomplete_type)) | ||
return ExprError(); | ||
break; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you need to check the number of arguments somewhere?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
yes. added
return ExprError(); | ||
} | ||
if (RequireCompleteType(PtrArg->getBeginLoc(), PtrArgType->getPointeeType(), | ||
diag::err_typecheck_decl_incomplete_type)) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please define an error message that explains what's actually going wrong here, instead of reusing err_typecheck_decl_incomplete_type. (The other errors could also be improved a bit.)
@@ -4315,6 +4453,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, | |||
|
|||
return RValue::get(Ptr); | |||
} | |||
case Builtin::BI__builtin_clear_padding: { | |||
const Expr *Op = E->getArg(0); | |||
Value *Address = EmitScalarExpr(Op); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
EmitPointerWithAlignment()?
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
#include <optional> | ||
#include <sstream> | ||
|
||
|
||
|
||
#include <iostream> |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
iostream is forbidden in LLVM code. If you think the debug prints are useful long-term, use llvm::dbgs(); otherwise, just delete them.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
changing it to llvm::dbgs() now. will remove it once the patch is ready
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
|
||
} | ||
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R); | ||
for (auto Base : R->bases()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is iterating over bases/fields like this actually guaranteed to return them in order of offset?
Do we need to worry about vtable pointers?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bases can be reordered to the end for virtual base. anyway, I sorted bases by offset now. for fields, IIRC, c++ requires them to be laid out in the same order
added vtable pointers support.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Empty fields that are [[no_unique_address]]
can be out-of-order, if that matters.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
anyway. I changed the approach now.the new approach should work regardless of the orders
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
auto Offset = static_cast<size_t>( | ||
ASTLayout.getBaseClassOffset(BaseRecord).getQuantity()); | ||
// Recursively zero out base classes. | ||
auto Index = SL->getElementContainingOffset(Offset); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm a bit skeptical it's a good idea to rely on the LLVM StructLayout for this; lowering from a clang type to an LLVM type involves a bunch of transforms. Can we use information from the clang RecordLayout instead?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
thank you for suggestion. I tried to use both AST and LLVM layouts information. But please let me know if there is better way of doing this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I removed all the code using LLVM layouts
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
for (size_t I = 0; I < NumFields; ++I, ++CurrentField) { | ||
// Size needs to be in bytes so we can compare it later. | ||
auto Offset = ASTLayout.getFieldOffset(I) / 8; | ||
auto Index = SL->getElementContainingOffset(Offset); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we need to handle bitfields?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
no not at the moment. I need to figure out how to do it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It is almost working now. I managed to get all the Padding in bits now. the remaining thing is to zeroing them out, instead of current generating store instruction with zeros, i need to basically do this for the bits that don't occupy the entire byte
byte &= ~PaddingBitMask
How can I generate that IR ?
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
std::cerr<< "\n\n null!" << std::endl; | ||
} | ||
auto ElementAlign =ElementRecord? | ||
CGF.getContext().getASTRecordLayout(ElementRecord).getAlignment(): |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why are we doing this different ways depending on whether we have a RecordType?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(), CurrentStartOffset, RunningOffset, WriteZeroAtOffset); | ||
} else { | ||
std::cerr << "\n\n increment running offset from: " << RunningOffset << " to " << RunningOffset + Size << std::endl; | ||
RunningOffset = std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are there scalar types that naturally have padding? (x86 long double comes to mind.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I see, this is querying the typesize from LLVM. I guess that works for most cases...? I don't think it works for _Complex long double. Maybe worth adding a comment explicitly noting how this size computation works.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
well i tested and it actually works for long double
because the LLVM size of long double is 80bits
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_Complex long double
has padding in the middle.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_Complex is handled now
Also, please fix the title so it isn't prefixed with "[libc++]"; the intent is for this to be used in libc++, but it's proposing a clang intrinsic. |
Adds `__builtin_clear_padding` to zero all padding bits of a struct. This builtin should match the behavior of those in NVCC and GCC (and MSVC?). There are some tests in this patch but hopefully we'll also get tests from other compilers (so all builtins can be as similar as possible). I'm planning to add support for unions, bitfields (both as members and members of sub-objects), and booleans as follow up patches. Differential Revision: https://reviews.llvm.org/D87974 overlapping subobjects + opague pointer union, rename, scalar types
We have fast AVX256 scanners that detect data change and generate a hash in a united pass. When this code is inlined, it further optimizes to just a few instructions with constant size structures. There is a lot of optimization potential for these flat data structures. We can we use vector instructions to process them in large chunks. However, padding bytes with random data is a problem. Detecting padding locations, especially with multiple inheritance is not trivial. __builtin_clear_padding() solves this problem. Now we can simply mark a flat structure with;
where Flat<> simply calls __builtin_clear_padding((F1*) this); at construction. LLVM unites any redundant writes from further inheritance. LLVM also combines clear padding and initializated field bytes into larger writes. These flat objects can be detected with a concept, and now have special powers, like fast comparison and/or hashing, implemented generically outside of these objects. This is made possible by this work here. Thank you! |
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// UNSUPPORTED: c++03 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@efriedma-quic There's a question of where this test should live. IIUC LLVM usually tests stuff by looking at the code that gets generated and using FileCheck
. However, this test is different in that it actually runs on the target and ensures that we have the right behavior. This is quite nice and it's definitely how we would want to test that from the library side.
However, there is no user-facing facility exposed by C++ for this builtin, so I'm thinking it doesn't really belong under libcxx/test/
. Is there precedent for these kinds of runtime tests in LLVM codegen?
Otherwise, one thing we could potentially do is define an internal libc++ helper like __libcpp_clear_padding
and keep these tests under libcxx/test/libcxx
. We'd still want to make sure that the tests for the builtin under clang/test
are sufficient, though.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no mechanism for clang/ tests to run on target. Usually, if we think it's important, we add tests to SingleSource/UnitTests in llvm-test-suite. But usually we just rely on clang regression testing.
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
|
||
} | ||
const ASTRecordLayout &ASTLayout = CGF.getContext().getASTRecordLayout(R); | ||
for (auto Base : R->bases()) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Empty fields that are [[no_unique_address]]
can be out-of-order, if that matters.
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
RecursivelyClearPaddingImpl(CGF, Ptr, Ty.getAtomicUnqualifiedType(), CurrentStartOffset, RunningOffset, WriteZeroAtOffset); | ||
} else { | ||
std::cerr << "\n\n increment running offset from: " << RunningOffset << " to " << RunningOffset + Size << std::endl; | ||
RunningOffset = std::max(RunningOffset, CurrentStartOffset + static_cast<size_t>(Size)); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
_Complex long double
has padding in the middle.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
// | ||
//===----------------------------------------------------------------------===// | ||
// UNSUPPORTED: c++03 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
There is no mechanism for clang/ tests to run on target. Usually, if we think it's important, we add tests to SingleSource/UnitTests in llvm-test-suite. But usually we just rely on clang regression testing.
clang/lib/CodeGen/CGBuiltin.cpp
Outdated
for (auto Base : R->bases()) { | ||
auto *BaseRecord = cast<CXXRecordDecl>(Base.getType()->getAsRecordDecl()); | ||
if (!Base.isVirtual()) { | ||
auto Offset = static_cast<size_t>( |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't use size_t for target offsets. Prefer CharUnits, or if you need a raw number for some reason, uint64_t. (We want to make sure cross-compilation works correctly on 32-bit hosts.)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
removed size_t
I refactored the code which does the following
|
If you want to modify part of a bitfield unit, you need to load/store the whole bitfield unit, as computed by the CGRecordLayout. This is true whether you're modifying an actual field, or padding adjacent to a field. Since any padding has to be adjacent to a bitfield, you can get the relevant information out of the CGBitFieldInfo for that bitfield. |
Hello, this is now working with the bitfield. Iterating over the field of a struct also gives me information about where bit fields are |
@efriedma-quic could you please have a look at the updated version? it works for bit field. May I have some help on how to write these IR tests? |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Bitfield load and store operations should be done using the same offset/size we normally use to access the bitfield; unconditionally using byte load/store operations will impair optimizations/performance. I guess this might not be possible when unions are involved, but it shouldn't be that hard for the non-union cases.
The format of builtin-clear-padding-codegen.cpp seems mostly fine, but consider using update_cc_test_checks.py to automate writing the CHECK lines. Please add a couple tests for empty classes and unions.
A few comments in the code outlining how the recursion and the interval representation work would be helpful.
llvm::dbgs() << "visiting base at offset " << StartBitOffset << " + " | ||
<< BaseOffset * CharWidth << '\n'; | ||
Queue.push_back( | ||
Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), false}); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), false}); | |
Data{StartBitOffset + BaseOffset * CharWidth, Base.getType(), /*VisitVirtualBase*/false}); |
Queue.push_back(Data{0, Ty, true}); | ||
while (!Queue.empty()) { | ||
auto Current = Queue.front(); | ||
Queue.pop_front(); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can you just use back()/pop_back() here (so you don't need an std::deque)?
Add builtin to clear padding bytes. This is the pre-work to implement
std::atomic::compare_exchange_[weak/strong]
that ignores padding bits.PR draft here: #76180
This PR picked up this patch from 3 years ago
https://reviews.llvm.org/D87974
The above patch no longer works as things changed quite a lot. I've made some changes on top of the above patch:
it handles:
long double
and types with__attribute__((ext_vector_type(N)))
TODO: