Skip to content

Commit

Permalink
codegen: replace store of freeze in allocop and in emit new struct wi…
Browse files Browse the repository at this point in the history
…th memset since aggregate stores are bad (#55879)

This fixes the issues found in slack in the reinterprets of
```julia
julia> split128_v2(x::UInt128) = (first(reinterpret(NTuple{2, UInt}, x)), last(reinterpret(NTuple{2, UInt}, x)))
split128_v2 (generic function with 1 method)

julia> split128(x::UInt128) = reinterpret(NTuple{2, UInt}, x)
split128 (generic function with 1 method)

@code_native  split128(UInt128(5))
	push	rbp
	mov	rbp, rsp
	mov	rax, rdi
	mov	qword ptr [rdi + 8], rdx
	mov	qword ptr [rdi], rsi
	pop	rbp
	ret

@code_native  split128_v2(UInt128(5))
	push	rbp
	mov	rbp, rsp
	mov	rax, rdi
	mov	qword ptr [rdi], rsi
	mov	qword ptr [rdi + 8], rdx
	pop	rbp
	ret
```

vs on master where
```julia
julia> @code_native  split128(UInt128(5))

	push	rbp
	mov	rbp, rsp
	mov	eax, esi
	shr	eax, 8
	mov	ecx, esi
	shr	ecx, 16
	mov	r8, rsi
	mov	r9, rsi
	vmovd	xmm0, esi
	vpinsrb	xmm0, xmm0, eax, 1
	mov	rax, rsi
	vpinsrb	xmm0, xmm0, ecx, 2
	mov	rcx, rsi
	shr	esi, 24
	vpinsrb	xmm0, xmm0, esi, 3
	shr	r8, 32
	vpinsrb	xmm0, xmm0, r8d, 4
	shr	r9, 40
	vpinsrb	xmm0, xmm0, r9d, 5
	shr	rax, 48
	vpinsrb	xmm0, xmm0, eax, 6
	shr	rcx, 56
	vpinsrb	xmm0, xmm0, ecx, 7
	vpinsrb	xmm0, xmm0, edx, 8
	mov	eax, edx
	shr	eax, 8
	vpinsrb	xmm0, xmm0, eax, 9
	mov	eax, edx
	shr	eax, 16
	vpinsrb	xmm0, xmm0, eax, 10
	mov	eax, edx
	shr	eax, 24
	vpinsrb	xmm0, xmm0, eax, 11
	mov	rax, rdx
	shr	rax, 32
	vpinsrb	xmm0, xmm0, eax, 12
	mov	rax, rdx
	shr	rax, 40
	vpinsrb	xmm0, xmm0, eax, 13
	mov	rax, rdx
	shr	rax, 48
	vpinsrb	xmm0, xmm0, eax, 14
	mov	rax, rdi
	shr	rdx, 56
	vpinsrb	xmm0, xmm0, edx, 15
	vmovdqu	xmmword ptr [rdi], xmm0
	pop	rbp
	ret
```
  • Loading branch information
gbaraldi authored Oct 18, 2024
1 parent ca3713e commit e5aff12
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 34 deletions.
31 changes: 12 additions & 19 deletions src/cgutils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4213,7 +4213,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
else {
strct = UndefValue::get(lt);
if (nargs < nf)
strct = ctx.builder.CreateFreeze(strct);
strct = ctx.builder.CreateFreeze(strct); // Change this to zero initialize instead?
}
}
else if (tracked.second) {
Expand Down Expand Up @@ -4380,25 +4380,18 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
ctx.builder.restoreIP(savedIP);
}
}
for (size_t i = nargs; i < nf; i++) {
if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
ssize_t offs = jl_field_offset(sty, i);
ssize_t ptrsoffs = -1;
if (!inline_roots.empty())
std::tie(offs, ptrsoffs) = split_value_field(sty, i);
assert(ptrsoffs < 0 && offs >= 0);
int fsz = jl_field_size(sty, i) - 1;
if (init_as_value) {
if (init_as_value) {
for (size_t i = nargs; i < nf; i++) {
if (!jl_field_isptr(sty, i) && jl_is_uniontype(jl_field_type(sty, i))) {
ssize_t offs = jl_field_offset(sty, i);
ssize_t ptrsoffs = -1;
if (!inline_roots.empty())
std::tie(offs, ptrsoffs) = split_value_field(sty, i);
assert(ptrsoffs < 0 && offs >= 0);
int fsz = jl_field_size(sty, i) - 1;
unsigned llvm_idx = convert_struct_offset(ctx, cast<StructType>(lt), offs + fsz);
strct = ctx.builder.CreateInsertValue(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0), ArrayRef<unsigned>(llvm_idx));
}
else {
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_unionselbyte);
Instruction *dest = cast<Instruction>(emit_ptrgep(ctx, strct, offs + fsz));
if (promotion_point == nullptr)
promotion_point = dest;
ai.decorateInst(ctx.builder.CreateAlignedStore(ctx.builder.getInt8(0), dest, Align(1)));
}
}
}
if (nargs < nf) {
Expand All @@ -4407,9 +4400,9 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg
if (promotion_point)
ctx.builder.SetInsertPoint(promotion_point);
if (strct) {
promotion_point = cast<FreezeInst>(ctx.builder.CreateFreeze(UndefValue::get(lt)));
jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack);
ai.decorateInst(ctx.builder.CreateStore(promotion_point, strct));
promotion_point = ai.decorateInst(ctx.builder.CreateMemSet(strct, ConstantInt::get(getInt8Ty(ctx.builder.getContext()), 0),
jl_datatype_size(ty), MaybeAlign(jl_datatype_align(ty))));
}
ctx.builder.restoreIP(savedIP);
}
Expand Down
11 changes: 3 additions & 8 deletions src/llvm-alloc-opt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -646,14 +646,9 @@ void Optimizer::initializeAlloca(IRBuilder<> &prolog_builder, AllocaInst *buff,
return;
assert(!buff->isArrayAllocation());
Type *T = buff->getAllocatedType();
Value *Init = UndefValue::get(T);
if ((allockind & AllocFnKind::Zeroed) != AllocFnKind::Unknown)
Init = Constant::getNullValue(T); // zero, as described
else if (allockind == AllocFnKind::Unknown)
Init = Constant::getNullValue(T); // assume zeroed since we didn't find the attribute
else
Init = prolog_builder.CreateFreeze(UndefValue::get(T)); // assume freeze, since LLVM does not natively support this case
prolog_builder.CreateStore(Init, buff);
const DataLayout &DL = F.getParent()->getDataLayout();
prolog_builder.CreateMemSet(buff, ConstantInt::get(Type::getInt8Ty(prolog_builder.getContext()), 0), DL.getTypeAllocSize(T), buff->getAlign());

}

// This function should not erase any safepoint so that the lifetime marker can find and cache
Expand Down
11 changes: 4 additions & 7 deletions test/llvmpasses/alloc-opt-pass.ll
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ L3: ; preds = %L2, %L1, %0
; CHECK-LABEL: @legal_int_types
; CHECK: alloca [12 x i8]
; CHECK-NOT: alloca i96
; CHECK: store [12 x i8] zeroinitializer,
; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %var1,
; CHECK: ret void
define void @legal_int_types() {
%pgcstack = call ptr @julia.get_pgcstack()
Expand Down Expand Up @@ -140,7 +140,7 @@ L2: ; preds = %0
; CHECK: alloca
; CHECK-NOT: call token(...) @llvm.julia.gc_preserve_begin
; CHECK: call void @llvm.lifetime.start
; CHECK: store [8 x i8] zeroinitializer,
; CHECK: call void @llvm.memset.p0.i64(ptr align 16 %v,
; CHECK-NOT: call void @llvm.lifetime.end
define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret({}) %0) {
%pgcstack = call ptr @julia.get_pgcstack()
Expand All @@ -164,11 +164,8 @@ define void @lifetime_no_preserve_end(ptr noalias nocapture noundef nonnull sret
; CHECK: alloca [1 x i8]
; CHECK-DAG: alloca [2 x i8]
; CHECK-DAG: alloca [3 x i8]
; CHECK-DAG: freeze [1 x i8] undef
; CHECK-DAG: store [1 x i8] %
; CHECK-DAG: store [3 x i8] zeroinitializer,
; CHECK-NOT: store
; CHECK-NOT: zeroinitializer
; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 1 %var1,
; CHECK-DAG: call void @llvm.memset.p0.i64(ptr align 4 %var7,
; CHECK: ret void
define void @initializers() {
%pgcstack = call ptr @julia.get_pgcstack()
Expand Down

0 comments on commit e5aff12

Please sign in to comment.