From 1eabe90fa054abc74f541798ae4dfcc9445bb564 Mon Sep 17 00:00:00 2001 From: Jameson Nash Date: Wed, 11 Sep 2024 14:04:10 -0400 Subject: [PATCH] codegen: some cleanup of layout computations (#55730) Change Alloca to take an explicit alignment, rather than relying on LLVM to guess our intended alignment from the DataLayout. Eventually we should try to change this code to just get all layout data from julia queries (jl_field_offset, julia_alignment, etc.) instead of relying on creating an LLVM element type for memory and inspecting it (CountTrackedPointers, DataLayout, and so on). --- src/ccall.cpp | 14 +++++--------- src/cgutils.cpp | 36 ++++++++++++++++-------------------- src/codegen.cpp | 34 +++++++++++++++------------------- src/intrinsics.cpp | 14 ++++++++------ 4 files changed, 44 insertions(+), 54 deletions(-) diff --git a/src/ccall.cpp b/src/ccall.cpp index eac130ea43189..e336de8e3574f 100644 --- a/src/ccall.cpp +++ b/src/ccall.cpp @@ -446,15 +446,13 @@ static Value *llvm_type_rewrite( const DataLayout &DL = ctx.builder.GetInsertBlock()->getModule()->getDataLayout(); Align align = std::max(DL.getPrefTypeAlign(target_type), DL.getPrefTypeAlign(from_type)); if (DL.getTypeAllocSize(target_type) >= DL.getTypeAllocSize(from_type)) { - to = emit_static_alloca(ctx, target_type); + to = emit_static_alloca(ctx, target_type, align); setName(ctx.emission_context, to, "type_rewrite_buffer"); - cast(to)->setAlignment(align); from = to; } else { - from = emit_static_alloca(ctx, from_type); + from = emit_static_alloca(ctx, from_type, align); setName(ctx.emission_context, from, "type_rewrite_buffer"); - cast(from)->setAlignment(align); to = from; } ctx.builder.CreateAlignedStore(v, from, align); @@ -555,9 +553,8 @@ static Value *julia_to_native( // pass the address of an alloca'd thing, not a box // since those are immutable. - Value *slot = emit_static_alloca(ctx, to); Align align(julia_alignment(jlto)); - cast(slot)->setAlignment(align); + Value *slot = emit_static_alloca(ctx, to, align); setName(ctx.emission_context, slot, "native_convert_buffer"); if (!jvinfo.ispointer()) { jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, jvinfo.tbaa); @@ -2090,7 +2087,7 @@ jl_cgval_t function_sig_t::emit_a_ccall( if (sret) { assert(!retboxed && jl_is_datatype(rt) && "sret return type invalid"); if (jl_is_pointerfree(rt)) { - result = emit_static_alloca(ctx, lrt); + result = emit_static_alloca(ctx, lrt, Align(julia_alignment(rt))); setName(ctx.emission_context, result, "ccall_sret"); sretty = lrt; argvals[0] = result; @@ -2266,9 +2263,8 @@ jl_cgval_t function_sig_t::emit_a_ccall( if (DL.getTypeStoreSize(resultTy) > rtsz) { // ARM and AArch64 can use a LLVM type larger than the julia type. // When this happens, cast through memory. - auto slot = emit_static_alloca(ctx, resultTy); + auto slot = emit_static_alloca(ctx, resultTy, boxalign); setName(ctx.emission_context, slot, "type_pun_slot"); - slot->setAlignment(boxalign); ctx.builder.CreateAlignedStore(result, slot, boxalign); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); emit_memcpy(ctx, strct, ai, slot, ai, rtsz, boxalign, boxalign); diff --git a/src/cgutils.cpp b/src/cgutils.cpp index 2a234f399f5c1..bf5c67ae9f849 100644 --- a/src/cgutils.cpp +++ b/src/cgutils.cpp @@ -1937,18 +1937,22 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j ctx.builder.CreateFence(Order); return ghostValue(ctx, jltype); } + if (isboxed) + alignment = sizeof(void*); + else if (!alignment) + alignment = julia_alignment(jltype); unsigned nb = isboxed ? sizeof(void*) : jl_datatype_size(jltype); // note that nb == jl_Module->getDataLayout().getTypeAllocSize(elty) or getTypeStoreSize, depending on whether it is a struct or primitive type AllocaInst *intcast = NULL; if (Order == AtomicOrdering::NotAtomic) { if (!isboxed && !aliasscope && elty->isAggregateType() && !CountTrackedPointers(elty).count) { - intcast = emit_static_alloca(ctx, elty); + intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "aggregate_load_box"); } } else { if (!isboxed && !elty->isIntOrPtrTy()) { - intcast = emit_static_alloca(ctx, elty); + intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "atomic_load_box"); elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); } @@ -1963,10 +1967,6 @@ static jl_cgval_t typed_load(jl_codectx_t &ctx, Value *ptr, Value *idx_0based, j if (idx_0based) data = ctx.builder.CreateInBoundsGEP(elty, data, idx_0based); Value *instr = nullptr; - if (isboxed) - alignment = sizeof(void*); - else if (!alignment) - alignment = julia_alignment(jltype); if (intcast && Order == AtomicOrdering::NotAtomic) { emit_memcpy(ctx, intcast, jl_aliasinfo_t::fromTBAA(ctx, ctx.tbaa().tbaa_stack), data, jl_aliasinfo_t::fromTBAA(ctx, tbaa), nb, Align(alignment), intcast->getAlign()); } @@ -2053,6 +2053,10 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, ret = update_julia_type(ctx, ret, jltype); return ret; }; + if (isboxed) + alignment = sizeof(void*); + else if (!alignment) + alignment = julia_alignment(jltype); Type *elty = isboxed ? ctx.types().T_prjlvalue : julia_type_to_llvm(ctx, jltype); if (type_is_ghost(elty) || (issetfieldonce && !maybe_null_if_boxed) || @@ -2095,7 +2099,7 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, intcast_eltyp = elty; elty = Type::getIntNTy(ctx.builder.getContext(), 8 * nb); if (!issetfield) { - intcast = emit_static_alloca(ctx, elty); + intcast = emit_static_alloca(ctx, elty, Align(alignment)); setName(ctx.emission_context, intcast, "atomic_store_box"); } } @@ -2121,10 +2125,6 @@ static jl_cgval_t typed_store(jl_codectx_t &ctx, if (realelty != elty) r = ctx.builder.CreateZExt(r, elty); } - if (isboxed) - alignment = sizeof(void*); - else if (!alignment) - alignment = julia_alignment(jltype); Value *instr = nullptr; Value *Compare = nullptr; Value *Success = nullptr; @@ -2657,10 +2657,8 @@ static jl_cgval_t emit_unionload(jl_codectx_t &ctx, Value *addr, Value *ptindex, if (fsz > 0 && mutabl) { // move value to an immutable stack slot (excluding tindex) Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * al), (fsz + al - 1) / al); - AllocaInst *lv = emit_static_alloca(ctx, AT); + AllocaInst *lv = emit_static_alloca(ctx, AT, Align(al)); setName(ctx.emission_context, lv, "immutable_union"); - if (al > 1) - lv->setAlignment(Align(al)); jl_aliasinfo_t ai = jl_aliasinfo_t::fromTBAA(ctx, tbaa); emit_memcpy(ctx, lv, ai, addr, ai, fsz, Align(al), Align(al)); addr = lv; @@ -2903,7 +2901,7 @@ static jl_cgval_t emit_getfield_knownidx(jl_codectx_t &ctx, const jl_cgval_t &st unsigned st_idx = convert_struct_offset(ctx, T, byte_offset); IntegerType *ET = cast(T->getStructElementType(st_idx)); unsigned align = (ET->getBitWidth() + 7) / 8; - lv = emit_static_alloca(ctx, ET); + lv = emit_static_alloca(ctx, ET, Align(align)); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz + align - 1) / align)); // emit all of the align-sized words unsigned i = 0; @@ -3324,10 +3322,8 @@ static AllocaInst *try_emit_union_alloca(jl_codectx_t &ctx, jl_uniontype_t *ut, // at least some of the values can live on the stack // try to pick an Integer type size such that SROA will emit reasonable code Type *AT = ArrayType::get(IntegerType::get(ctx.builder.getContext(), 8 * min_align), (nbytes + min_align - 1) / min_align); - AllocaInst *lv = emit_static_alloca(ctx, AT); + AllocaInst *lv = emit_static_alloca(ctx, AT, Align(align)); setName(ctx.emission_context, lv, "unionalloca"); - if (align > 1) - lv->setAlignment(Align(align)); return lv; } return NULL; @@ -3886,7 +3882,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg } } else { - strct = emit_static_alloca(ctx, lt); + strct = emit_static_alloca(ctx, lt, Align(julia_alignment(ty))); setName(ctx.emission_context, strct, arg_typename); if (tracked.count) undef_derived_strct(ctx, strct, sty, ctx.tbaa().tbaa_stack); @@ -3966,7 +3962,7 @@ static jl_cgval_t emit_new_struct(jl_codectx_t &ctx, jl_value_t *ty, size_t narg if (fsz1 > 0 && !fval_info.isghost) { Type *ET = IntegerType::get(ctx.builder.getContext(), 8 * al); assert(lt->getStructElementType(llvm_idx) == ET); - AllocaInst *lv = emit_static_alloca(ctx, ET); + AllocaInst *lv = emit_static_alloca(ctx, ET, Align(al)); setName(ctx.emission_context, lv, "unioninit"); lv->setOperand(0, ConstantInt::get(getInt32Ty(ctx.builder.getContext()), (fsz1 + al - 1) / al)); emit_unionmove(ctx, lv, ctx.tbaa().tbaa_stack, fval_info, nullptr); diff --git a/src/codegen.cpp b/src/codegen.cpp index a82056eb36e21..73a5f844b31da 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -2208,10 +2208,10 @@ static GlobalVariable *get_pointer_to_constant(jl_codegen_params_t &emission_con return gv; } -static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty) +static AllocaInst *emit_static_alloca(jl_codectx_t &ctx, Type *lty, Align align) { ++EmittedAllocas; - return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), "", /*InsertBefore=*/ctx.topalloca); + return new AllocaInst(lty, ctx.topalloca->getModule()->getDataLayout().getAllocaAddrSpace(), nullptr, align, "", /*InsertBefore=*/ctx.topalloca); } static void undef_derived_strct(jl_codectx_t &ctx, Value *ptr, jl_datatype_t *sty, MDNode *tbaa) @@ -2323,7 +2323,7 @@ static inline jl_cgval_t value_to_pointer(jl_codectx_t &ctx, Value *v, jl_value_ loc = get_pointer_to_constant(ctx.emission_context, cast(v), Align(julia_alignment(typ)), "_j_const", *jl_Module); } else { - loc = emit_static_alloca(ctx, v->getType()); + loc = emit_static_alloca(ctx, v->getType(), Align(julia_alignment(typ))); ctx.builder.CreateStore(v, loc); } return mark_julia_slot(loc, typ, tindex, ctx.tbaa().tbaa_stack); @@ -2435,7 +2435,7 @@ static void alloc_def_flag(jl_codectx_t &ctx, jl_varinfo_t& vi) { assert((!vi.boxroot || vi.pTIndex) && "undef check is null pointer for boxed things"); if (vi.usedUndef) { - vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext())); + vi.defFlag = emit_static_alloca(ctx, getInt1Ty(ctx.builder.getContext()), Align(1)); setName(ctx.emission_context, vi.defFlag, "isdefined"); store_def_flag(ctx, vi, false); } @@ -5025,25 +5025,20 @@ static jl_cgval_t emit_call_specfun_other(jl_codectx_t &ctx, bool is_opaque_clos case jl_returninfo_t::Ghosts: break; case jl_returninfo_t::SRet: - result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType()); - #if JL_LLVM_VERSION < 170000 - assert(cast(result->getType())->hasSameElementTypeAs(cast(cft->getParamType(0)))); - #endif + result = emit_static_alloca(ctx, getAttributeAtIndex(returninfo.attrs, 1, Attribute::StructRet).getValueAsType(), Align(julia_alignment(jlretty))); argvals[idx] = result; idx++; break; case jl_returninfo_t::Union: - result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes)); + result = emit_static_alloca(ctx, ArrayType::get(getInt8Ty(ctx.builder.getContext()), returninfo.union_bytes), Align(returninfo.union_align)); setName(ctx.emission_context, result, "sret_box"); - if (returninfo.union_align > 1) - result->setAlignment(Align(returninfo.union_align)); argvals[idx] = result; idx++; break; } if (returninfo.return_roots) { - AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots)); + AllocaInst *return_roots = emit_static_alloca(ctx, ArrayType::get(ctx.types().T_prjlvalue, returninfo.return_roots), Align(alignof(jl_value_t*))); argvals[idx] = return_roots; idx++; } @@ -5922,11 +5917,10 @@ static void emit_phinode_assign(jl_codectx_t &ctx, ssize_t idx, jl_value_t *r) if (vtype->isAggregateType() && CountTrackedPointers(vtype).count == 0) { // the value will be moved into dest in the predecessor critical block. // here it's moved into phi in the successor (from dest) - dest = emit_static_alloca(ctx, vtype); - Value *phi = emit_static_alloca(ctx, vtype); - ctx.builder.CreateMemCpy(phi, Align(julia_alignment(phiType)), - dest, dest->getAlign(), - jl_datatype_size(phiType), false); + Align align(julia_alignment(phiType)); + dest = emit_static_alloca(ctx, vtype, align); + Value *phi = emit_static_alloca(ctx, vtype, align); + ctx.builder.CreateMemCpy(phi, align, dest, align, jl_datatype_size(phiType), false); ctx.builder.CreateLifetimeEnd(dest); slot = mark_julia_slot(phi, phiType, NULL, ctx.tbaa().tbaa_stack); } @@ -7737,6 +7731,8 @@ static jl_returninfo_t get_specsig_function(jl_codectx_t &ctx, Module *M, Value if (tracked.count && !tracked.all) props.return_roots = tracked.count; props.cc = jl_returninfo_t::SRet; + props.union_bytes = jl_datatype_size(jlrettype); + props.union_align = props.union_minalign = jl_datatype_align(jlrettype); // sret is always passed from alloca assert(M); fsig.push_back(rt->getPointerTo(M->getDataLayout().getAllocaAddrSpace())); @@ -8365,13 +8361,13 @@ static jl_llvm_functions_t if (lv) { lv->setName(jl_symbol_name(s)); varinfo.value = mark_julia_slot(lv, jt, NULL, ctx.tbaa().tbaa_stack); - varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); + varinfo.pTIndex = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1)); setName(ctx.emission_context, varinfo.pTIndex, "tindex"); // TODO: attach debug metadata to this variable } else if (allunbox) { // all ghost values just need a selector allocated - AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext())); + AllocaInst *lv = emit_static_alloca(ctx, getInt8Ty(ctx.builder.getContext()), Align(1)); lv->setName(jl_symbol_name(s)); varinfo.pTIndex = lv; varinfo.value.tbaa = NULL; diff --git a/src/intrinsics.cpp b/src/intrinsics.cpp index 194b45886bb0d..c747edfeffe5f 100644 --- a/src/intrinsics.cpp +++ b/src/intrinsics.cpp @@ -405,10 +405,11 @@ static Value *emit_unboxed_coercion(jl_codectx_t &ctx, Type *to, Value *unboxed) } else if (!ty->isIntOrPtrTy() && !ty->isFloatingPointTy()) { assert(DL.getTypeSizeInBits(ty) == DL.getTypeSizeInBits(to)); - AllocaInst *cast = emit_static_alloca(ctx, ty); + Align align = std::max(DL.getPrefTypeAlign(ty), DL.getPrefTypeAlign(to)); + AllocaInst *cast = emit_static_alloca(ctx, ty, align); setName(ctx.emission_context, cast, "coercion"); - ctx.builder.CreateStore(unboxed, cast); - unboxed = ctx.builder.CreateLoad(to, cast); + ctx.builder.CreateAlignedStore(unboxed, cast, align); + unboxed = ctx.builder.CreateAlignedLoad(to, cast, align); } else if (frompointer) { Type *INTT_to = INTT(to, DL); @@ -692,10 +693,11 @@ static jl_cgval_t generic_cast( // understood that everything is implicitly rounded to 23 bits, // but if we start looking at more bits we need to actually do the // rounding first instead of carrying around incorrect low bits. - Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType()); + Align align(julia_alignment((jl_value_t*)jlto)); + Value *jlfloattemp_var = emit_static_alloca(ctx, from->getType(), align); setName(ctx.emission_context, jlfloattemp_var, "rounding_slot"); - ctx.builder.CreateStore(from, jlfloattemp_var); - from = ctx.builder.CreateLoad(from->getType(), jlfloattemp_var, /*force this to load from the stack*/true); + ctx.builder.CreateAlignedStore(from, jlfloattemp_var, align); + from = ctx.builder.CreateAlignedLoad(from->getType(), jlfloattemp_var, align, /*force this to load from the stack*/true); setName(ctx.emission_context, from, "rounded"); } }