Skip to content

Commit

Permalink
address review comments
Browse files Browse the repository at this point in the history
  • Loading branch information
tschuett committed Sep 29, 2024
1 parent f5b6a7c commit e5e78c1
Show file tree
Hide file tree
Showing 3 changed files with 246 additions and 25 deletions.
2 changes: 2 additions & 0 deletions llvm/include/llvm/CodeGen/GlobalISel/IRTranslator.h
Original file line number Diff line number Diff line change
Expand Up @@ -546,8 +546,10 @@ class IRTranslator : public MachineFunctionPass {
bool translateVAArg(const User &U, MachineIRBuilder &MIRBuilder);

bool translateInsertElement(const User &U, MachineIRBuilder &MIRBuilder);
bool translateInsertVector(const User &U, MachineIRBuilder &MIRBuilder);

bool translateExtractElement(const User &U, MachineIRBuilder &MIRBuilder);
bool translateExtractVector(const User &U, MachineIRBuilder &MIRBuilder);

bool translateShuffleVector(const User &U, MachineIRBuilder &MIRBuilder);

Expand Down
123 changes: 107 additions & 16 deletions llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2588,20 +2588,10 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
getOrCreateVReg(*CI.getOperand(0)),
getOrCreateVReg(*CI.getOperand(1)));
return true;
case Intrinsic::vector_extract: {
ConstantInt *Index = cast<ConstantInt>(CI.getOperand(1));
MIRBuilder.buildExtractSubvector(getOrCreateVReg(CI),
getOrCreateVReg(*CI.getOperand(0)),
Index->getZExtValue());
return true;
}
case Intrinsic::vector_insert: {
ConstantInt *Index = cast<ConstantInt>(CI.getOperand(2));
MIRBuilder.buildInsertSubvector(
getOrCreateVReg(CI), getOrCreateVReg(*CI.getOperand(0)),
getOrCreateVReg(*CI.getOperand(1)), Index->getZExtValue());
return true;
}
case Intrinsic::vector_extract:
return translateExtractVector(CI, MIRBuilder);
case Intrinsic::vector_insert:
return translateInsertVector(CI, MIRBuilder);
case Intrinsic::prefetch: {
Value *Addr = CI.getOperand(0);
unsigned RW = cast<ConstantInt>(CI.getOperand(1))->getZExtValue();
Expand Down Expand Up @@ -3163,8 +3153,7 @@ bool IRTranslator::translateInsertElement(const User &U,
if (auto *CI = dyn_cast<ConstantInt>(U.getOperand(2))) {
if (CI->getBitWidth() != PreferredVecIdxWidth) {
APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
auto *NewIdxCI = ConstantInt::get(CI->getContext(), NewIdx);
Idx = getOrCreateVReg(*NewIdxCI);
CI = ConstantInt::get(CI->getContext(), NewIdx);
}
}
if (!Idx)
Expand All @@ -3177,6 +3166,58 @@ bool IRTranslator::translateInsertElement(const User &U,
return true;
}

bool IRTranslator::translateInsertVector(const User &U,
MachineIRBuilder &MIRBuilder) {
Register Dst = getOrCreateVReg(U);
Register Vec = getOrCreateVReg(*U.getOperand(0));
Register Elt = getOrCreateVReg(*U.getOperand(1));

ConstantInt *CI = cast<ConstantInt>(U.getOperand(2));
unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits();

// Resize Index to preferred index width.
if (CI->getBitWidth() != PreferredVecIdxWidth) {
APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
CI = ConstantInt::get(CI->getContext(), NewIdx);
}

// If it is a <1 x Ty> vector, we have to use other means.
if (auto *ResultType = dyn_cast<FixedVectorType>(U.getOperand(1)->getType());
ResultType && ResultType->getNumElements() == 1) {
if (auto *InputType = dyn_cast<FixedVectorType>(U.getOperand(0)->getType());
InputType && InputType->getNumElements() == 1) {
// We are inserting an illegal fixed vector into an illegal
// fixed vector, use the scalar as it is not a legal vector type
// in LLT.
return translateCopy(U, *U.getOperand(0), MIRBuilder);
}
if (auto *InputType =
dyn_cast<FixedVectorType>(U.getOperand(0)->getType())) {
// We are inserting an illegal fixed vector into a fixed vector, use the
// scalar as it is not a legal vector type in LLT.
Register Idx = getOrCreateVReg(*CI);
MIRBuilder.buildInsertVectorElement(Dst, Vec, Elt, Idx);
return true;
}
if (auto *InputType =
dyn_cast<ScalableVectorType>(U.getOperand(0)->getType())) {
// We are inserting an illegal fixed vector into a scalable vector, use
// a scalar element insert.
LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
Register Idx = getOrCreateVReg(*CI);
auto ScaledIndex = MIRBuilder.buildMul(
VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx);
MIRBuilder.buildInsertVectorElement(Dst, Vec, Elt, ScaledIndex);
return true;
}
}

MIRBuilder.buildInsertSubvector(
getOrCreateVReg(U), getOrCreateVReg(*U.getOperand(0)),
getOrCreateVReg(*U.getOperand(1)), CI->getZExtValue());
return true;
}

bool IRTranslator::translateExtractElement(const User &U,
MachineIRBuilder &MIRBuilder) {
// If it is a <1 x Ty> vector, use the scalar as it is
Expand Down Expand Up @@ -3205,6 +3246,56 @@ bool IRTranslator::translateExtractElement(const User &U,
return true;
}

bool IRTranslator::translateExtractVector(const User &U,
MachineIRBuilder &MIRBuilder) {
Register Res = getOrCreateVReg(U);
Register Vec = getOrCreateVReg(*U.getOperand(0));
ConstantInt *CI = cast<ConstantInt>(U.getOperand(1));
unsigned PreferredVecIdxWidth = TLI->getVectorIdxTy(*DL).getSizeInBits();

// Resize Index to preferred index width.
if (CI->getBitWidth() != PreferredVecIdxWidth) {
APInt NewIdx = CI->getValue().zextOrTrunc(PreferredVecIdxWidth);
CI = ConstantInt::get(CI->getContext(), NewIdx);
}

// If it is a <1 x Ty> vector, we have to use other means.
if (auto *ResultType = dyn_cast<FixedVectorType>(U.getType());
ResultType && ResultType->getNumElements() == 1) {
if (auto *InputType = dyn_cast<FixedVectorType>(U.getOperand(0)->getType());
InputType && InputType->getNumElements() == 1) {
// We are extracting an illegal fixed vector from an illegal fixed vector,
// use the scalar as it is not a legal vector type in LLT.
return translateCopy(U, *U.getOperand(0), MIRBuilder);
}
if (auto *InputType =
dyn_cast<FixedVectorType>(U.getOperand(0)->getType())) {
// We are extracting a fixed vector from a fixed vector, use the
// scalar as it is not a legal vector type in LLT.
Register Idx = getOrCreateVReg(*CI);
MIRBuilder.buildExtractVectorElement(Res, Vec, Idx);
return true;
}
if (auto *InputType =
dyn_cast<ScalableVectorType>(U.getOperand(0)->getType())) {
// We are extracting a fixed vector from a scalable vector, use
// a scalar element extract.
LLT VecIdxTy = LLT::scalar(PreferredVecIdxWidth);
Register Idx = getOrCreateVReg(*CI);
auto ScaledIndex = MIRBuilder.buildMul(
VecIdxTy, MIRBuilder.buildVScale(VecIdxTy, 1), Idx);
MIRBuilder.buildExtractVectorElement(Res, Vec, ScaledIndex);
return true;
}
}

ConstantInt *Index = cast<ConstantInt>(U.getOperand(1));
MIRBuilder.buildExtractSubvector(getOrCreateVReg(U),
getOrCreateVReg(*U.getOperand(0)),
Index->getZExtValue());
return true;
}

bool IRTranslator::translateShuffleVector(const User &U,
MachineIRBuilder &MIRBuilder) {
// A ShuffleVector that operates on scalable vectors is a splat vector where
Expand Down
146 changes: 137 additions & 9 deletions llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-subvector.ll
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -global-isel -stop-after=irtranslator %s -o - | FileCheck %s
; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve -global-isel -stop-after=irtranslator -aarch64-enable-gisel-sve=1 %s -o - | FileCheck %s

define i32 @extract_v4i32_vector_insert_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
; CHECK-LABEL: name: extract_v4i32_vector_insert_const
Expand Down Expand Up @@ -58,21 +58,149 @@ entry:
ret i32 %d
}

define i32 @extract_v4i32_vector_extract_const(<4 x i32> %a, <2 x i32> %b, i32 %c) {
define i32 @extract_v4i32_vector_extract_const(<vscale x 4 x i32> %a, i32 %c, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_extract_const
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $d1, $q0, $w0
; CHECK-NEXT: liveins: $w0, $x1, $z0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_EXTRACT_SUBVECTOR [[COPY]](<vscale x 4 x s32>), 0
; CHECK-NEXT: G_STORE [[EXTRACT_SUBVECTOR]](<vscale x 4 x s32>), [[COPY2]](p0) :: (store (<vscale x 4 x s32>) into %ir.p)
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <vscale x 4 x i32> @llvm.vector.extract(<vscale x 4 x i32> %a, i64 0)
store <vscale x 4 x i32> %vector, ptr %p, align 16
ret i32 1
}

define i32 @extract_v4i32_vector_insert_const_vscale(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i32 %c, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_insert_const_vscale
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $w0, $x1, $z0, $z1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z1
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[INSERT_SUBVECTOR:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_SUBVECTOR [[COPY]], [[COPY1]](<vscale x 4 x s32>), 0
; CHECK-NEXT: G_STORE [[INSERT_SUBVECTOR]](<vscale x 4 x s32>), [[COPY3]](p0) :: (store (<vscale x 4 x s32>) into %ir.p)
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, i64 0)
store <vscale x 4 x i32> %vector, ptr %p, align 16
ret i32 1
}

define i32 @extract_v4i32_vector_extract_const_illegal_fixed(<4 x i32> %a, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_extract_const_illegal_fixed
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $q0, $x0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: G_STORE [[EVEC]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p, align 16)
; CHECK-NEXT: $w0 = COPY [[C1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <1 x i32> @llvm.vector.extract(<4 x i32> %a, i64 0)
store <1 x i32> %vector, ptr %p, align 16
ret i32 1
}

define i32 @extract_v4i32_vector_extract_const_illegal_scalable(<vscale x 4 x i32> %a, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_extract_const_illegal_scalable
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $x0, $z0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(p0) = COPY $x0
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 1
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[VSCALE]], [[C]]
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[COPY]](<vscale x 4 x s32>), [[MUL]](s64)
; CHECK-NEXT: G_STORE [[EVEC]](s32), [[COPY1]](p0) :: (store (s32) into %ir.p, align 16)
; CHECK-NEXT: $w0 = COPY [[C1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <1 x i32> @llvm.vector.extract(<vscale x 4 x i32> %a, i64 0)
store <1 x i32> %vector, ptr %p, align 16
ret i32 1
}

define i32 @extract_v4i32_vector_insert_const_illegal_scalable(<vscale x 4 x i32> %a, <1 x i32> %b, i32 %c, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_insert_const_illegal_scalable
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $d1, $w0, $x1, $z0
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<vscale x 4 x s32>) = COPY $z0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[VSCALE:%[0-9]+]]:_(s64) = G_VSCALE i64 1
; CHECK-NEXT: [[MUL:%[0-9]+]]:_(s64) = G_MUL [[VSCALE]], [[C]]
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<vscale x 4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[UV]](s32), [[MUL]](s64)
; CHECK-NEXT: G_STORE [[IVEC]](<vscale x 4 x s32>), [[COPY3]](p0) :: (store (<vscale x 4 x s32>) into %ir.p)
; CHECK-NEXT: $w0 = COPY [[C1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> %a, <1 x i32> %b, i64 0)
store <vscale x 4 x i32> %vector, ptr %p, align 16
ret i32 1
}

define i32 @extract_v4i32_vector_insert_const_fixed(<4 x i32> %a, <1 x i32> %b, i32 %c, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_insert_const_fixed
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $d1, $q0, $w0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
; CHECK-NEXT: [[EXTRACT_SUBVECTOR:%[0-9]+]]:_(<4 x s32>) = G_EXTRACT_SUBVECTOR [[COPY]](<4 x s32>), 0
; CHECK-NEXT: [[EVEC:%[0-9]+]]:_(s32) = G_EXTRACT_VECTOR_ELT [[EXTRACT_SUBVECTOR]](<4 x s32>), [[C]](s64)
; CHECK-NEXT: $w0 = COPY [[EVEC]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<4 x s32>) = G_INSERT_VECTOR_ELT [[COPY]], [[UV]](s32), [[C]](s64)
; CHECK-NEXT: G_STORE [[IVEC]](<4 x s32>), [[COPY3]](p0) :: (store (<4 x s32>) into %ir.p)
; CHECK-NEXT: $w0 = COPY [[C1]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <4 x i32> @llvm.vector.extract.v2i32.v4i32(<4 x i32> %a, i64 0)
%d = extractelement <4 x i32> %vector, i32 0
ret i32 %d
%vector = call <4 x i32> @llvm.vector.insert.v4i32.v4i32(<4 x i32> %a, <1 x i32> %b, i64 0)
store <4 x i32> %vector, ptr %p, align 16
ret i32 1
}

define i32 @extract_v4i32_vector_insert_const_fixed_illegal(<1 x i32> %a, <1 x i32> %b, i32 %c, ptr %p) {
; CHECK-LABEL: name: extract_v4i32_vector_insert_const_fixed_illegal
; CHECK: bb.1.entry:
; CHECK-NEXT: liveins: $d0, $d1, $w0, $x1
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $w0
; CHECK-NEXT: [[COPY3:%[0-9]+]]:_(p0) = COPY $x1
; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; CHECK-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY [[UV]](s32)
; CHECK-NEXT: G_STORE [[COPY4]](s32), [[COPY3]](p0) :: (store (s32) into %ir.p, align 16)
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: RET_ReallyLR implicit $w0
entry:
%vector = call <1 x i32> @llvm.vector.insert.v1i32.v4i32(<1 x i32> %a, <1 x i32> %b, i64 0)
store <1 x i32> %vector, ptr %p, align 16
ret i32 1
}

0 comments on commit e5e78c1

Please sign in to comment.