Skip to content

Commit

Permalink
[RISCV] Lower scalar_to_vector for supported FP types (#114340)
Browse files Browse the repository at this point in the history
In https://reviews.llvm.org/D147608 we added custom lowering for
integers, but inadvertently also marked it as custom for scalable FP
vectors despite not handling it.

This adds handling for floats and marks it as custom lowered for
fixed-length FP vectors too.

Note that this doesn't handle bf16 or f16 vectors that would need
promotion, but these scalar_to_vector nodes seem to be emitted when
expanding them.
  • Loading branch information
lukel97 authored Oct 31, 2024
1 parent a33fd61 commit 6da5968
Show file tree
Hide file tree
Showing 4 changed files with 146 additions and 43 deletions.
16 changes: 12 additions & 4 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1403,7 +1403,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}

setOperationAction({ISD::BUILD_VECTOR, ISD::VECTOR_SHUFFLE,
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT},
ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT,
ISD::SCALAR_TO_VECTOR},
VT, Custom);

setOperationAction(
Expand Down Expand Up @@ -6511,9 +6512,16 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
if (VT.isFixedLengthVector())
ContainerVT = getContainerForFixedLengthVector(VT);
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
SDValue V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);

SDValue V;
if (VT.isFloatingPoint()) {
V = DAG.getNode(RISCVISD::VFMV_S_F_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);
} else {
Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, Subtarget.getXLenVT(), Scalar);
V = DAG.getNode(RISCVISD::VMV_S_X_VL, DL, ContainerVT,
DAG.getUNDEF(ContainerVT), Scalar, VL);
}
if (VT.isFixedLengthVector())
V = convertFromScalableVector(VT, V, DAG, Subtarget);
return V;
Expand Down
92 changes: 92 additions & 0 deletions llvm/test/CodeGen/RISCV/rvv/fixed-vectors-scalarized.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=riscv32 -mattr=+v -verify-machineinstrs | FileCheck %s
; RUN: llc < %s -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s

define <8 x float> @fpext_v8bf16(<8 x bfloat> %x) {
; CHECK-LABEL: fpext_v8bf16:
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: fmv.x.w a1, fa1
; CHECK-NEXT: fmv.x.w a2, fa2
; CHECK-NEXT: fmv.x.w a3, fa3
; CHECK-NEXT: fmv.x.w a4, fa4
; CHECK-NEXT: fmv.x.w a5, fa5
; CHECK-NEXT: fmv.x.w a6, fa6
; CHECK-NEXT: fmv.x.w a7, fa7
; CHECK-NEXT: slli a7, a7, 16
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a7
; CHECK-NEXT: slli a6, a6, 16
; CHECK-NEXT: vmv.s.x v9, a6
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a5, a5, 16
; CHECK-NEXT: vmv.s.x v8, a5
; CHECK-NEXT: slli a4, a4, 16
; CHECK-NEXT: vmv.s.x v10, a4
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 2
; CHECK-NEXT: slli a3, a3, 16
; CHECK-NEXT: vmv.s.x v8, a3
; CHECK-NEXT: slli a2, a2, 16
; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a1, a1, 16
; CHECK-NEXT: vmv.s.x v11, a1
; CHECK-NEXT: slli a0, a0, 16
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vslideup.vi v8, v11, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%y = fpext <8 x bfloat> %x to <8 x float>
ret <8 x float> %y
}

define <8 x float> @fpext_v8f16(<8 x bfloat> %x) {
; CHECK-LABEL: fpext_v8f16:
; CHECK: # %bb.0:
; CHECK-NEXT: fmv.x.w a0, fa0
; CHECK-NEXT: fmv.x.w a1, fa1
; CHECK-NEXT: fmv.x.w a2, fa2
; CHECK-NEXT: fmv.x.w a3, fa3
; CHECK-NEXT: fmv.x.w a4, fa4
; CHECK-NEXT: fmv.x.w a5, fa5
; CHECK-NEXT: fmv.x.w a6, fa6
; CHECK-NEXT: fmv.x.w a7, fa7
; CHECK-NEXT: slli a7, a7, 16
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vmv.s.x v8, a7
; CHECK-NEXT: slli a6, a6, 16
; CHECK-NEXT: vmv.s.x v9, a6
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a5, a5, 16
; CHECK-NEXT: vmv.s.x v8, a5
; CHECK-NEXT: slli a4, a4, 16
; CHECK-NEXT: vmv.s.x v10, a4
; CHECK-NEXT: vslideup.vi v10, v8, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 2
; CHECK-NEXT: slli a3, a3, 16
; CHECK-NEXT: vmv.s.x v8, a3
; CHECK-NEXT: slli a2, a2, 16
; CHECK-NEXT: vmv.s.x v9, a2
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v9, v8, 1
; CHECK-NEXT: slli a1, a1, 16
; CHECK-NEXT: vmv.s.x v11, a1
; CHECK-NEXT: slli a0, a0, 16
; CHECK-NEXT: vmv.s.x v8, a0
; CHECK-NEXT: vslideup.vi v8, v11, 1
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 4
; CHECK-NEXT: ret
%y = fpext <8 x bfloat> %x to <8 x float>
ret <8 x float> %y
}

Original file line number Diff line number Diff line change
Expand Up @@ -412,30 +412,20 @@ declare <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7>, me
define <1 x half> @vsitofp_v1i7_v1f16(<1 x i7> %va) strictfp {
; RV32-LABEL: vsitofp_v1i7_v1f16:
; RV32: # %bb.0:
; RV32-NEXT: addi sp, sp, -16
; RV32-NEXT: .cfi_def_cfa_offset 16
; RV32-NEXT: slli a0, a0, 25
; RV32-NEXT: srai a0, a0, 25
; RV32-NEXT: fcvt.h.w fa5, a0
; RV32-NEXT: fsh fa5, 14(sp)
; RV32-NEXT: addi a0, sp, 14
; RV32-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV32-NEXT: vle16.v v8, (a0)
; RV32-NEXT: addi sp, sp, 16
; RV32-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV32-NEXT: vfmv.s.f v8, fa5
; RV32-NEXT: ret
;
; RV64-LABEL: vsitofp_v1i7_v1f16:
; RV64: # %bb.0:
; RV64-NEXT: addi sp, sp, -16
; RV64-NEXT: .cfi_def_cfa_offset 16
; RV64-NEXT: slli a0, a0, 57
; RV64-NEXT: srai a0, a0, 57
; RV64-NEXT: fcvt.h.w fa5, a0
; RV64-NEXT: fsh fa5, 14(sp)
; RV64-NEXT: addi a0, sp, 14
; RV64-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; RV64-NEXT: vle16.v v8, (a0)
; RV64-NEXT: addi sp, sp, 16
; RV64-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; RV64-NEXT: vfmv.s.f v8, fa5
; RV64-NEXT: ret
%evec = call <1 x half> @llvm.experimental.constrained.sitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <1 x half> %evec
Expand All @@ -445,15 +435,10 @@ declare <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7>, me
define <1 x half> @vuitofp_v1i7_v1f16(<1 x i7> %va) strictfp {
; CHECK-LABEL: vuitofp_v1i7_v1f16:
; CHECK: # %bb.0:
; CHECK-NEXT: addi sp, sp, -16
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: andi a0, a0, 127
; CHECK-NEXT: fcvt.h.wu fa5, a0
; CHECK-NEXT: fsh fa5, 14(sp)
; CHECK-NEXT: addi a0, sp, 14
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
; CHECK-NEXT: vle16.v v8, (a0)
; CHECK-NEXT: addi sp, sp, 16
; CHECK-NEXT: vsetivli zero, 1, e16, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa5
; CHECK-NEXT: ret
%evec = call <1 x half> @llvm.experimental.constrained.uitofp.v1f16.v1i7(<1 x i7> %va, metadata !"round.dynamic", metadata !"fpexcept.strict")
ret <1 x half> %evec
Expand Down
54 changes: 36 additions & 18 deletions llvm/test/CodeGen/RISCV/rvv/pr63596.ll
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,54 @@ define <4 x float> @foo(ptr %0) nounwind {
; CHECK-NEXT: sd s0, 32(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s1, 24(sp) # 8-byte Folded Spill
; CHECK-NEXT: sd s2, 16(sp) # 8-byte Folded Spill
; CHECK-NEXT: csrr a1, vlenb
; CHECK-NEXT: slli a1, a1, 1
; CHECK-NEXT: sub sp, sp, a1
; CHECK-NEXT: lhu s0, 0(a0)
; CHECK-NEXT: lhu s1, 2(a0)
; CHECK-NEXT: lhu s2, 4(a0)
; CHECK-NEXT: lhu a0, 6(a0)
; CHECK-NEXT: fmv.w.x fa0, a0
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 4(sp)
; CHECK-NEXT: fmv.w.x fa0, s2
; CHECK-NEXT: fmv.w.x fa5, s2
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: fmv.s fa0, fa5
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 12(sp)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: fmv.w.x fa0, s1
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 8(sp)
; CHECK-NEXT: fmv.w.x fa0, s0
; CHECK-NEXT: fmv.w.x fa5, s0
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vs1r.v v8, (a0) # Unknown-size Folded Spill
; CHECK-NEXT: fmv.s fa0, fa5
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: fsw fa0, 0(sp)
; CHECK-NEXT: addi a0, sp, 4
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
; CHECK-NEXT: vle32.v v9, (a0)
; CHECK-NEXT: addi a0, sp, 12
; CHECK-NEXT: vle32.v v10, (a0)
; CHECK-NEXT: addi a0, sp, 8
; CHECK-NEXT: vle32.v v11, (a0)
; CHECK-NEXT: mv a0, sp
; CHECK-NEXT: vle32.v v8, (a0)
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
; CHECK-NEXT: vslideup.vi v10, v9, 1
; CHECK-NEXT: vslideup.vi v8, v11, 1
; CHECK-NEXT: vfmv.s.f v8, fa0
; CHECK-NEXT: addi a0, sp, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vslideup.vi v8, v9, 1
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: add a0, sp, a0
; CHECK-NEXT: addi a0, a0, 16
; CHECK-NEXT: vl1r.v v9, (a0) # Unknown-size Folded Reload
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
; CHECK-NEXT: vslideup.vi v8, v10, 2
; CHECK-NEXT: vslideup.vi v8, v9, 2
; CHECK-NEXT: csrr a0, vlenb
; CHECK-NEXT: slli a0, a0, 1
; CHECK-NEXT: add sp, sp, a0
; CHECK-NEXT: ld ra, 40(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s0, 32(sp) # 8-byte Folded Reload
; CHECK-NEXT: ld s1, 24(sp) # 8-byte Folded Reload
Expand Down

0 comments on commit 6da5968

Please sign in to comment.