Skip to content

Commit

Permalink
[AMDGPU] Constant folding of llvm.amdgcn.trig.preop
Browse files Browse the repository at this point in the history
  If the parameters(the input and segment select) coming in to
amdgcn.trig.preop intrinsic are compile time constants, we
pre-compute the output of amdgcn.trig.preop on the CPU and
replaces the uses with the computed constant.

  This work extends the patch https://reviews.llvm.org/D120150 to
make it a complete coverage.

  For the segment select, only src1[4:0] are used. A segment slect is
invalid if we are selecting the 53-bit segment beyond the [1200:0] range
of the 2/PI table. 0 is returned when a segment select is not valid.
  • Loading branch information
changpeng committed Jul 11, 2024
1 parent a38d5e0 commit 9ac00e8
Show file tree
Hide file tree
Showing 2 changed files with 166 additions and 59 deletions.
91 changes: 91 additions & 0 deletions llvm/lib/Target/AMDGPU/AMDGPUInstCombineIntrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1102,6 +1102,97 @@ GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {

break;
}
case Intrinsic::amdgcn_trig_preop: {
// The intrinsic is declared with name mangling, but currently the
// instruction only exists for f64
if (!II.getType()->isDoubleTy())
break;

Value *Src = II.getArgOperand(0);
Value *Segment = II.getArgOperand(1);
if (isa<PoisonValue>(Src))
return IC.replaceInstUsesWith(II, Src);

if (isa<UndefValue>(Src)) {
auto *QNaN = ConstantFP::get(
II.getType(), APFloat::getQNaN(II.getType()->getFltSemantics()));
return IC.replaceInstUsesWith(II, QNaN);
}

const ConstantFP *Csrc = dyn_cast<ConstantFP>(Src);
if (!Csrc)
break;

if (II.isStrictFP())
break;

const APFloat &Fsrc = Csrc->getValueAPF();
if (Fsrc.isNaN()) {
// FIXME: We just need to make the nan quiet here, but that's unavailable
// on APFloat, only IEEEfloat
auto *Quieted = ConstantFP::get(
II.getType(), scalbn(Fsrc, 0, APFloat::rmNearestTiesToEven));
return IC.replaceInstUsesWith(II, Quieted);
}

const ConstantInt *Cseg = dyn_cast<ConstantInt>(Segment);
if (!Cseg)
break;

// 2.0/PI table.
static const uint32_t TwoByPi[] = {
0xa2f9836e, 0x4e441529, 0xfc2757d1, 0xf534ddc0, 0xdb629599, 0x3c439041,
0xfe5163ab, 0xdebbc561, 0xb7246e3a, 0x424dd2e0, 0x06492eea, 0x09d1921c,
0xfe1deb1c, 0xb129a73e, 0xe88235f5, 0x2ebb4484, 0xe99c7026, 0xb45f7e41,
0x3991d639, 0x835339f4, 0x9c845f8b, 0xbdf9283b, 0x1ff897ff, 0xde05980f,
0xef2f118b, 0x5a0a6d1f, 0x6d367ecf, 0x27cb09b7, 0x4f463f66, 0x9e5fea2d,
0x7527bac7, 0xebe5f17b, 0x3d0739f7, 0x8a5292ea, 0x6bfb5fb1, 0x1f8d5d08,
0x56033046};

const APInt &SegVal = Cseg->getValue();
bool Ovflow = false;
unsigned Numbits = 32;
bool Signed = true;
APInt EClamp(Numbits, 1077, Signed);
APInt E = Fsrc.bitcastToAPInt().ashr(52);
E &= 0x7ff;
E = E.trunc(Numbits);
APInt Shift =
(E.sgt(EClamp) ? E.ssub_ov(EClamp, Ovflow) : APInt(Numbits, 0, Signed))
.sadd_ov(APInt(Numbits, 53, Signed).smul_ov(SegVal & 0x1f, Ovflow),
Ovflow);
uint32_t Idx = Shift.ashr(5).getZExtValue();

// Return 0 for invalid segment select (outbound).
if (static_cast<size_t>(Idx) + 2 >= std::size(TwoByPi)) {
APFloat Zero = APFloat::getZero(II.getType()->getFltSemantics());
return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Zero));
}

APInt Bshift = Shift & 0x1f;
Numbits = 64;
Signed = false;
uint64_t Hi = ((uint64_t)TwoByPi[Idx] << 32) | (uint64_t)TwoByPi[Idx + 1];
APInt Thi = APInt(Numbits, Hi, Signed);
APInt Tlo = APInt(Numbits, (uint64_t)TwoByPi[Idx + 2] << 32, Signed);

if (Bshift.sgt(0)) {
Numbits = 32;
Signed = true;
Thi = Thi.shl(Bshift) |
Tlo.lshr(APInt(Numbits, 64, Signed).ssub_ov(Bshift, Ovflow));
}

Thi = Thi.lshr(11);
APFloat Res = APFloat(Thi.roundToDouble());
int32_t Scale = -53 - Shift.getSExtValue();

if (E.sge(0x7b0))
Scale += 128;

Res = scalbn(Res, Scale, RoundingMode::NearestTiesToEven);
return IC.replaceInstUsesWith(II, ConstantFP::get(Src->getType(), Res));
}
case Intrinsic::amdgcn_fmul_legacy: {
Value *Op0 = II.getArgOperand(0);
Value *Op1 = II.getArgOperand(1);
Expand Down
134 changes: 75 additions & 59 deletions llvm/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
Original file line number Diff line number Diff line change
Expand Up @@ -5608,17 +5608,15 @@ declare float @llvm.amdgcn.trig.preop.f32(float, i32)

define double @trig_preop_constfold_variable_undef_arg(i32 %arg) {
; CHECK-LABEL: @trig_preop_constfold_variable_undef_arg(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 [[ARG:%.*]])
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double 0x7FF8000000000000
;
%val = call double @llvm.amdgcn.trig.preop.f64(double undef, i32 %arg)
ret double %val
}

define double @trig_preop_constfold_variable_poison_arg(i32 %arg) {
; CHECK-LABEL: @trig_preop_constfold_variable_poison_arg(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double poison, i32 [[ARG:%.*]])
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double poison
;
%val = call double @llvm.amdgcn.trig.preop.f64(double poison, i32 %arg)
ret double %val
Expand Down Expand Up @@ -5653,35 +5651,31 @@ define double @trig_preop_constfold_variable_int(i32 %arg) {

define double @trig_preop_qnan(i32 %arg) {
; CHECK-LABEL: @trig_preop_qnan(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 [[ARG:%.*]])
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double 0x7FF8000000000000
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF8000000000000, i32 %arg)
ret double %val
}

define double @trig_preop_snan(i32 %arg) {
; CHECK-LABEL: @trig_preop_snan(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 [[ARG:%.*]])
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double 0x7FF8000000000001
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000001, i32 %arg)
ret double %val
}

define double @trig_preop_inf_0() {
; CHECK-LABEL: @trig_preop_inf_0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000000, i32 0)
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x7FF0000000000000, i32 0)
ret double %val
}

define double @trig_preop_ninf_0() {
; CHECK-LABEL: @trig_preop_ninf_0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0xFFF0000000000000, i32 0)
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double 0xB43DD63F5F2F8BD
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0xFFF0000000000000, i32 0)
ret double %val
Expand All @@ -5707,10 +5701,36 @@ define double @trig_preop_variable_args(double %arg0, i32 %arg1) {

define double @trig_preop_constfold() {
; CHECK-LABEL: @trig_preop_constfold(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5)
; CHECK-NEXT: ret double [[VAL]]
; CHECK-NEXT: ret double 0x394A6EE06DB14ACC
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 5)
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 2)
ret double %val
}

; src1[4:0] <= 21 for segment to be inbound with this exponent of src0.
define double @trig_preop_constfold_outbound_segment() {
; CHECK-LABEL: @trig_preop_constfold_outbound_segment(
; CHECK-NEXT: ret double 0.000000e+00
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 22)
ret double %val
}

; Only use src1[4:0], so segment is actually 31 for -1.
define double @trig_preop_constfold_neg1_segment() {
; CHECK-LABEL: @trig_preop_constfold_neg1_segment(
; CHECK-NEXT: ret double 0.000000e+00
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 -1)
ret double %val
}

; Only use src1[4:0], so segment is actually 0 for -32.
define double @trig_preop_constfold_neg32_segment() {
; CHECK-LABEL: @trig_preop_constfold_neg32_segment(
; CHECK-NEXT: ret double 0x3FE45F306DC9C882
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 3.454350e+02, i32 -32)
ret double %val
}

Expand All @@ -5723,84 +5743,80 @@ define double @trig_preop_constfold_strictfp() strictfp {
ret double %val
}

define double @trig_preop_constfold_0.0__0() {
; CHECK-LABEL: @trig_preop_constfold_0.0__0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 0)
; CHECK-NEXT: ret double [[VAL]]
define double @trig_preop_constfold_exponent0__segment0() {
; CHECK-LABEL: @trig_preop_constfold_exponent0__segment0(
; CHECK-NEXT: ret double 0x3FE45F306DC9C882
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 0)
ret double %val
}

define double @trig_preop_constfold_0.0__1() {
; CHECK-LABEL: @trig_preop_constfold_0.0__1(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 1)
; CHECK-NEXT: ret double [[VAL]]
define double @trig_preop_constfold_exponent0__segment2() {
; CHECK-LABEL: @trig_preop_constfold_exponent0__segment2(
; CHECK-NEXT: ret double 0x394A6EE06DB14ACC
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 1)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 2)
ret double %val
}

define double @trig_preop_constfold_0.0__neg1() {
; CHECK-LABEL: @trig_preop_constfold_0.0__neg1(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 -1)
; CHECK-NEXT: ret double [[VAL]]
; src1[4:0] <= 21 for segment to be inbound with this exponent of src0.
define double @trig_preop_constfold_exponent0__outbound_segment() {
; CHECK-LABEL: @trig_preop_constfold_exponent0__outbound_segment(
; CHECK-NEXT: ret double 0.000000e+00
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 -1)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 22)
ret double %val
}

define double @trig_preop_constfold_0.0__9999999() {
; CHECK-LABEL: @trig_preop_constfold_0.0__9999999(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 9999999)
; CHECK-NEXT: ret double [[VAL]]
; 1607 = 1077 + 10 * 53
define double @trig_preop_constfold_exponent1607__segment0() {
; CHECK-LABEL: @trig_preop_constfold_exponent1607__segment0(
; CHECK-NEXT: ret double 0x1EC8135A2FBF209C
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 9999999)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 0)
ret double %val
}

define double @trig_preop_constfold_0.0__neg999999() {
; CHECK-LABEL: @trig_preop_constfold_0.0__neg999999(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0.000000e+00, i32 -999999)
; CHECK-NEXT: ret double [[VAL]]
; 1607 = 1077 + 10 * 53
define double @trig_preop_constfold_exponent1607__segment2() {
; CHECK-LABEL: @trig_preop_constfold_exponent1607__segment2(
; CHECK-NEXT: ret double 0x181272117E2EF7E4
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0.0, i32 -999999)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 2)
ret double %val
}

define double @trig_preop_constfold_0x0020000000000000_0() {
; CHECK-LABEL: @trig_preop_constfold_0x0020000000000000_0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x10000000000000, i32 0)
; CHECK-NEXT: ret double [[VAL]]
; src1[4:0] <= 11 for segment to be inbound with this exponent of src0.
define double @trig_preop_constfold_exponent1607__outbound_segment() {
; CHECK-LABEL: @trig_preop_constfold_exponent1607__outbound_segment(
; CHECK-NEXT: ret double 0.000000e+00
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x0010000000000000, i32 0)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x6470000000000000, i32 12)
ret double %val
}

define double @trig_preop_constfold_0x001fffffffffffff_0() {
; CHECK-LABEL: @trig_preop_constfold_0x001fffffffffffff_0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0xFFFFFFFFFFFFF, i32 0)
; CHECK-NEXT: ret double [[VAL]]
define double @trig_preop_constfold_exponent1968__segment0() {
; CHECK-LABEL: @trig_preop_constfold_exponent1968__segment0(
; CHECK-NEXT: ret double 0x10374F463F669E5F
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x000fffffffffffff, i32 0)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 0)
ret double %val
}

define double @trig_preop_constfold_0x8020000000000000_0() {
; CHECK-LABEL: @trig_preop_constfold_0x8020000000000000_0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x8020000000000000, i32 0)
; CHECK-NEXT: ret double [[VAL]]
define double @trig_preop_constfold_exponent1968__segment2() {
; CHECK-LABEL: @trig_preop_constfold_exponent1968__segment2(
; CHECK-NEXT: ret double 0x98F2F8BD9E839CE
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x8020000000000000, i32 0)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 2)
ret double %val
}

define double @trig_preop_constfold_0x801fffffffffffff_0() {
; CHECK-LABEL: @trig_preop_constfold_0x801fffffffffffff_0(
; CHECK-NEXT: [[VAL:%.*]] = call double @llvm.amdgcn.trig.preop.f64(double 0x801FFFFFFFFFFFFF, i32 0)
; CHECK-NEXT: ret double [[VAL]]
; src1[4:0] <= 4 for segment to be inbound with this exponent of src0.
define double @trig_preop_constfold_exponent1968__outbound_segment() {
; CHECK-LABEL: @trig_preop_constfold_exponent1968__outbound_segment(
; CHECK-NEXT: ret double 0.000000e+00
;
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x801fffffffffffff, i32 0)
%val = call double @llvm.amdgcn.trig.preop.f64(double 0x7B00000000000000, i32 5)
ret double %val
}

Expand Down

0 comments on commit 9ac00e8

Please sign in to comment.