diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 7de813f6032642b..79c0e45e3aa5b50 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -748,22 +748,44 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, // output are the same, or we are using cvt f64->i32 or f32->i64. if ((LT.second == MVT::f32 || LT.second == MVT::f64 || LT.second == MVT::v2f32 || LT.second == MVT::v4f32 || - LT.second == MVT::v2f64) && - (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() || - (LT.second == MVT::f64 && MTy == MVT::i32) || - (LT.second == MVT::f32 && MTy == MVT::i64))) - return LT.first; - // Similarly for fp16 sizes - if (ST->hasFullFP16() && - ((LT.second == MVT::f16 && MTy == MVT::i32) || - ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) && - (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits())))) + LT.second == MVT::v2f64)) { + if ((LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits() || + (LT.second == MVT::f64 && MTy == MVT::i32) || + (LT.second == MVT::f32 && MTy == MVT::i64))) + return LT.first; + // Extending vector types v2f32->v2i64, fcvtl*2 + fcvt*2 + if (LT.second.getScalarType() == MVT::f32 && MTy.isFixedLengthVector() && + MTy.getScalarSizeInBits() == 64) + return LT.first * (MTy.getVectorNumElements() > 2 ? 4 : 2); + } + // Similarly for fp16 sizes. Without FullFP16 we generally need to fcvt to + // f32. + if (LT.second.getScalarType() == MVT::f16 && !ST->hasFullFP16()) + return LT.first + getIntrinsicInstrCost( + {ICA.getID(), + RetTy, + {ICA.getArgTypes()[0]->getWithNewType( + Type::getFloatTy(RetTy->getContext()))}}, + CostKind); + if ((LT.second == MVT::f16 && MTy == MVT::i32) || + (LT.second == MVT::f16 && MTy == MVT::i64) || + ((LT.second == MVT::v4f16 || LT.second == MVT::v8f16) && + (LT.second.getScalarSizeInBits() == MTy.getScalarSizeInBits()))) return LT.first; - - // Otherwise we use a legal convert followed by a min+max + // Extending vector types v8f16->v8i32, fcvtl*2 + fcvt*2 + if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() && + MTy.getScalarSizeInBits() == 32) + return LT.first * (MTy.getVectorNumElements() > 4 ? 4 : 2); + // Extending vector types v8f16->v8i32. These current scalarize but the + // codegen could be better. + if (LT.second.getScalarType() == MVT::f16 && MTy.isFixedLengthVector() && + MTy.getScalarSizeInBits() == 64) + return MTy.getVectorNumElements() * 3; + + // If we can we use a legal convert followed by a min+max if ((LT.second.getScalarType() == MVT::f32 || LT.second.getScalarType() == MVT::f64 || - (ST->hasFullFP16() && LT.second.getScalarType() == MVT::f16)) && + LT.second.getScalarType() == MVT::f16) && LT.second.getScalarSizeInBits() >= MTy.getScalarSizeInBits()) { Type *LegalTy = Type::getIntNTy(RetTy->getContext(), LT.second.getScalarSizeInBits()); @@ -776,9 +798,33 @@ AArch64TTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, IntrinsicCostAttributes Attrs2(IsSigned ? Intrinsic::smax : Intrinsic::umax, LegalTy, {LegalTy, LegalTy}); Cost += getIntrinsicInstrCost(Attrs2, CostKind); - return LT.first * Cost; + return LT.first * Cost + + ((LT.second.getScalarType() != MVT::f16 || ST->hasFullFP16()) ? 0 + : 1); } - break; + // Otherwise we need to follow the default expansion that clamps the value + // using a float min/max with a fcmp+sel for nan handling when signed. + Type *FPTy = ICA.getArgTypes()[0]->getScalarType(); + RetTy = RetTy->getScalarType(); + if (LT.second.isVector()) { + FPTy = VectorType::get(FPTy, LT.second.getVectorElementCount()); + RetTy = VectorType::get(RetTy, LT.second.getVectorElementCount()); + } + IntrinsicCostAttributes Attrs1(Intrinsic::minnum, FPTy, {FPTy, FPTy}); + InstructionCost Cost = getIntrinsicInstrCost(Attrs1, CostKind); + IntrinsicCostAttributes Attrs2(Intrinsic::maxnum, FPTy, {FPTy, FPTy}); + Cost += getIntrinsicInstrCost(Attrs2, CostKind); + Cost += + getCastInstrCost(IsSigned ? Instruction::FPToSI : Instruction::FPToUI, + RetTy, FPTy, TTI::CastContextHint::None, CostKind); + if (IsSigned) { + Type *CondTy = RetTy->getWithNewBitWidth(1); + Cost += getCmpSelInstrCost(BinaryOperator::FCmp, FPTy, CondTy, + CmpInst::FCMP_UNO, CostKind); + Cost += getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy, + CmpInst::FCMP_UNO, CostKind); + } + return LT.first * Cost; } case Intrinsic::fshl: case Intrinsic::fshr: { diff --git a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll index e4e29143985b2eb..c45b6c3c5dcabf4 100644 --- a/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll +++ b/llvm/test/Analysis/CostModel/AArch64/fptoi_sat.ll @@ -34,8 +34,8 @@ define void @casts() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f32u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f32(<2 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f32(<2 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f64(<2 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2f64s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f64(<2 x double> undef) @@ -54,8 +54,8 @@ define void @casts() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f32u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f32u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f32(<4 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f32(<4 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f64(<4 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v4f64s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f64(<4 x double> undef) @@ -74,8 +74,8 @@ define void @casts() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f32u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f32(<8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f32(<8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v8f32u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f32(<8 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f32(<8 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f64(<8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f64(<8 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v8f64s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f64(<8 x double> undef) @@ -94,8 +94,8 @@ define void @casts() { ; CHECK-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v16f32u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v16f32u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 360 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) -; CHECK-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f32(<16 x float> undef) +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f32u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f32(<16 x float> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f64(<16 x double> undef) ; CHECK-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v16f64s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f64(<16 x double> undef) @@ -223,56 +223,56 @@ define void @casts() { define void @fp16() { ; CHECK-NOFP16-LABEL: 'fp16' -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 186 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 147 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 171 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 141 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 325 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 143 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 373 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 281 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 373 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 281 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 262 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 202 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 342 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 282 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 650 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 286 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16u1 = call i1 @llvm.fptoui.sat.i1.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s8 = call i8 @llvm.fptosi.sat.i8.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16u8 = call i8 @llvm.fptoui.sat.i8.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16s16 = call i16 @llvm.fptosi.sat.i16.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; CHECK-NOFP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; ; CHECK-FP16-LABEL: 'fp16' @@ -284,48 +284,48 @@ define void @fp16() { ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16u16 = call i16 @llvm.fptoui.sat.i16.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s32 = call i32 @llvm.fptosi.sat.i32.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u32 = call i32 @llvm.fptoui.sat.i32.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16s64 = call i64 @llvm.fptosi.sat.i64.f16(half undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16u64 = call i64 @llvm.fptoui.sat.i64.f16(half undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s1 = call <2 x i1> @llvm.fptosi.sat.v2i1.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u1 = call <2 x i1> @llvm.fptoui.sat.v2i1.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16s8 = call <2 x i8> @llvm.fptosi.sat.v2i8.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u8 = call <2 x i8> @llvm.fptoui.sat.v2i8.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16s16 = call <2 x i16> @llvm.fptosi.sat.v2i16.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16u16 = call <2 x i16> @llvm.fptoui.sat.v2i16.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16s32 = call <2 x i32> @llvm.fptosi.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16u32 = call <2 x i32> @llvm.fptoui.sat.v2i32.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16s64 = call <2 x i64> @llvm.fptosi.sat.v2i64.v2f16(<2 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v2f16u64 = call <2 x i64> @llvm.fptoui.sat.v2i64.v2f16(<2 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16s1 = call <4 x i1> @llvm.fptosi.sat.v4i1.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u1 = call <4 x i1> @llvm.fptoui.sat.v4i1.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16s8 = call <4 x i8> @llvm.fptosi.sat.v4i8.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v4f16u8 = call <4 x i8> @llvm.fptoui.sat.v4i8.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16s16 = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4f16u16 = call <4 x i16> @llvm.fptoui.sat.v4i16.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16s32 = call <4 x i32> @llvm.fptosi.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16u32 = call <4 x i32> @llvm.fptoui.sat.v4i32.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16s64 = call <4 x i64> @llvm.fptosi.sat.v4i64.v4f16(<4 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v4f16u64 = call <4 x i64> @llvm.fptoui.sat.v4i64.v4f16(<4 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s1 = call <8 x i1> @llvm.fptosi.sat.v8i1.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u1 = call <8 x i1> @llvm.fptoui.sat.v8i1.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16s8 = call <8 x i8> @llvm.fptosi.sat.v8i8.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v8f16u8 = call <8 x i8> @llvm.fptoui.sat.v8i8.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16s16 = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v8f16u16 = call <8 x i16> @llvm.fptoui.sat.v8i16.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 206 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16s32 = call <8 x i32> @llvm.fptosi.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f16u32 = call <8 x i32> @llvm.fptoui.sat.v8i32.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16s64 = call <8 x i64> @llvm.fptosi.sat.v8i64.v8f16(<8 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v8f16u64 = call <8 x i64> @llvm.fptoui.sat.v8i64.v8f16(<8 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s1 = call <16 x i1> @llvm.fptosi.sat.v16i1.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u1 = call <16 x i1> @llvm.fptoui.sat.v16i1.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16s8 = call <16 x i8> @llvm.fptosi.sat.v16i8.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v16f16u8 = call <16 x i8> @llvm.fptoui.sat.v16i8.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16s16 = call <16 x i16> @llvm.fptosi.sat.v16i16.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v16f16u16 = call <16 x i16> @llvm.fptoui.sat.v16i16.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 416 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) -; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16s32 = call <16 x i32> @llvm.fptosi.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16u32 = call <16 x i32> @llvm.fptoui.sat.v16i32.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16s64 = call <16 x i64> @llvm.fptosi.sat.v16i64.v16f16(<16 x half> undef) +; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v16f16u64 = call <16 x i64> @llvm.fptoui.sat.v16i64.v16f16(<16 x half> undef) ; CHECK-FP16-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; %f16s1 = call i1 @llvm.fptosi.sat.i1.f16(half undef) diff --git a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll index 5fea6f669ead69f..2518a308382428f 100644 --- a/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll +++ b/llvm/test/Transforms/AggressiveInstCombine/AArch64/fptosisat.ll @@ -92,16 +92,10 @@ define i32 @f64_i16(double %in) { } define i64 @f16_i32(half %in) { -; CHECK-FP-LABEL: @f16_i32( -; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi half [[IN:%.*]] to i64 -; CHECK-FP-NEXT: [[MIN:%.*]] = call i64 @llvm.smin.i64(i64 [[CONV]], i64 2147483647) -; CHECK-FP-NEXT: [[MAX:%.*]] = call i64 @llvm.smax.i64(i64 [[MIN]], i64 -2147483648) -; CHECK-FP-NEXT: ret i64 [[MAX]] -; -; CHECK-FP16-LABEL: @f16_i32( -; CHECK-FP16-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) -; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 -; CHECK-FP16-NEXT: ret i64 [[TMP2]] +; CHECK-LABEL: @f16_i32( +; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.fptosi.sat.i32.f16(half [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[TMP1]] to i64 +; CHECK-NEXT: ret i64 [[TMP2]] ; %conv = fptosi half %in to i64 %min = call i64 @llvm.smin.i64(i64 %conv, i64 2147483647) @@ -185,16 +179,10 @@ define <8 x i64> @v8f32_i32(<8 x float> %in) { } define <4 x i32> @v4f16_i16(<4 x half> %in) { -; CHECK-FP-LABEL: @v4f16_i16( -; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <4 x half> [[IN:%.*]] to <4 x i32> -; CHECK-FP-NEXT: [[MIN:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[CONV]], <4 x i32> ) -; CHECK-FP-NEXT: [[MAX:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[MIN]], <4 x i32> ) -; CHECK-FP-NEXT: ret <4 x i32> [[MAX]] -; -; CHECK-FP16-LABEL: @v4f16_i16( -; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) -; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> -; CHECK-FP16-NEXT: ret <4 x i32> [[TMP2]] +; CHECK-LABEL: @v4f16_i16( +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.fptosi.sat.v4i16.v4f16(<4 x half> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i32> +; CHECK-NEXT: ret <4 x i32> [[TMP2]] ; %conv = fptosi <4 x half> %in to <4 x i32> %min = call <4 x i32> @llvm.smin.v4i32(<4 x i32> %conv, <4 x i32> ) @@ -203,16 +191,10 @@ define <4 x i32> @v4f16_i16(<4 x half> %in) { } define <8 x i32> @v8f16_i16(<8 x half> %in) { -; CHECK-FP-LABEL: @v8f16_i16( -; CHECK-FP-NEXT: [[CONV:%.*]] = fptosi <8 x half> [[IN:%.*]] to <8 x i32> -; CHECK-FP-NEXT: [[MIN:%.*]] = call <8 x i32> @llvm.smin.v8i32(<8 x i32> [[CONV]], <8 x i32> ) -; CHECK-FP-NEXT: [[MAX:%.*]] = call <8 x i32> @llvm.smax.v8i32(<8 x i32> [[MIN]], <8 x i32> ) -; CHECK-FP-NEXT: ret <8 x i32> [[MAX]] -; -; CHECK-FP16-LABEL: @v8f16_i16( -; CHECK-FP16-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) -; CHECK-FP16-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> -; CHECK-FP16-NEXT: ret <8 x i32> [[TMP2]] +; CHECK-LABEL: @v8f16_i16( +; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i16> @llvm.fptosi.sat.v8i16.v8f16(<8 x half> [[IN:%.*]]) +; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i16> [[TMP1]] to <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[TMP2]] ; %conv = fptosi <8 x half> %in to <8 x i32> %min = call <8 x i32> @llvm.smin.v8i32(<8 x i32> %conv, <8 x i32> ) @@ -292,3 +274,6 @@ declare <4 x i32> @llvm.smin.v4i32(<4 x i32>, <4 x i32>) declare <4 x i32> @llvm.smax.v4i32(<4 x i32>, <4 x i32>) declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK-FP: {{.*}} +; CHECK-FP16: {{.*}}