summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2019-05-22 20:04:55 +0000
committerCraig Topper <craig.topper@intel.com>2019-05-22 20:04:55 +0000
commit9816d557768cfdb3b9339269f0295b7f4356883d (patch)
tree8a16eef4f85925a4bc139c9468e4463ca5d4cf09 /llvm/lib
parent13bf9892dc2a37a4c4a0fcce8cb79ed8798a236f (diff)
downloadbcm5719-llvm-9816d557768cfdb3b9339269f0295b7f4356883d.tar.gz
bcm5719-llvm-9816d557768cfdb3b9339269f0295b7f4356883d.zip
[X86][InstCombine] Remove InstCombine code that turns X86 round intrinsics into llvm.ceil/floor. Remove some isel patterns that existed because that was happening.
We were turning roundss/sd/ps/pd intrinsics with immediates of 1 or 2 into llvm.floor/ceil. The llvm.ceil/floor intrinsics are supposed to correspond to the libm functions. For the libm functions we need to disable the precision exception so the llvm.floor/ceil functions should always map to encodings 0x9 and 0xA. We had a mix of isel patterns where some used 0x9 and 0xA and others used 0x1 and 0x2. We need to be consistent and always use 0x9 and 0xA. Since we have no way in isel of knowing where the llvm.ceil/floor came from, we can't map X86 specific intrinsics with encodings 1 or 2 to it. We could map 0x9 and 0xA to llvm.ceil/floor instead, but I'd really like to see a use case and optimization advantage first. I've left the backend test cases to show the blend we now emit without the extra isel patterns. But I've removed the InstCombine tests completely. llvm-svn: 361425
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td46
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td26
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp122
3 files changed, 0 insertions, 194 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 73abd964aa1..7c7c27340cd 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -9392,32 +9392,6 @@ defm : avx512_masked_scalar<fsqrt, "SQRTSDZ", X86Movsd,
(v1i1 (scalar_to_vector (i8 (trunc (i32 GR32:$mask))))), v2f64x_info,
fp64imm0, (COPY_TO_REGCLASS $mask, VK1WM), HasAVX512>;
-multiclass avx512_masked_scalar_imm<SDNode OpNode, string OpcPrefix, SDNode Move,
- X86VectorVTInfo _, PatLeaf ZeroFP,
- bits<8> ImmV, Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
- (OpNode (extractelt _.VT:$src2, (iPTR 0))),
- (extractelt _.VT:$dst, (iPTR 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Intk)
- _.VT:$dst, VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
-
- def : Pat<(Move _.VT:$src1, (scalar_to_vector (X86selects VK1WM:$mask,
- (OpNode (extractelt _.VT:$src2, (iPTR 0))), ZeroFP))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Intkz)
- VK1WM:$mask, _.VT:$src1, _.VT:$src2, (i32 ImmV))>;
- }
-}
-
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESS", X86Movss,
- v4f32x_info, fp32imm0, 0x09, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESS", X86Movss,
- v4f32x_info, fp32imm0, 0x0A, HasAVX512>;
-defm : avx512_masked_scalar_imm<ffloor, "RNDSCALESD", X86Movsd,
- v2f64x_info, fp64imm0, 0x09, HasAVX512>;
-defm : avx512_masked_scalar_imm<fceil, "RNDSCALESD", X86Movsd,
- v2f64x_info, fp64imm0, 0x0A, HasAVX512>;
-
//-------------------------------------------------
// Integer truncate and extend operations
@@ -12293,26 +12267,6 @@ multiclass AVX512_scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix,
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32x_info>;
defm : AVX512_scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64x_info>;
-multiclass AVX512_scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix,
- SDNode Move, X86VectorVTInfo _,
- bits<8> ImmV> {
- let Predicates = [HasAVX512] in {
- def : Pat<(_.VT (Move _.VT:$dst,
- (scalar_to_vector (OpNode (extractelt _.VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#Zr_Int) _.VT:$dst, _.VT:$src,
- (i32 ImmV))>;
- }
-}
-
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESS", X86Movss,
- v4f32x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESS", X86Movss,
- v4f32x_info, 0x02>;
-defm : AVX512_scalar_unary_math_imm_patterns<ffloor, "RNDSCALESD", X86Movsd,
- v2f64x_info, 0x01>;
-defm : AVX512_scalar_unary_math_imm_patterns<fceil, "RNDSCALESD", X86Movsd,
- v2f64x_info, 0x02>;
-
//===----------------------------------------------------------------------===//
// AES instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 737296d9714..18d9af8bdcd 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3099,23 +3099,6 @@ multiclass scalar_unary_math_patterns<SDNode OpNode, string OpcPrefix, SDNode Mo
}
}
-multiclass scalar_unary_math_imm_patterns<SDNode OpNode, string OpcPrefix, SDNode Move,
- ValueType VT, bits<8> ImmV,
- Predicate BasePredicate> {
- let Predicates = [BasePredicate] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>(OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
- }
-
- // Repeat for AVX versions of the instructions.
- let Predicates = [UseAVX] in {
- def : Pat<(VT (Move VT:$dst, (scalar_to_vector
- (OpNode (extractelt VT:$src, 0))))),
- (!cast<Instruction>("V"#OpcPrefix#r_Int) VT:$dst, VT:$src, (i32 ImmV))>;
- }
-}
-
defm : scalar_unary_math_patterns<fsqrt, "SQRTSS", X86Movss, v4f32, UseSSE1>;
defm : scalar_unary_math_patterns<fsqrt, "SQRTSD", X86Movsd, v2f64, UseSSE2>;
@@ -5984,15 +5967,6 @@ let Predicates = [UseSSE41] in {
(ROUNDPDm addr:$src, (i32 0xB))>;
}
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSS", X86Movss,
- v4f32, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSS", X86Movss,
- v4f32, 0x02, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<ffloor, "ROUNDSD", X86Movsd,
- v2f64, 0x01, UseSSE41>;
-defm : scalar_unary_math_imm_patterns<fceil, "ROUNDSD", X86Movsd,
- v2f64, 0x02, UseSSE41>;
-
//===----------------------------------------------------------------------===//
// SSE4.1 - Packed Bit Test
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 2013de065a0..8d022617d85 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -607,105 +607,6 @@ static Value *simplifyX86pack(IntrinsicInst &II,
return Builder.CreateTrunc(Shuffle, ResTy);
}
-// Replace X86-specific intrinsics with generic floor-ceil where applicable.
-static Value *simplifyX86round(IntrinsicInst &II,
- InstCombiner::BuilderTy &Builder) {
- ConstantInt *Arg = nullptr;
- Intrinsic::ID IntrinsicID = II.getIntrinsicID();
-
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(2));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(1));
- if (!Arg)
- return nullptr;
- unsigned RoundControl = Arg->getZExtValue();
-
- Arg = nullptr;
- unsigned SAE = 0;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(4));
- else if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd)
- Arg = dyn_cast<ConstantInt>(II.getArgOperand(5));
- else
- SAE = 4;
- if (!SAE) {
- if (!Arg)
- return nullptr;
- SAE = Arg->getZExtValue();
- }
-
- if (SAE != 4 || (RoundControl != 2 /*ceil*/ && RoundControl != 1 /*floor*/))
- return nullptr;
-
- Value *Src, *Dst, *Mask;
- bool IsScalar = false;
- if (IntrinsicID == Intrinsic::x86_sse41_round_ss ||
- IntrinsicID == Intrinsic::x86_sse41_round_sd ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- IsScalar = true;
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Mask = II.getArgOperand(3);
- Value *Zero = Constant::getNullValue(Mask->getType());
- Mask = Builder.CreateAnd(Mask, 1);
- Mask = Builder.CreateICmp(ICmpInst::ICMP_NE, Mask, Zero);
- Dst = II.getArgOperand(2);
- } else
- Dst = II.getArgOperand(0);
- Src = Builder.CreateExtractElement(II.getArgOperand(1), (uint64_t)0);
- } else {
- Src = II.getArgOperand(0);
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ps_512 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_128 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_256 ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_pd_512) {
- Dst = II.getArgOperand(2);
- Mask = II.getArgOperand(3);
- } else {
- Dst = Src;
- Mask = ConstantInt::getAllOnesValue(
- Builder.getIntNTy(Src->getType()->getVectorNumElements()));
- }
- }
-
- Intrinsic::ID ID = (RoundControl == 2) ? Intrinsic::ceil : Intrinsic::floor;
- Value *Res = Builder.CreateUnaryIntrinsic(ID, Src, &II);
- if (!IsScalar) {
- if (auto *C = dyn_cast<Constant>(Mask))
- if (C->isAllOnesValue())
- return Res;
- auto *MaskTy = VectorType::get(
- Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
- Mask = Builder.CreateBitCast(Mask, MaskTy);
- unsigned Width = Src->getType()->getVectorNumElements();
- if (MaskTy->getVectorNumElements() > Width) {
- uint32_t Indices[4];
- for (unsigned i = 0; i != Width; ++i)
- Indices[i] = i;
- Mask = Builder.CreateShuffleVector(Mask, Mask,
- makeArrayRef(Indices, Width));
- }
- return Builder.CreateSelect(Mask, Res, Dst);
- }
- if (IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_ss ||
- IntrinsicID == Intrinsic::x86_avx512_mask_rndscale_sd) {
- Dst = Builder.CreateExtractElement(Dst, (uint64_t)0);
- Res = Builder.CreateSelect(Mask, Res, Dst);
- Dst = II.getArgOperand(0);
- }
- return Builder.CreateInsertElement(Dst, Res, (uint64_t)0);
-}
-
static Value *simplifyX86movmsk(const IntrinsicInst &II,
InstCombiner::BuilderTy &Builder) {
Value *Arg = II.getArgOperand(0);
@@ -2603,22 +2504,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- case Intrinsic::x86_sse41_round_ps:
- case Intrinsic::x86_sse41_round_pd:
- case Intrinsic::x86_avx_round_ps_256:
- case Intrinsic::x86_avx_round_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_128:
- case Intrinsic::x86_avx512_mask_rndscale_ps_256:
- case Intrinsic::x86_avx512_mask_rndscale_ps_512:
- case Intrinsic::x86_avx512_mask_rndscale_pd_128:
- case Intrinsic::x86_avx512_mask_rndscale_pd_256:
- case Intrinsic::x86_avx512_mask_rndscale_pd_512:
- case Intrinsic::x86_avx512_mask_rndscale_ss:
- case Intrinsic::x86_avx512_mask_rndscale_sd:
- if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
-
case Intrinsic::x86_mmx_pmovmskb:
case Intrinsic::x86_sse_movmsk_ps:
case Intrinsic::x86_sse2_movmsk_pd:
@@ -2812,13 +2697,6 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
}
break;
- case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd: {
- if (Value *V = simplifyX86round(*II, Builder))
- return replaceInstUsesWith(*II, V);
- break;
- }
-
// Constant fold ashr( <A x Bi>, Ci ).
// Constant fold lshr( <A x Bi>, Ci ).
// Constant fold shl( <A x Bi>, Ci ).
OpenPOWER on IntegriCloud