diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-12-13 07:45:45 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-12-13 07:45:45 +0000 |
| commit | ac75bca1eb169c2d13b88e33440dec3df589f964 (patch) | |
| tree | 3cc4f09b130afc39ef3950698e923dba4a1b0f2f | |
| parent | b8ea75a010dac5b14f8d23cb9f7bce0967dee771 (diff) | |
| download | bcm5719-llvm-ac75bca1eb169c2d13b88e33440dec3df589f964.tar.gz bcm5719-llvm-ac75bca1eb169c2d13b88e33440dec3df589f964.zip | |
[X86][InstCombine] Fix SimplifyDemandedVectorElts to handle frcz scalar intrinsics correctly.
Only the lower bits of the input element are used. And only the lower element can be undef since the upper bits are zeroed.
Have InstCombineCalls call SimplifyDemandedVectorElts for these intrinsics to reuse this support.
llvm-svn: 289523
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 5 | ||||
| -rw-r--r-- | llvm/test/Transforms/InstCombine/x86-xop.ll | 22 |
3 files changed, 40 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 73d685ef10a..62e2de7192f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1837,6 +1837,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: { + unsigned VWidth = II->getType()->getVectorNumElements(); + APInt UndefElts(VWidth, 0); + APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); + if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) { + if (V != II) + return replaceInstUsesWith(*II, V); + return II; + } + break; + } + // Constant fold ashr( <A x Bi>, Ci ). // Constant fold lshr( <A x Bi>, Ci ). // Constant fold shl( <A x Bi>, Ci ). diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index abda6a366e8..979163c6028 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1264,9 +1264,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, if (!DemandedElts[0]) return ConstantAggregateZero::get(II->getType()); + // Only the lower element is used. + DemandedElts = 1; TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + + // Only the lower element is undefined. The high elements are zero. + UndefElts = UndefElts[0]; break; // Unary scalar-as-vector operations that work column-wise. diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll index d987c757da5..03a3f921abb 100644 --- a/llvm/test/Transforms/InstCombine/x86-xop.ll +++ b/llvm/test/Transforms/InstCombine/x86-xop.ll @@ -1,6 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -instcombine -S | FileCheck %s +define <2 x double> @test_vfrcz_sd(<2 x double> %a) { +; CHECK-LABEL: @test_vfrcz_sd( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a) +; CHECK-NEXT: ret <2 x double> [[TMP1]] +; + %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1 + %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1) + ret <2 x double> %2 +} + define double @test_vfrcz_sd_0(double %a) { ; CHECK-LABEL: @test_vfrcz_sd_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0 @@ -26,6 +36,18 @@ define double @test_vfrcz_sd_1(double %a) { ret double %4 } +define <4 x float> @test_vfrcz_ss(<4 x float> %a) { +; CHECK-LABEL: @test_vfrcz_ss( +; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a) +; CHECK-NEXT: ret <4 x float> [[TMP1]] +; + %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1 + %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2 + %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3 + %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3) + ret <4 x float> %4 +} + define float @test_vfrcz_ss_0(float %a) { ; CHECK-LABEL: @test_vfrcz_ss_0( ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0 |

