[X86][InstCombine] Fix SimplifyDemandedVectorElts to handle frcz scalar intrinsics correctly.

Only the lower bits of the input element are used. And only the lower element can be undef since the upper bits are zeroed. Have InstCombineCalls call SimplifyDemandedVectorElts for these intrinsics to reuse this support. llvm-svn: 289523
author: Craig Topper <craig.topper@gmail.com> 2016-12-13 07:45:45 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-12-13 07:45:45 +0000
commit: ac75bca1eb169c2d13b88e33440dec3df589f964 (patch)
tree: 3cc4f09b130afc39ef3950698e923dba4a1b0f2f
parent: b8ea75a010dac5b14f8d23cb9f7bce0967dee771 (diff)
download: bcm5719-llvm-ac75bca1eb169c2d13b88e33440dec3df589f964.tar.gz
bcm5719-llvm-ac75bca1eb169c2d13b88e33440dec3df589f964.zip
3 files changed, 40 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 73d685ef10a..62e2de7192f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1837,6 +1837,19 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
+  case Intrinsic::x86_xop_vfrcz_ss:
+  case Intrinsic::x86_xop_vfrcz_sd: {
+   unsigned VWidth = II->getType()->getVectorNumElements();
+   APInt UndefElts(VWidth, 0);
+   APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
+   if (Value *V = SimplifyDemandedVectorElts(II, AllOnesEltMask, UndefElts)) {
+     if (V != II)
+       return replaceInstUsesWith(*II, V);
+     return II;
+   }
+   break;
+  }
+
   // Constant fold ashr( <A x Bi>, Ci ).
   // Constant fold lshr( <A x Bi>, Ci ).
   // Constant fold shl( <A x Bi>, Ci ).
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index abda6a366e8..979163c6028 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1264,9 +1264,14 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
       if (!DemandedElts[0])
         return ConstantAggregateZero::get(II->getType());
 
+      // Only the lower element is used.
+      DemandedElts = 1;
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                         UndefElts, Depth + 1);
       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+
+      // Only the lower element is undefined. The high elements are zero.
+      UndefElts = UndefElts[0];
       break;
 
     // Unary scalar-as-vector operations that work column-wise.
diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll
index d987c757da5..03a3f921abb 100644
--- a/llvm/test/Transforms/InstCombine/x86-xop.ll
+++ b/llvm/test/Transforms/InstCombine/x86-xop.ll
@@ -1,6 +1,16 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
+define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
+; CHECK-LABEL: @test_vfrcz_sd(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a)
+; CHECK-NEXT:    ret <2 x double> [[TMP1]]
+;
+  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
+  %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
+  ret <2 x double> %2
+}
+
 define double @test_vfrcz_sd_0(double %a) {
 ; CHECK-LABEL: @test_vfrcz_sd_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
@@ -26,6 +36,18 @@ define double @test_vfrcz_sd_1(double %a) {
   ret double %4
 }
 
+define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
+; CHECK-LABEL: @test_vfrcz_ss(
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a)
+; CHECK-NEXT:    ret <4 x float> [[TMP1]]
+;
+  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+  %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
+  ret <4 x float> %4
+}
+
 define float @test_vfrcz_ss_0(float %a) {
 ; CHECK-LABEL: @test_vfrcz_ss_0(
 ; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
author	Craig Topper <craig.topper@gmail.com>	2016-12-13 07:45:45 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-12-13 07:45:45 +0000
commit	ac75bca1eb169c2d13b88e33440dec3df589f964 (patch)
tree	3cc4f09b130afc39ef3950698e923dba4a1b0f2f
parent	b8ea75a010dac5b14f8d23cb9f7bce0967dee771 (diff)
download	bcm5719-llvm-ac75bca1eb169c2d13b88e33440dec3df589f964.tar.gz bcm5719-llvm-ac75bca1eb169c2d13b88e33440dec3df589f964.zip