[InstCombine][XOP] The instructions for the scalar frcz intrinsics are defined to put 0 in the upper bits, not pass bits through like other intrinsics. So we should return a zero vector instead.

llvm-svn: 289411
author: Craig Topper <craig.topper@gmail.com> 2016-12-11 22:32:38 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-12-11 22:32:38 +0000
commit: 7fc6d34ed1fce99505713c5b09b3701aaac9d60e (patch)
tree: e6b521e82fdf3fec5b26e8adf13a8dbcb9c78c03 /llvm
parent: 3f530938f6a1b40437abedf7aa89e0c7e82b827b (diff)
download: bcm5719-llvm-7fc6d34ed1fce99505713c5b09b3701aaac9d60e.tar.gz
bcm5719-llvm-7fc6d34ed1fce99505713c5b09b3701aaac9d60e.zip
2 files changed, 16 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 839eff7cb75..abda6a366e8 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1255,13 +1255,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
     switch (II->getIntrinsicID()) {
     default: break;
 
+    case Intrinsic::x86_xop_vfrcz_ss:
+    case Intrinsic::x86_xop_vfrcz_sd:
+      // The instructions for these intrinsics are speced to zero upper bits not
+      // pass them through like other scalar intrinsics. So we shouldn't just
+      // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics.
+      // Instead we should return a zero vector.
+      if (!DemandedElts[0])
+        return ConstantAggregateZero::get(II->getType());
+
+      TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+                                        UndefElts, Depth + 1);
+      if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
+      break;
+
     // Unary scalar-as-vector operations that work column-wise.
     case Intrinsic::x86_sse_rcp_ss:
     case Intrinsic::x86_sse_rsqrt_ss:
     case Intrinsic::x86_sse_sqrt_ss:
     case Intrinsic::x86_sse2_sqrt_sd:
-    case Intrinsic::x86_xop_vfrcz_ss:
-    case Intrinsic::x86_xop_vfrcz_sd:
       TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
                                         UndefElts, Depth + 1);
       if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll
index 015d511ac4d..d987c757da5 100644
--- a/llvm/test/Transforms/InstCombine/x86-xop.ll
+++ b/llvm/test/Transforms/InstCombine/x86-xop.ll
@@ -17,7 +17,7 @@ define double @test_vfrcz_sd_0(double %a) {
 
 define double @test_vfrcz_sd_1(double %a) {
 ; CHECK-LABEL: @test_vfrcz_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
+; CHECK-NEXT:    ret double 0.000000e+00
 ;
   %1 = insertelement <2 x double> undef, double %a, i32 0
   %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
@@ -44,7 +44,7 @@ define float @test_vfrcz_ss_0(float %a) {
 
 define float @test_vfrcz_ss_3(float %a) {
 ; CHECK-LABEL: @test_vfrcz_ss_3(
-; CHECK-NEXT:    ret float 3.000000e+00
+; CHECK-NEXT:    ret float 0.000000e+00
 ;
   %1 = insertelement <4 x float> undef, float %a, i32 0
   %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
author	Craig Topper <craig.topper@gmail.com>	2016-12-11 22:32:38 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-12-11 22:32:38 +0000
commit	7fc6d34ed1fce99505713c5b09b3701aaac9d60e (patch)
tree	e6b521e82fdf3fec5b26e8adf13a8dbcb9c78c03 /llvm
parent	3f530938f6a1b40437abedf7aa89e0c7e82b827b (diff)
download	bcm5719-llvm-7fc6d34ed1fce99505713c5b09b3701aaac9d60e.tar.gz bcm5719-llvm-7fc6d34ed1fce99505713c5b09b3701aaac9d60e.zip