diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-11 22:32:38 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-11 22:32:38 +0000 |
commit | 7fc6d34ed1fce99505713c5b09b3701aaac9d60e (patch) | |
tree | e6b521e82fdf3fec5b26e8adf13a8dbcb9c78c03 /llvm | |
parent | 3f530938f6a1b40437abedf7aa89e0c7e82b827b (diff) | |
download | bcm5719-llvm-7fc6d34ed1fce99505713c5b09b3701aaac9d60e.tar.gz bcm5719-llvm-7fc6d34ed1fce99505713c5b09b3701aaac9d60e.zip |
[InstCombine][XOP] The instructions for the scalar frcz intrinsics are defined to put 0 in the upper bits, not pass bits through like other intrinsics. So we should return a zero vector instead.
llvm-svn: 289411
Diffstat (limited to 'llvm')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp | 16 | ||||
-rw-r--r-- | llvm/test/Transforms/InstCombine/x86-xop.ll | 4 |
2 files changed, 16 insertions, 4 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 839eff7cb75..abda6a366e8 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -1255,13 +1255,25 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, switch (II->getIntrinsicID()) { default: break; + case Intrinsic::x86_xop_vfrcz_ss: + case Intrinsic::x86_xop_vfrcz_sd: + // The instructions for these intrinsics are speced to zero upper bits not + // pass them through like other scalar intrinsics. So we shouldn't just + // use Arg0 if DemandedElts[0] is clear like we do for other intrinsics. + // Instead we should return a zero vector. + if (!DemandedElts[0]) + return ConstantAggregateZero::get(II->getType()); + + TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, + UndefElts, Depth + 1); + if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } + break; + // Unary scalar-as-vector operations that work column-wise. case Intrinsic::x86_sse_rcp_ss: case Intrinsic::x86_sse_rsqrt_ss: case Intrinsic::x86_sse_sqrt_ss: case Intrinsic::x86_sse2_sqrt_sd: - case Intrinsic::x86_xop_vfrcz_ss: - case Intrinsic::x86_xop_vfrcz_sd: TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts, UndefElts, Depth + 1); if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; } diff --git a/llvm/test/Transforms/InstCombine/x86-xop.ll b/llvm/test/Transforms/InstCombine/x86-xop.ll index 015d511ac4d..d987c757da5 100644 --- a/llvm/test/Transforms/InstCombine/x86-xop.ll +++ b/llvm/test/Transforms/InstCombine/x86-xop.ll @@ -17,7 +17,7 @@ define double @test_vfrcz_sd_0(double %a) { define double @test_vfrcz_sd_1(double %a) { ; CHECK-LABEL: @test_vfrcz_sd_1( -; CHECK-NEXT: ret double 1.000000e+00 +; CHECK-NEXT: ret double 0.000000e+00 ; %1 = insertelement <2 x double> undef, double %a, i32 0 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1 @@ -44,7 +44,7 @@ define float @test_vfrcz_ss_0(float %a) { define float @test_vfrcz_ss_3(float %a) { ; CHECK-LABEL: @test_vfrcz_ss_3( -; CHECK-NEXT: ret float 3.000000e+00 +; CHECK-NEXT: ret float 0.000000e+00 ; %1 = insertelement <4 x float> undef, float %a, i32 0 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1 |