summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-14 03:17:30 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-14 03:17:30 +0000
commiteb6a20e79ed4a5ac8d5f61f7372bb633c7362378 (patch)
treef582a662bab3c3b210043da9047358b16053f51c /llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
parenta0372dec261d137973b29ed14376230c372b4290 (diff)
downloadbcm5719-llvm-eb6a20e79ed4a5ac8d5f61f7372bb633c7362378.tar.gz
bcm5719-llvm-eb6a20e79ed4a5ac8d5f61f7372bb633c7362378.zip
[X86][InstCombine] Teach SimplifyDemandedVectorElts to handle scalar round intrinsics more correctly.
Now we only pass bit 0 of the DemandedElts to optimize operand 1 as we recurse since the upper bits are unused. Similarly we clear bit 0 for optimizing operand 0. Also calculate UndefElts correctly. Simplify InstCombineCalls for these instrinics to just call SimplifyDemandedVectorElts for the call instrution to reuse this support. llvm-svn: 289629
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp30
1 files changed, 19 insertions, 11 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 3038fd58ad0..3fd18a3b24d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -1321,25 +1321,33 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts,
break;
}
- // Binary scalar-as-vector operations that work column-wise. A dest element
- // is a function of the corresponding input elements from the two inputs.
+ // Binary scalar-as-vector operations that work column-wise. The high
+ // elements come from operand 0 and the low element comes from operand 1.
case Intrinsic::x86_sse41_round_ss:
- case Intrinsic::x86_sse41_round_sd:
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts,
+ case Intrinsic::x86_sse41_round_sd: {
+ // Don't use the low element of operand 0.
+ APInt DemandedElts2 = DemandedElts;
+ DemandedElts2.clearBit(0);
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(0), DemandedElts2,
UndefElts, Depth + 1);
if (TmpV) { II->setArgOperand(0, TmpV); MadeChange = true; }
- TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
- UndefElts2, Depth + 1);
- if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
// If lowest element of a scalar op isn't used then use Arg0.
- if (DemandedElts.getLoBits(1) != 1)
+ if (!DemandedElts[0])
return II->getArgOperand(0);
- // Output elements are undefined if both are undefined. Consider things
- // like undef&0. The result is known zero, not undef.
- UndefElts &= UndefElts2;
+ // Only lower element is used for operand 1.
+ DemandedElts = 1;
+ TmpV = SimplifyDemandedVectorElts(II->getArgOperand(1), DemandedElts,
+ UndefElts2, Depth + 1);
+ if (TmpV) { II->setArgOperand(1, TmpV); MadeChange = true; }
+
+ // Take the high undef elements from operand 0 and take the lower element
+ // from operand 1.
+ UndefElts.clearBit(0);
+ UndefElts |= UndefElts2[0];
break;
+ }
case Intrinsic::x86_fma_vfmadd_ss:
case Intrinsic::x86_fma_vfmsub_ss:
OpenPOWER on IntegriCloud