summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp54
-rw-r--r--llvm/test/CodeGen/X86/vselect.ll4
2 files changed, 23 insertions, 35 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 2a63c9a8545..9d0d7f25afd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -31910,31 +31910,29 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
if (VT.is512BitVector())
return SDValue();
- assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
- APInt DemandedMask(APInt::getSignMask(BitWidth));
- KnownBits Known;
- TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
- !DCI.isBeforeLegalizeOps());
- if (TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO)) {
- // If we changed the computation somewhere in the DAG, this change will
- // affect all users of Cond. Make sure it is fine and update all the nodes
- // so that we do not use the generic VSELECT anymore. Otherwise, we may
- // perform wrong optimizations as we messed with the actual expectation
- // for the vector boolean values.
- if (Cond != TLO.Old) {
- // Check all uses of the condition operand to check whether it will be
- // consumed by non-BLEND instructions. Those may require that all bits
- // are set properly.
- for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
- UI != UE; ++UI) {
- // TODO: Add other opcodes eventually lowered into BLEND.
- if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0)
- return SDValue();
- }
+ bool CanShrinkCond = true;
+ for (SDNode::use_iterator UI = Cond->use_begin(), UE = Cond->use_end();
+ UI != UE; ++UI) {
+ // TODO: Add other opcodes eventually lowered into BLEND.
+ if (UI->getOpcode() != ISD::VSELECT || UI.getOperandNo() != 0) {
+ CanShrinkCond = false;
+ break;
+ }
+ }
- // Update all users of the condition before committing the change, so
- // that the VSELECT optimizations that expect the correct vector boolean
- // value will not be triggered.
+ if (CanShrinkCond) {
+ assert(BitWidth >= 8 && BitWidth <= 64 && "Invalid mask size");
+ APInt DemandedMask(APInt::getSignMask(BitWidth));
+ KnownBits Known;
+ TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
+ !DCI.isBeforeLegalizeOps());
+ if (TLI.SimplifyDemandedBits(Cond, DemandedMask, Known, TLO, 0,
+ /*AssumeSingleUse*/true)) {
+ // If we changed the computation somewhere in the DAG, this change will
+ // affect all users of Cond. Update all the nodes so that we do not use
+ // the generic VSELECT anymore. Otherwise, we may perform wrong
+ // optimizations as we messed with the actual expectation for the vector
+ // boolean values.
for (SDNode *U : Cond->uses()) {
SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, SDLoc(U),
U->getValueType(0), Cond, U->getOperand(1),
@@ -31942,14 +31940,8 @@ static SDValue combineSelect(SDNode *N, SelectionDAG &DAG,
DAG.ReplaceAllUsesOfValueWith(SDValue(U, 0), SB);
}
DCI.CommitTargetLoweringOpt(TLO);
- return SDValue();
+ return SDValue(N, 0);
}
- // Only Cond (rather than other nodes in the computation chain) was
- // changed. Change the condition just for N to keep the opportunity to
- // optimize all other users their own way.
- SDValue SB = DAG.getNode(X86ISD::SHRUNKBLEND, DL, VT, TLO.New, LHS, RHS);
- DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), SB);
- return SDValue();
}
}
diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll
index 2370006329a..899d46bddc1 100644
--- a/llvm/test/CodeGen/X86/vselect.ll
+++ b/llvm/test/CodeGen/X86/vselect.ll
@@ -503,8 +503,6 @@ define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b,
; SSE41-LABEL: shrunkblend_2uses:
; SSE41: # %bb.0:
; SSE41-NEXT: psllq $63, %xmm0
-; SSE41-NEXT: psrad $31, %xmm0
-; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3]
; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2
; SSE41-NEXT: blendvpd %xmm0, %xmm3, %xmm4
; SSE41-NEXT: paddq %xmm2, %xmm4
@@ -514,8 +512,6 @@ define <2 x i64> @shrunkblend_2uses(<2 x i1> %cond, <2 x i64> %a, <2 x i64> %b,
; AVX-LABEL: shrunkblend_2uses:
; AVX: # %bb.0:
; AVX-NEXT: vpsllq $63, %xmm0, %xmm0
-; AVX-NEXT: vpxor %xmm5, %xmm5, %xmm5
-; AVX-NEXT: vpcmpgtq %xmm0, %xmm5, %xmm0
; AVX-NEXT: vblendvpd %xmm0, %xmm1, %xmm2, %xmm1
; AVX-NEXT: vblendvpd %xmm0, %xmm3, %xmm4, %xmm0
; AVX-NEXT: vpaddq %xmm0, %xmm1, %xmm0
OpenPOWER on IntegriCloud