summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-01 14:46:03 +0000
committerSimon Pilgrim <llvm-dev@redking.me.uk>2019-08-01 14:46:03 +0000
commit33f5f863b508c227afe563affaee1253fd975fc0 (patch)
tree96268755eaf9355ed990d8dd822b319d56680f39
parent3fa3831830b1e848a6359a1e59184e3e2903f5f8 (diff)
downloadbcm5719-llvm-33f5f863b508c227afe563affaee1253fd975fc0.tar.gz
bcm5719-llvm-33f5f863b508c227afe563affaee1253fd975fc0.zip
[X86][SSE] SimplifyMultipleUseDemandedBits - Add PEXTR/PINSR B+W handling
This adds SimplifyMultipleUseDemandedBitsForTargetNode X86 support and uses it to allow us to peek through vector insertions to avoid dependencies on entire insertion chains. llvm-svn: 367570
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp27
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
-rw-r--r--llvm/test/CodeGen/X86/promote-vec3.ll16
3 files changed, 39 insertions, 8 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6fe906de8cf..c2be03f8d90 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -34577,6 +34577,11 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
KnownVec, TLO, Depth + 1))
return true;
+ if (SDValue V = SimplifyMultipleUseDemandedBits(
+ Vec, DemandedVecBits, DemandedVecElts, TLO.DAG, Depth + 1))
+ return TLO.CombineTo(
+ Op, TLO.DAG.getNode(Opc, SDLoc(Op), VT, V, Op.getOperand(1)));
+
Known = KnownVec.zext(BitWidth, true);
return false;
}
@@ -34678,6 +34683,28 @@ bool X86TargetLowering::SimplifyDemandedBitsForTargetNode(
Op, OriginalDemandedBits, OriginalDemandedElts, Known, TLO, Depth);
}
+SDValue X86TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const {
+ unsigned Opc = Op.getOpcode();
+ switch (Opc) {
+ case X86ISD::PINSRB:
+ case X86ISD::PINSRW: {
+ // If we don't demand the inserted element, return the base vector.
+ SDValue Vec = Op.getOperand(0);
+ auto *CIdx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
+ MVT VecVT = Vec.getSimpleValueType();
+ if (CIdx && CIdx->getAPIntValue().ult(VecVT.getVectorNumElements()) &&
+ !DemandedElts[CIdx->getZExtValue()])
+ return Vec;
+ break;
+ }
+ }
+
+ return TargetLowering::SimplifyMultipleUseDemandedBitsForTargetNode(
+ Op, DemandedBits, DemandedElts, DAG, Depth);
+}
+
/// Check if a vector extract from a target-specific shuffle of a load can be
/// folded into a single element load.
/// Similar handling for VECTOR_SHUFFLE is performed by DAGCombiner, but
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index dd11cc4497a..625b42d3515 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -920,6 +920,10 @@ namespace llvm {
TargetLoweringOpt &TLO,
unsigned Depth) const override;
+ SDValue SimplifyMultipleUseDemandedBitsForTargetNode(
+ SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
+ SelectionDAG &DAG, unsigned Depth) const override;
+
const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const override;
SDValue unwrapAddress(SDValue N) const override;
diff --git a/llvm/test/CodeGen/X86/promote-vec3.ll b/llvm/test/CodeGen/X86/promote-vec3.ll
index cb4e99b3b18..b66570e8aaa 100644
--- a/llvm/test/CodeGen/X86/promote-vec3.ll
+++ b/llvm/test/CodeGen/X86/promote-vec3.ll
@@ -8,14 +8,14 @@
define <3 x i16> @zext_i8(<3 x i8>) {
; SSE3-LABEL: zext_i8:
; SSE3: # %bb.0:
-; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %edx
-; SSE3-NEXT: movd %edx, %xmm0
-; SSE3-NEXT: pinsrw $1, %ecx, %xmm0
-; SSE3-NEXT: pinsrw $2, %eax, %xmm0
+; SSE3-NEXT: movzbl {{[0-9]+}}(%esp), %eax
+; SSE3-NEXT: movd %eax, %xmm0
; SSE3-NEXT: pextrw $0, %xmm0, %eax
+; SSE3-NEXT: pinsrw $1, %edx, %xmm0
; SSE3-NEXT: pextrw $1, %xmm0, %edx
+; SSE3-NEXT: pinsrw $2, %ecx, %xmm0
; SSE3-NEXT: pextrw $2, %xmm0, %ecx
; SSE3-NEXT: # kill: def $ax killed $ax killed $eax
; SSE3-NEXT: # kill: def $dx killed $dx killed $edx
@@ -27,9 +27,9 @@ define <3 x i16> @zext_i8(<3 x i8>) {
; SSE41-NEXT: pxor %xmm0, %xmm0
; SSE41-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: pinsrb $4, {{[0-9]+}}(%esp), %xmm0
+; SSE41-NEXT: pextrw $2, %xmm0, %edx
; SSE41-NEXT: pinsrb $8, {{[0-9]+}}(%esp), %xmm0
; SSE41-NEXT: movd %xmm0, %eax
-; SSE41-NEXT: pextrw $2, %xmm0, %edx
; SSE41-NEXT: pextrw $4, %xmm0, %ecx
; SSE41-NEXT: # kill: def $ax killed $ax killed $eax
; SSE41-NEXT: # kill: def $dx killed $dx killed $edx
@@ -41,10 +41,10 @@ define <3 x i16> @zext_i8(<3 x i8>) {
; AVX-32-NEXT: vpxor %xmm0, %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0
; AVX-32-NEXT: vpinsrb $4, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm0
-; AVX-32-NEXT: vmovd %xmm0, %eax
+; AVX-32-NEXT: vpinsrb $8, {{[0-9]+}}(%esp), %xmm0, %xmm1
; AVX-32-NEXT: vpextrw $2, %xmm0, %edx
-; AVX-32-NEXT: vpextrw $4, %xmm0, %ecx
+; AVX-32-NEXT: vmovd %xmm1, %eax
+; AVX-32-NEXT: vpextrw $4, %xmm1, %ecx
; AVX-32-NEXT: # kill: def $ax killed $ax killed $eax
; AVX-32-NEXT: # kill: def $dx killed $dx killed $edx
; AVX-32-NEXT: # kill: def $cx killed $cx killed $ecx
OpenPOWER on IntegriCloud