diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-26 10:49:13 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2019-04-26 10:49:13 +0000 |
| commit | 5d6ef94c369a3ea804d8377f16ba8878160bd9a6 (patch) | |
| tree | c0e2d5896961dabe274f9d136051971f2b553007 /llvm/lib | |
| parent | 5e161df9f8999c7570fdf9477d51d33a3e288f5a (diff) | |
| download | bcm5719-llvm-5d6ef94c369a3ea804d8377f16ba8878160bd9a6.tar.gz bcm5719-llvm-5d6ef94c369a3ea804d8377f16ba8878160bd9a6.zip | |
[X86][SSE] Disable shouldFoldConstantShiftPairToMask for btver1/btver2 targets (PR40758)
As detailed on PR40758, Bobcat/Jaguar can perform vector immediate shifts on the same pipes as vector ANDs with the same latency - so it doesn't make sense to replace a shl+lshr with a shift+and pair as it requires an additional mask (with the extra constant pool, loading and register pressure costs).
Differential Revision: https://reviews.llvm.org/D61068
llvm-svn: 359293
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 4 |
4 files changed, 26 insertions, 2 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 6d351cc57ac..e8bad0413b2 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -6882,6 +6882,8 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding // this will increase the total number of instructions. + // TODO - drop hasOneUse requirement if c1 == c2? + // TODO - support non-uniform vector shift amounts. if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() && TLI.shouldFoldConstantShiftPairToMask(N, Level)) { if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { @@ -7188,6 +7190,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } // fold (srl (shl x, c), c) -> (and x, cst2) + // TODO - (srl (shl x, c1), c2). if (N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && isConstantOrConstantVector(N1, /* NoOpaques */ true)) { SDLoc DL(N); diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index c054379acf7..fe23a2900d5 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -424,6 +424,11 @@ def FeatureFastHorizontalOps "Prefer horizontal vector math instructions (haddp, phsub, etc.) over " "normal vector instructions with shuffles", [FeatureSSE3]>; +def FeatureFastVectorShiftMasks + : SubtargetFeature< + "fast-vector-shift-masks", "HasFastVectorShiftMasks", "true", + "Prefer a left/right vector logical shift pair over a shift+and pair">; + // Merge branches using three-way conditional code. def FeatureMergeToThreeWayBranch : SubtargetFeature<"merge-to-threeway-branch", "ThreewayBranchProfitable", "true", @@ -775,7 +780,8 @@ def ProcessorFeatures { FeaturePOPCNT, FeatureSlowSHLD, FeatureLAHFSAHF, - FeatureFast15ByteNOP]; + FeatureFast15ByteNOP, + FeatureFastVectorShiftMasks]; list<SubtargetFeature> BtVer1Features = BtVer1InheritableFeatures; // Jaguar diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index e91724c3f32..889a0111b87 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5013,7 +5013,18 @@ bool X86TargetLowering::hasAndNot(SDValue Y) const { bool X86TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { - // TODO - some targets prefer immediate vector shifts to shift+mask. + assert((N->getOpcode() == ISD::SHL && + N->getOperand(0).getOpcode() == ISD::SRL) || + (N->getOpcode() == ISD::SRL && + N->getOperand(0).getOpcode() == ISD::SHL) && + "Expected shift-shift mask"); + + if (Subtarget.hasFastVectorShiftMasks() && N->getValueType(0).isVector()) { + // Only fold if the shift values are equal - so it folds to AND. + // TODO - we should fold if either is non-uniform but we don't do the + // fold for non-splats yet. + return N->getOperand(1) == N->getOperand(0).getOperand(1); + } return TargetLoweringBase::shouldFoldConstantShiftPairToMask(N, Level); } diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index fe04f022070..0ff9d544d82 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -393,6 +393,9 @@ protected: /// Try harder to combine to horizontal vector ops if they are fast. bool HasFastHorizontalOps = false; + /// Prefer a left/right vector logical shifts pair over a shift+and pair. + bool HasFastVectorShiftMasks = false; + /// Use a retpoline thunk rather than indirect calls to block speculative /// execution. bool UseRetpolineIndirectCalls = false; @@ -644,6 +647,7 @@ public: bool hasFastSHLDRotate() const { return HasFastSHLDRotate; } bool hasFastBEXTR() const { return HasFastBEXTR; } bool hasFastHorizontalOps() const { return HasFastHorizontalOps; } + bool hasFastVectorShiftMasks() const { return HasFastVectorShiftMasks; } bool hasMacroFusion() const { return HasMacroFusion; } bool hasBranchFusion() const { return HasBranchFusion; } bool hasERMSB() const { return HasERMSB; } |

