diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-07-09 19:55:28 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-07-09 19:55:28 +0000 |
| commit | 84a1f07363405c25304e3fd3df55a216676e9984 (patch) | |
| tree | da18767d1cb035014d1b7ae007b82ca8a62e9521 /llvm/lib | |
| parent | c236eeaf7d8a95a3457c4658600ee5b79052b7a2 (diff) | |
| download | bcm5719-llvm-84a1f07363405c25304e3fd3df55a216676e9984.tar.gz bcm5719-llvm-84a1f07363405c25304e3fd3df55a216676e9984.zip | |
[X86][AMDGPU][DAGCombiner] Move call to allowsMemoryAccess into isLoadBitCastBeneficial/isStoreBitCastBeneficial to allow X86 to bypass it
Basically the problem is that X86 doesn't set the Fast flag from
allowsMemoryAccess on certain CPUs due to slow unaligned memory
subtarget features. This prevents bitcasts from being folded into
loads and stores. But all vector loads and stores of the same width
are the same cost on X86.
This patch merges the allowsMemoryAccess call into isLoadBitCastBeneficial to allow X86 to skip it.
Differential Revision: https://reviews.llvm.org/D64295
llvm-svn: 365549
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 23 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 |
5 files changed, 31 insertions, 24 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index cd5de4c1400..09e1195e1fd 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11040,14 +11040,11 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // as we assume software couldn't rely on the number of accesses of an // illegal type. ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || - TLI.isOperationLegal(ISD::LOAD, VT)) && - TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { + TLI.isOperationLegal(ISD::LOAD, VT))) { LoadSDNode *LN0 = cast<LoadSDNode>(N0); - bool Fast = false; - if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT, - *LN0->getMemOperand(), &Fast) && - Fast) { + if (TLI.isLoadBitCastBeneficial(N0.getValueType(), VT, DAG, + *LN0->getMemOperand())) { SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->getAlignment(), @@ -16174,15 +16171,11 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // illegal type. if (((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegal(ISD::STORE, SVT)) && - TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) { - bool Fast = false; - if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT, - *ST->getMemOperand(), &Fast) && - Fast) { - return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, - ST->getPointerInfo(), ST->getAlignment(), - ST->getMemOperand()->getFlags(), ST->getAAInfo()); - } + TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT, + DAG, *ST->getMemOperand())) { + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, + ST->getPointerInfo(), ST->getAlignment(), + ST->getMemOperand()->getFlags(), ST->getAAInfo()); } } diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 766294dee23..0ccd58d44aa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -719,8 +719,9 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, return (OldSize < 32); } -bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, - EVT CastTy) const { +bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, + const SelectionDAG &DAG, + const MachineMemOperand &MMO) const { assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits()); @@ -730,8 +731,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, unsigned LScalarSize = LoadTy.getScalarSizeInBits(); unsigned CastScalarSize = CastTy.getScalarSizeInBits(); - return (LScalarSize < CastScalarSize) || - (CastScalarSize >= 32); + if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) + return false; + + bool Fast = false; + return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy, + MMO, &Fast) && Fast; } // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 9723fc3ec66..40ff24f0754 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -182,7 +182,8 @@ public: ISD::LoadExtType ExtType, EVT ExtVT) const override; - bool isLoadBitCastBeneficial(EVT, EVT) const final; + bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, + const MachineMemOperand &MMO) const final; bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 370ecefd273..3cab44b0ac1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4941,8 +4941,9 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasLZCNT(); } -bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, - EVT BitcastVT) const { +bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, + const SelectionDAG &DAG, + const MachineMemOperand &MMO) const { if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && BitcastVT.getVectorElementType() == MVT::i1) return false; @@ -4950,7 +4951,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) return false; - return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT); + // If both types are legal vectors, it's always ok to convert them. + if (LoadVT.isVector() && BitcastVT.isVector() && + isTypeLegal(LoadVT) && isTypeLegal(BitcastVT)) + return true; + + return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO); } bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 0631fb7dfe8..e0be03bc3f9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1127,7 +1127,9 @@ namespace llvm { return NumElem > 2; } - bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override; + bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, + const SelectionDAG &DAG, + const MachineMemOperand &MMO) const override; /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { |

