diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 13 | ||||
| -rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 4 |
4 files changed, 23 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 766294dee23..0ccd58d44aa 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -719,8 +719,9 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N, return (OldSize < 32); } -bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, - EVT CastTy) const { +bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy, + const SelectionDAG &DAG, + const MachineMemOperand &MMO) const { assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits()); @@ -730,8 +731,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, unsigned LScalarSize = LoadTy.getScalarSizeInBits(); unsigned CastScalarSize = CastTy.getScalarSizeInBits(); - return (LScalarSize < CastScalarSize) || - (CastScalarSize >= 32); + if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32)) + return false; + + bool Fast = false; + return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy, + MMO, &Fast) && Fast; } // SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 9723fc3ec66..40ff24f0754 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -182,7 +182,8 @@ public: ISD::LoadExtType ExtType, EVT ExtVT) const override; - bool isLoadBitCastBeneficial(EVT, EVT) const final; + bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG, + const MachineMemOperand &MMO) const final; bool storeOfVectorConstantIsCheap(EVT MemVT, unsigned NumElem, diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 370ecefd273..3cab44b0ac1 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4941,8 +4941,9 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const { return Subtarget.hasLZCNT(); } -bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, - EVT BitcastVT) const { +bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, + const SelectionDAG &DAG, + const MachineMemOperand &MMO) const { if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() && BitcastVT.getVectorElementType() == MVT::i1) return false; @@ -4950,7 +4951,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8) return false; - return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT); + // If both types are legal vectors, it's always ok to convert them. + if (LoadVT.isVector() && BitcastVT.isVector() && + isTypeLegal(LoadVT) && isTypeLegal(BitcastVT)) + return true; + + return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO); } bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT, diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 0631fb7dfe8..e0be03bc3f9 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1127,7 +1127,9 @@ namespace llvm { return NumElem > 2; } - bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override; + bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, + const SelectionDAG &DAG, + const MachineMemOperand &MMO) const override; /// Intel processors have a unified instruction and data cache const char * getClearCacheBuiltinName() const override { |

