summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp13
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h3
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp12
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h4
4 files changed, 23 insertions, 9 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 766294dee23..0ccd58d44aa 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -719,8 +719,9 @@ bool AMDGPUTargetLowering::shouldReduceLoadWidth(SDNode *N,
return (OldSize < 32);
}
-bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
- EVT CastTy) const {
+bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy, EVT CastTy,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
assert(LoadTy.getSizeInBits() == CastTy.getSizeInBits());
@@ -730,8 +731,12 @@ bool AMDGPUTargetLowering::isLoadBitCastBeneficial(EVT LoadTy,
unsigned LScalarSize = LoadTy.getScalarSizeInBits();
unsigned CastScalarSize = CastTy.getScalarSizeInBits();
- return (LScalarSize < CastScalarSize) ||
- (CastScalarSize >= 32);
+ if ((LScalarSize >= CastScalarSize) && (CastScalarSize < 32))
+ return false;
+
+ bool Fast = false;
+ return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), CastTy,
+ MMO, &Fast) && Fast;
}
// SI+ has instructions for cttz / ctlz for 32-bit values. This is probably also
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 9723fc3ec66..40ff24f0754 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -182,7 +182,8 @@ public:
ISD::LoadExtType ExtType,
EVT ExtVT) const override;
- bool isLoadBitCastBeneficial(EVT, EVT) const final;
+ bool isLoadBitCastBeneficial(EVT, EVT, const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const final;
bool storeOfVectorConstantIsCheap(EVT MemVT,
unsigned NumElem,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 370ecefd273..3cab44b0ac1 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4941,8 +4941,9 @@ bool X86TargetLowering::isCheapToSpeculateCtlz() const {
return Subtarget.hasLZCNT();
}
-bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
- EVT BitcastVT) const {
+bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const {
if (!Subtarget.hasAVX512() && !LoadVT.isVector() && BitcastVT.isVector() &&
BitcastVT.getVectorElementType() == MVT::i1)
return false;
@@ -4950,7 +4951,12 @@ bool X86TargetLowering::isLoadBitCastBeneficial(EVT LoadVT,
if (!Subtarget.hasDQI() && BitcastVT == MVT::v8i1 && LoadVT == MVT::i8)
return false;
- return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT);
+ // If both types are legal vectors, it's always ok to convert them.
+ if (LoadVT.isVector() && BitcastVT.isVector() &&
+ isTypeLegal(LoadVT) && isTypeLegal(BitcastVT))
+ return true;
+
+ return TargetLowering::isLoadBitCastBeneficial(LoadVT, BitcastVT, DAG, MMO);
}
bool X86TargetLowering::canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 0631fb7dfe8..e0be03bc3f9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1127,7 +1127,9 @@ namespace llvm {
return NumElem > 2;
}
- bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT) const override;
+ bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT,
+ const SelectionDAG &DAG,
+ const MachineMemOperand &MMO) const override;
/// Intel processors have a unified instruction and data cache
const char * getClearCacheBuiltinName() const override {
OpenPOWER on IntegriCloud