summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-09 18:37:39 +0000
committerMatt Arsenault <Matthew.Arsenault@amd.com>2018-05-09 18:37:39 +0000
commit762d49880876eca14aa75a1bc893eba23ed50788 (patch)
tree2d8e059a73d5b7c2f2e60458e71e2503e58dadad /llvm/lib/Target
parent73634e40378fdba2b39c777a871961af2651b4e0 (diff)
downloadbcm5719-llvm-762d49880876eca14aa75a1bc893eba23ed50788.tar.gz
bcm5719-llvm-762d49880876eca14aa75a1bc893eba23ed50788.zip
AMDGPU: Add combine for trunc of bitcast from build_vector
If the truncate is only accessing the first element of the vector, we can use the original source value. This helps with some combine ordering issues after operations are lowered to integer operations between bitcasts of build_vector. In particular it stops unnecessarily materializing the unused top half of a vector in some cases. llvm-svn: 331909
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp30
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h1
2 files changed, 31 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index fffcb2fb566..bfd28b93569 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -574,6 +574,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::TRUNCATE);
setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::MULHU);
setTargetDAGCombine(ISD::MULHS);
@@ -3119,6 +3120,33 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N,
return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair);
}
+SDValue AMDGPUTargetLowering::performTruncateCombine(
+ SDNode *N, DAGCombinerInfo &DCI) const {
+ SDLoc SL(N);
+ SelectionDAG &DAG = DCI.DAG;
+ EVT VT = N->getValueType(0);
+ SDValue Src = N->getOperand(0);
+
+ // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
+ if (Src.getOpcode() == ISD::BITCAST) {
+ SDValue Vec = Src.getOperand(0);
+ if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
+ SDValue Elt0 = Vec.getOperand(0);
+ EVT EltVT = Elt0.getValueType();
+ if (VT.getSizeInBits() <= EltVT.getSizeInBits()) {
+ if (EltVT.isFloatingPoint()) {
+ Elt0 = DAG.getNode(ISD::BITCAST, SL,
+ EltVT.changeTypeToInteger(), Elt0);
+ }
+
+ return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0);
+ }
+ }
+ }
+
+ return SDValue();
+}
+
// We need to specifically handle i64 mul here to avoid unnecessary conversion
// instructions. If we only match on the legalized i64 mul expansion,
// SimplifyDemandedBits will be unable to remove them because there will be
@@ -3758,6 +3786,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N,
return performSraCombine(N, DCI);
}
+ case ISD::TRUNCATE:
+ return performTruncateCombine(N, DCI);
case ISD::MUL:
return performMulCombine(N, DCI);
case ISD::MULHS:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 857a69a1951..6db83395dc5 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -87,6 +87,7 @@ protected:
SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const;
OpenPOWER on IntegriCloud