diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-09 18:37:39 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-09 18:37:39 +0000 |
commit | 762d49880876eca14aa75a1bc893eba23ed50788 (patch) | |
tree | 2d8e059a73d5b7c2f2e60458e71e2503e58dadad /llvm/lib/Target | |
parent | 73634e40378fdba2b39c777a871961af2651b4e0 (diff) | |
download | bcm5719-llvm-762d49880876eca14aa75a1bc893eba23ed50788.tar.gz bcm5719-llvm-762d49880876eca14aa75a1bc893eba23ed50788.zip |
AMDGPU: Add combine for trunc of bitcast from build_vector
If the truncate is only accessing the first element of the vector,
we can use the original source value.
This helps with some combine ordering issues after operations are
lowered to integer operations between bitcasts of build_vector.
In particular it stops unnecessarily materializing the unused
top half of a vector in some cases.
llvm-svn: 331909
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 30 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 1 |
2 files changed, 31 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index fffcb2fb566..bfd28b93569 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -574,6 +574,7 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(const TargetMachine &TM, setTargetDAGCombine(ISD::SHL); setTargetDAGCombine(ISD::SRA); setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::TRUNCATE); setTargetDAGCombine(ISD::MUL); setTargetDAGCombine(ISD::MULHU); setTargetDAGCombine(ISD::MULHS); @@ -3119,6 +3120,33 @@ SDValue AMDGPUTargetLowering::performSrlCombine(SDNode *N, return DAG.getNode(ISD::BITCAST, SL, MVT::i64, BuildPair); } +SDValue AMDGPUTargetLowering::performTruncateCombine( + SDNode *N, DAGCombinerInfo &DCI) const { + SDLoc SL(N); + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + SDValue Src = N->getOperand(0); + + // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x) + if (Src.getOpcode() == ISD::BITCAST) { + SDValue Vec = Src.getOperand(0); + if (Vec.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Elt0 = Vec.getOperand(0); + EVT EltVT = Elt0.getValueType(); + if (VT.getSizeInBits() <= EltVT.getSizeInBits()) { + if (EltVT.isFloatingPoint()) { + Elt0 = DAG.getNode(ISD::BITCAST, SL, + EltVT.changeTypeToInteger(), Elt0); + } + + return DAG.getNode(ISD::TRUNCATE, SL, VT, Elt0); + } + } + } + + return SDValue(); +} + // We need to specifically handle i64 mul here to avoid unnecessary conversion // instructions. If we only match on the legalized i64 mul expansion, // SimplifyDemandedBits will be unable to remove them because there will be @@ -3758,6 +3786,8 @@ SDValue AMDGPUTargetLowering::PerformDAGCombine(SDNode *N, return performSraCombine(N, DCI); } + case ISD::TRUNCATE: + return performTruncateCombine(N, DCI); case ISD::MUL: return performMulCombine(N, DCI); case ISD::MULHS: diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 857a69a1951..6db83395dc5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -87,6 +87,7 @@ protected: SDValue performShlCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSraCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performSrlCombine(SDNode *N, DAGCombinerInfo &DCI) const; + SDValue performTruncateCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulhsCombine(SDNode *N, DAGCombinerInfo &DCI) const; SDValue performMulhuCombine(SDNode *N, DAGCombinerInfo &DCI) const; |