diff options
author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-16 11:47:30 +0000 |
---|---|---|
committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2018-05-16 11:47:30 +0000 |
commit | 67a9815a5c9d0355020e17c0a494f7eaa1df21fe (patch) | |
tree | 3422ea805c0a69f22bcdec55e8c684b282498b59 /llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | |
parent | ca22d427b94de946d4ef32b8acbdfb7e62e7cfa4 (diff) | |
download | bcm5719-llvm-67a9815a5c9d0355020e17c0a494f7eaa1df21fe.tar.gz bcm5719-llvm-67a9815a5c9d0355020e17c0a494f7eaa1df21fe.zip |
AMDGPU: Custom lower v4i16/v4f16 vector operations
Avoids stack access.
Also handle extract hi elt pattern from truncate + shift
to avoid a couple test regressions.
llvm-svn: 332453
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 22 |
1 files changed, 22 insertions, 0 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index d00727bf314..988554621c5 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3144,6 +3144,28 @@ SDValue AMDGPUTargetLowering::performTruncateCombine( } } + // Equivalent of above for accessing the high element of a vector as an + // integer operation. + // trunc (srl (bitcast (build_vector x, y))), 16 -> trunc (bitcast y) + if (Src.getOpcode() == ISD::SRL) { + if (auto K = isConstOrConstSplat(Src.getOperand(1))) { + if (2 * K->getZExtValue() == Src.getValueType().getScalarSizeInBits()) { + SDValue BV = stripBitcast(Src.getOperand(0)); + if (BV.getOpcode() == ISD::BUILD_VECTOR && + BV.getValueType().getVectorNumElements() == 2) { + SDValue SrcElt = BV.getOperand(1); + EVT SrcEltVT = SrcElt.getValueType(); + if (SrcEltVT.isFloatingPoint()) { + SrcElt = DAG.getNode(ISD::BITCAST, SL, + SrcEltVT.changeTypeToInteger(), SrcElt); + } + + return DAG.getNode(ISD::TRUNCATE, SL, VT, SrcElt); + } + } + } + } + // Partially shrink 64-bit shifts to 32-bit if reduced to 16-bit. // // i16 (trunc (srl i64:x, K)), K <= 16 -> |