diff options
Diffstat (limited to 'llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp')
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 27 |
1 files changed, 24 insertions, 3 deletions
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index 2e43d427e47..5a871489acd 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -587,19 +587,32 @@ bool AMDGPUTargetLowering::aggressivelyPreferBuildVectorSources(EVT VecVT) const bool AMDGPUTargetLowering::isTruncateFree(EVT Source, EVT Dest) const { // Truncate is just accessing a subregister. - return Dest.bitsLT(Source) && (Dest.getSizeInBits() % 32 == 0); + + unsigned SrcSize = Source.getSizeInBits(); + unsigned DestSize = Dest.getSizeInBits(); + + return DestSize < SrcSize && DestSize % 32 == 0 ; } bool AMDGPUTargetLowering::isTruncateFree(Type *Source, Type *Dest) const { // Truncate is just accessing a subregister. - return Dest->getPrimitiveSizeInBits() < Source->getPrimitiveSizeInBits() && - (Dest->getPrimitiveSizeInBits() % 32 == 0); + + unsigned SrcSize = Source->getScalarSizeInBits(); + unsigned DestSize = Dest->getScalarSizeInBits(); + + if (DestSize== 16 && Subtarget->has16BitInsts()) + return SrcSize >= 32; + + return DestSize < SrcSize && DestSize % 32 == 0; } bool AMDGPUTargetLowering::isZExtFree(Type *Src, Type *Dest) const { unsigned SrcSize = Src->getScalarSizeInBits(); unsigned DestSize = Dest->getScalarSizeInBits(); + if (SrcSize == 16 && Subtarget->has16BitInsts()) + return DestSize >= 32; + return SrcSize == 32 && DestSize == 64; } @@ -608,6 +621,10 @@ bool AMDGPUTargetLowering::isZExtFree(EVT Src, EVT Dest) const { // practical purposes, the extra mov 0 to load a 64-bit is free. As used, // this will enable reducing 64-bit operations the 32-bit, which is always // good. + + if (Src == MVT::i16) + return Dest == MVT::i32 ||Dest == MVT::i64 ; + return Src == MVT::i32 && Dest == MVT::i64; } @@ -2447,6 +2464,10 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N, if (VT.isVector() || Size > 64) return SDValue(); + // There are i16 integer mul/mad. + if (Subtarget->has16BitInsts() && VT.getScalarType().bitsLE(MVT::i16)) + return SDValue(); + SelectionDAG &DAG = DCI.DAG; SDLoc DL(N); |