diff options
| author | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-06 20:58:30 +0000 |
|---|---|---|
| committer | Matt Arsenault <Matthew.Arsenault@amd.com> | 2017-04-06 20:58:30 +0000 |
| commit | 5cf4271883e18e729ddc7cac74aaa4cc59406dff (patch) | |
| tree | f16dae4a44c759a74684eaecc5ef20c91b8daa6a /llvm/lib/Target/AMDGPU/SIISelLowering.cpp | |
| parent | 2bf4c59025777d9d7b705e23dc3ed4346e67108c (diff) | |
| download | bcm5719-llvm-5cf4271883e18e729ddc7cac74aaa4cc59406dff.tar.gz bcm5719-llvm-5cf4271883e18e729ddc7cac74aaa4cc59406dff.zip | |
AMDGPU: Replace fp16SrcZerosHighBits with a whitelist
FCOPYSIGN is lowered to bit operations which don't clear the high
bits.
llvm-svn: 299708
Diffstat (limited to 'llvm/lib/Target/AMDGPU/SIISelLowering.cpp')
| -rw-r--r-- | llvm/lib/Target/AMDGPU/SIISelLowering.cpp | 54 |
1 files changed, 50 insertions, 4 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp index 87354ccbc70..e8b0626bccf 100644 --- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -4035,13 +4035,59 @@ SDValue SITargetLowering::performXorCombine(SDNode *N, return SDValue(); } +// Instructions that will be lowered with a final instruction that zeros the +// high result bits. +// XXX - probably only need to list legal operations. static bool fp16SrcZerosHighBits(unsigned Opc) { switch (Opc) { - case ISD::SELECT: - case ISD::EXTRACT_VECTOR_ELT: - return false; - default: + case ISD::FADD: + case ISD::FSUB: + case ISD::FMUL: + case ISD::FDIV: + case ISD::FREM: + case ISD::FMA: + case ISD::FMAD: + case ISD::FCANONICALIZE: + case ISD::FP_ROUND: + case ISD::UINT_TO_FP: + case ISD::SINT_TO_FP: + case ISD::FABS: + // Fabs is lowered to a bit operation, but it's an and which will clear the + // high bits anyway. + case ISD::FSQRT: + case ISD::FSIN: + case ISD::FCOS: + case ISD::FPOWI: + case ISD::FPOW: + case ISD::FLOG: + case ISD::FLOG2: + case ISD::FLOG10: + case ISD::FEXP: + case ISD::FEXP2: + case ISD::FCEIL: + case ISD::FTRUNC: + case ISD::FRINT: + case ISD::FNEARBYINT: + case ISD::FROUND: + case ISD::FFLOOR: + case ISD::FMINNUM: + case ISD::FMAXNUM: + case AMDGPUISD::FRACT: + case AMDGPUISD::CLAMP: + case AMDGPUISD::COS_HW: + case AMDGPUISD::SIN_HW: + case AMDGPUISD::FMIN3: + case AMDGPUISD::FMAX3: + case AMDGPUISD::FMED3: + case AMDGPUISD::FMAD_FTZ: + case AMDGPUISD::RCP: + case AMDGPUISD::RSQ: + case AMDGPUISD::LDEXP: return true; + default: + // fcopysign, select and others may be lowered to 32-bit bit operations + // which don't zero the high bits. + return false; } } |

