diff options
author | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-04-13 23:17:00 +0000 |
---|---|---|
committer | Konstantin Zhuravlyov <kzhuravl_dev@outlook.com> | 2017-04-13 23:17:00 +0000 |
commit | d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2 (patch) | |
tree | fb06f927219d033affee12be8f4da32198dd9289 /llvm/lib | |
parent | f93c58b81bfbe1e4502d8dd0465797fdd8fb400b (diff) | |
download | bcm5719-llvm-d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2.tar.gz bcm5719-llvm-d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2.zip |
AMDGPU/GFX9: Do not use v_pack_b32_f16 when packing
Differential Revision: https://reviews.llvm.org/D31819
llvm-svn: 300275
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 44 |
1 files changed, 15 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index b1e4c9a7aaa..b29cfe116c8 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -3560,36 +3560,22 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist, switch (Inst.getOpcode()) { case AMDGPU::S_PACK_LL_B32_B16: { - // v_pack_b32_f16 flushes denormals if not enabled. Use it if the default - // is to leave them untouched. - // XXX: Does this do anything to NaNs? - if (ST.hasFP16Denormals()) { - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_PACK_B32_F16), ResultReg) - .addImm(0) // src0_modifiers - .add(Src0) // src0 - .addImm(0) // src1_modifiers - .add(Src1) // src2 - .addImm(0) // clamp - .addImm(0); // omod - } else { - unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); - - // FIXME: Can do a lot better if we know the high bits of src0 or src1 are - // 0. - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) - .addImm(0xffff); - - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) - .addReg(ImmReg, RegState::Kill) - .add(Src0); - - BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg) - .add(Src1) - .addImm(16) - .addReg(TmpReg, RegState::Kill); - } + unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); + + // FIXME: Can do a lot better if we know the high bits of src0 or src1 are + // 0. + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg) + .addImm(0xffff); + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg) + .addReg(ImmReg, RegState::Kill) + .add(Src0); + + BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg) + .add(Src1) + .addImm(16) + .addReg(TmpReg, RegState::Kill); break; } case AMDGPU::S_PACK_LH_B32_B16: { |