summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2017-04-13 23:17:00 +0000
committerKonstantin Zhuravlyov <kzhuravl_dev@outlook.com>2017-04-13 23:17:00 +0000
commitd24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2 (patch)
treefb06f927219d033affee12be8f4da32198dd9289 /llvm/lib
parentf93c58b81bfbe1e4502d8dd0465797fdd8fb400b (diff)
downloadbcm5719-llvm-d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2.tar.gz
bcm5719-llvm-d24aeb20fc3ac57a2180380edbeb3c0fa0cd03b2.zip
AMDGPU/GFX9: Do not use v_pack_b32_f16 when packing
Differential Revision: https://reviews.llvm.org/D31819 llvm-svn: 300275
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.cpp44
1 files changed, 15 insertions, 29 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index b1e4c9a7aaa..b29cfe116c8 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3560,36 +3560,22 @@ void SIInstrInfo::movePackToVALU(SmallVectorImpl<MachineInstr *> &Worklist,
switch (Inst.getOpcode()) {
case AMDGPU::S_PACK_LL_B32_B16: {
- // v_pack_b32_f16 flushes denormals if not enabled. Use it if the default
- // is to leave them untouched.
- // XXX: Does this do anything to NaNs?
- if (ST.hasFP16Denormals()) {
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_PACK_B32_F16), ResultReg)
- .addImm(0) // src0_modifiers
- .add(Src0) // src0
- .addImm(0) // src1_modifiers
- .add(Src1) // src2
- .addImm(0) // clamp
- .addImm(0); // omod
- } else {
- unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
- unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
-
- // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
- // 0.
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
- .addImm(0xffff);
-
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
- .addReg(ImmReg, RegState::Kill)
- .add(Src0);
-
- BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
- .add(Src1)
- .addImm(16)
- .addReg(TmpReg, RegState::Kill);
- }
+ unsigned ImmReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ unsigned TmpReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+
+ // FIXME: Can do a lot better if we know the high bits of src0 or src1 are
+ // 0.
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_MOV_B32_e32), ImmReg)
+ .addImm(0xffff);
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_AND_B32_e64), TmpReg)
+ .addReg(ImmReg, RegState::Kill)
+ .add(Src0);
+
+ BuildMI(*MBB, Inst, DL, get(AMDGPU::V_LSHL_OR_B32), ResultReg)
+ .add(Src1)
+ .addImm(16)
+ .addReg(TmpReg, RegState::Kill);
break;
}
case AMDGPU::S_PACK_LH_B32_B16: {
OpenPOWER on IntegriCloud