diff options
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 22 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstrInfo.td | 16 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/SIInstructions.td | 12 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll | 20 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/inline-constraints.ll | 37 | ||||
-rw-r--r-- | llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 2 |
6 files changed, 86 insertions, 23 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp index 905ee467319..108995a463f 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -1943,6 +1943,28 @@ bool SIInstrInfo::verifyInstruction(const MachineInstr &MI, return false; } + if (MI.isInlineAsm()) { + // Verify register classes for inlineasm constraints. + for (unsigned I = InlineAsm::MIOp_FirstOperand, E = MI.getNumOperands(); + I != E; ++I) { + const TargetRegisterClass *RC = MI.getRegClassConstraint(I, this, &RI); + if (!RC) + continue; + + const MachineOperand &Op = MI.getOperand(I); + if (!Op.isReg()) + continue; + + unsigned Reg = Op.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg) && !RC->contains(Reg)) { + ErrInfo = "inlineasm operand has incorrect register class."; + return false; + } + } + + return true; + } + // Make sure the register classes are correct. for (int i = 0, e = Desc.getNumOperands(); i != e; ++i) { if (MI.getOperand(i).isFPImm()) { diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index a56d909624f..f19e99e7cd1 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -265,19 +265,25 @@ class InlineFPImm <ValueType vt> : PatLeaf <(vt fpimm), [{ return isInlineImmediate(N); }]>; -class SGPRImm <dag frag> : PatLeaf<frag, [{ +class VGPRImm <dag frag> : PatLeaf<frag, [{ if (Subtarget->getGeneration() < SISubtarget::SOUTHERN_ISLANDS) { return false; } const SIRegisterInfo *SIRI = static_cast<const SIRegisterInfo *>(Subtarget->getRegisterInfo()); + unsigned Limit = 0; for (SDNode::use_iterator U = N->use_begin(), E = SDNode::use_end(); - U != E; ++U) { + Limit < 10 && U != E; ++U, ++Limit) { const TargetRegisterClass *RC = getOperandRegClass(*U, U.getOperandNo()); - if (RC && SIRI->isSGPRClass(RC)) - return true; + + // If the register class is unknown, it could be an unknown + // register class that needs to be an SGPR, e.g. an inline asm + // constraint + if (!RC || SIRI->isSGPRClass(RC)) + return false; } - return false; + + return Limit < 10; }]>; //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td index f3ccee288fd..4122eb915f3 100644 --- a/llvm/lib/Target/AMDGPU/SIInstructions.td +++ b/llvm/lib/Target/AMDGPU/SIInstructions.td @@ -615,23 +615,23 @@ def : Pat < /********** ================== **********/ def : Pat < - (SGPRImm<(i32 imm)>:$imm), - (S_MOV_B32 imm:$imm) + (VGPRImm<(i32 imm)>:$imm), + (V_MOV_B32_e32 imm:$imm) >; def : Pat < - (SGPRImm<(f32 fpimm)>:$imm), - (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) + (VGPRImm<(f32 fpimm)>:$imm), + (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : Pat < (i32 imm:$imm), - (V_MOV_B32_e32 imm:$imm) + (S_MOV_B32 imm:$imm) >; def : Pat < (f32 fpimm:$imm), - (V_MOV_B32_e32 (f32 (bitcast_fpimm_to_i32 $imm))) + (S_MOV_B32 (f32 (bitcast_fpimm_to_i32 $imm))) >; def : Pat < diff --git a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll index ad5e8908340..27b3cc0e435 100644 --- a/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll +++ b/llvm/test/CodeGen/AMDGPU/control-flow-fastregalloc.ll @@ -15,7 +15,7 @@ ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, [[CMP0]] ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} @@ -93,24 +93,24 @@ endif: ; GCN: s_mov_b32 m0, -1 ; GCN: ds_read_b32 [[LOAD0:v[0-9]+]] -; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], v0, +; GCN: v_cmp_eq_u32_e64 [[CMP0:s\[[0-9]+:[0-9]\]]], s{{[0-9]+}}, v0 ; GCN: s_mov_b64 s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, exec ; GCN: s_and_b64 s{{\[}}[[ANDEXEC_LO:[0-9]+]]:[[ANDEXEC_HI:[0-9]+]]{{\]}}, s{{\[}}[[SAVEEXEC_LO:[0-9]+]]:[[SAVEEXEC_HI:[0-9]+]]{{\]}}, [[CMP0]] ; GCN: s_xor_b64 s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}}, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}}, s{{\[}}[[SAVEEXEC_LO]]:[[SAVEEXEC_HI]]{{\]}} +; Spill load +; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 ; 4-byte Folded Spill + ; Spill saved exec ; VGPR: v_writelane_b32 [[SPILL_VGPR:v[0-9]+]], s[[SAVEEXEC_LO]], [[SAVEEXEC_LO_LANE:[0-9]+]] ; VGPR: v_writelane_b32 [[SPILL_VGPR]], s[[SAVEEXEC_HI]], [[SAVEEXEC_HI_LANE:[0-9]+]] ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_LO:[0-9]+]], s[[SAVEEXEC_LO]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 ; 8-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_LO]], off, s[0:3], s7 offset:12 ; 8-byte Folded Spill ; VMEM: v_mov_b32_e32 v[[V_SAVEEXEC_HI:[0-9]+]], s[[SAVEEXEC_HI]] -; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:4 ; 8-byte Folded Spill - -; Spill load -; GCN: buffer_store_dword [[LOAD0]], off, s[0:3], s7 offset:[[VAL_OFFSET:[0-9]+]] ; 4-byte Folded Spill +; VMEM: buffer_store_dword v[[V_SAVEEXEC_HI]], off, s[0:3], s7 offset:16 ; 8-byte Folded Spill ; GCN: s_mov_b64 exec, s{{\[}}[[ANDEXEC_LO]]:[[ANDEXEC_HI]]{{\]}} @@ -120,7 +120,7 @@ endif: ; GCN: [[LOOP:BB[0-9]+_[0-9]+]]: -; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 offset:[[VAL_OFFSET]] ; 4-byte Folded Reload +; GCN: buffer_load_dword v[[VAL_LOOP_RELOAD:[0-9]+]], off, s[0:3], s7 ; 4-byte Folded Reload ; GCN: v_subrev_i32_e32 [[VAL_LOOP:v[0-9]+]], vcc, v{{[0-9]+}}, v[[VAL_LOOP_RELOAD]] ; GCN: v_cmp_ne_u32_e32 vcc, ; GCN: s_and_b64 vcc, exec, vcc @@ -133,11 +133,11 @@ endif: ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_LO_LANE]] ; VGPR: v_readlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], [[SPILL_VGPR]], [[SAVEEXEC_HI_LANE]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 ; 8-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_LO:[0-9]+]], off, s[0:3], s7 offset:12 ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_LO:[0-9]+]], v[[V_RELOAD_SAVEEXEC_LO]] -; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:4 ; 8-byte Folded Reload +; VMEM: buffer_load_dword v[[V_RELOAD_SAVEEXEC_HI:[0-9]+]], off, s[0:3], s7 offset:16 ; 8-byte Folded Reload ; VMEM: s_waitcnt vmcnt(0) ; VMEM: v_readfirstlane_b32 s[[S_RELOAD_SAVEEXEC_HI:[0-9]+]], v[[V_RELOAD_SAVEEXEC_HI]] diff --git a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll index 7282e89e85b..3c0bb75a607 100644 --- a/llvm/test/CodeGen/AMDGPU/inline-constraints.ll +++ b/llvm/test/CodeGen/AMDGPU/inline-constraints.ll @@ -26,8 +26,43 @@ entry: ; GCN: s_mov_b32 m0, -1 ; GCN-NOT: s_mov_b32 s{{[0-9]+}}, m0 ; GCN: ; use m0 -define void @inline_sreg_constraint_m0(i32 addrspace(1)* %ptr) { +define void @inline_sreg_constraint_m0() { %m0 = tail call i32 asm sideeffect "s_mov_b32 m0, -1", "={M0}"() tail call void asm sideeffect "; use $0", "s"(i32 %m0) ret void } + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i32: +; GCN: s_mov_b32 [[REG:s[0-9]+]], 32 +; GCN: ; use [[REG]] +define void @inline_sreg_constraint_imm_i32() { + tail call void asm sideeffect "; use $0", "s"(i32 32) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f32: +; GCN: s_mov_b32 [[REG:s[0-9]+]], 1.0 +; GCN: ; use [[REG]] +define void @inline_sreg_constraint_imm_f32() { + tail call void asm sideeffect "; use $0", "s"(float 1.0) + ret void +} + +; FIXME: Should be able to use s_mov_b64 +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_i64: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], -4{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], -1{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define void @inline_sreg_constraint_imm_i64() { + tail call void asm sideeffect "; use $0", "s"(i64 -4) + ret void +} + +; GCN-LABEL: {{^}}inline_sreg_constraint_imm_f64: +; GCN-DAG: s_mov_b32 s[[REG_LO:[0-9]+]], 0{{$}} +; GCN-DAG: s_mov_b32 s[[REG_HI:[0-9]+]], 0x3ff00000{{$}} +; GCN: ; use s{{\[}}[[REG_LO]]:[[REG_HI]]{{\]}} +define void @inline_sreg_constraint_imm_f64() { + tail call void asm sideeffect "; use $0", "s"(double 1.0) + ret void +} diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll index 0cdb1c9fb3a..37da9c5d5ad 100644 --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll @@ -15,7 +15,7 @@ ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} ; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}} -; GCN-DAG: v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 0x40a00000 +; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000 ; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]] ; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]: define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind { |