diff options
-rw-r--r-- | llvm/lib/Target/R600/SIInstrInfo.td | 4 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SIInstructions.td | 53 | ||||
-rw-r--r-- | llvm/lib/Target/R600/SILowerI1Copies.cpp | 76 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/fceil64.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/ffloor.ll | 17 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/setcc.ll | 42 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/setcc64.ll | 35 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/sgpr-control-flow.ll | 41 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/valu-i1.ll | 154 | ||||
-rw-r--r-- | llvm/test/CodeGen/R600/xor.ll | 24 |
10 files changed, 339 insertions, 124 deletions
diff --git a/llvm/lib/Target/R600/SIInstrInfo.td b/llvm/lib/Target/R600/SIInstrInfo.td index cdbc22e0ead..4b3be5be578 100644 --- a/llvm/lib/Target/R600/SIInstrInfo.td +++ b/llvm/lib/Target/R600/SIInstrInfo.td @@ -131,6 +131,10 @@ def as_i32imm: SDNodeXForm<imm, [{ return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i32); }]>; +def as_i64imm: SDNodeXForm<imm, [{ + return CurDAG->getTargetConstant(N->getSExtValue(), MVT::i64); +}]>; + def IMM8bit : PatLeaf <(imm), [{return isUInt<8>(N->getZExtValue());}] >; diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td index 00ce9bfcc26..cfe6c81ced9 100644 --- a/llvm/lib/Target/R600/SIInstructions.td +++ b/llvm/lib/Target/R600/SIInstructions.td @@ -1686,30 +1686,8 @@ defm V_TRIG_PREOP_F64 : VOP3Inst < //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// - let isCodeGenOnly = 1, isPseudo = 1 in { -def V_MOV_I1 : InstSI < - (outs VReg_1:$dst), - (ins i1imm:$src), - "", [(set i1:$dst, (imm:$src))] ->; - -def V_AND_I1 : InstSI < - (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", - [(set i1:$dst, (and i1:$src0, i1:$src1))] ->; - -def V_OR_I1 : InstSI < - (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", - [(set i1:$dst, (or i1:$src0, i1:$src1))] ->; - -def V_XOR_I1 : InstSI < - (outs VReg_1:$dst), (ins VReg_1:$src0, VReg_1:$src1), "", - [(set i1:$dst, (xor i1:$src0, i1:$src1))] ->; - let hasSideEffects = 1 in { def SGPR_USE : InstSI <(outs),(ins), "", []>; } @@ -2495,6 +2473,14 @@ def : Pat < (S_MOV_B64 InlineImm<i64>:$imm) >; +// XXX - Should this use a s_cmp to set SCC? + +// Set to sign-extended 64-bit value (true = -1, false = 0) +def : Pat < + (i1 imm:$imm), + (S_MOV_B64 (i64 (as_i64imm $imm))) +>; + /********** ===================== **********/ /********** Interpolation Paterns **********/ /********** ===================== **********/ @@ -3045,6 +3031,27 @@ def : Pat < (V_CNDMASK_B32_e64 0, -1, $src), sub1) >; +// If we need to perform a logical operation on i1 values, we need to +// use vector comparisons since there is only one SCC register. Vector +// comparisions still write to a pair of SGPRs, so treat these as +// 64-bit comparisons. When legalizing SGPR copies, instructions +// resulting in the copies from SCC to these instructions will be +// moved to the VALU. +def : Pat < + (i1 (and i1:$src0, i1:$src1)), + (S_AND_B64 $src0, $src1) +>; + +def : Pat < + (i1 (or i1:$src0, i1:$src1)), + (S_OR_B64 $src0, $src1) +>; + +def : Pat < + (i1 (xor i1:$src0, i1:$src1)), + (S_XOR_B64 $src0, $src1) +>; + def : Pat < (f32 (sint_to_fp i1:$src)), (V_CNDMASK_B32_e64 (i32 0), CONST.FP32_NEG_ONE, $src) @@ -3057,7 +3064,7 @@ def : Pat < def : Pat < (f64 (sint_to_fp i1:$src)), - (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) + (V_CVT_F64_I32_e32 (V_CNDMASK_B32_e64 (i32 0), (i32 -1), $src)) >; def : Pat < diff --git a/llvm/lib/Target/R600/SILowerI1Copies.cpp b/llvm/lib/Target/R600/SILowerI1Copies.cpp index 226a672b343..7767c4c0671 100644 --- a/llvm/lib/Target/R600/SILowerI1Copies.cpp +++ b/llvm/lib/Target/R600/SILowerI1Copies.cpp @@ -85,30 +85,6 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { Next = std::next(I); MachineInstr &MI = *I; - if (MI.getOpcode() == AMDGPU::V_MOV_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_MOV_B32_e32)); - continue; - } - - if (MI.getOpcode() == AMDGPU::V_AND_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_AND_B32_e64)); - continue; - } - - if (MI.getOpcode() == AMDGPU::V_OR_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_OR_B32_e64)); - continue; - } - - if (MI.getOpcode() == AMDGPU::V_XOR_I1) { - I1Defs.push_back(MI.getOperand(0).getReg()); - MI.setDesc(TII->get(AMDGPU::V_XOR_B32_e64)); - continue; - } - if (MI.getOpcode() == AMDGPU::IMPLICIT_DEF) { unsigned Reg = MI.getOperand(0).getReg(); const TargetRegisterClass *RC = MRI.getRegClass(Reg); @@ -117,32 +93,52 @@ bool SILowerI1Copies::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI.getOpcode() != AMDGPU::COPY || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg()) || - !TargetRegisterInfo::isVirtualRegister(MI.getOperand(1).getReg())) + if (MI.getOpcode() != AMDGPU::COPY) continue; + const MachineOperand &Dst = MI.getOperand(0); + const MachineOperand &Src = MI.getOperand(1); + + if (!TargetRegisterInfo::isVirtualRegister(Src.getReg()) || + !TargetRegisterInfo::isVirtualRegister(Dst.getReg())) + continue; - const TargetRegisterClass *DstRC = - MRI.getRegClass(MI.getOperand(0).getReg()); - const TargetRegisterClass *SrcRC = - MRI.getRegClass(MI.getOperand(1).getReg()); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst.getReg()); + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src.getReg()); if (DstRC == &AMDGPU::VReg_1RegClass && TRI->getCommonSubClass(SrcRC, &AMDGPU::SGPR_64RegClass)) { - I1Defs.push_back(MI.getOperand(0).getReg()); - BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CNDMASK_B32_e64)) - .addOperand(MI.getOperand(0)) - .addImm(0) - .addImm(-1) - .addOperand(MI.getOperand(1)); + I1Defs.push_back(Dst.getReg()); + DebugLoc DL = MI.getDebugLoc(); + + MachineInstr *DefInst = MRI.getUniqueVRegDef(Src.getReg()); + if (DefInst->getOpcode() == AMDGPU::S_MOV_B64) { + if (DefInst->getOperand(1).isImm()) { + I1Defs.push_back(Dst.getReg()); + + int64_t Val = DefInst->getOperand(1).getImm(); + assert(Val == 0 || Val == -1); + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_MOV_B32_e32)) + .addOperand(Dst) + .addImm(Val); + MI.eraseFromParent(); + continue; + } + } + + BuildMI(MBB, &MI, DL, TII->get(AMDGPU::V_CNDMASK_B32_e64)) + .addOperand(Dst) + .addImm(0) + .addImm(-1) + .addOperand(Src); MI.eraseFromParent(); } else if (TRI->getCommonSubClass(DstRC, &AMDGPU::SGPR_64RegClass) && SrcRC == &AMDGPU::VReg_1RegClass) { BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(AMDGPU::V_CMP_NE_I32_e64)) - .addOperand(MI.getOperand(0)) - .addOperand(MI.getOperand(1)) - .addImm(0); + .addOperand(Dst) + .addOperand(Src) + .addImm(0); MI.eraseFromParent(); } } diff --git a/llvm/test/CodeGen/R600/fceil64.ll b/llvm/test/CodeGen/R600/fceil64.ll index 029f41dc7ed..c459a6a63eb 100644 --- a/llvm/test/CodeGen/R600/fceil64.ll +++ b/llvm/test/CodeGen/R600/fceil64.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.ceil.f64(double) nounwind readnone declare <2 x double> @llvm.ceil.v2f64(<2 x double>) nounwind readnone @@ -22,12 +22,15 @@ declare <16 x double> @llvm.ceil.v16f64(<16 x double>) nounwind readnone ; SI: cmp_gt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 -; SI: cmp_gt_f64 -; SI: cndmask_b32 -; SI: cmp_ne_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 +; SI: v_cmp_o_f64 +; SI: v_cmp_neq_f64 +; SI: s_and_b64 +; SI: v_cmp_gt_f64 +; SI: s_and_b64 +; SI: v_cndmask_b32 +; SI: v_cndmask_b32 ; SI: v_add_f64 +; SI: s_endpgm define void @fceil_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.ceil.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/R600/ffloor.ll b/llvm/test/CodeGen/R600/ffloor.ll index 166f7055fb1..77b7997b909 100644 --- a/llvm/test/CodeGen/R600/ffloor.ll +++ b/llvm/test/CodeGen/R600/ffloor.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=r600 -mcpu=bonaire < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s -; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=CI -check-prefix=FUNC %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s declare double @llvm.floor.f64(double) nounwind readnone declare <2 x double> @llvm.floor.v2f64(<2 x double>) nounwind readnone @@ -23,12 +23,15 @@ declare <16 x double> @llvm.floor.v16f64(<16 x double>) nounwind readnone ; SI: cmp_gt_i32 ; SI: cndmask_b32 ; SI: cndmask_b32 -; SI: cmp_lt_f64 -; SI: cndmask_b32 -; SI: cmp_ne_i32 -; SI: cndmask_b32 -; SI: cndmask_b32 +; SI: v_cmp_o_f64 +; SI: v_cmp_neq_f64 +; SI: s_and_b64 +; SI: v_cmp_lt_f64 +; SI: s_and_b64 +; SI: v_cndmask_b32 +; SI: v_cndmask_b32 ; SI: v_add_f64 +; SI: s_endpgm define void @ffloor_f64(double addrspace(1)* %out, double %x) { %y = call double @llvm.floor.f64(double %x) nounwind readnone store double %y, double addrspace(1)* %out diff --git a/llvm/test/CodeGen/R600/setcc.ll b/llvm/test/CodeGen/R600/setcc.ll index 371ebbedf18..1cca2bc21e2 100644 --- a/llvm/test/CodeGen/R600/setcc.ll +++ b/llvm/test/CodeGen/R600/setcc.ll @@ -96,11 +96,12 @@ entry: ; R600-DAG: SETNE_DX10 ; R600-DAG: AND_INT ; R600-DAG: SETNE_INT -; SI: v_cmp_o_f32 -; SI: v_cmp_neq_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_and_b32_e32 + +; SI-DAG: v_cmp_o_f32_e32 vcc +; SI-DAG: v_cmp_neq_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], vcc +; SI: v_cndmask_b32_e64 [[VRESULT:v[0-9]+]], 0, -1, [[AND]] +; SI: buffer_store_dword [[VRESULT]] define void @f32_one(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp one float %a, %b @@ -130,11 +131,12 @@ entry: ; R600-DAG: SETE_DX10 ; R600-DAG: OR_INT ; R600-DAG: SETNE_INT -; SI: v_cmp_u_f32 -; SI: v_cmp_eq_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 + +; SI-DAG: v_cmp_u_f32_e32 vcc +; SI-DAG: v_cmp_eq_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_or_b64 [[OR:s\[[0-9]+:[0-9]+\]]], [[CMP1]], vcc +; SI: v_cndmask_b32_e64 [[VRESULT:v[0-9]+]], 0, -1, [[OR]] +; SI: buffer_store_dword [[VRESULT]] define void @f32_ueq(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ueq float %a, %b @@ -148,9 +150,8 @@ entry: ; R600: SETE_DX10 ; SI: v_cmp_u_f32 ; SI: v_cmp_gt_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f32_ugt(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ugt float %a, %b @@ -164,9 +165,8 @@ entry: ; R600: SETE_DX10 ; SI: v_cmp_u_f32 ; SI: v_cmp_ge_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f32_uge(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp uge float %a, %b @@ -180,9 +180,8 @@ entry: ; R600: SETE_DX10 ; SI: v_cmp_u_f32 ; SI: v_cmp_lt_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f32_ult(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ult float %a, %b @@ -196,9 +195,8 @@ entry: ; R600: SETE_DX10 ; SI: v_cmp_u_f32 ; SI: v_cmp_le_f32 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f32_ule(i32 addrspace(1)* %out, float %a, float %b) { entry: %0 = fcmp ule float %a, %b diff --git a/llvm/test/CodeGen/R600/setcc64.ll b/llvm/test/CodeGen/R600/setcc64.ll index 6e43172b1cb..282a5dea976 100644 --- a/llvm/test/CodeGen/R600/setcc64.ll +++ b/llvm/test/CodeGen/R600/setcc64.ll @@ -57,11 +57,11 @@ entry: } ; FUNC-LABEL: {{^}}f64_one: -; SI: v_cmp_o_f64 -; SI: v_cmp_neq_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_and_b32_e32 +; SI-DAG: v_cmp_o_f64_e32 vcc +; SI-DAG: v_cmp_neq_f64_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_b64 [[AND:s\[[0-9]+:[0-9]+\]]], [[CMP1]], vcc +; SI: v_cndmask_b32_e64 [[VRESULT:v[0-9]+]], 0, -1, [[AND]] +; SI: buffer_store_dword [[VRESULT]] define void @f64_one(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp one double %a, %b @@ -83,9 +83,8 @@ entry: ; FUNC-LABEL: {{^}}f64_ueq: ; SI: v_cmp_u_f64 ; SI: v_cmp_eq_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f64_ueq(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ueq double %a, %b @@ -97,9 +96,8 @@ entry: ; FUNC-LABEL: {{^}}f64_ugt: ; SI: v_cmp_u_f64 ; SI: v_cmp_gt_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f64_ugt(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ugt double %a, %b @@ -111,9 +109,8 @@ entry: ; FUNC-LABEL: {{^}}f64_uge: ; SI: v_cmp_u_f64 ; SI: v_cmp_ge_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f64_uge(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp uge double %a, %b @@ -125,9 +122,8 @@ entry: ; FUNC-LABEL: {{^}}f64_ult: ; SI: v_cmp_u_f64 ; SI: v_cmp_lt_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f64_ult(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ult double %a, %b @@ -139,9 +135,8 @@ entry: ; FUNC-LABEL: {{^}}f64_ule: ; SI: v_cmp_u_f64 ; SI: v_cmp_le_f64 -; SI: v_cndmask_b32_e64 -; SI: v_cndmask_b32_e64 -; SI: v_or_b32_e32 +; SI: s_or_b64 +; SI: v_cndmask_b32 define void @f64_ule(i32 addrspace(1)* %out, double %a, double %b) { entry: %0 = fcmp ule double %a, %b diff --git a/llvm/test/CodeGen/R600/sgpr-control-flow.ll b/llvm/test/CodeGen/R600/sgpr-control-flow.ll index d8b8dffa7fa..667c4ea3a4e 100644 --- a/llvm/test/CodeGen/R600/sgpr-control-flow.ll +++ b/llvm/test/CodeGen/R600/sgpr-control-flow.ll @@ -59,6 +59,47 @@ endif: ret void } +; FIXME: Should write to different SGPR pairs instead of copying to +; VALU for i1 phi. + +; SI-LABEL: {{^}}sgpr_if_else_valu_cmp_phi_br: +; SI: buffer_load_dword [[AVAL:v[0-9]+]] +; SI: v_cmp_lt_i32_e64 [[CMP_IF:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0 +; SI: v_cndmask_b32_e64 [[V_CMP:v[0-9]+]], 0, -1, [[CMP_IF]] + +; SI: BB2_1: +; SI: buffer_load_dword [[AVAL:v[0-9]+]] +; SI: v_cmp_eq_i32_e64 [[CMP_ELSE:s\[[0-9]+:[0-9]+\]]], [[AVAL]], 0 +; SI: v_cndmask_b32_e64 [[V_CMP]], 0, -1, [[CMP_ELSE]] + +; SI: v_cmp_ne_i32_e64 [[CMP_CMP:s\[[0-9]+:[0-9]+\]]], [[V_CMP]], 0 +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, [[CMP_CMP]] +; SI: buffer_store_dword [[RESULT]] +define void @sgpr_if_else_valu_cmp_phi_br(i32 addrspace(1)* %out, i32 addrspace(1)* %a, i32 addrspace(1)* %b) { +entry: + %tid = call i32 @llvm.r600.read.tidig.x() #0 + %tmp1 = icmp eq i32 %tid, 0 + br i1 %tmp1, label %if, label %else + +if: + %gep.if = getelementptr i32 addrspace(1)* %a, i32 %tid + %a.val = load i32 addrspace(1)* %gep.if + %cmp.if = icmp eq i32 %a.val, 0 + br label %endif + +else: + %gep.else = getelementptr i32 addrspace(1)* %b, i32 %tid + %b.val = load i32 addrspace(1)* %gep.else + %cmp.else = icmp slt i32 %b.val, 0 + br label %endif + +endif: + %tmp4 = phi i1 [%cmp.if, %if], [%cmp.else, %else] + %ext = sext i1 %tmp4 to i32 + store i32 %ext, i32 addrspace(1)* %out + ret void +} + declare i32 @llvm.r600.read.tidig.x() #0 attributes #0 = { readnone } diff --git a/llvm/test/CodeGen/R600/valu-i1.ll b/llvm/test/CodeGen/R600/valu-i1.ll index a193077067e..7b9f3343980 100644 --- a/llvm/test/CodeGen/R600/valu-i1.ll +++ b/llvm/test/CodeGen/R600/valu-i1.ll @@ -1,10 +1,13 @@ -; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s +; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs -enable-misched -asm-verbose < %s | FileCheck -check-prefix=SI %s +declare i32 @llvm.r600.read.tidig.x() nounwind readnone + +; SI-LABEL: @test_if ; Make sure the i1 values created by the cfg structurizer pass are ; moved using VALU instructions ; SI-NOT: s_mov_b64 s[{{[0-9]:[0-9]}}], -1 ; SI: v_mov_b32_e32 v{{[0-9]}}, -1 -define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) { +define void @test_if(i32 %a, i32 %b, i32 addrspace(1)* %src, i32 addrspace(1)* %dst) #1 { entry: switch i32 %a, label %default [ i32 0, label %case0 @@ -37,3 +40,150 @@ else: end: ret void } + +; SI-LABEL: @simple_test_v_if +; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0 +; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]] +; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] + +; SI: ; BB#1 +; SI: buffer_store_dword +; SI: s_endpgm + +; SI: BB1_2: +; SI: s_or_b64 exec, exec, [[BR_SREG]] +; SI: s_endpgm +define void @simple_test_v_if(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %is.0 = icmp ne i32 %tid, 0 + br i1 %is.0, label %store, label %exit + +store: + %gep = getelementptr i32 addrspace(1)* %dst, i32 %tid + store i32 999, i32 addrspace(1)* %gep + ret void + +exit: + ret void +} + +; SI-LABEL: @simple_test_v_loop +; SI: v_cmp_ne_i32_e64 [[BR_SREG:s\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}, 0 +; SI: s_and_saveexec_b64 [[BR_SREG]], [[BR_SREG]] +; SI: s_xor_b64 [[BR_SREG]], exec, [[BR_SREG]] +; SI: s_cbranch_execz BB2_2 + +; SI: ; BB#1: +; SI: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, 0{{$}} + +; SI: BB2_3: +; SI: buffer_load_dword +; SI: buffer_store_dword +; SI: v_cmp_eq_i32_e32 vcc, +; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]] +; SI: v_add_i32_e32 {{v[0-9]+}}, 1, {{v[0-9]+}} +; SI: s_andn2_b64 exec, exec, [[OR_SREG]] +; SI: s_cbranch_execnz BB2_3 + +define void @simple_test_v_loop(i32 addrspace(1)* %dst, i32 addrspace(1)* %src) #1 { +entry: + %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone + %is.0 = icmp ne i32 %tid, 0 + %limit = add i32 %tid, 64 + br i1 %is.0, label %loop, label %exit + +loop: + %i = phi i32 [%tid, %entry], [%i.inc, %loop] + %gep.src = getelementptr i32 addrspace(1)* %src, i32 %i + %gep.dst = getelementptr i32 addrspace(1)* %dst, i32 %i + %load = load i32 addrspace(1)* %src + store i32 %load, i32 addrspace(1)* %gep.dst + %i.inc = add nsw i32 %i, 1 + %cmp = icmp eq i32 %limit, %i.inc + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} + +; SI-LABEL: @multi_vcond_loop + +; Load loop limit from buffer +; Branch to exit if uniformly not taken +; SI: ; BB#0: +; SI: buffer_load_dword [[VBOUND:v[0-9]+]] +; SI: v_cmp_gt_i32_e64 [[OUTER_CMP_SREG:s\[[0-9]+:[0-9]+\]]] +; SI: s_and_saveexec_b64 [[OUTER_CMP_SREG]], [[OUTER_CMP_SREG]] +; SI: s_xor_b64 [[OUTER_CMP_SREG]], exec, [[OUTER_CMP_SREG]] +; SI: s_cbranch_execz BB3_2 + +; Initialize inner condition to false +; SI: ; BB#1: +; SI: s_mov_b64 [[ZERO:s\[[0-9]+:[0-9]+\]]], 0{{$}} +; SI: s_mov_b64 [[COND_STATE:s\[[0-9]+:[0-9]+\]]], [[ZERO]] + +; Clear exec bits for workitems that load -1s +; SI: BB3_3: +; SI: buffer_load_dword [[A:v[0-9]+]] +; SI: buffer_load_dword [[B:v[0-9]+]] +; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], [[A]], -1 +; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_1:s\[[0-9]+:[0-9]+\]]], [[B]], -1 +; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]] +; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]] +; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]] +; SI: s_cbranch_execz BB3_5 + +; SI: BB#4: +; SI: buffer_store_dword +; SI: v_cmp_ge_i64_e32 vcc +; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]] + +; SI: BB3_5: +; SI: s_or_b64 exec, exec, [[ORNEG1]] +; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]] +; SI: s_andn2_b64 exec, exec, [[COND_STATE]] +; SI: s_cbranch_execnz BB3_3 + +; SI: BB#6 +; SI: s_or_b64 exec, exec, [[COND_STATE]] + +; SI: BB3_2: +; SI-NOT: [[COND_STATE]] +; SI: s_endpgm + +define void @multi_vcond_loop(i32 addrspace(1)* noalias nocapture %arg, i32 addrspace(1)* noalias nocapture readonly %arg1, i32 addrspace(1)* noalias nocapture readonly %arg2, i32 addrspace(1)* noalias nocapture readonly %arg3) #1 { +bb: + %tmp = tail call i32 @llvm.r600.read.tidig.x() #0 + %tmp4 = sext i32 %tmp to i64 + %tmp5 = getelementptr inbounds i32 addrspace(1)* %arg3, i64 %tmp4 + %tmp6 = load i32 addrspace(1)* %tmp5, align 4 + %tmp7 = icmp sgt i32 %tmp6, 0 + %tmp8 = sext i32 %tmp6 to i64 + br i1 %tmp7, label %bb10, label %bb26 + +bb10: ; preds = %bb, %bb20 + %tmp11 = phi i64 [ %tmp23, %bb20 ], [ 0, %bb ] + %tmp12 = add nsw i64 %tmp11, %tmp4 + %tmp13 = getelementptr inbounds i32 addrspace(1)* %arg1, i64 %tmp12 + %tmp14 = load i32 addrspace(1)* %tmp13, align 4 + %tmp15 = getelementptr inbounds i32 addrspace(1)* %arg2, i64 %tmp12 + %tmp16 = load i32 addrspace(1)* %tmp15, align 4 + %tmp17 = icmp ne i32 %tmp14, -1 + %tmp18 = icmp ne i32 %tmp16, -1 + %tmp19 = and i1 %tmp17, %tmp18 + br i1 %tmp19, label %bb20, label %bb26 + +bb20: ; preds = %bb10 + %tmp21 = add nsw i32 %tmp16, %tmp14 + %tmp22 = getelementptr inbounds i32 addrspace(1)* %arg, i64 %tmp12 + store i32 %tmp21, i32 addrspace(1)* %tmp22, align 4 + %tmp23 = add nuw nsw i64 %tmp11, 1 + %tmp24 = icmp slt i64 %tmp23, %tmp8 + br i1 %tmp24, label %bb10, label %bb26 + +bb26: ; preds = %bb10, %bb20, %bb + ret void +} + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } diff --git a/llvm/test/CodeGen/R600/xor.ll b/llvm/test/CodeGen/R600/xor.ll index be47f8c0598..bf98e7df86a 100644 --- a/llvm/test/CodeGen/R600/xor.ll +++ b/llvm/test/CodeGen/R600/xor.ll @@ -39,19 +39,37 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ; FUNC-LABEL: {{^}}xor_i1: ; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} -; SI: v_xor_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} - +; SI-DAG: v_cmp_ge_f32_e64 [[CMP0:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 0.0 +; SI-DAG: v_cmp_ge_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], {{v[0-9]+}}, 1.0 +; SI: s_xor_b64 [[XOR:s\[[0-9]+:[0-9]+\]]], [[CMP0]], [[CMP1]] +; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}, [[XOR]] +; SI: buffer_store_dword [[RESULT]] +; SI: s_endpgm define void @xor_i1(float addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { %a = load float addrspace(1) * %in0 %b = load float addrspace(1) * %in1 %acmp = fcmp oge float %a, 0.000000e+00 - %bcmp = fcmp oge float %b, 0.000000e+00 + %bcmp = fcmp oge float %b, 1.000000e+00 %xor = xor i1 %acmp, %bcmp %result = select i1 %xor, float %a, float %b store float %result, float addrspace(1)* %out ret void } +; FUNC-LABEL: {{^}}v_xor_i1: +; SI: buffer_load_ubyte [[A:v[0-9]+]] +; SI: buffer_load_ubyte [[B:v[0-9]+]] +; SI: v_xor_b32_e32 [[XOR:v[0-9]+]], [[B]], [[A]] +; SI: v_and_b32_e32 [[RESULT:v[0-9]+]], 1, [[XOR]] +; SI: buffer_store_byte [[RESULT]] +define void @v_xor_i1(i1 addrspace(1)* %out, i1 addrspace(1)* %in0, i1 addrspace(1)* %in1) { + %a = load i1 addrspace(1)* %in0 + %b = load i1 addrspace(1)* %in1 + %xor = xor i1 %a, %b + store i1 %xor, i1 addrspace(1)* %out + ret void +} + ; FUNC-LABEL: {{^}}vector_xor_i32: ; SI: v_xor_b32_e32 define void @vector_xor_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in0, i32 addrspace(1)* %in1) { |