summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIDefines.h3
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrFormats.td5
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.h11
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td45
-rw-r--r--llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp62
-rw-r--r--llvm/lib/Target/AMDGPU/SOPInstructions.td60
-rw-r--r--llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/sopk-compares.ll555
8 files changed, 691 insertions, 52 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 5c29f82fa23..643b8722d91 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -43,7 +43,8 @@ enum {
SGPRSpill = 1 << 24,
VOPAsmPrefer32Bit = 1 << 25,
Gather4 = 1 << 26,
- DisableWQM = 1 << 27
+ DisableWQM = 1 << 27,
+ SOPK_ZEXT = 1 << 28
};
}
diff --git a/llvm/lib/Target/AMDGPU/SIInstrFormats.td b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
index aa15e09085a..33b9b122e2e 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrFormats.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrFormats.td
@@ -56,6 +56,10 @@ class InstSI <dag outs, dag ins, string asm = "",
// Whether WQM _must_ be disabled for this instruction.
field bits<1> DisableWQM = 0;
+ // Most sopk treat the immediate as a signed 16-bit, however some
+ // use it as unsigned.
+ field bits<1> SOPKZext = 0;
+
// These need to be kept in sync with the enum in SIInstrFlags.
let TSFlags{0} = VM_CNT;
let TSFlags{1} = EXP_CNT;
@@ -89,6 +93,7 @@ class InstSI <dag outs, dag ins, string asm = "",
let TSFlags{25} = VOPAsmPrefer32Bit;
let TSFlags{26} = Gather4;
let TSFlags{27} = DisableWQM;
+ let TSFlags{28} = SOPKZext;
let SchedRW = [Write32Bit];
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.h b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
index aced35a852f..2740f2f5412 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.h
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.h
@@ -391,6 +391,14 @@ public:
return MI.getDesc().TSFlags & SIInstrFlags::VM_CNT;
}
+ static bool sopkIsZext(const MachineInstr &MI) {
+ return MI.getDesc().TSFlags & SIInstrFlags::SOPK_ZEXT;
+ }
+
+ bool sopkIsZext(uint16_t Opcode) const {
+ return get(Opcode).TSFlags & SIInstrFlags::SOPK_ZEXT;
+ }
+
bool isVGPRCopy(const MachineInstr &MI) const {
assert(MI.isCopy());
unsigned Dest = MI.getOperand(0).getReg();
@@ -603,6 +611,9 @@ namespace AMDGPU {
LLVM_READONLY
int getAtomicNoRetOp(uint16_t Opcode);
+ LLVM_READONLY
+ int getSOPKOp(uint16_t Opcode);
+
const uint64_t RSRC_DATA_FORMAT = 0xf00000000000LL;
const uint64_t RSRC_ELEMENT_SIZE_SHIFT = (32 + 19);
const uint64_t RSRC_INDEX_STRIDE_SHIFT = (32 + 21);
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 27b53c9e151..37f8f17bff3 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1193,7 +1193,7 @@ class VOP <string opName> {
string OpName = opName;
}
-class VOP2_REV <string revOp, bit isOrig> {
+class Commutable_REV <string revOp, bit isOrig> {
string RevOp = revOp;
bit IsOrig = isOrig;
}
@@ -1325,7 +1325,7 @@ multiclass VOP2SI_m <vop2 op, string opName, VOPProfile p, list<dag> pattern,
string revOp> {
def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
- VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
}
@@ -1334,7 +1334,7 @@ multiclass VOP2_m <vop2 op, string opName, VOPProfile p, list <dag> pattern,
string revOp> {
def "" : VOP2_Pseudo <p.Outs32, p.Ins32, pattern, opName>,
- VOP2_REV<revOp#"_e32", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e32", !eq(revOp, opName)>;
def _si : VOP2_Real_si <opName, op, p.Outs32, p.Ins32, p.Asm32>;
@@ -1523,7 +1523,7 @@ multiclass VOP3_2_m <vop op, dag outs, dag ins, string asm,
bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
VOP3DisableFields<1, 0, HasMods>;
@@ -1537,7 +1537,7 @@ multiclass VOP3SI_2_m <vop op, dag outs, dag ins, string asm,
bit HasMods = 1> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)>;
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName, HasMods>,
VOP3DisableFields<1, 0, HasMods>;
@@ -1578,7 +1578,7 @@ multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
string revOp, list<SchedReadWrite> sched> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName, HasMods>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ Commutable_REV<revOp#"_e64", !eq(revOp, opName)> {
let Defs = !if(defExec, [EXEC], []);
let SchedRW = sched;
}
@@ -1829,7 +1829,7 @@ multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
string revOpName = "", string asm = opName#"_e32 "#op_asm,
string alias_asm = opName#" "#op_asm> {
def "" : VOPC_Pseudo <ins, pattern, opName>,
- VOP2_REV<revOpName#"_e32", !eq(revOpName, opName)> {
+ Commutable_REV<revOpName#"_e32", !eq(revOpName, opName)> {
let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
let SchedRW = sched;
let isConvergent = DefExec;
@@ -2106,7 +2106,7 @@ def getMaskedMIMGOp : InstrMapping {
// Maps an commuted opcode to its original version
def getCommuteOrig : InstrMapping {
- let FilterClass = "VOP2_REV";
+ let FilterClass = "Commutable_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["0"];
@@ -2115,31 +2115,13 @@ def getCommuteOrig : InstrMapping {
// Maps an original opcode to its commuted version
def getCommuteRev : InstrMapping {
- let FilterClass = "VOP2_REV";
+ let FilterClass = "Commutable_REV";
let RowFields = ["RevOp"];
let ColFields = ["IsOrig"];
let KeyCol = ["1"];
let ValueCols = [["0"]];
}
-def getCommuteCmpOrig : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["0"];
- let ValueCols = [["1"]];
-}
-
-// Maps an original opcode to its commuted version
-def getCommuteCmpRev : InstrMapping {
- let FilterClass = "VOP2_REV";
- let RowFields = ["RevOp"];
- let ColFields = ["IsOrig"];
- let KeyCol = ["1"];
- let ValueCols = [["0"]];
-}
-
-
def getMCOpcodeGen : InstrMapping {
let FilterClass = "SIMCInstr";
let RowFields = ["PseudoInstr"];
@@ -2149,6 +2131,15 @@ def getMCOpcodeGen : InstrMapping {
[!cast<string>(SIEncodingFamily.VI)]];
}
+// Get equivalent SOPK instruction.
+def getSOPKOp : InstrMapping {
+ let FilterClass = "SOPKInstTable";
+ let RowFields = ["BaseCmpOp"];
+ let ColFields = ["IsSOPK"];
+ let KeyCol = ["0"];
+ let ValueCols = [["1"]];
+}
+
def getAddr64Inst : InstrMapping {
let FilterClass = "MUBUFAddr64Table";
let RowFields = ["OpName"];
diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index e72b7d496ab..2d444edd664 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -188,6 +188,26 @@ static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
return isInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
}
+static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) {
+ return isUInt<16>(Src.getImm()) && !TII->isInlineConstant(Src, 4);
+}
+
+static bool isKImmOrKUImmOperand(const SIInstrInfo *TII,
+ const MachineOperand &Src,
+ bool &IsUnsigned) {
+ if (isInt<16>(Src.getImm())) {
+ IsUnsigned = false;
+ return !TII->isInlineConstant(Src, 4);
+ }
+
+ if (isUInt<16>(Src.getImm())) {
+ IsUnsigned = true;
+ return !TII->isInlineConstant(Src, 4);
+ }
+
+ return false;
+}
+
/// Copy implicit register operands from specified instruction to this
/// instruction that are not part of the instruction definition.
static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
@@ -202,6 +222,42 @@ static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF,
}
}
+static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
+ // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
+ // get constants on the RHS.
+ if (!MI.getOperand(0).isReg())
+ TII->commuteInstruction(MI, false, 0, 1);
+
+ const MachineOperand &Src1 = MI.getOperand(1);
+ if (!Src1.isImm())
+ return;
+
+ int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
+ if (SOPKOpc == -1)
+ return;
+
+ // eq/ne is special because the imm16 can be treated as signed or unsigned.
+ if (SOPKOpc == AMDGPU::S_CMPK_EQ_I32 || SOPKOpc == AMDGPU::S_CMPK_LG_I32) {
+ bool HasUImm;
+ if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) {
+ if (HasUImm) {
+ SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_I32) ?
+ AMDGPU::S_CMPK_EQ_U32 : AMDGPU::S_CMPK_LG_U32;
+ }
+
+ MI.setDesc(TII->get(SOPKOpc));
+ }
+
+ return;
+ }
+
+ const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
+ if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) ||
+ isKImmOperand(TII, Src1)) {
+ MI.setDesc(NewDesc);
+ }
+}
+
bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(*MF.getFunction()))
return false;
@@ -310,6 +366,12 @@ bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
}
}
+ // Try to use s_cmpk_*
+ if (MI.isCompare() && TII->isSOPC(MI)) {
+ shrinkScalarCompare(TII, MI);
+ continue;
+ }
+
// Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
const MachineOperand &Src = MI.getOperand(1);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 7226f2002c4..4451d1642f5 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -473,6 +473,11 @@ class SOPK_Real64<bits<5> op, SOPK_Pseudo ps> :
let Inst{63-32} = imm;
}
+class SOPKInstTable <bit is_sopk, string cmpOp = ""> {
+ bit IsSOPK = is_sopk;
+ string BaseCmpOp = cmpOp;
+}
+
class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
opName,
(outs SReg_32:$sdst),
@@ -480,12 +485,12 @@ class SOPK_32 <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
"$sdst, $simm16",
pattern>;
-class SOPK_SCC <string opName, list<dag> pattern=[]> : SOPK_Pseudo <
+class SOPK_SCC <string opName, string base_op = ""> : SOPK_Pseudo <
opName,
(outs),
(ins SReg_32:$sdst, u16imm:$simm16),
- "$sdst, $simm16",
- pattern> {
+ "$sdst, $simm16", []>,
+ SOPKInstTable<1, base_op>{
let Defs = [SCC];
}
@@ -521,18 +526,21 @@ let isCompare = 1 in {
// [(set i1:$dst, (setcc i32:$src0, imm:$src1, SETEQ))]
// >;
-def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32">;
-def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32">;
-def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32">;
-def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32">;
-def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32">;
-def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32">;
-def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32">;
-def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32">;
-def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32">;
-def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32">;
-def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32">;
-def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32">;
+def S_CMPK_EQ_I32 : SOPK_SCC <"s_cmpk_eq_i32", "s_cmp_eq_i32">;
+def S_CMPK_LG_I32 : SOPK_SCC <"s_cmpk_lg_i32", "s_cmp_lg_i32">;
+def S_CMPK_GT_I32 : SOPK_SCC <"s_cmpk_gt_i32", "s_cmp_gt_i32">;
+def S_CMPK_GE_I32 : SOPK_SCC <"s_cmpk_ge_i32", "s_cmp_ge_i32">;
+def S_CMPK_LT_I32 : SOPK_SCC <"s_cmpk_lt_i32", "s_cmp_lt_i32">;
+def S_CMPK_LE_I32 : SOPK_SCC <"s_cmpk_le_i32", "s_cmp_le_i32">;
+
+let SOPKZext = 1 in {
+def S_CMPK_EQ_U32 : SOPK_SCC <"s_cmpk_eq_u32", "s_cmp_eq_u32">;
+def S_CMPK_LG_U32 : SOPK_SCC <"s_cmpk_lg_u32", "s_cmp_lg_u32">;
+def S_CMPK_GT_U32 : SOPK_SCC <"s_cmpk_gt_u32", "s_cmp_gt_u32">;
+def S_CMPK_GE_U32 : SOPK_SCC <"s_cmpk_ge_u32", "s_cmp_ge_u32">;
+def S_CMPK_LT_U32 : SOPK_SCC <"s_cmpk_lt_u32", "s_cmp_lt_u32">;
+def S_CMPK_LE_U32 : SOPK_SCC <"s_cmpk_le_u32", "s_cmp_le_u32">;
+} // End SOPKZext = 1
} // End isCompare = 1
let Defs = [SCC], isCommutable = 1, DisableEncoding = "$src0",
@@ -613,8 +621,14 @@ class SOPC_Helper <bits<7> op, RegisterOperand rc, ValueType vt,
[(set SCC, (si_setcc_uniform vt:$src0, vt:$src1, cond))] > {
}
-class SOPC_CMP_32<bits<7> op, string opName, PatLeaf cond = COND_NULL>
- : SOPC_Helper<op, SSrc_b32, i32, opName, cond>;
+class SOPC_CMP_32<bits<7> op, string opName,
+ PatLeaf cond = COND_NULL, string revOp = opName>
+ : SOPC_Helper<op, SSrc_b32, i32, opName, cond>,
+ Commutable_REV<revOp, !eq(revOp, opName)>,
+ SOPKInstTable<0, opName> {
+ let isCompare = 1;
+ let isCommutable = 1;
+}
class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
: SOPC_Base<op, SSrc_b32, SSrc_b32, opName, pattern>;
@@ -622,19 +636,19 @@ class SOPC_32<bits<7> op, string opName, list<dag> pattern = []>
class SOPC_64_32<bits<7> op, string opName, list<dag> pattern = []>
: SOPC_Base<op, SSrc_b64, SSrc_b32, opName, pattern>;
-
def S_CMP_EQ_I32 : SOPC_CMP_32 <0x00, "s_cmp_eq_i32", COND_EQ>;
def S_CMP_LG_I32 : SOPC_CMP_32 <0x01, "s_cmp_lg_i32", COND_NE>;
def S_CMP_GT_I32 : SOPC_CMP_32 <0x02, "s_cmp_gt_i32", COND_SGT>;
def S_CMP_GE_I32 : SOPC_CMP_32 <0x03, "s_cmp_ge_i32", COND_SGE>;
-def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT>;
-def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE>;
+def S_CMP_LT_I32 : SOPC_CMP_32 <0x04, "s_cmp_lt_i32", COND_SLT, "s_cmp_gt_i32">;
+def S_CMP_LE_I32 : SOPC_CMP_32 <0x05, "s_cmp_le_i32", COND_SLE, "s_cmp_ge_i32">;
def S_CMP_EQ_U32 : SOPC_CMP_32 <0x06, "s_cmp_eq_u32", COND_EQ>;
-def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE >;
+def S_CMP_LG_U32 : SOPC_CMP_32 <0x07, "s_cmp_lg_u32", COND_NE>;
def S_CMP_GT_U32 : SOPC_CMP_32 <0x08, "s_cmp_gt_u32", COND_UGT>;
def S_CMP_GE_U32 : SOPC_CMP_32 <0x09, "s_cmp_ge_u32", COND_UGE>;
-def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT>;
-def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE>;
+def S_CMP_LT_U32 : SOPC_CMP_32 <0x0a, "s_cmp_lt_u32", COND_ULT, "s_cmp_gt_u32">;
+def S_CMP_LE_U32 : SOPC_CMP_32 <0x0b, "s_cmp_le_u32", COND_ULE, "s_cmp_ge_u32">;
+
def S_BITCMP0_B32 : SOPC_32 <0x0c, "s_bitcmp0_b32">;
def S_BITCMP1_B32 : SOPC_32 <0x0d, "s_bitcmp1_b32">;
def S_BITCMP0_B64 : SOPC_64_32 <0x0e, "s_bitcmp0_b64">;
diff --git a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
index 133fd480e59..1a4c2259559 100644
--- a/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-annotate-cf.ll
@@ -92,7 +92,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; SI: s_cmp_gt_i32
; SI-NEXT: s_cbranch_scc0 [[ENDPGM:BB[0-9]+_[0-9]+]]
-; SI: s_cmp_gt_i32
+; SI: s_cmpk_gt_i32
; SI-NEXT: s_cbranch_scc1 [[ENDPGM]]
; SI: [[INFLOOP:BB[0-9]+_[0-9]+]]
diff --git a/llvm/test/CodeGen/AMDGPU/sopk-compares.ll b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
new file mode 100644
index 00000000000..d62f3fbbcd5
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/sopk-compares.ll
@@ -0,0 +1,555 @@
+; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
+; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+; Since this intrinsic is exposed as a constant after isel, use it to
+; defeat the DAG's compare with constant canonicalizations.
+declare i32 @llvm.amdgcn.groupstaticsize() #1
+
+@lds = addrspace(3) global [512 x i32] undef, align 4
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_inline_imm:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 4{{$}}
+define void @br_scc_eq_i32_inline_imm(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 4
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x7fff{{$}}
+define void @br_scc_eq_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 32767
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_max_p1:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_eq_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ne_i32_simm16_max_p1:
+; GCN: s_cmpk_lg_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_ne_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ne i32 %cond, 32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_eq_i32_simm16_min(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, -32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_simm16_min_m1:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0xffff7fff{{$}}
+define void @br_scc_eq_i32_simm16_min_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, -32769
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm15_max:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
+define void @br_scc_eq_i32_uimm15_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65535
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max:
+; GCN: s_cmpk_eq_u32 s{{[0-9]+}}, 0xffff{{$}}
+define void @br_scc_eq_i32_uimm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65535
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_eq_i32_uimm16_max_p1:
+; GCN: s_cmp_eq_i32 s{{[0-9]+}}, 0x10000{{$}}
+define void @br_scc_eq_i32_uimm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65536
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+
+; GCN-LABEL: {{^}}br_scc_eq_i32:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp eq i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ne_i32:
+; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ne i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp sgt i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x7fff{{$}}
+define void @br_scc_sgt_i32_simm16_max(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp sgt i32 %cond, 32767
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sgt_i32_simm16_max_p1:
+; GCN: s_cmp_gt_i32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_sgt_i32_simm16_max_p1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp sgt i32 %cond, 32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sge_i32:
+; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sge i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_slt_i32:
+; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp slt i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_sle_i32:
+; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sle i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ugt_i32:
+; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ugt i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_uge_i32:
+; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp uge i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x41{{$}}
+define void @br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ult i32 %cond, 65
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x8000{{$}}
+define void @br_scc_ult_i32_min_simm16(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ult i32 %cond, -32768
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ult_i32_min_simm16_m1:
+; GCN: s_cmp_lt_u32 s{{[0-9]+}}, 0xffff7fff{{$}}
+define void @br_scc_ult_i32_min_simm16_m1(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %cmp0 = icmp ult i32 %cond, -32769
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "", ""()
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}br_scc_ule_i32:
+; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ule i32 %cond, %size
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_eq_i32:
+; GCN: s_cmpk_eq_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_eq_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp eq i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ne_i32:
+; GCN: s_cmpk_lg_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ne_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ne i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sgt_i32:
+; GCN: s_cmpk_lt_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sgt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sgt i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sge_i32:
+; GCN: s_cmpk_le_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sge i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_slt_i32:
+; GCN: s_cmpk_gt_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_slt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp slt i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_sle_i32:
+; GCN: s_cmpk_ge_i32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_sle_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp sle i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ugt_i32:
+; GCN: s_cmpk_lt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ugt_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ugt i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_uge_i32:
+; GCN: s_cmpk_le_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_uge_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp uge i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ult_i32:
+; GCN: s_cmpk_gt_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ult_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ult i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+; GCN-LABEL: {{^}}commute_br_scc_ule_i32:
+; GCN: s_cmpk_ge_u32 s{{[0-9]+}}, 0x800{{$}}
+define void @commute_br_scc_ule_i32(i32 %cond, i32 addrspace(1)* %out) #0 {
+entry:
+ %size = call i32 @llvm.amdgcn.groupstaticsize()
+ %cmp0 = icmp ule i32 %size, %cond
+ br i1 %cmp0, label %endif, label %if
+
+if:
+ call void asm sideeffect "; $0", "v"([512 x i32] addrspace(3)* @lds)
+ br label %endif
+
+endif:
+ store volatile i32 1, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
OpenPOWER on IntegriCloud