summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/AMDGPU/SIInstrInfo.td105
-rw-r--r--llvm/lib/Target/AMDGPU/SISchedule.td16
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/valu-i1.ll12
4 files changed, 83 insertions, 56 deletions
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 674f5b70836..7eb6ab319bb 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1490,19 +1490,24 @@ multiclass VOP3b_2_3_m <vop op, dag outs, dag ins, string asm,
multiclass VOP3_C_m <vop op, dag outs, dag ins, string asm,
list<dag> pattern, string opName,
- bit HasMods, bit defExec, string revOp> {
+ bit HasMods, bit defExec,
+ string revOp, list<SchedReadWrite> sched> {
def "" : VOP3_Pseudo <outs, ins, pattern, opName>,
- VOP2_REV<revOp#"_e64", !eq(revOp, opName)>;
+ VOP2_REV<revOp#"_e64", !eq(revOp, opName)> {
+ let SchedRW = sched;
+ }
def _si : VOP3_Real_si <op.SI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
}
def _vi : VOP3_Real_vi <op.VI3, outs, ins, asm, opName>,
VOP3DisableFields<1, 0, HasMods> {
let Defs = !if(defExec, [EXEC], []);
+ let SchedRW = sched;
}
}
@@ -1690,39 +1695,40 @@ class VOPC_Pseudo <dag ins, list<dag> pattern, string opName> :
multiclass VOPC_m <vopc op, dag ins, string op_asm, list<dag> pattern,
string opName, bit DefExec, VOPProfile p,
+ list<SchedReadWrite> sched,
string revOpName = "", string asm = opName#"_e32 "#op_asm,
string alias_asm = opName#" "#op_asm> {
- def "" : VOPC_Pseudo <ins, pattern, opName>;
+ def "" : VOPC_Pseudo <ins, pattern, opName> {
+ let SchedRW = sched;
+ }
let AssemblerPredicates = [isSICI] in {
+ def _si : VOPC<op.SI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.SI> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let hasSideEffects = DefExec;
+ let SchedRW = sched;
+ }
- def _si : VOPC<op.SI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.SI> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
- let hasSideEffects = DefExec;
- }
-
- def : SIInstAlias <
- alias_asm,
- (!cast<Instruction>(NAME#"_e32_si") p.Src0RC32:$src0, p.Src1RC32:$src1)
- >;
+ def : SIInstAlias <
+ alias_asm,
+ (!cast<Instruction>(NAME#"_e32_si") p.Src0RC32:$src0, p.Src1RC32:$src1)
+ >;
} // End AssemblerPredicates = [isSICI]
-
let AssemblerPredicates = [isVI] in {
+ def _vi : VOPC<op.VI, ins, asm, []>,
+ SIMCInstr <opName#"_e32", SISubtarget.VI> {
+ let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
+ let hasSideEffects = DefExec;
+ let SchedRW = sched;
+ }
- def _vi : VOPC<op.VI, ins, asm, []>,
- SIMCInstr <opName#"_e32", SISubtarget.VI> {
- let Defs = !if(DefExec, [VCC, EXEC], [VCC]);
- let hasSideEffects = DefExec;
- }
-
- def : SIInstAlias <
- alias_asm,
- (!cast<Instruction>(NAME#"_e32_vi") p.Src0RC32:$src0, p.Src1RC32:$src1)
- >;
-
+ def : SIInstAlias <
+ alias_asm,
+ (!cast<Instruction>(NAME#"_e32_vi") p.Src0RC32:$src0, p.Src1RC32:$src1)
+ >;
} // End AssemblerPredicates = [isVI]
}
@@ -1730,11 +1736,13 @@ multiclass VOPC_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
bit HasMods, bit DefExec, string revOp,
- VOPProfile p> {
- defm _e32 : VOPC_m <op, ins32, asm32, pat32, opName, DefExec, p>;
+ VOPProfile p,
+ list<SchedReadWrite> sched> {
+ defm _e32 : VOPC_m <op, ins32, asm32, pat32, opName, DefExec, p, sched>;
defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>;
+ opName, HasMods, DefExec, revOp,
+ sched>;
}
// Special case for class instructions which only have modifiers on
@@ -1743,18 +1751,21 @@ multiclass VOPC_Class_Helper <vopc op, string opName,
dag ins32, string asm32, list<dag> pat32,
dag out64, dag ins64, string asm64, list<dag> pat64,
bit HasMods, bit DefExec, string revOp,
- VOPProfile p> {
- defm _e32 : VOPC_m <op, ins32, asm32, pat32, opName, DefExec, p>;
+ VOPProfile p,
+ list<SchedReadWrite> sched> {
+ defm _e32 : VOPC_m <op, ins32, asm32, pat32, opName, DefExec, p, sched>;
defm _e64 : VOP3_C_m <op, out64, ins64, opName#asm64, pat64,
- opName, HasMods, DefExec, revOp>,
+ opName, HasMods, DefExec, revOp, sched>,
VOP3DisableModFields<1, 0, 0>;
}
multiclass VOPCInst <vopc op, string opName,
VOPProfile P, PatLeaf cond = COND_NULL,
string revOp = opName,
- bit DefExec = 0> : VOPC_Helper <
+ bit DefExec = 0,
+ list<SchedReadWrite> sched = [Write32Bit]> :
+ VOPC_Helper <
op, opName,
P.Ins32, P.Asm32, [],
(outs VOPDstS64:$dst), P.Ins64, P.Asm64,
@@ -1765,11 +1776,12 @@ multiclass VOPCInst <vopc op, string opName,
(P.Src1VT (VOP3Mods P.Src1VT:$src1, i32:$src1_modifiers)),
cond))],
[(set i1:$dst, (setcc P.Src0VT:$src0, P.Src1VT:$src1, cond))]),
- P.HasModifiers, DefExec, revOp, P
+ P.HasModifiers, DefExec, revOp, P, sched
>;
multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
- bit DefExec = 0> : VOPC_Class_Helper <
+ bit DefExec = 0,
+ list<SchedReadWrite> sched> : VOPC_Class_Helper <
op, opName,
P.Ins32, P.Asm32, [],
(outs VOPDstS64:$dst), P.Ins64, P.Asm64,
@@ -1777,7 +1789,7 @@ multiclass VOPCClassInst <vopc op, string opName, VOPProfile P,
[(set i1:$dst,
(AMDGPUfp_class (P.Src0VT (VOP3Mods0Clamp0OMod P.Src0VT:$src0, i32:$src0_modifiers)), P.Src1VT:$src1))],
[(set i1:$dst, (AMDGPUfp_class P.Src0VT:$src0, P.Src1VT:$src1))]),
- P.HasModifiers, DefExec, opName, P
+ P.HasModifiers, DefExec, opName, P, sched
>;
@@ -1785,31 +1797,32 @@ multiclass VOPC_F32 <vopc op, string opName, PatLeaf cond = COND_NULL, string re
VOPCInst <op, opName, VOPC_I1_F32_F32, cond, revOp>;
multiclass VOPC_F64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOPC_I1_F64_F64, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_F64_F64, cond, revOp, 0, [WriteDoubleAdd]>;
multiclass VOPC_I32 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
VOPCInst <op, opName, VOPC_I1_I32_I32, cond, revOp>;
multiclass VOPC_I64 <vopc op, string opName, PatLeaf cond = COND_NULL, string revOp = opName> :
- VOPCInst <op, opName, VOPC_I1_I64_I64, cond, revOp>;
+ VOPCInst <op, opName, VOPC_I1_I64_I64, cond, revOp, 0, [Write64Bit]>;
multiclass VOPCX <vopc op, string opName, VOPProfile P,
PatLeaf cond = COND_NULL,
+ list<SchedReadWrite> sched,
string revOp = "">
- : VOPCInst <op, opName, P, cond, revOp, 1>;
+ : VOPCInst <op, opName, P, cond, revOp, 1, sched>;
multiclass VOPCX_F32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOPC_I1_F32_F32, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_F32_F32, COND_NULL, [Write32Bit], revOp>;
multiclass VOPCX_F64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOPC_I1_F64_F64, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_F64_F64, COND_NULL, [WriteDoubleAdd], revOp>;
multiclass VOPCX_I32 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOPC_I1_I32_I32, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_I32_I32, COND_NULL, [Write32Bit], revOp>;
multiclass VOPCX_I64 <vopc op, string opName, string revOp = opName> :
- VOPCX <op, opName, VOPC_I1_I64_I64, COND_NULL, revOp>;
+ VOPCX <op, opName, VOPC_I1_I64_I64, COND_NULL, [Write64Bit], revOp>;
multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
list<dag> pat, int NumSrcArgs, bit HasMods> : VOP3_m <
@@ -1817,16 +1830,16 @@ multiclass VOP3_Helper <vop3 op, string opName, dag outs, dag ins, string asm,
>;
multiclass VOPC_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOPC_I1_F32_I32, 0>;
+ VOPCClassInst <op, opName, VOPC_I1_F32_I32, 0, [Write32Bit]>;
multiclass VOPCX_CLASS_F32 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOPC_I1_F32_I32, 1>;
+ VOPCClassInst <op, opName, VOPC_I1_F32_I32, 1, [Write32Bit]>;
multiclass VOPC_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOPC_I1_F64_I32, 0>;
+ VOPCClassInst <op, opName, VOPC_I1_F64_I32, 0, [WriteDoubleAdd]>;
multiclass VOPCX_CLASS_F64 <vopc op, string opName> :
- VOPCClassInst <op, opName, VOPC_I1_F64_I32, 1>;
+ VOPCClassInst <op, opName, VOPC_I1_F64_I32, 1, [WriteDoubleAdd]>;
multiclass VOP3Inst <vop3 op, string opName, VOPProfile P,
SDPatternOperator node = null_frag> : VOP3_Helper <
diff --git a/llvm/lib/Target/AMDGPU/SISchedule.td b/llvm/lib/Target/AMDGPU/SISchedule.td
index da7601492f9..cd77e519abb 100644
--- a/llvm/lib/Target/AMDGPU/SISchedule.td
+++ b/llvm/lib/Target/AMDGPU/SISchedule.td
@@ -22,12 +22,23 @@ def WriteBarrier : SchedWrite;
// Vector ALU instructions
def Write32Bit : SchedWrite;
def WriteQuarterRate32 : SchedWrite;
+def WriteFullOrQuarterRate32 : SchedWrite;
def WriteFloatFMA : SchedWrite;
-def WriteDouble : SchedWrite;
+// Slow quarter rate f64 instruction.
+def WriteDouble : SchedWrite;
+
+// half rate f64 instruction (same as v_add_f64)
def WriteDoubleAdd : SchedWrite;
+// Half rate 64-bit instructions.
+def Write64Bit : SchedWrite;
+
+// FIXME: Should there be a class for instructions which are VALU
+// instructions and have VALU rates, but write to the SALU (i.e. VOPC
+// instructions)
+
def SIFullSpeedModel : SchedMachineModel;
def SIQuarterSpeedModel : SchedMachineModel;
@@ -54,7 +65,7 @@ class HWVALUWriteRes<SchedWrite write, int latency> :
// The latency numbers are taken from AMD Accelerated Parallel Processing
-// guide. They may not be acurate.
+// guide. They may not be accurate.
// The latency values are 1 / (operations / cycle) / 4.
multiclass SICommonWriteRes {
@@ -68,6 +79,7 @@ multiclass SICommonWriteRes {
def : HWWriteRes<WriteBarrier, [HWBranch], 500>; // XXX: Guessed ???
def : HWVALUWriteRes<Write32Bit, 1>;
+ def : HWVALUWriteRes<Write64Bit, 2>;
def : HWVALUWriteRes<WriteQuarterRate32, 4>;
}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
index 805a88b59c7..80eb3b93f8e 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.AMDGPU.class.ll
@@ -271,7 +271,8 @@ define void @test_class_64_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI: s_load_dwordx2 [[SA:s\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}, 0xb
; SI: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
; SI: v_cmp_class_f64_e32 vcc, [[SA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NOT: vcc
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
; SI-NEXT: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
@@ -285,7 +286,8 @@ define void @test_class_full_mask_f64(i32 addrspace(1)* %out, double %a) #0 {
; SI-DAG: buffer_load_dwordx2 [[VA:v\[[0-9]+:[0-9]+\]]]
; SI-DAG: v_mov_b32_e32 [[MASK:v[0-9]+]], 0x1ff{{$}}
; SI: v_cmp_class_f64_e32 vcc, [[VA]], [[MASK]]
-; SI-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
+; SI-NOT: vcc
+; SI: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, -1, vcc
; SI: buffer_store_dword [[RESULT]]
; SI: s_endpgm
define void @v_test_class_full_mask_f64(i32 addrspace(1)* %out, double addrspace(1)* %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/valu-i1.ll b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
index 7d0ebd139f5..c27702813a8 100644
--- a/llvm/test/CodeGen/AMDGPU/valu-i1.ll
+++ b/llvm/test/CodeGen/AMDGPU/valu-i1.ll
@@ -128,18 +128,18 @@ exit:
; SI-DAG: v_cmp_ne_i32_e64 [[NEG1_CHECK_0:s\[[0-9]+:[0-9]+\]]], -1, [[A]]
; SI-DAG: v_cmp_ne_i32_e32 [[NEG1_CHECK_1:vcc]], -1, [[B]]
; SI: s_and_b64 [[ORNEG1:s\[[0-9]+:[0-9]+\]]], [[NEG1_CHECK_1]], [[NEG1_CHECK_0]]
-; SI: s_and_saveexec_b64 [[ORNEG1]], [[ORNEG1]]
-; SI: s_xor_b64 [[ORNEG1]], exec, [[ORNEG1]]
+; SI: s_and_saveexec_b64 [[ORNEG2:s\[[0-9]+:[0-9]+\]]], [[ORNEG1]]
+; SI: s_xor_b64 [[ORNEG2]], exec, [[ORNEG2]]
; SI: s_cbranch_execz BB3_5
; SI: BB#4:
; SI: buffer_store_dword
-; SI: v_cmp_ge_i64_e32 vcc
-; SI: s_or_b64 [[COND_STATE]], vcc, [[COND_STATE]]
+; SI: v_cmp_ge_i64_e64 [[CMP:s\[[0-9]+:[0-9]+\]]]
+; SI: s_or_b64 [[COND_STATE]], [[CMP]], [[COND_STATE]]
; SI: BB3_5:
-; SI: s_or_b64 exec, exec, [[ORNEG1]]
-; SI: s_or_b64 [[COND_STATE]], [[ORNEG1]], [[COND_STATE]]
+; SI: s_or_b64 exec, exec, [[ORNEG2]]
+; SI: s_or_b64 [[COND_STATE]], [[ORNEG2]], [[COND_STATE]]
; SI: s_andn2_b64 exec, exec, [[COND_STATE]]
; SI: s_cbranch_execnz BB3_3
OpenPOWER on IntegriCloud