summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/R600/SIInstructions.td14
-rw-r--r--llvm/test/CodeGen/R600/and.ll23
-rw-r--r--llvm/test/CodeGen/R600/bfi_int.ll4
-rw-r--r--llvm/test/CodeGen/R600/ctpop.ll17
-rw-r--r--llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll21
-rw-r--r--llvm/test/CodeGen/R600/rsq.ll10
-rw-r--r--llvm/test/CodeGen/R600/trunc.ll15
7 files changed, 88 insertions, 16 deletions
diff --git a/llvm/lib/Target/R600/SIInstructions.td b/llvm/lib/Target/R600/SIInstructions.td
index 8082d3254ef..e4d7dc79be1 100644
--- a/llvm/lib/Target/R600/SIInstructions.td
+++ b/llvm/lib/Target/R600/SIInstructions.td
@@ -1976,9 +1976,9 @@ class BinOp64Pat <SDNode node, Instruction inst> : Pat <
(EXTRACT_SUBREG i64:$src1, sub1)), sub1)
>;
-def : BinOp64Pat <and, V_AND_B32_e32>;
-def : BinOp64Pat <or, V_OR_B32_e32>;
-def : BinOp64Pat <xor, V_XOR_B32_e32>;
+def : BinOp64Pat <and, V_AND_B32_e64>;
+def : BinOp64Pat <or, V_OR_B32_e64>;
+def : BinOp64Pat <xor, V_XOR_B32_e64>;
class SextInReg <ValueType vt, int ShiftAmt> : Pat <
(sext_inreg i32:$src0, vt),
@@ -1990,7 +1990,7 @@ def : SextInReg <i16, 16>;
def : Pat <
(i32 (add (i32 (ctpop i32:$popcnt)), i32:$val)),
- (V_BCNT_U32_B32_e32 $popcnt, $val)
+ (V_BCNT_U32_B32_e64 $popcnt, $val)
>;
def : Pat <
@@ -2010,7 +2010,7 @@ def : Pat <
def : Pat <
(addc i32:$src0, i32:$src1),
- (V_ADD_I32_e32 $src0, $src1)
+ (V_ADD_I32_e64 $src0, $src1)
>;
/********** ======================= **********/
@@ -3070,13 +3070,13 @@ def : Pat <
def : Pat <
(i1 (trunc i32:$a)),
- (V_CMP_EQ_I32_e64 (V_AND_B32_e32 (i32 1), $a), 1)
+ (V_CMP_EQ_I32_e64 (V_AND_B32_e64 (i32 1), $a), 1)
>;
//============================================================================//
// Miscellaneous Optimization Patterns
//============================================================================//
-def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e32>;
+def : SHA256MaPattern <V_BFI_B32, V_XOR_B32_e64>;
} // End isSI predicate
diff --git a/llvm/test/CodeGen/R600/and.ll b/llvm/test/CodeGen/R600/and.ll
index 7bbbec2d969..ccb97e38f3b 100644
--- a/llvm/test/CodeGen/R600/and.ll
+++ b/llvm/test/CodeGen/R600/and.ll
@@ -129,11 +129,30 @@ endif:
}
; FUNC-LABEL: @v_and_constant_i64
-; SI: V_AND_B32
-; SI: V_AND_B32
+; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
+; SI: V_AND_B32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
define void @v_and_constant_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
%a = load i64 addrspace(1)* %aptr, align 8
%and = and i64 %a, 1234567
store i64 %and, i64 addrspace(1)* %out, align 8
ret void
}
+
+; FIXME: Replace and 0 with mov 0
+; FUNC-LABEL: @v_and_inline_imm_i64
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 64, {{v[0-9]+}}
+; SI: V_AND_B32_e32 {{v[0-9]+}}, 0, {{v[0-9]+}}
+define void @v_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr) {
+ %a = load i64 addrspace(1)* %aptr, align 8
+ %and = and i64 %a, 64
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
+
+; FUNC-LABEL: @s_and_inline_imm_i64
+; SI: S_AND_B64 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 64
+define void @s_and_inline_imm_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %aptr, i64 %a) {
+ %and = and i64 %a, 64
+ store i64 %and, i64 addrspace(1)* %out, align 8
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/bfi_int.ll b/llvm/test/CodeGen/R600/bfi_int.ll
index d18702a1de9..107158de54d 100644
--- a/llvm/test/CodeGen/R600/bfi_int.ll
+++ b/llvm/test/CodeGen/R600/bfi_int.ll
@@ -38,8 +38,8 @@ entry:
; R600-CHECK: @bfi_sha256_ma
; R600-CHECK: XOR_INT * [[DST:T[0-9]+\.[XYZW]]], KC0[2].Z, KC0[2].W
; R600-CHECK: BFI_INT * {{T[0-9]+\.[XYZW]}}, {{[[DST]]|PV\.[XYZW]}}, KC0[3].X, KC0[2].W
-; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{[sv][0-9]+, v[0-9]+}}
-; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{[sv][0-9]+, [sv][0-9]+}}
+; SI-CHECK: V_XOR_B32_e32 [[DST:v[0-9]+]], {{s[0-9]+, v[0-9]+}}
+; SI-CHECK: V_BFI_B32 {{v[0-9]+}}, [[DST]], {{s[0-9]+, v[0-9]+}}
define void @bfi_sha256_ma(i32 addrspace(1)* %out, i32 %x, i32 %y, i32 %z) {
entry:
diff --git a/llvm/test/CodeGen/R600/ctpop.ll b/llvm/test/CodeGen/R600/ctpop.ll
index c7c406a57e6..fd128672a17 100644
--- a/llvm/test/CodeGen/R600/ctpop.ll
+++ b/llvm/test/CodeGen/R600/ctpop.ll
@@ -42,8 +42,7 @@ define void @v_ctpop_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noali
; SI: BUFFER_LOAD_DWORD [[VAL1:v[0-9]+]],
; SI: V_MOV_B32_e32 [[VZERO:v[0-9]+]], 0
; SI: V_BCNT_U32_B32_e32 [[MIDRESULT:v[0-9]+]], [[VAL1]], [[VZERO]]
-; SI-NOT: ADD
-; SI: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
+; SI-NEXT: V_BCNT_U32_B32_e32 [[RESULT:v[0-9]+]], [[VAL0]], [[MIDRESULT]]
; SI: BUFFER_STORE_DWORD [[RESULT]],
; SI: S_ENDPGM
@@ -59,6 +58,20 @@ define void @v_ctpop_add_chain_i32(i32 addrspace(1)* noalias %out, i32 addrspace
ret void
}
+; FUNC-LABEL: @v_ctpop_add_sgpr_i32
+; SI: BUFFER_LOAD_DWORD [[VAL0:v[0-9]+]],
+; SI-NEXT: S_WAITCNT
+; SI-NEXT: V_BCNT_U32_B32_e64 [[RESULT:v[0-9]+]], [[VAL0]], s{{[0-9]+}}
+; SI-NEXT: BUFFER_STORE_DWORD [[RESULT]],
+; SI: S_ENDPGM
+define void @v_ctpop_add_sgpr_i32(i32 addrspace(1)* noalias %out, i32 addrspace(1)* noalias %in0, i32 addrspace(1)* noalias %in1, i32 %sval) nounwind {
+ %val0 = load i32 addrspace(1)* %in0, align 4
+ %ctpop0 = call i32 @llvm.ctpop.i32(i32 %val0) nounwind readnone
+ %add = add i32 %ctpop0, %sval
+ store i32 %add, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
; FUNC-LABEL: @v_ctpop_v2i32:
; SI: V_BCNT_U32_B32_e32
; SI: V_BCNT_U32_B32_e32
diff --git a/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll b/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
index 27cf6b28fd6..449b3afc381 100644
--- a/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
+++ b/llvm/test/CodeGen/R600/llvm.AMDGPU.rsq.ll
@@ -4,10 +4,29 @@
declare float @llvm.AMDGPU.rsq.f32(float) nounwind readnone
; FUNC-LABEL: @rsq_f32
-; SI: V_RSQ_F32_e32
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
; EG: RECIPSQRT_IEEE
define void @rsq_f32(float addrspace(1)* %out, float %src) nounwind {
%rsq = call float @llvm.AMDGPU.rsq.f32(float %src) nounwind readnone
store float %rsq, float addrspace(1)* %out, align 4
ret void
}
+
+; TODO: Really these should be constant folded
+; FUNC-LABEL: @rsq_f32_constant_4.0
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 4.0
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_4.0(float addrspace(1)* %out) nounwind {
+ %rsq = call float @llvm.AMDGPU.rsq.f32(float 4.0) nounwind readnone
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
+
+; FUNC-LABEL: @rsq_f32_constant_100.0
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, 0x42c80000
+; EG: RECIPSQRT_IEEE
+define void @rsq_f32_constant_100.0(float addrspace(1)* %out) nounwind {
+ %rsq = call float @llvm.AMDGPU.rsq.f32(float 100.0) nounwind readnone
+ store float %rsq, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/rsq.ll b/llvm/test/CodeGen/R600/rsq.ll
index 3069f62724b..a9f3013d3e4 100644
--- a/llvm/test/CodeGen/R600/rsq.ll
+++ b/llvm/test/CodeGen/R600/rsq.ll
@@ -26,3 +26,13 @@ define void @rsq_f64(double addrspace(1)* noalias %out, double addrspace(1)* noa
store double %div, double addrspace(1)* %out, align 4
ret void
}
+
+; SI-LABEL: @rsq_f32_sgpr
+; SI: V_RSQ_F32_e32 {{v[0-9]+}}, {{s[0-9]+}}
+; SI: S_ENDPGM
+define void @rsq_f32_sgpr(float addrspace(1)* noalias %out, float %val) nounwind {
+ %sqrt = call float @llvm.sqrt.f32(float %val) nounwind readnone
+ %div = fdiv float 1.0, %sqrt
+ store float %div, float addrspace(1)* %out, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/R600/trunc.ll b/llvm/test/CodeGen/R600/trunc.ll
index 2f4b48236d4..d3b191db282 100644
--- a/llvm/test/CodeGen/R600/trunc.ll
+++ b/llvm/test/CodeGen/R600/trunc.ll
@@ -46,9 +46,20 @@ define void @trunc_shl_i64(i64 addrspace(1)* %out2, i32 addrspace(1)* %out, i64
}
; SI-LABEL: @trunc_i32_to_i1:
-; SI: V_AND_B32
+; SI: V_AND_B32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; SI: V_CMP_EQ_I32
-define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
+define void @trunc_i32_to_i1(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) {
+ %a = load i32 addrspace(1)* %ptr, align 4
+ %trunc = trunc i32 %a to i1
+ %result = select i1 %trunc, i32 1, i32 0
+ store i32 %result, i32 addrspace(1)* %out, align 4
+ ret void
+}
+
+; SI-LABEL: @sgpr_trunc_i32_to_i1:
+; SI: V_AND_B32_e64 v{{[0-9]+}}, 1, s{{[0-9]+}}
+; SI: V_CMP_EQ_I32
+define void @sgpr_trunc_i32_to_i1(i32 addrspace(1)* %out, i32 %a) {
%trunc = trunc i32 %a to i1
%result = select i1 %trunc, i32 1, i32 0
store i32 %result, i32 addrspace(1)* %out, align 4
OpenPOWER on IntegriCloud