summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/AMDGPU
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen/AMDGPU')
-rw-r--r--llvm/test/CodeGen/AMDGPU/cayman-loop-bug.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/commute-shifts.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/mubuf.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/or.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/udivrem.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-crash.ll7
-rw-r--r--llvm/test/CodeGen/AMDGPU/vselect.ll52
8 files changed, 49 insertions, 43 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/cayman-loop-bug.ll b/llvm/test/CodeGen/AMDGPU/cayman-loop-bug.ll
index 694d922f6d8..34e6669434f 100644
--- a/llvm/test/CodeGen/AMDGPU/cayman-loop-bug.ll
+++ b/llvm/test/CodeGen/AMDGPU/cayman-loop-bug.ll
@@ -11,20 +11,26 @@
define amdgpu_ps void @main (<4 x float> inreg %reg0) {
entry:
br label %outer_loop
+
outer_loop:
%cnt = phi i32 [0, %entry], [%cnt_incr, %inner_loop]
%cond = icmp eq i32 %cnt, 16
br i1 %cond, label %outer_loop_body, label %exit
+
outer_loop_body:
%cnt_incr = add i32 %cnt, 1
br label %inner_loop
+
inner_loop:
%cnt2 = phi i32 [0, %outer_loop_body], [%cnt2_incr, %inner_loop_body]
- %cond2 = icmp eq i32 %cnt2, 16
- br i1 %cond, label %inner_loop_body, label %outer_loop
+ %n = load volatile i32, i32 addrspace(1)* undef
+ %cond2 = icmp slt i32 %cnt2, %n
+ br i1 %cond2, label %inner_loop_body, label %outer_loop
+
inner_loop_body:
%cnt2_incr = add i32 %cnt2, 1
br label %inner_loop
+
exit:
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
index 629219e7cd1..7e0396a4fe6 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
+++ b/llvm/test/CodeGen/AMDGPU/commute-shifts.ll
@@ -4,9 +4,9 @@
; GCN-LABEL: {{^}}main:
; SI: v_lshl_b32_e32 v{{[0-9]+}}, 1, v{{[0-9]+}}
; VI: v_lshlrev_b32_e64 v{{[0-9]+}}, v{{[0-9]+}}, 1
-define amdgpu_ps void @main() #0 {
+define amdgpu_ps void @main(float %arg0, float %arg1) #0 {
bb:
- %tmp = fptosi float undef to i32
+ %tmp = fptosi float %arg0 to i32
%tmp1 = call <4 x float> @llvm.SI.image.load.v4i32(<4 x i32> undef, <8 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%tmp2.f = extractelement <4 x float> %tmp1, i32 0
%tmp2 = bitcast float %tmp2.f to i32
@@ -14,7 +14,7 @@ bb:
%tmp4 = shl i32 1, %tmp3
%tmp5 = and i32 %tmp2, %tmp4
%tmp6 = icmp eq i32 %tmp5, 0
- %tmp7 = select i1 %tmp6, float 0.000000e+00, float undef
+ %tmp7 = select i1 %tmp6, float 0.000000e+00, float %arg1
%tmp8 = call i32 @llvm.SI.packf16(float undef, float %tmp7)
%tmp9 = bitcast i32 %tmp8 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float undef, float %tmp9, float undef, float %tmp9)
diff --git a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
index e6129e62e34..4d50dc2f402 100644
--- a/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
+++ b/llvm/test/CodeGen/AMDGPU/i1-copy-phi.ll
@@ -24,7 +24,8 @@ bb3: ; preds = %bb2, %bb
br i1 %tmp, label %bb4, label %bb6
bb4: ; preds = %bb3
- %tmp5 = mul i32 undef, %arg
+ %val = load volatile i32, i32 addrspace(1)* undef
+ %tmp5 = mul i32 %val, %arg
br label %bb6
bb6: ; preds = %bb4, %bb3
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf.ll b/llvm/test/CodeGen/AMDGPU/mubuf.ll
index 277b5882eb2..f5bf09f15d9 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf.ll
+++ b/llvm/test/CodeGen/AMDGPU/mubuf.ll
@@ -62,8 +62,7 @@ main_body:
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 64, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
- %tmp5 = bitcast float 0.0 to i32
- call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+ call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
ret void
}
@@ -81,8 +80,7 @@ main_body:
%tmp2 = shl i32 %6, 2
%tmp3 = call i32 @llvm.SI.buffer.load.dword.i32.i32(<16 x i8> %tmp1, i32 %tmp2, i32 65, i32 0, i32 1, i32 0, i32 1, i32 0, i32 0)
%tmp4 = add i32 %6, 16
- %tmp5 = bitcast float 0.0 to i32
- call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp5, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
+ call void @llvm.SI.tbuffer.store.i32(<16 x i8> %tmp1, i32 %tmp3, i32 1, i32 %tmp4, i32 %4, i32 0, i32 4, i32 4, i32 1, i32 0, i32 1, i32 1, i32 0)
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/or.ll b/llvm/test/CodeGen/AMDGPU/or.ll
index e40f18f040b..9b90ff798ca 100644
--- a/llvm/test/CodeGen/AMDGPU/or.ll
+++ b/llvm/test/CodeGen/AMDGPU/or.ll
@@ -96,7 +96,7 @@ define void @scalar_or_i64(i64 addrspace(1)* %out, i64 %a, i64 %b) {
; SI: v_or_b32_e32 v{{[0-9]}}
define void @vector_or_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %a, i64 addrspace(1)* %b) {
%loada = load i64, i64 addrspace(1)* %a, align 8
- %loadb = load i64, i64 addrspace(1)* %a, align 8
+ %loadb = load i64, i64 addrspace(1)* %b, align 8
%or = or i64 %loada, %loadb
store i64 %or, i64 addrspace(1)* %out
ret void
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll
index db94df632a8..55a64da0a2f 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll
@@ -51,11 +51,11 @@
; SI-DAG: v_cndmask_b32_e64
; SI-DAG: v_cndmask_b32_e64
; SI: s_endpgm
-define void @test_udivrem(i32 addrspace(1)* %out, i32 %x, i32 %y) {
+define void @test_udivrem(i32 addrspace(1)* %out0, i32 addrspace(1)* %out1, i32 %x, i32 %y) {
%result0 = udiv i32 %x, %y
- store i32 %result0, i32 addrspace(1)* %out
+ store i32 %result0, i32 addrspace(1)* %out0
%result1 = urem i32 %x, %y
- store i32 %result1, i32 addrspace(1)* %out
+ store i32 %result1, i32 addrspace(1)* %out1
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
index b994adda1bf..cfbb2af5867 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-crash.ll
@@ -35,11 +35,12 @@ bb2: ; preds = %bb
br label %bb3
bb3: ; preds = %bb3, %bb2
- %tmp4 = icmp eq i32 undef, %arg1
+ %val = load volatile i32, i32 addrspace(2)* undef
+ %tmp4 = icmp eq i32 %val, %arg1
br i1 %tmp4, label %bb5, label %bb3
bb5: ; preds = %bb3, %bb
- %tmp6 = tail call i32 @llvm.r600.read.tidig.y() #1
+ %tmp6 = tail call i32 @llvm.amdgcn.workitem.id.y() #1
%tmp10 = icmp ult i32 %tmp6, %arg
br i1 %tmp10, label %bb11, label %bb12
@@ -51,6 +52,6 @@ bb12: ; preds = %bb11, %bb5
}
; Function Attrs: nounwind readnone
-declare i32 @llvm.r600.read.tidig.y() #1
+declare i32 @llvm.amdgcn.workitem.id.y() #1
attributes #1 = { nounwind readnone }
diff --git a/llvm/test/CodeGen/AMDGPU/vselect.ll b/llvm/test/CodeGen/AMDGPU/vselect.ll
index 9d068a8ff77..0cd706b642d 100644
--- a/llvm/test/CodeGen/AMDGPU/vselect.ll
+++ b/llvm/test/CodeGen/AMDGPU/vselect.ll
@@ -2,28 +2,28 @@
;RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=VI --check-prefix=FUNC %s
;RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG --check-prefix=FUNC %s
-;FUNC-LABEL: {{^}}test_select_v2i32:
+; FUNC-LABEL: {{^}}test_select_v2i32:
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
+; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
-;SI: v_cndmask_b32_e64
-;SI: v_cndmask_b32_e32
+; SI: v_cndmask_b32_e64
+; SI: v_cndmask_b32_e32
-define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1) {
+define void @test_select_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in0, <2 x i32> addrspace(1)* %in1, <2 x i32> %val) {
entry:
- %0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
- %1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
- %cmp = icmp ne <2 x i32> %0, %1
- %result = select <2 x i1> %cmp, <2 x i32> %0, <2 x i32> %1
+ %load0 = load <2 x i32>, <2 x i32> addrspace(1)* %in0
+ %load1 = load <2 x i32>, <2 x i32> addrspace(1)* %in1
+ %cmp = icmp sgt <2 x i32> %load0, %load1
+ %result = select <2 x i1> %cmp, <2 x i32> %val, <2 x i32> %load0
store <2 x i32> %result, <2 x i32> addrspace(1)* %out
ret void
}
-;FUNC-LABEL: {{^}}test_select_v2f32:
+; FUNC-LABEL: {{^}}test_select_v2f32:
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
;SI: v_cndmask_b32_e64
;SI: v_cndmask_b32_e32
@@ -40,24 +40,24 @@ entry:
;FUNC-LABEL: {{^}}test_select_v4i32:
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
-;EG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}
+; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[4].X
+; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].W
+; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Z
+; EG-DAG: CNDE_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], T[0-9]+\.[XYZW]}}, KC0[3].Y
; FIXME: The shrinking does not happen on tonga
-;SI: v_cndmask_b32
-;SI: v_cndmask_b32
-;SI: v_cndmask_b32
-;SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
+; SI: v_cndmask_b32
-define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1) {
+define void @test_select_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in0, <4 x i32> addrspace(1)* %in1, <4 x i32> %val) {
entry:
- %0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
- %1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
- %cmp = icmp ne <4 x i32> %0, %1
- %result = select <4 x i1> %cmp, <4 x i32> %0, <4 x i32> %1
+ %load0 = load <4 x i32>, <4 x i32> addrspace(1)* %in0
+ %load1 = load <4 x i32>, <4 x i32> addrspace(1)* %in1
+ %cmp = icmp sgt <4 x i32> %load0, %load1
+ %result = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %load0
store <4 x i32> %result, <4 x i32> addrspace(1)* %out
ret void
}
OpenPOWER on IntegriCloud