summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorMatthias Braun <matze@braunis.de>2016-05-06 22:43:50 +0000
committerMatthias Braun <matze@braunis.de>2016-05-06 22:43:50 +0000
commit22152acf7b5d05bea25672cf5eb287893f85fa41 (patch)
treeb63055c92cb79b4cc8fda611ccd4988f6112a62a /llvm/test/CodeGen
parent8f429ead58999e584d6857ebc334564f88489de0 (diff)
downloadbcm5719-llvm-22152acf7b5d05bea25672cf5eb287893f85fa41.tar.gz
bcm5719-llvm-22152acf7b5d05bea25672cf5eb287893f85fa41.zip
DetectDeadLanes: Increase precision when detecting undef inputs
In case of COPY-like instruction we may be able to deduce that a certain input is unused, based on the used lanes of the register defined by the instruction. This even works accross otherwise incompatible copies (no need to have compatible lanemasks, completely unused operands are still completely unused). It even makes sense to redo the analysis in this case since we gained information for a case we previously stopped at because of the incompatible masks. llvm-svn: 268815
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir22
-rw-r--r--llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll6
2 files changed, 24 insertions, 4 deletions
diff --git a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir
index d04b3f13e3f..8c761298cd2 100644
--- a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir
+++ b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir
@@ -5,6 +5,7 @@
define void @test2() { ret void }
define void @test3() { ret void }
define void @test4() { ret void }
+ define void @test5() { ret void }
define void @loop0() { ret void }
define void @loop1() { ret void }
define void @loop2() { ret void }
@@ -20,7 +21,7 @@
# CHECK: S_NOP 0, implicit %3:sub1
# CHECK: S_NOP 0, implicit undef %3:sub2
# CHECK: %4 = COPY %3:sub0_sub1
-# CHECK: %5 = COPY %3:sub2_sub3
+# CHECK: %5 = COPY undef %3:sub2_sub3
# CHECK: S_NOP 0, implicit %4:sub0
# CHECK: S_NOP 0, implicit %4:sub1
# CHECK: S_NOP 0, implicit undef %5:sub0
@@ -255,6 +256,25 @@ body: |
S_NOP 0, implicit %1
...
---
+# Check that unused inputs are marked as undef, even if the vreg itself is
+# used.
+# CHECK-LABEL: name: test5
+# CHECK: S_NOP 0, implicit-def %0
+# CHECK: %1 = REG_SEQUENCE undef %0, {{[0-9]+}}, %0, {{[0-9]+}}
+# CHECK: S_NOP 0, implicit %1:sub1
+name: test5
+isSSA: true
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: sreg_32 }
+ - { id: 1, class: sreg_64 }
+body: |
+ bb.0:
+ S_NOP 0, implicit-def %0
+ %1 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1
+ S_NOP 0, implicit %1:sub1
+...
+---
# Check "optimistic" dataflow fixpoint in phi-loops.
# CHECK-LABEL: name: loop0
# CHECK: bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
index 1b386ff7d58..4b6f65a77b9 100644
--- a/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/subreg-coalescer-undef-use.ll
@@ -6,10 +6,10 @@ target triple="amdgcn--"
; CHECK-LABEL: foobar:
; CHECK: s_load_dword s2, s[0:1], 0x9
; CHECK-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0xb
-; CHECK-NEXT: v_mbcnt_lo_u32_b32_e64
-; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0
; CHECK-NEXT: s_waitcnt lgkmcnt(0)
-; CHECK: s_and_saveexec_b64 s[2:3], vcc
+; CHECK: v_mbcnt_lo_u32_b32_e64
+; CHECK-NEXT: v_cmp_eq_i32_e32 vcc, 0, v0
+; CHECK-NEXT: s_and_saveexec_b64 s[2:3], vcc
; CHECK-NEXT: s_xor_b64 s[2:3], exec, s[2:3]
; BB0_1:
; CHECK: s_load_dword s0, s[0:1], 0xa
OpenPOWER on IntegriCloud