From 2c7d52a5407ad7145629a8d089b3aa59547a158f Mon Sep 17 00:00:00 2001 From: Matthias Braun Date: Fri, 9 Dec 2016 19:08:15 +0000 Subject: Move .mir tests to appropriate directories test/CodeGen/MIR should contain tests that intent to test the MIR printing or parsing. Tests that test something else should be in test/CodeGen/TargetName even when they are written in .mir. As a rule of thumb, only tests using "llc -run-pass none" should be in test/CodeGen/MIR. llvm-svn: 289254 --- llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir | 27 + llvm/test/CodeGen/AArch64/machine-dead-copy.mir | 67 ++ llvm/test/CodeGen/AArch64/machine-scheduler.mir | 34 + llvm/test/CodeGen/AArch64/machine-sink-zr.mir | 48 ++ llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir | 75 ++ llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir | 419 ++++++++++++ llvm/test/CodeGen/AMDGPU/insert-waits-exp.mir | 63 ++ llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir | 333 +++++++++ llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir | 89 +++ llvm/test/CodeGen/AMDGPU/liveness.mir | 32 + llvm/test/CodeGen/AMDGPU/movrels-bug.mir | 31 + .../CodeGen/AMDGPU/optimize-if-exec-masking.mir | 755 +++++++++++++++++++++ .../CodeGen/AMDGPU/rename-independent-subregs.mir | 70 ++ .../CodeGen/AMDGPU/scalar-store-cache-flush.mir | 173 +++++ llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir | 43 ++ llvm/test/CodeGen/AMDGPU/subreg-intervals.mir | 51 ++ .../CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir | 177 +++++ llvm/test/CodeGen/AMDGPU/waitcnt.mir | 59 ++ llvm/test/CodeGen/ARM/imm-peephole-arm.mir | 60 ++ llvm/test/CodeGen/ARM/imm-peephole-thumb.mir | 59 ++ llvm/test/CodeGen/Hexagon/anti-dep-partial.mir | 34 + llvm/test/CodeGen/Lanai/peephole-compare.mir | 678 ++++++++++++++++++ .../CodeGen/MIR/AArch64/ldst-opt-zr-clobber.mir | 27 - .../test/CodeGen/MIR/AArch64/machine-dead-copy.mir | 67 -- .../test/CodeGen/MIR/AArch64/machine-scheduler.mir | 34 - llvm/test/CodeGen/MIR/AArch64/machine-sink-zr.mir | 48 -- .../CodeGen/MIR/AMDGPU/coalescer-subreg-join.mir | 75 -- llvm/test/CodeGen/MIR/AMDGPU/detect-dead-lanes.mir | 419 ------------ llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir | 63 -- .../CodeGen/MIR/AMDGPU/inserted-wait-states.mir | 333 --------- .../CodeGen/MIR/AMDGPU/invert-br-undef-vcc.mir | 89 --- llvm/test/CodeGen/MIR/AMDGPU/liveness.mir | 32 - llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir | 31 - .../MIR/AMDGPU/optimize-if-exec-masking.mir | 755 --------------------- .../MIR/AMDGPU/rename-independent-subregs.mir | 70 -- .../MIR/AMDGPU/scalar-store-cache-flush.mir | 173 ----- .../test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir | 43 -- llvm/test/CodeGen/MIR/AMDGPU/subreg-intervals.mir | 51 -- .../MIR/AMDGPU/vccz-corrupt-bug-workaround.mir | 177 ----- llvm/test/CodeGen/MIR/AMDGPU/waitcnt.mir | 59 -- llvm/test/CodeGen/MIR/ARM/imm-peephole-arm.mir | 60 -- llvm/test/CodeGen/MIR/ARM/imm-peephole-thumb.mir | 59 -- llvm/test/CodeGen/MIR/Hexagon/anti-dep-partial.mir | 34 - llvm/test/CodeGen/MIR/Lanai/lit.local.cfg | 2 - llvm/test/CodeGen/MIR/Lanai/peephole-compare.mir | 678 ------------------ 45 files changed, 3377 insertions(+), 3379 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir create mode 100644 llvm/test/CodeGen/AArch64/machine-dead-copy.mir create mode 100644 llvm/test/CodeGen/AArch64/machine-scheduler.mir create mode 100644 llvm/test/CodeGen/AArch64/machine-sink-zr.mir create mode 100644 llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir create mode 100644 llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir create mode 100644 llvm/test/CodeGen/AMDGPU/insert-waits-exp.mir create mode 100644 llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir create mode 100644 llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir create mode 100644 llvm/test/CodeGen/AMDGPU/liveness.mir create mode 100644 llvm/test/CodeGen/AMDGPU/movrels-bug.mir create mode 100644 llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir create mode 100644 llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir create mode 100644 llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir create mode 100644 llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir create mode 100644 llvm/test/CodeGen/AMDGPU/subreg-intervals.mir create mode 100644 llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir create mode 100644 llvm/test/CodeGen/AMDGPU/waitcnt.mir create mode 100644 llvm/test/CodeGen/ARM/imm-peephole-arm.mir create mode 100644 llvm/test/CodeGen/ARM/imm-peephole-thumb.mir create mode 100644 llvm/test/CodeGen/Hexagon/anti-dep-partial.mir create mode 100644 llvm/test/CodeGen/Lanai/peephole-compare.mir delete mode 100644 llvm/test/CodeGen/MIR/AArch64/ldst-opt-zr-clobber.mir delete mode 100644 llvm/test/CodeGen/MIR/AArch64/machine-dead-copy.mir delete mode 100644 llvm/test/CodeGen/MIR/AArch64/machine-scheduler.mir delete mode 100644 llvm/test/CodeGen/MIR/AArch64/machine-sink-zr.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/coalescer-subreg-join.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/detect-dead-lanes.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/invert-br-undef-vcc.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/liveness.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/optimize-if-exec-masking.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/rename-independent-subregs.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/scalar-store-cache-flush.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/subreg-intervals.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/vccz-corrupt-bug-workaround.mir delete mode 100644 llvm/test/CodeGen/MIR/AMDGPU/waitcnt.mir delete mode 100644 llvm/test/CodeGen/MIR/ARM/imm-peephole-arm.mir delete mode 100644 llvm/test/CodeGen/MIR/ARM/imm-peephole-thumb.mir delete mode 100644 llvm/test/CodeGen/MIR/Hexagon/anti-dep-partial.mir delete mode 100644 llvm/test/CodeGen/MIR/Lanai/lit.local.cfg delete mode 100644 llvm/test/CodeGen/MIR/Lanai/peephole-compare.mir (limited to 'llvm/test') diff --git a/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir b/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir new file mode 100644 index 00000000000..75ad849e4f3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/ldst-opt-zr-clobber.mir @@ -0,0 +1,27 @@ + +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s + +--- | + define i1 @no-clobber-zr(i64* %p, i64 %x) { ret i1 0 } +... +--- +# Check that write of xzr doesn't inhibit pairing of xzr stores since +# it isn't actually clobbered. Written as a MIR test to avoid +# schedulers reordering instructions such that SUBS doesn't appear +# between stores. +# CHECK-LABEL: name: no-clobber-zr +# CHECK: STPXi %xzr, %xzr, %x0, 0 +name: no-clobber-zr +body: | + bb.0: + liveins: %x0, %x1 + STRXui %xzr, %x0, 0 :: (store 8 into %ir.p) + dead %xzr = SUBSXri killed %x1, 0, 0, implicit-def %nzcv + %w8 = CSINCWr %wzr, %wzr, 1, implicit killed %nzcv + STRXui %xzr, killed %x0, 1 :: (store 8 into %ir.p) + %w0 = ORRWrs %wzr, killed %w8, 0 + RET %lr, implicit %w0 +... + + + diff --git a/llvm/test/CodeGen/AArch64/machine-dead-copy.mir b/llvm/test/CodeGen/AArch64/machine-dead-copy.mir new file mode 100644 index 00000000000..cb552e5cab3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-dead-copy.mir @@ -0,0 +1,67 @@ + +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-cp -verify-machineinstrs -o - %s | FileCheck %s + +--- | + define i32 @copyprop1(i32 %a, i32 %b) { ret i32 %a } + define i32 @copyprop2(i32 %a, i32 %b) { ret i32 %a } + define i32 @copyprop3(i32 %a, i32 %b) { ret i32 %a } + define i32 @copyprop4(i32 %a, i32 %b) { ret i32 %a } + declare i32 @foo(i32) +... +--- +# The first copy is dead copy which is not used. +# CHECK-LABEL: name: copyprop1 +# CHECK: bb.0: +# CHECK-NOT: %w20 = COPY +name: copyprop1 +body: | + bb.0: + liveins: %w0, %w1 + %w20 = COPY %w1 + BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 + RET_ReallyLR implicit %w0 +... +--- +# The first copy is not a dead copy which is used in the second copy after the +# call. +# CHECK-LABEL: name: copyprop2 +# CHECK: bb.0: +# CHECK: %w20 = COPY +name: copyprop2 +body: | + bb.0: + liveins: %w0, %w1 + %w20 = COPY %w1 + BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 + %w0 = COPY %w20 + RET_ReallyLR implicit %w0 +... +--- +# Both the first and second copy are dead copies which are not used. +# CHECK-LABEL: name: copyprop3 +# CHECK: bb.0: +# CHECK-NOT: COPY +name: copyprop3 +body: | + bb.0: + liveins: %w0, %w1 + %w20 = COPY %w1 + BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 + %w20 = COPY %w0 + RET_ReallyLR implicit %w0 +... +# The second copy is removed as a NOP copy, after then the first copy become +# dead which should be removed as well. +# CHECK-LABEL: name: copyprop4 +# CHECK: bb.0: +# CHECK-NOT: COPY +name: copyprop4 +body: | + bb.0: + liveins: %w0, %w1 + %w20 = COPY %w0 + %w0 = COPY %w20 + BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 + RET_ReallyLR implicit %w0 +... + diff --git a/llvm/test/CodeGen/AArch64/machine-scheduler.mir b/llvm/test/CodeGen/AArch64/machine-scheduler.mir new file mode 100644 index 00000000000..e7e0dda53c5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-scheduler.mir @@ -0,0 +1,34 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-scheduler -verify-machineinstrs -o - %s | FileCheck %s + +--- | + define i64 @load_imp-def(i64* nocapture %P, i32 %v) { + entry: + %0 = bitcast i64* %P to i32* + %1 = load i32, i32* %0 + %conv = zext i32 %1 to i64 + %arrayidx19 = getelementptr inbounds i64, i64* %P, i64 1 + %arrayidx1 = bitcast i64* %arrayidx19 to i32* + store i32 %v, i32* %arrayidx1 + %2 = load i64, i64* %arrayidx19 + %and = and i64 %2, 4294967295 + %add = add nuw nsw i64 %and, %conv + ret i64 %add + } +... +--- +# CHECK-LABEL: name: load_imp-def +# CHECK: bb.0.entry: +# CHECK: LDRWui %x0, 0 +# CHECK: LDRWui %x0, 1 +# CHECK: STRWui %w1, %x0, 2 +name: load_imp-def +body: | + bb.0.entry: + liveins: %w1, %x0 + %w8 = LDRWui %x0, 1, implicit-def %x8 :: (load 4 from %ir.0) + STRWui killed %w1, %x0, 2 :: (store 4 into %ir.arrayidx1) + %w9 = LDRWui killed %x0, 0, implicit-def %x9 :: (load 4 from %ir.arrayidx19, align 8) + %x0 = ADDXrr killed %x9, killed %x8 + RET_ReallyLR implicit %x0 +... + diff --git a/llvm/test/CodeGen/AArch64/machine-sink-zr.mir b/llvm/test/CodeGen/AArch64/machine-sink-zr.mir new file mode 100644 index 00000000000..535fba0dc63 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/machine-sink-zr.mir @@ -0,0 +1,48 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-sink -o - %s | FileCheck %s +--- | + define void @sinkwzr() { ret void } +... +--- +name: sinkwzr +tracksRegLiveness: true +registers: + - { id: 0, class: gpr32 } + - { id: 1, class: gpr32 } + - { id: 2, class: gpr32sp } + - { id: 3, class: gpr32 } + - { id: 4, class: gpr32 } +body: | + ; Check that WZR copy is sunk into the loop preheader. + ; CHECK-LABEL: name: sinkwzr + ; CHECK-LABEL: bb.0: + ; CHECK-NOT: COPY %wzr + bb.0: + successors: %bb.3, %bb.1 + liveins: %w0 + + %0 = COPY %w0 + %1 = COPY %wzr + CBZW %0, %bb.3 + + ; CHECK-LABEL: bb.1: + ; CHECK: COPY %wzr + + bb.1: + successors: %bb.2 + + B %bb.2 + + bb.2: + successors: %bb.3, %bb.2 + + %2 = PHI %0, %bb.1, %4, %bb.2 + %w0 = COPY %1 + %3 = SUBSWri %2, 1, 0, implicit-def dead %nzcv + %4 = COPY %3 + CBZW %3, %bb.3 + B %bb.2 + + bb.3: + RET_ReallyLR + +... diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir new file mode 100644 index 00000000000..234fe57b513 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/coalescer-subreg-join.mir @@ -0,0 +1,75 @@ +# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck %s +# Check that %11 and %20 have been coalesced. +# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG:[0-9]+]] +# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG]] + +--- +name: main +alignment: 0 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: vgpr_32 } + - { id: 2, class: vgpr_32 } + - { id: 3, class: sreg_256 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_256 } + - { id: 6, class: sreg_128 } + - { id: 7, class: sreg_512 } + - { id: 9, class: vreg_512 } + - { id: 11, class: vreg_512 } + - { id: 18, class: vgpr_32 } + - { id: 20, class: vreg_512 } + - { id: 27, class: vgpr_32 } +liveins: + - { reg: '%sgpr2_sgpr3', virtual-reg: '%0' } + - { reg: '%vgpr2', virtual-reg: '%1' } + - { reg: '%vgpr3', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0: + liveins: %sgpr2_sgpr3, %vgpr2, %vgpr3 + + %0 = COPY %sgpr2_sgpr3 + %1 = COPY %vgpr2 + %2 = COPY %vgpr3 + %3 = S_LOAD_DWORDX8_IMM %0, 0, 0 + %4 = S_LOAD_DWORDX4_IMM %0, 12, 0 + %5 = S_LOAD_DWORDX8_IMM %0, 16, 0 + %6 = S_LOAD_DWORDX4_IMM %0, 28, 0 + undef %7.sub0 = S_MOV_B32 212739 + %20 = COPY %7 + %11 = COPY %20 + %11.sub1 = COPY %1 + %11.sub2 = COPY %1 + %11.sub3 = COPY %1 + %11.sub4 = COPY %1 + %11.sub5 = COPY %1 + %11.sub6 = COPY %1 + %11.sub7 = COPY %1 + %11.sub8 = COPY %1 + dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit %exec + %20.sub1 = COPY %2 + %20.sub2 = COPY %2 + %20.sub3 = COPY %2 + %20.sub4 = COPY %2 + %20.sub5 = COPY %2 + %20.sub6 = COPY %2 + %20.sub7 = COPY %2 + %20.sub8 = COPY %2 + dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit %exec + +... diff --git a/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir new file mode 100644 index 00000000000..9d70f67ef49 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/detect-dead-lanes.mir @@ -0,0 +1,419 @@ +# RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s +--- | + define void @test0() { ret void } + define void @test1() { ret void } + define void @test2() { ret void } + define void @test3() { ret void } + define void @test4() { ret void } + define void @test5() { ret void } + define void @loop0() { ret void } + define void @loop1() { ret void } + define void @loop2() { ret void } +... +--- +# Combined use/def transfer check, the basics. +# CHECK-LABEL: name: test0 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %3.sub0 +# CHECK: S_NOP 0, implicit %3.sub1 +# CHECK: S_NOP 0, implicit undef %3.sub2 +# CHECK: %4 = COPY %3.sub0_sub1 +# CHECK: %5 = COPY undef %3.sub2_sub3 +# CHECK: S_NOP 0, implicit %4.sub0 +# CHECK: S_NOP 0, implicit %4.sub1 +# CHECK: S_NOP 0, implicit undef %5.sub0 +name: test0 +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sreg_32_xm0 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_64 } + - { id: 5, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub3 + S_NOP 0, implicit %3.sub0 + S_NOP 0, implicit %3.sub1 + S_NOP 0, implicit %3.sub2 + %4 = COPY %3.sub0_sub1 + %5 = COPY %3.sub2_sub3 + S_NOP 0, implicit %4.sub0 + S_NOP 0, implicit %4.sub1 + S_NOP 0, implicit %5.sub0 +... +--- +# Check defined lanes transfer; Includes checking for some special cases like +# undef operands or IMPLICIT_DEF definitions. +# CHECK-LABEL: name: test1 +# CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}} +# CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}} +# CHECK: %2 = INSERT_SUBREG %0.sub2_sub3, %sgpr42, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %1.sub0 +# CHECK: S_NOP 0, implicit undef %1.sub1 +# CHECK: S_NOP 0, implicit %1.sub2 +# CHECK: S_NOP 0, implicit %1.sub3 +# CHECK: S_NOP 0, implicit %2.sub0 +# CHECK: S_NOP 0, implicit undef %2.sub1 + +# CHECK: %3 = IMPLICIT_DEF +# CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %4.sub0 +# CHECK: S_NOP 0, implicit undef %4.sub1 +# CHECK: S_NOP 0, implicit %4.sub2 +# CHECK: S_NOP 0, implicit undef %4.sub3 + +# CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}} +# CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %5 +# CHECK: S_NOP 0, implicit %6 +# CHECK: S_NOP 0, implicit undef %7 + +# CHECK: %8 = IMPLICIT_DEF +# CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %9 + +# CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}} +# CHECK: S_NOP 0, implicit undef %10 +name: test1 +registers: + - { id: 0, class: sreg_128 } + - { id: 1, class: sreg_128 } + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_32_xm0 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_64 } + - { id: 6, class: sreg_32_xm0 } + - { id: 7, class: sreg_32_xm0 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_32_xm0 } + - { id: 10, class: sreg_128 } +body: | + bb.0: + %0 = REG_SEQUENCE %sgpr0, %subreg.sub0, %sgpr0, %subreg.sub2 + %1 = INSERT_SUBREG %0, %sgpr1, %subreg.sub3 + %2 = INSERT_SUBREG %0.sub2_sub3, %sgpr42, %subreg.sub0 + S_NOP 0, implicit %1.sub0 + S_NOP 0, implicit %1.sub1 + S_NOP 0, implicit %1.sub2 + S_NOP 0, implicit %1.sub3 + S_NOP 0, implicit %2.sub0 + S_NOP 0, implicit %2.sub1 + + %3 = IMPLICIT_DEF + %4 = INSERT_SUBREG %0, %3, %subreg.sub0 + S_NOP 0, implicit %4.sub0 + S_NOP 0, implicit %4.sub1 + S_NOP 0, implicit %4.sub2 + S_NOP 0, implicit %4.sub3 + + %5 = EXTRACT_SUBREG %0, %subreg.sub0_sub1 + %6 = EXTRACT_SUBREG %5, %subreg.sub0 + %7 = EXTRACT_SUBREG %5, %subreg.sub1 + S_NOP 0, implicit %5 + S_NOP 0, implicit %6 + S_NOP 0, implicit %7 + + %8 = IMPLICIT_DEF + %9 = EXTRACT_SUBREG %8, %subreg.sub1 + S_NOP 0, implicit %9 + + %10 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3 + S_NOP 0, implicit %10 +... +--- +# Check used lanes transfer; Includes checking for some special cases like +# undef operands. +# CHECK-LABEL: name: test2 +# CHECK: S_NOP 0, implicit-def dead %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def %2 +# CHECK: %3 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %3.sub1 +# CHECK: S_NOP 0, implicit %3.sub3 + +# CHECK: S_NOP 0, implicit-def %4 +# CHECK: S_NOP 0, implicit-def dead %5 +# CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %6 + +# CHECK: S_NOP 0, implicit-def dead %7 +# CHECK: S_NOP 0, implicit-def %8 +# CHECK: %9 = INSERT_SUBREG undef %7, %8, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %9.sub2 + +# CHECK: S_NOP 0, implicit-def %10 +# CHECK: S_NOP 0, implicit-def dead %11 +# CHECK: %12 = INSERT_SUBREG %10, undef %11, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %12.sub3 + +# CHECK: S_NOP 0, implicit-def %13 +# CHECK: S_NOP 0, implicit-def dead %14 +# CHECK: %15 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}} +# CHECK: %16 = EXTRACT_SUBREG %15, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %16.sub1 + +name: test2 +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sreg_64 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_32_xm0 } + - { id: 5, class: sreg_32_xm0 } + - { id: 6, class: sreg_64 } + - { id: 7, class: sreg_128 } + - { id: 8, class: sreg_64 } + - { id: 9, class: sreg_128 } + - { id: 10, class: sreg_128 } + - { id: 11, class: sreg_64 } + - { id: 12, class: sreg_128 } + - { id: 13, class: sreg_64 } + - { id: 14, class: sreg_64 } + - { id: 15, class: sreg_128 } + - { id: 16, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3 + S_NOP 0, implicit %3.sub1 + S_NOP 0, implicit %3.sub3 + + S_NOP 0, implicit-def %4 + S_NOP 0, implicit-def %5 + %6 = REG_SEQUENCE %4, %subreg.sub0, undef %5, %subreg.sub1 + S_NOP 0, implicit %6 + + S_NOP 0, implicit-def %7 + S_NOP 0, implicit-def %8 + %9 = INSERT_SUBREG %7, %8, %subreg.sub2_sub3 + S_NOP 0, implicit %9.sub2 + + S_NOP 0, implicit-def %10 + S_NOP 0, implicit-def %11 + %12 = INSERT_SUBREG %10, %11, %subreg.sub0_sub1 + S_NOP 0, implicit %12.sub3 + + S_NOP 0, implicit-def %13 + S_NOP 0, implicit-def %14 + %15 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2_sub3 + %16 = EXTRACT_SUBREG %15, %subreg.sub0_sub1 + S_NOP 0, implicit %16.sub1 +... +--- +# Check that copies to physregs use all lanes, copies from physregs define all +# lanes. So we should not get a dead/undef flag here. +# CHECK-LABEL: name: test3 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %vcc = COPY %0 +# CHECK: %1 = COPY %vcc +# CHECK: S_NOP 0, implicit %1 +name: test3 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + %vcc = COPY %0 + + %1 = COPY %vcc + S_NOP 0, implicit %1 +... +--- +# Check that implicit-def/kill do not count as def/uses. +# CHECK-LABEL: name: test4 +# CHECK: S_NOP 0, implicit-def dead %0 +# CHECK: KILL undef %0 +# CHECK: %1 = IMPLICIT_DEF +# CHECK: S_NOP 0, implicit undef %1 +name: test4 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + KILL %0 + + %1 = IMPLICIT_DEF + S_NOP 0, implicit %1 +... +--- +# Check that unused inputs are marked as undef, even if the vreg itself is +# used. +# CHECK-LABEL: name: test5 +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %1 = REG_SEQUENCE undef %0, {{[0-9]+}}, %0, {{[0-9]+}} +# CHECK: S_NOP 0, implicit %1.sub1 +name: test5 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + %1 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1 + S_NOP 0, implicit %1.sub1 +... +--- +# Check "optimistic" dataflow fixpoint in phi-loops. +# CHECK-LABEL: name: loop0 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def dead %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %4 = PHI %3, %bb.0, %5, %bb.1 + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %4.sub0 +# CHECK: S_NOP 0, implicit undef %4.sub3 +name: loop0 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sreg_32_xm0 } + - { id: 3, class: sreg_128 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def %2 + %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %4 = PHI %3, %bb.0, %5, %bb.1 + + ; let's swiffle some lanes around for fun... + %5 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub2, %subreg.sub1, %4.sub1, %subreg.sub2, %4.sub3, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %4.sub0 + S_NOP 0, implicit %4.sub3 +... +--- +# Check a loop that needs to be traversed multiple times to reach the fixpoint +# for the used lanes. The example reads sub3 lane at the end, however with each +# loop iteration we should get 1 more lane marked as we cycles the sublanes +# along. Sublanes sub0, sub1 and sub3 are rotate in the loop so only sub2 +# should be dead. +# CHECK-LABEL: name: loop1 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: S_NOP 0, implicit-def %1 +# CHECK: S_NOP 0, implicit-def dead %2 +# CHECK: S_NOP 0, implicit-def %3 +# CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %5 = PHI %4, %bb.0, %6, %bb.1 + +# CHECK: %6 = REG_SEQUENCE %5.sub1, {{[0-9]+}}, %5.sub3, {{[0-9]+}}, undef %5.sub2, {{[0-9]+}}, %5.sub0, {{[0-9]+}} + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %6.sub3 +name: loop1 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_32_xm0 } + - { id: 2, class: sreg_32_xm0 } + - { id: 3, class: sreg_32_xm0 } + - { id: 4, class: sreg_128 } + - { id: 5, class: sreg_128 } + - { id: 6, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + S_NOP 0, implicit-def %1 + S_NOP 0, implicit-def dead %2 + S_NOP 0, implicit-def %3 + %4 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %5 = PHI %4, %bb.0, %6, %bb.1 + + ; rotate lanes, but skip sub2 lane... + %6 = REG_SEQUENCE %5.sub1, %subreg.sub0, %5.sub3, %subreg.sub1, %5.sub2, %subreg.sub2, %5.sub0, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %6.sub3 +... +--- +# Similar to loop1 test, but check for fixpoint of defined lanes. +# Lanes are rotate between sub0, sub2, sub3 so only sub1 should be dead/undef. +# CHECK-LABEL: name: loop2 +# CHECK: bb.0: +# CHECK: S_NOP 0, implicit-def %0 +# CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}} + +# CHECK: bb.1: +# CHECK: %2 = PHI %1, %bb.0, %3, %bb.1 + +# CHECK: %3 = REG_SEQUENCE %2.sub3, {{[0-9]+}}, undef %2.sub1, {{[0-9]+}}, %2.sub0, {{[0-9]+}}, %2.sub2, {{[0-9]+}} + +# CHECK: bb.2: +# CHECK: S_NOP 0, implicit %2.sub0 +# CHECK: S_NOP 0, implicit undef %2.sub1 +# CHECK: S_NOP 0, implicit %2.sub2 +# CHECK: S_NOP 0, implicit %2.sub3 +name: loop2 +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_128 } + - { id: 2, class: sreg_128 } + - { id: 3, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + S_NOP 0, implicit-def %0 + %1 = REG_SEQUENCE %0, %subreg.sub0 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.1, %bb.2 + %2 = PHI %1, %bb.0, %3, %bb.1 + + ; rotate subreg lanes, skipping sub1 + %3 = REG_SEQUENCE %2.sub3, %subreg.sub0, %2.sub1, %subreg.sub1, %2.sub0, %subreg.sub2, %2.sub2, %subreg.sub3 + + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %2.sub0 + S_NOP 0, implicit undef %2.sub1 + S_NOP 0, implicit %2.sub2 + S_NOP 0, implicit %2.sub3 +... diff --git a/llvm/test/CodeGen/AMDGPU/insert-waits-exp.mir b/llvm/test/CodeGen/AMDGPU/insert-waits-exp.mir new file mode 100644 index 00000000000..9aaa374ed28 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/insert-waits-exp.mir @@ -0,0 +1,63 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s +--- | + define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) { + %a = load volatile float, float addrspace(1)* undef + %b = load volatile float, float addrspace(1)* undef + %c = load volatile float, float addrspace(1)* undef + %d = load volatile float, float addrspace(1)* undef + call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d) + ret <4 x float> + } + + declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) + + attributes #0 = { readnone } + attributes #1 = { nounwind } + +... +--- + +# CHECK-LABEL: name: exp_done_waitcnt{{$}} +# CHECK: EXP_DONE +# CHECK-NEXT: S_WAITCNT 3855 +# CHECK: %vgpr0 = V_MOV_B32 +# CHECK: %vgpr1 = V_MOV_B32 +# CHECK: %vgpr2 = V_MOV_B32 +# CHECK: %vgpr3 = V_MOV_B32 +name: exp_done_waitcnt +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0 (%ir-block.2): + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %vgpr2 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + %vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) + EXP_DONE 0, killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3, -1, -1, 15, implicit %exec + %vgpr0 = V_MOV_B32_e32 1056964608, implicit %exec + %vgpr1 = V_MOV_B32_e32 1065353216, implicit %exec + %vgpr2 = V_MOV_B32_e32 1073741824, implicit %exec + %vgpr3 = V_MOV_B32_e32 1082130432, implicit %exec + SI_RETURN killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3 + +... diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir new file mode 100644 index 00000000000..7cc9c7c1d92 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir @@ -0,0 +1,333 @@ +# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN +# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI + +--- | + define void @div_fmas() { ret void } + define void @s_getreg() { ret void } + define void @s_setreg() { ret void } + define void @vmem_gt_8dw_store() { ret void } + define void @readwrite_lane() { ret void } + define void @rfe() { ret void } +... +--- +# GCN-LABEL: name: div_fmas + +# GCN-LABEL: bb.0: +# GCN: S_MOV_B64 +# GCN-NOT: S_NOP +# GCN: V_DIV_FMAS + +# GCN-LABEL: bb.1: +# GCN: V_CMP_EQ_I32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_DIV_FMAS_F32 + +# GCN-LABEL: bb.2: +# GCN: V_CMP_EQ_I32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_DIV_FMAS_F32 + +# GCN-LABEL: bb.3: +# GCN: V_DIV_SCALE_F32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_DIV_FMAS_F32 +name: div_fmas + +body: | + bb.0: + successors: %bb.1 + %vcc = S_MOV_B64 0 + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_BRANCH %bb.3 + + bb.3: + %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec + %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec + S_ENDPGM + +... + +... +--- +# GCN-LABEL: name: s_getreg + +# GCN-LABEL: bb.0: +# GCN: S_SETREG +# GCN: S_NOP 0 +# GCN: S_NOP 0 +# GCN: S_GETREG + +# GCN-LABEL: bb.1: +# GCN: S_SETREG_IMM32 +# GCN: S_NOP 0 +# GCN: S_NOP 0 +# GCN: S_GETREG + +# GCN-LABEL: bb.2: +# GCN: S_SETREG +# GCN: S_NOP 0 +# GCN: S_GETREG + +# GCN-LABEL: bb.3: +# GCN: S_SETREG +# GCN-NEXT: S_GETREG + +name: s_getreg + +body: | + bb.0: + successors: %bb.1 + S_SETREG_B32 %sgpr0, 1 + %sgpr1 = S_GETREG_B32 1 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + S_SETREG_IMM32_B32 0, 1 + %sgpr1 = S_GETREG_B32 1 + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + S_SETREG_B32 %sgpr0, 1 + %sgpr1 = S_MOV_B32 0 + %sgpr2 = S_GETREG_B32 1 + S_BRANCH %bb.3 + + bb.3: + S_SETREG_B32 %sgpr0, 0 + %sgpr1 = S_GETREG_B32 1 + S_ENDPGM +... + +... +--- +# GCN-LABEL: name: s_setreg + +# GCN-LABEL: bb.0: +# GCN: S_SETREG +# GCN: S_NOP 0 +# VI: S_NOP 0 +# GCN-NEXT: S_SETREG + +# GCN-LABEL: bb.1: +# GCN: S_SETREG +# GCN: S_NOP 0 +# VI: S_NOP 0 +# GCN-NEXT: S_SETREG + +# GCN-LABEL: bb.2: +# GCN: S_SETREG +# GCN-NEXT: S_SETREG + +name: s_setreg + +body: | + bb.0: + successors: %bb.1 + S_SETREG_B32 %sgpr0, 1 + S_SETREG_B32 %sgpr1, 1 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + S_SETREG_B32 %sgpr0, 64 + S_SETREG_B32 %sgpr1, 128 + S_BRANCH %bb.2 + + bb.2: + S_SETREG_B32 %sgpr0, 1 + S_SETREG_B32 %sgpr1, 0 + S_ENDPGM +... + +... +--- +# GCN-LABEL: name: vmem_gt_8dw_store + +# GCN-LABEL: bb.0: +# GCN: BUFFER_STORE_DWORD_OFFSET +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_DWORDX3_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_DWORDX4_OFFSET +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_DWORDX4_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 + +# GCN-LABEL: bb.1: +# GCN: FLAT_STORE_DWORDX2 +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_STORE_DWORDX3 +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_STORE_DWORDX4 +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_ATOMIC_CMPSWAP_X2 +# CIVI: S_NOP +# GCN-NEXT: V_MOV_B32 +# GCN: FLAT_ATOMIC_FCMPSWAP_X2 +# CIVI: S_NOP +# GCN: V_MOV_B32 + +name: vmem_gt_8dw_store + +body: | + bb.0: + successors: %bb.1 + BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_FORMAT_XYZ_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_STORE_FORMAT_XYZW_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + BUFFER_ATOMIC_CMPSWAP_X2_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit %exec + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + S_BRANCH %bb.1 + + bb.1: + FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3 = V_MOV_B32_e32 0, implicit %exec + S_ENDPGM + +... + +... +--- + +# GCN-LABEL: name: readwrite_lane + +# GCN-LABEL: bb.0: +# GCN: V_ADD_I32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_READLANE_B32 + +# GCN-LABEL: bb.1: +# GCN: V_ADD_I32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_WRITELANE_B32 + +# GCN-LABEL: bb.2: +# GCN: V_ADD_I32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_READLANE_B32 + +# GCN-LABEL: bb.3: +# GCN: V_ADD_I32 +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: S_NOP +# GCN: V_WRITELANE_B32 + +name: readwrite_lane + +body: | + bb.0: + successors: %bb.1 + %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec + %sgpr4 = V_READLANE_B32 %vgpr4, %sgpr0 + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec + %vgpr4 = V_WRITELANE_B32 %sgpr0, %sgpr0 + S_BRANCH %bb.2 + + bb.2: + successors: %bb.3 + %vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec + %sgpr4 = V_READLANE_B32 %vgpr4, %vcc_lo + S_BRANCH %bb.3 + + bb.3: + %vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec + %vgpr4 = V_WRITELANE_B32 %sgpr4, %vcc_lo + S_ENDPGM + +... + +... +--- + +# GCN-LABEL: name: rfe + +# GCN-LABEL: bb.0: +# GCN: S_SETREG +# VI: S_NOP +# GCN-NEXT: S_RFE_B64 + +# GCN-LABEL: bb.1: +# GCN: S_SETREG +# GCN-NEXT: S_RFE_B64 + +name: rfe + +body: | + bb.0: + successors: %bb.1 + S_SETREG_B32 %sgpr0, 3 + S_RFE_B64 %sgpr2_sgpr3 + S_BRANCH %bb.1 + + bb.1: + S_SETREG_B32 %sgpr0, 0 + S_RFE_B64 %sgpr2_sgpr3 + S_ENDPGM + +... diff --git a/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir new file mode 100644 index 00000000000..66182d09289 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/invert-br-undef-vcc.mir @@ -0,0 +1,89 @@ +# RUN: llc -run-pass block-placement -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s +--- | + + define void @invert_br_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 { + entry: + br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 + + else: ; preds = %entry + store volatile i32 100, i32 addrspace(1)* undef + br label %done, !structurizecfg.uniform !0 + + if: ; preds = %entry + store volatile i32 9, i32 addrspace(1)* undef + br label %done, !structurizecfg.uniform !0 + + done: ; preds = %if, %else + %value = phi i32 [ 0, %if ], [ 1, %else ] + store i32 %value, i32 addrspace(1)* %out + ret void + } + + attributes #0 = { nounwind } + + !0 = !{} + +... +--- +# CHECK-LABEL: name: invert_br_undef_vcc +# CHECK: S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc + +name: invert_br_undef_vcc +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr0_sgpr1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.2.if, %bb.1.else + liveins: %sgpr0_sgpr1 + + %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + S_CBRANCH_VCCNZ %bb.2.if, implicit undef %vcc + + bb.1.else: + successors: %bb.3.done + liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %vgpr0 = V_MOV_B32_e32 100, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + %vgpr0 = V_MOV_B32_e32 1, implicit %exec + S_BRANCH %bb.3.done + + bb.2.if: + successors: %bb.3.done + liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %vgpr0 = V_MOV_B32_e32 9, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + + bb.3.done: + liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) + S_ENDPGM + +... diff --git a/llvm/test/CodeGen/AMDGPU/liveness.mir b/llvm/test/CodeGen/AMDGPU/liveness.mir new file mode 100644 index 00000000000..112c3f8e69a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/liveness.mir @@ -0,0 +1,32 @@ +# RUN: llc -march=amdgcn -run-pass liveintervals -verify-machineinstrs -o /dev/null -debug-only=regalloc %s 2>&1 | FileCheck %s +# REQUIRES: asserts +# We currently maintain a main liveness range which operates like a superset of +# all subregister liveranges. We may need to create additional SSA values at +# merge point in this main liverange even though none of the subregister +# liveranges needed it. +# +# Should see three distinct value numbers: +# CHECK: %vreg0 [{{.*}}:0)[{{.*}}:1)[{{.*}}:2) 0@{{[0-9]+[Berd]}} 1@{{[0-9]+[Berd]}} 2@{{[0-9]+B-phi}} +--- | + define void @test0() { ret void } +... +--- +name: test0 +registers: + - { id: 0, class: sreg_64 } +body: | + bb.0: + successors: %bb.1, %bb.2 + S_NOP 0, implicit-def undef %0.sub0 + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.1: + successors: %bb.2 + S_NOP 0, implicit-def %0.sub1 + S_NOP 0, implicit %0.sub1 + S_BRANCH %bb.2 + + bb.2: + S_NOP 0, implicit %0.sub0 +... diff --git a/llvm/test/CodeGen/AMDGPU/movrels-bug.mir b/llvm/test/CodeGen/AMDGPU/movrels-bug.mir new file mode 100644 index 00000000000..9c330bc8a6b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/movrels-bug.mir @@ -0,0 +1,31 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass post-RA-sched %s -o - | FileCheck %s + +# This tests a situation where a sub-register of a killed super-register operand +# of V_MOVRELS happens to have an undef use later on. This leads to the post RA +# scheduler adding additional implicit operands to the V_MOVRELS, which used +# to fail machine instruction verification. + +--- | + + define amdgpu_vs void @main(i32 %arg) { ret void } + +... +--- +# CHECK-LABEL: name: main +# CHECK-LABEL: bb.0: +# CHECK: V_MOVRELS_B32_e32 +# CHECK: V_MAC_F32_e32 + +name: main +tracksRegLiveness: true +body: | + bb.0: + %m0 = S_MOV_B32 undef %sgpr0 + V_MOVRELD_B32_e32 undef %vgpr2, 0, implicit %m0, implicit %exec, implicit-def %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit undef %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8(tied-def 4) + %m0 = S_MOV_B32 undef %sgpr0 + %vgpr1 = V_MOVRELS_B32_e32 undef %vgpr1, implicit %m0, implicit %exec, implicit killed %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 + %vgpr4 = V_MAC_F32_e32 undef %vgpr0, undef %vgpr0, undef %vgpr4, implicit %exec + EXP_DONE 15, undef %vgpr0, killed %vgpr1, killed %vgpr4, undef %vgpr0, 0, 0, 12, implicit %exec + S_ENDPGM + +... diff --git a/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir new file mode 100644 index 00000000000..4584802ad5a --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/optimize-if-exec-masking.mir @@ -0,0 +1,755 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s + +--- | + target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" + + define void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 { + main_body: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %cc = icmp eq i32 %id, 0 + %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) + %1 = extractvalue { i1, i64 } %0, 0 + %2 = extractvalue { i1, i64 } %0, 1 + br i1 %1, label %if, label %end + + if: ; preds = %main_body + %v.if = load volatile i32, i32 addrspace(1)* undef + br label %end + + end: ; preds = %if, %main_body + %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] + call void @llvm.amdgcn.end.cf(i64 %2) + store i32 %r, i32 addrspace(1)* undef + ret void + } + + define void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + define void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + + define void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 { + main_body: + %id = call i32 @llvm.amdgcn.workitem.id.x() + %cc = icmp eq i32 %id, 0 + %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) + %1 = extractvalue { i1, i64 } %0, 0 + %2 = extractvalue { i1, i64 } %0, 1 + store i32 %id, i32 addrspace(1)* undef + br i1 %1, label %if, label %end + + if: ; preds = %main_body + %v.if = load volatile i32, i32 addrspace(1)* undef + br label %end + + end: ; preds = %if, %main_body + %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] + call void @llvm.amdgcn.end.cf(i64 %2) + store i32 %r, i32 addrspace(1)* undef + ret void + } + + define void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + define void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + define void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + define void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + define void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + define void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 { + main_body: + br i1 undef, label %if, label %end + + if: + br label %end + + end: + ret void + } + + ; Function Attrs: nounwind readnone + declare i32 @llvm.amdgcn.workitem.id.x() #1 + + declare { i1, i64 } @llvm.amdgcn.if(i1) + + declare void @llvm.amdgcn.end.cf(i64) + + + attributes #0 = { nounwind } + attributes #1 = { nounwind readnone } + +... +--- +# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}} +# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec +# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc +# CHECK-NEXT: SI_MASK_BRANCH + +name: optimize_if_and_saveexec_xor +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_and_saveexec{{$}} +# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH + +name: optimize_if_and_saveexec +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_or_saveexec{{$}} +# CHECK: %sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH + +name: optimize_if_or_saveexec +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_OR_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle +# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc +# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) +# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc +# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 +# CHECK-NEXT: SI_MASK_BRANCH +name: optimize_if_and_saveexec_xor_valu_middle +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}} +# CHECK: %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc +# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc +# CHECK-NEXT: %exec = COPY %sgpr0_sgpr1 +# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec +name: optimize_if_and_saveexec_xor_wrong_reg +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr6 = S_MOV_B32 -1 + %sgpr7 = S_MOV_B32 61440 + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc + %exec = S_MOV_B64_term %sgpr0_sgpr1 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1, %sgpr4_sgpr5_sgpr6_sgpr7 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}} +# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc +# CHECK-NEXT: %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc +# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc +# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 +# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec + +name: optimize_if_and_saveexec_xor_modify_copy_to_exec +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc + %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr0 = S_MOV_B32 0 + %sgpr1 = S_MOV_B32 1 + %sgpr2 = S_MOV_B32 -1 + %sgpr3 = S_MOV_B32 61440 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}} +# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc +# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc +# CHECK-NEXT: %exec = COPY %sgpr2_sgpr3 +# CHECK-NEXT: SI_MASK_BRANCH +name: optimize_if_and_saveexec_xor_live_out_setexec +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc + %exec = S_MOV_B64_term %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1, %sgpr2_sgpr3 + S_SLEEP 0, implicit %sgpr2_sgpr3 + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... + +# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}} +# CHECK: %sgpr0_sgpr1 = COPY %exec +# CHECK: %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc +# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 +# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec + +name: optimize_if_unknown_saveexec +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}} +# CHECK: %sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec +# CHECK-NEXT: SI_MASK_BRANCH + +name: optimize_if_andn2_saveexec +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_ANDN2_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} +# CHECK: %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc +# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 +# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec +name: optimize_if_andn2_saveexec_no_commute +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%vgpr0' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.main_body: + successors: %bb.1.if, %bb.2.end + liveins: %vgpr0 + + %sgpr0_sgpr1 = COPY %exec + %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec + %vgpr0 = V_MOV_B32_e32 4, implicit %exec + %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc + %exec = S_MOV_B64_term killed %sgpr2_sgpr3 + SI_MASK_BRANCH %bb.2.end, implicit %exec + S_BRANCH %bb.1.if + + bb.1.if: + successors: %bb.2.end + liveins: %sgpr0_sgpr1 + + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) + + bb.2.end: + liveins: %vgpr0, %sgpr0_sgpr1 + + %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) + S_ENDPGM + +... diff --git a/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir new file mode 100644 index 00000000000..b928bc7086b --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/rename-independent-subregs.mir @@ -0,0 +1,70 @@ +# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck %s +--- | + define void @test0() { ret void } + define void @test1() { ret void } +... +--- +# In the test below we have two independent def+use pairs of subregister1 which +# can be moved to a new virtual register. The third def of sub1 however is used +# in combination with sub0 and needs to stay with the original vreg. +# CHECK-LABEL: name: test0 +# CHECK: S_NOP 0, implicit-def undef %0.sub0 +# CHECK: S_NOP 0, implicit-def undef %2.sub1 +# CHECK: S_NOP 0, implicit %2.sub1 +# CHECK: S_NOP 0, implicit-def undef %1.sub1 +# CHECK: S_NOP 0, implicit %1.sub1 +# CHECK: S_NOP 0, implicit-def %0.sub1 +# CHECK: S_NOP 0, implicit %0 +name: test0 +registers: + - { id: 0, class: sreg_128 } +body: | + bb.0: + S_NOP 0, implicit-def undef %0.sub0 + S_NOP 0, implicit-def %0.sub1 + S_NOP 0, implicit %0.sub1 + S_NOP 0, implicit-def %0.sub1 + S_NOP 0, implicit %0.sub1 + S_NOP 0, implicit-def %0.sub1 + S_NOP 0, implicit %0 +... +--- +# Test for a bug where we would incorrectly query liveness at the instruction +# index in rewriteOperands(). This should pass the verifier afterwards. +# CHECK-LABEL: test1 +# CHECK: bb.0 +# CHECK: S_NOP 0, implicit-def undef %2.sub2 +# CHECK: bb.1 +# CHECK: S_NOP 0, implicit-def %2.sub1 +# CHECK-NEXT: S_NOP 0, implicit-def %2.sub3 +# CHECK-NEXT: S_NOP 0, implicit %2 +# CHECK-NEXT: S_NOP 0, implicit-def undef %0.sub0 +# CHECK-NEXT: S_NOP 0, implicit %2.sub1 +# CHECK-NEXT: S_NOP 0, implicit %0.sub0 +# CHECK: bb.2 +# CHECK: S_NOP 0, implicit %2.sub +name: test1 +registers: + - { id: 0, class: sreg_128 } + - { id: 1, class: sreg_128 } +body: | + bb.0: + successors: %bb.1, %bb.2 + S_NOP 0, implicit-def undef %0.sub2 + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.1: + S_NOP 0, implicit-def %0.sub1 + S_NOP 0, implicit-def %0.sub3 + %1 = COPY %0 + S_NOP 0, implicit %1 + + S_NOP 0, implicit-def %1.sub0 + S_NOP 0, implicit %1.sub1 + S_NOP 0, implicit %1.sub0 + + bb.2: + S_NOP 0, implicit %0.sub2 + +... diff --git a/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir b/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir new file mode 100644 index 00000000000..af71086e542 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/scalar-store-cache-flush.mir @@ -0,0 +1,173 @@ +# RUN: llc -march=amdgcn -run-pass si-insert-waits %s -o - | FileCheck %s + +--- | + define void @basic_insert_dcache_wb() { + ret void + } + + define void @explicit_flush_after() { + ret void + } + + define void @explicit_flush_before() { + ret void + } + + define void @no_scalar_store() { + ret void + } + + define void @multi_block_store() { + bb0: + br i1 undef, label %bb1, label %bb2 + + bb1: + ret void + + bb2: + ret void + } + + define void @one_block_store() { + bb0: + br i1 undef, label %bb1, label %bb2 + + bb1: + ret void + + bb2: + ret void + } + + define amdgpu_ps float @si_return() { + ret float undef + } + +... +--- +# CHECK-LABEL: name: basic_insert_dcache_wb +# CHECK: bb.0: +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +name: basic_insert_dcache_wb +tracksRegLiveness: false + +body: | + bb.0: + S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 + S_ENDPGM +... +--- +# Already has an explicitly requested flush after the last store. +# CHECK-LABEL: name: explicit_flush_after +# CHECK: bb.0: +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +name: explicit_flush_after +tracksRegLiveness: false + +body: | + bb.0: + S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 + S_DCACHE_WB + S_ENDPGM +... +--- +# Already has an explicitly requested flush before the last store. +# CHECK-LABEL: name: explicit_flush_before +# CHECK: bb.0: +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +name: explicit_flush_before +tracksRegLiveness: false + +body: | + bb.0: + S_DCACHE_WB + S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 + S_ENDPGM +... +--- +# CHECK-LABEL: no_scalar_store +# CHECK: bb.0 +# CHECK-NEXT: S_ENDPGM +name: no_scalar_store +tracksRegLiveness: false + +body: | + bb.0: + S_ENDPGM +... + +# CHECK-LABEL: name: multi_block_store +# CHECK: bb.0: +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +# CHECK: bb.1: +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +name: multi_block_store +tracksRegLiveness: false + +body: | + bb.0: + S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 + S_ENDPGM + + bb.1: + S_STORE_DWORD_SGPR undef %sgpr4, undef %sgpr6_sgpr7, undef %m0, 0 + S_ENDPGM +... +... + +# This one should be able to omit the flush in the storeless block but +# this isn't handled now. + +# CHECK-LABEL: name: one_block_store +# CHECK: bb.0: +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +# CHECK: bb.1: +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: S_ENDPGM + +name: one_block_store +tracksRegLiveness: false + +body: | + bb.0: + S_ENDPGM + + bb.1: + S_STORE_DWORD_SGPR undef %sgpr4, undef %sgpr6_sgpr7, undef %m0, 0 + S_ENDPGM +... +--- +# CHECK-LABEL: name: si_return +# CHECK: bb.0: +# CHECK-NEXT: S_STORE_DWORD +# CHECK-NEXT: S_WAITCNT +# CHECK-NEXT: S_DCACHE_WB +# CHECK-NEXT: SI_RETURN + +name: si_return +tracksRegLiveness: false + +body: | + bb.0: + S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 + SI_RETURN undef %vgpr0 +... diff --git a/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir new file mode 100644 index 00000000000..0c08deb13a8 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/si-fix-sgpr-copies.mir @@ -0,0 +1,43 @@ +# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN + +--- | + define void @phi_visit_order() { ret void } + +name: phi_visit_order +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_32_xm0 } + - { id: 1, class: sreg_64 } + - { id: 2, class: sreg_32_xm0 } + - { id: 7, class: vgpr_32 } + - { id: 8, class: sreg_32_xm0 } + - { id: 9, class: vgpr_32 } + - { id: 10, class: sreg_64 } + - { id: 11, class: sreg_32_xm0 } + +body: | + ; GCN-LABEL: name: phi_visit_order + ; GCN: V_ADD_I32 + bb.0: + liveins: %vgpr0 + successors: %bb.1 + %7 = COPY %vgpr0 + %8 = S_MOV_B32 0 + + bb.1: + successors: %bb.1, %bb.2 + %0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2 + %9 = V_MOV_B32_e32 9, implicit %exec + %10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec + %1 = SI_IF %10, %bb.2, implicit-def %exec, implicit-def %scc, implicit %exec + S_BRANCH %bb.1 + + bb.2: + successors: %bb.1 + SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec + %11 = S_MOV_B32 1 + %2 = S_ADD_I32 %0, %11, implicit-def %scc + S_BRANCH %bb.1 + +... +--- diff --git a/llvm/test/CodeGen/AMDGPU/subreg-intervals.mir b/llvm/test/CodeGen/AMDGPU/subreg-intervals.mir new file mode 100644 index 00000000000..c4e00215159 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/subreg-intervals.mir @@ -0,0 +1,51 @@ +# RUN: llc -march=amdgcn -run-pass liveintervals -debug-only=regalloc -o /dev/null %s 2>&1 | FileCheck %s +# REQUIRES: asserts + +# CHECK: INTERVALS +# CHECK: vreg0 +# CHECK-LABEL: Machine code for function test0: + +# CHECK: INTERVALS +# CHECK: vreg0 +# CHECK-LABEL: Machine code for function test1: + +--- | + define void @test0() { ret void } + define void @test1() { ret void } +... +--- +name: test0 +registers: + - { id: 0, class: sreg_64 } +body: | + bb.0: + S_NOP 0, implicit-def %0 + S_NOP 0, implicit %0 + + S_NOP 0, implicit-def undef %0.sub0 + S_NOP 0, implicit %0 +... +--- +name: test1 +registers: + - { id: 0, class: sreg_64 } +body: | + bb.0: + successors: %bb.1, %bb.2 + S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc + S_BRANCH %bb.2 + + bb.1: + successors: %bb.3 + S_NOP 0, implicit-def undef %0.sub0 + S_BRANCH %bb.3 + + bb.2: + successors: %bb.3 + S_NOP 0, implicit-def %0 + S_BRANCH %bb.3 + + bb.3: + S_NOP 0 + S_NOP 0, implicit %0 +... diff --git a/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir new file mode 100644 index 00000000000..03e473e3a0c --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/vccz-corrupt-bug-workaround.mir @@ -0,0 +1,177 @@ +# RUN: llc -run-pass si-insert-waits -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s +--- | + + define void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 { + entry: + %cmp0 = fcmp oeq float %cond, 0.000000e+00 + br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 + + else: ; preds = %entry + store volatile i32 100, i32 addrspace(1)* undef + br label %done, !structurizecfg.uniform !0 + + if: ; preds = %entry + store volatile i32 9, i32 addrspace(1)* undef + br label %done, !structurizecfg.uniform !0 + + done: ; preds = %if, %else + %value = phi i32 [ 0, %if ], [ 1, %else ] + store i32 %value, i32 addrspace(1)* %out + ret void + } + + define void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 { + entry: + br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 + + else: ; preds = %entry + store volatile i32 100, i32 addrspace(1)* undef + br label %done, !structurizecfg.uniform !0 + + if: ; preds = %entry + store volatile i32 9, i32 addrspace(1)* undef + br label %done, !structurizecfg.uniform !0 + + done: ; preds = %if, %else + %value = phi i32 [ 0, %if ], [ 1, %else ] + store i32 %value, i32 addrspace(1)* %out + ret void + } + + attributes #0 = { nounwind } + attributes #1 = { readnone } + + !0 = !{} + +... +--- +# CHECK-LABEL: name: vccz_corrupt_workaround +# CHECK: %vcc = V_CMP_EQ_F32 +# CHECK-NEXT: %vcc = S_MOV_B64 %vcc +# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit killed %vcc + +name: vccz_corrupt_workaround +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr0_sgpr1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.2.if, %bb.1.else + liveins: %sgpr0_sgpr1 + + %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`) + %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, 0, implicit %exec + S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc + + bb.2.if: + successors: %bb.3.done + liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %vgpr0 = V_MOV_B32_e32 9, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + S_BRANCH %bb.3.done + + bb.1.else: + successors: %bb.3.done + liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %vgpr0 = V_MOV_B32_e32 100, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + %vgpr0 = V_MOV_B32_e32 1, implicit %exec + + bb.3.done: + liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) + S_ENDPGM + +... +--- +# CHECK-LABEL: name: vccz_corrupt_undef_vcc +# CHECK: S_WAITCNT +# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit undef %vcc + +name: vccz_corrupt_undef_vcc +alignment: 0 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: + - { reg: '%sgpr0_sgpr1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.2.if, %bb.1.else + liveins: %sgpr0_sgpr1 + + %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) + %sgpr7 = S_MOV_B32 61440 + %sgpr6 = S_MOV_B32 -1 + S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc + + bb.2.if: + successors: %bb.3.done + liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %vgpr0 = V_MOV_B32_e32 9, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + %vgpr0 = V_MOV_B32_e32 0, implicit %exec + S_BRANCH %bb.3.done + + bb.1.else: + successors: %bb.3.done + liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %vgpr0 = V_MOV_B32_e32 100, implicit %exec + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) + %vgpr0 = V_MOV_B32_e32 1, implicit %exec + + bb.3.done: + liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 + + %sgpr3 = S_MOV_B32 61440 + %sgpr2 = S_MOV_B32 -1 + BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) + S_ENDPGM + +... diff --git a/llvm/test/CodeGen/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/AMDGPU/waitcnt.mir new file mode 100644 index 00000000000..cb5de6a2419 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/waitcnt.mir @@ -0,0 +1,59 @@ +# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waits %s -o - | FileCheck %s + +--- | + define void @flat_zero_waitcnt(i32 addrspace(1)* %global4, + <4 x i32> addrspace(1)* %global16, + i32 addrspace(4)* %flat4, + <4 x i32> addrspace(4)* %flat16) { + ret void + } +... +--- + +# CHECK-LABEL: name: flat_zero_waitcnt + +# CHECK-LABEL: bb.0: +# CHECK: FLAT_LOAD_DWORD +# CHECK: FLAT_LOAD_DWORDX4 +# Global loads will return in order so we should: +# s_waitcnt vmcnt(1) lgkmcnt(0) +# CHECK-NEXT: S_WAITCNT 113 + +# CHECK-LABEL: bb.1: +# CHECK: FLAT_LOAD_DWORD +# CHECK: FLAT_LOAD_DWORDX4 +# The first load has no mem operand, so we should assume it accesses the flat +# address space. +# s_waitcnt vmcnt(0) lgkmcnt(0) +# CHECK-NEXT: S_WAITCNT 112 + +# CHECK-LABEL: bb.2: +# CHECK: FLAT_LOAD_DWORD +# CHECK: FLAT_LOAD_DWORDX4 +# One outstand loads access the flat address space. +# s_waitcnt vmcnt(0) lgkmcnt(0) +# CHECK-NEXT: S_WAITCNT 112 + +name: flat_zero_waitcnt + +body: | + bb.0: + successors: %bb.1 + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) + %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec + S_BRANCH %bb.1 + + bb.1: + successors: %bb.2 + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) + %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec + S_BRANCH %bb.2 + + bb.2: + %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) + %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) + %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec + S_ENDPGM +... diff --git a/llvm/test/CodeGen/ARM/imm-peephole-arm.mir b/llvm/test/CodeGen/ARM/imm-peephole-arm.mir new file mode 100644 index 00000000000..cd30bdb74d5 --- /dev/null +++ b/llvm/test/CodeGen/ARM/imm-peephole-arm.mir @@ -0,0 +1,60 @@ +# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s + +# CHECK: [[IN:%.*]] = COPY %r0 +# CHECK: [[SUM1TMP:%.*]] = ADDri [[IN]], 133 +# CHECK: [[SUM1:%.*]] = ADDri killed [[SUM1TMP]], 25600 + +# CHECK: [[SUM2TMP:%.*]] = SUBri [[IN]], 133 +# CHECK: [[SUM2:%.*]] = SUBri killed [[SUM2TMP]], 25600 + +# CHECK: [[SUM3TMP:%.*]] = SUBri [[IN]], 133 +# CHECK: [[SUM3:%.*]] = SUBri killed [[SUM3TMP]], 25600 + +# CHECK: [[SUM4TMP:%.*]] = ADDri killed [[IN]], 133 +# CHECK: [[SUM4:%.*]] = ADDri killed [[SUM4TMP]], 25600 + + +--- | + target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" + target triple = "armv7-apple-ios" + + define i32 @foo(i32 %in) { + ret i32 undef + } +... +--- +name: foo +registers: + - { id: 0, class: gprnopc } + - { id: 1, class: rgpr } + - { id: 2, class: rgpr } + - { id: 3, class: rgpr } + - { id: 4, class: rgpr } + - { id: 5, class: rgpr } + - { id: 6, class: rgpr } + - { id: 7, class: rgpr } + - { id: 8, class: rgpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } +body: | + bb.0 (%ir-block.0): + liveins: %r0 + + %0 = COPY %r0 + %1 = MOVi32imm -25733 + %2 = SUBrr %0, killed %1, 14, _, _ + + %3 = MOVi32imm 25733 + %4 = SUBrr %0, killed %3, 14, _, _ + + %5 = MOVi32imm -25733 + %6 = ADDrr %0, killed %5, 14, _, _ + + %7 = MOVi32imm 25733 + %8 = ADDrr killed %0, killed %7, 14, _, _ + + %r0 = COPY killed %8 + BX_RET 14, _, implicit %r0 + +... + diff --git a/llvm/test/CodeGen/ARM/imm-peephole-thumb.mir b/llvm/test/CodeGen/ARM/imm-peephole-thumb.mir new file mode 100644 index 00000000000..3d342902d80 --- /dev/null +++ b/llvm/test/CodeGen/ARM/imm-peephole-thumb.mir @@ -0,0 +1,59 @@ +# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s + +# CHECK: [[IN:%.*]] = COPY %r0 +# CHECK: [[SUM1TMP:%.*]] = t2ADDri [[IN]], 25600 +# CHECK: [[SUM1:%.*]] = t2ADDri killed [[SUM1TMP]], 133 + +# CHECK: [[SUM2TMP:%.*]] = t2SUBri [[IN]], 25600 +# CHECK: [[SUM2:%.*]] = t2SUBri killed [[SUM2TMP]], 133 + +# CHECK: [[SUM3TMP:%.*]] = t2SUBri [[IN]], 25600 +# CHECK: [[SUM3:%.*]] = t2SUBri killed [[SUM3TMP]], 133 + +# CHECK: [[SUM4TMP:%.*]] = t2ADDri killed [[IN]], 25600 +# CHECK: [[SUM4:%.*]] = t2ADDri killed [[SUM4TMP]], 133 + + +--- | + target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" + target triple = "thumbv7-apple-ios" + + define i32 @foo(i32 %in) { + ret i32 undef + } +... +--- +name: foo +registers: + - { id: 0, class: gprnopc } + - { id: 1, class: rgpr } + - { id: 2, class: rgpr } + - { id: 3, class: rgpr } + - { id: 4, class: rgpr } + - { id: 5, class: rgpr } + - { id: 6, class: rgpr } + - { id: 7, class: rgpr } + - { id: 8, class: rgpr } +liveins: + - { reg: '%r0', virtual-reg: '%0' } +body: | + bb.0 (%ir-block.0): + liveins: %r0 + %0 = COPY %r0 + %1 = t2MOVi32imm -25733 + %2 = t2SUBrr %0, killed %1, 14, _, _ + + %3 = t2MOVi32imm 25733 + %4 = t2SUBrr %0, killed %3, 14, _, _ + + %5 = t2MOVi32imm -25733 + %6= t2ADDrr %0, killed %5, 14, _, _ + + %7 = t2MOVi32imm 25733 + %8 = t2ADDrr killed %0, killed %7, 14, _, _ + + %r0 = COPY killed %8 + tBX_RET 14, _, implicit %r0 + +... + diff --git a/llvm/test/CodeGen/Hexagon/anti-dep-partial.mir b/llvm/test/CodeGen/Hexagon/anti-dep-partial.mir new file mode 100644 index 00000000000..09bc49c508a --- /dev/null +++ b/llvm/test/CodeGen/Hexagon/anti-dep-partial.mir @@ -0,0 +1,34 @@ +# RUN: llc -march=hexagon -post-RA-scheduler -run-pass post-RA-sched %s -o - | FileCheck %s + +--- | + declare void @check(i64, i32, i32, i64) + define void @foo() { + ret void + } +... + +--- +name: foo +tracksRegLiveness: true +body: | + bb.0: + successors: + liveins: %r0, %r1, %d1, %d2, %r16, %r17, %r19, %r22, %r23 + %r2 = A2_add %r23, killed %r17 + %r6 = M2_mpyi %r16, %r16 + %r22 = M2_accii %r22, killed %r2, 2 + %r7 = A2_tfrsi 12345678 + %r3 = A2_tfr killed %r16 + %d2 = A2_tfrp killed %d0 + %r2 = L2_loadri_io %r29, 28 + %r2 = M2_mpyi killed %r6, killed %r2 + %r23 = S2_asr_i_r %r22, 31 + S2_storeri_io killed %r29, 0, killed %r7 + ; The anti-dependency on r23 between the first A2_add and the + ; S2_asr_i_r was causing d11 to be renamed, while r22 remained + ; unchanged. Check that the renaming of d11 does not happen. + ; CHECK: d11 + %d0 = A2_tfrp killed %d11 + J2_call @check, implicit-def %d0, implicit-def %d1, implicit-def %d2, implicit %d0, implicit %d1, implicit %d2 +... + diff --git a/llvm/test/CodeGen/Lanai/peephole-compare.mir b/llvm/test/CodeGen/Lanai/peephole-compare.mir new file mode 100644 index 00000000000..5056a05ed1f --- /dev/null +++ b/llvm/test/CodeGen/Lanai/peephole-compare.mir @@ -0,0 +1,678 @@ +# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s + +# Test the compare fold peephole. + +# CHECK-LABEL: name: test0a +# TODO: Enhance combiner to handle this case. This expands into: +# sub %r7, %r6, %r3 +# sub.f %r7, %r6, %r0 +# sel.eq %r18, %r3, %rv +# This is different from the pattern currently matched. If the lowered form had +# been sub.f %r3, 0, %r0 then it would have matched. + +# CHECK-LABEL: name: test1a +# CHECK: [[IN1:%.*]] = COPY %r7 +# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr + +# CHECK-LABEL: name: test1b +# CHECK: [[IN1:%.*]] = COPY %r7 +# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr + +# CHECK-LABEL: name: test2a +# CHECK: [[IN1:%.*]] = COPY %r7 +# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr + +# CHECK-LABEL: name: test2b +# CHECK: [[IN1:%.*]] = COPY %r7 +# CHECK: [[IN2:%.*]] = COPY %r6 +# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr + +# CHECK-LABEL: name: test3 +# CHECK: AND_F_R +# CHECK: AND_F_R +# CHECK: AND_F_R + +--- | + target datalayout = "E-m:e-p:32:32-i64:64-a:0:32-n32-S64" + target triple = "lanai-unknown-unknown" + + @a = global i32 -1, align 4 + @b = global i32 0, align 4 + + define i32 @test0a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %sub = sub i32 %b, %a + %cmp = icmp eq i32 %sub, 0 + %cond = select i1 %cmp, i32 %c, i32 %sub + ret i32 %cond + } + + define i32 @test0b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %cmp = icmp eq i32 %b, %a + %cond = select i1 %cmp, i32 %c, i32 %b + ret i32 %cond + } + + define i32 @test1a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %sub = sub i32 %b, %a + %cmp = icmp slt i32 %sub, 0 + %cond = select i1 %cmp, i32 %c, i32 %d + ret i32 %cond + } + + define i32 @test1b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %sub = sub i32 %b, %a + %cmp = icmp slt i32 %sub, 0 + %cond = select i1 %cmp, i32 %c, i32 %d + ret i32 %cond + } + + define i32 @test2a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %sub = sub i32 %b, %a + %cmp = icmp sgt i32 %sub, -1 + %cond = select i1 %cmp, i32 %c, i32 %d + ret i32 %cond + } + + define i32 @test2b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %sub = sub i32 %b, %a + %cmp = icmp sgt i32 %sub, -1 + %cond = select i1 %cmp, i32 %c, i32 %d + ret i32 %cond + } + + define i32 @test3(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %sub = sub i32 %b, %a + %cmp = icmp slt i32 %sub, 1 + %cond = select i1 %cmp, i32 %c, i32 %d + ret i32 %cond + } + + define i32 @test4(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { + entry: + %cmp = icmp ne i32 %a, 0 + %cmp1 = icmp ult i32 %a, %b + %or.cond = and i1 %cmp, %cmp1 + br i1 %or.cond, label %return, label %if.end + + if.end: ; preds = %entry + %cmp2 = icmp ne i32 %b, 0 + %cmp4 = icmp ult i32 %b, %c + %or.cond29 = and i1 %cmp2, %cmp4 + br i1 %or.cond29, label %return, label %if.end6 + + if.end6: ; preds = %if.end + %cmp7 = icmp ne i32 %c, 0 + %cmp9 = icmp ult i32 %c, %d + %or.cond30 = and i1 %cmp7, %cmp9 + br i1 %or.cond30, label %return, label %if.end11 + + if.end11: ; preds = %if.end6 + %cmp12 = icmp ne i32 %d, 0 + %cmp14 = icmp ult i32 %d, %a + %or.cond31 = and i1 %cmp12, %cmp14 + %b. = select i1 %or.cond31, i32 %b, i32 21 + ret i32 %b. + + return: ; preds = %if.end6, %if.end, %entry + %retval.0 = phi i32 [ %c, %entry ], [ %d, %if.end ], [ %a, %if.end6 ] + ret i32 %retval.0 + } + + define void @testBB() { + entry: + %0 = load i32, i32* @a, align 4, !tbaa !0 + %1 = load i32, i32* @b, align 4, !tbaa !0 + %sub.i = sub i32 %1, %0 + %tobool = icmp sgt i32 %sub.i, -1 + br i1 %tobool, label %if.end, label %if.then + + if.then: ; preds = %entry + %call1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() + br label %while.body + + while.body: ; preds = %while.body, %if.then + br label %while.body + + if.end: ; preds = %entry + %cmp.i = icmp slt i32 %sub.i, 1 + br i1 %cmp.i, label %if.then4, label %if.end7 + + if.then4: ; preds = %if.end + %call5 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() + br label %while.body6 + + while.body6: ; preds = %while.body6, %if.then4 + br label %while.body6 + + if.end7: ; preds = %if.end + ret void + } + + declare i32 @g(...) + + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #0 + + attributes #0 = { nounwind } + + !0 = !{!1, !1, i64 0} + !1 = !{!"int", !2, i64 0} + !2 = !{!"omnipotent char", !3, i64 0} + !3 = !{!"Simple C/C++ TBAA"} + +... +--- +name: test0a +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18 + + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + %4 = SUB_R %1, %0, 0 + SFSUB_F_RI_LO %4, 0, implicit-def %sr + %5 = SELECT %2, %4, 7, implicit %sr + %rv = COPY %5 + RET implicit %rca, implicit %rv + +... +--- +name: test0b +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18 + + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + SFSUB_F_RR %1, %0, implicit-def %sr + %4 = SELECT %2, %1, 7, implicit %sr + %rv = COPY %4 + RET implicit %rca, implicit %rv + +... +--- +name: test1a +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } + - { reg: '%r19', virtual-reg: '%3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18, %r19 + + %3 = COPY %r19 + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + %4 = SUB_R %1, %0, 0 + SFSUB_F_RI_LO killed %4, 0, implicit-def %sr + %5 = SELECT %2, %3, 11, implicit %sr + %rv = COPY %5 + RET implicit %rca, implicit %rv + +... +--- +name: test1b +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } + - { reg: '%r19', virtual-reg: '%3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18, %r19 + + %3 = COPY %r19 + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + %4 = SUB_R %1, %0, 0 + SFSUB_F_RI_LO killed %4, 0, implicit-def %sr + %5 = SELECT %2, %3, 11, implicit %sr + %rv = COPY %5 + RET implicit %rca, implicit %rv + +... +--- +name: test2a +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } + - { reg: '%r19', virtual-reg: '%3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18, %r19 + + %3 = COPY %r19 + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + %4 = SUB_R %1, %0, 0 + SFSUB_F_RI_LO killed %4, 0, implicit-def %sr + %5 = SELECT %2, %3, 10, implicit %sr + %rv = COPY %5 + RET implicit %rca, implicit %rv + +... +--- +name: test2b +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } + - { reg: '%r19', virtual-reg: '%3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18, %r19 + + %3 = COPY %r19 + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + %4 = SUB_R %1, %0, 0 + SFSUB_F_RI_LO killed %4, 0, implicit-def %sr + %5 = SELECT %2, %3, 10, implicit %sr + %rv = COPY %5 + RET implicit %rca, implicit %rv + +... +--- +name: test3 +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%0' } + - { reg: '%r7', virtual-reg: '%1' } + - { reg: '%r18', virtual-reg: '%2' } + - { reg: '%r19', virtual-reg: '%3' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r6, %r7, %r18, %r19 + + %3 = COPY %r19 + %2 = COPY %r18 + %1 = COPY %r7 + %0 = COPY %r6 + %4 = SUB_R %1, %0, 0 + SFSUB_F_RI_LO killed %4, 1, implicit-def %sr + %5 = SELECT %2, %3, 13, implicit %sr + %rv = COPY %5 + RET implicit %rca, implicit %rv + +... +--- +name: test4 +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } + - { id: 9, class: gpr } + - { id: 10, class: gpr } + - { id: 11, class: gpr } + - { id: 12, class: gpr } + - { id: 13, class: gpr } + - { id: 14, class: gpr } + - { id: 15, class: gpr } + - { id: 16, class: gpr } + - { id: 17, class: gpr } + - { id: 18, class: gpr } + - { id: 19, class: gpr } + - { id: 20, class: gpr } + - { id: 21, class: gpr } + - { id: 22, class: gpr } +liveins: + - { reg: '%r6', virtual-reg: '%1' } + - { reg: '%r7', virtual-reg: '%2' } + - { reg: '%r18', virtual-reg: '%3' } + - { reg: '%r19', virtual-reg: '%4' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.4.return, %bb.1.if.end + liveins: %r6, %r7, %r18, %r19 + + %4 = COPY %r19 + %3 = COPY %r18 + %2 = COPY %r7 + %1 = COPY %r6 + SFSUB_F_RI_LO %1, 0, implicit-def %sr + %5 = SCC 6, implicit %sr + SFSUB_F_RR %1, %2, implicit-def %sr + %6 = SCC 4, implicit %sr + %7 = AND_R killed %5, killed %6, 0 + %8 = SLI 1 + %9 = AND_R killed %7, %8, 0 + SFSUB_F_RI_LO killed %9, 0, implicit-def %sr + BRCC %bb.4.return, 6, implicit %sr + BT %bb.1.if.end + + bb.1.if.end: + successors: %bb.4.return, %bb.2.if.end6 + + SFSUB_F_RI_LO %2, 0, implicit-def %sr + %10 = SCC 6, implicit %sr + SFSUB_F_RR %2, %3, implicit-def %sr + %11 = SCC 4, implicit %sr + %12 = AND_R killed %10, killed %11, 0 + %14 = AND_R killed %12, %8, 0 + SFSUB_F_RI_LO killed %14, 0, implicit-def %sr + BRCC %bb.4.return, 6, implicit %sr + BT %bb.2.if.end6 + + bb.2.if.end6: + successors: %bb.4.return, %bb.3.if.end11 + + SFSUB_F_RI_LO %3, 0, implicit-def %sr + %15 = SCC 6, implicit %sr + SFSUB_F_RR %3, %4, implicit-def %sr + %16 = SCC 4, implicit %sr + %17 = AND_R killed %15, killed %16, 0 + %18 = SLI 1 + %19 = AND_R killed %17, killed %18, 0 + SFSUB_F_RI_LO killed %19, 0, implicit-def %sr + BRCC %bb.4.return, 6, implicit %sr + BT %bb.3.if.end11 + + bb.3.if.end11: + %20 = SLI 21 + SFSUB_F_RR %4, %1, implicit-def %sr + %21 = SELECT %2, %20, 4, implicit %sr + SFSUB_F_RI_LO %4, 0, implicit-def %sr + %22 = SELECT killed %21, %20, 6, implicit %sr + %rv = COPY %22 + RET implicit %rca, implicit %rv + + bb.4.return: + %0 = PHI %3, %bb.0.entry, %4, %bb.1.if.end, %1, %bb.2.if.end6 + %rv = COPY %0 + RET implicit %rca, implicit %rv + +... +--- +name: testBB +alignment: 2 +exposesReturnsTwice: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gpr } + - { id: 4, class: gpr } + - { id: 5, class: gpr } + - { id: 6, class: gpr } + - { id: 7, class: gpr } + - { id: 8, class: gpr } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: true + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + successors: %bb.3.if.end, %bb.1.if.then + + %1 = MOVHI target-flags(lanai-hi) @a + %2 = OR_I_LO killed %1, target-flags(lanai-lo) @a + %3 = LDW_RI killed %2, 0, 0 :: (load 4 from @a, !tbaa !0) + %4 = MOVHI target-flags(lanai-hi) @b + %5 = OR_I_LO killed %4, target-flags(lanai-lo) @b + %6 = LDW_RI killed %5, 0, 0 :: (load 4 from @b, !tbaa !0) + %0 = SUB_R killed %6, killed %3, 0 + SFSUB_F_RI_LO %0, 0, implicit-def %sr + BRCC %bb.3.if.end, 10, implicit %sr + BT %bb.1.if.then + + bb.1.if.then: + successors: %bb.2.while.body + + ADJCALLSTACKDOWN 0, implicit-def dead %sp, implicit %sp + CALL @g, csr, implicit-def dead %rca, implicit %sp, implicit-def %sp, implicit-def %rv + ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp + + bb.2.while.body: + successors: %bb.2.while.body + + BT %bb.2.while.body + + bb.3.if.end: + successors: %bb.4.if.then4, %bb.6.if.end7 + liveins: %sr + + BRCC %bb.6.if.end7, 14, implicit %sr + BT %bb.4.if.then4 + + bb.4.if.then4: + successors: %bb.5.while.body6 + + ADJCALLSTACKDOWN 0, implicit-def dead %sp, implicit %sp + CALL @g, csr, implicit-def dead %rca, implicit %sp, implicit-def %sp, implicit-def %rv + ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp + + bb.5.while.body6: + successors: %bb.5.while.body6 + + BT %bb.5.while.body6 + + bb.6.if.end7: + RET implicit %rca + +... diff --git a/llvm/test/CodeGen/MIR/AArch64/ldst-opt-zr-clobber.mir b/llvm/test/CodeGen/MIR/AArch64/ldst-opt-zr-clobber.mir deleted file mode 100644 index 75ad849e4f3..00000000000 --- a/llvm/test/CodeGen/MIR/AArch64/ldst-opt-zr-clobber.mir +++ /dev/null @@ -1,27 +0,0 @@ - -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass aarch64-ldst-opt -verify-machineinstrs -o - %s | FileCheck %s - ---- | - define i1 @no-clobber-zr(i64* %p, i64 %x) { ret i1 0 } -... ---- -# Check that write of xzr doesn't inhibit pairing of xzr stores since -# it isn't actually clobbered. Written as a MIR test to avoid -# schedulers reordering instructions such that SUBS doesn't appear -# between stores. -# CHECK-LABEL: name: no-clobber-zr -# CHECK: STPXi %xzr, %xzr, %x0, 0 -name: no-clobber-zr -body: | - bb.0: - liveins: %x0, %x1 - STRXui %xzr, %x0, 0 :: (store 8 into %ir.p) - dead %xzr = SUBSXri killed %x1, 0, 0, implicit-def %nzcv - %w8 = CSINCWr %wzr, %wzr, 1, implicit killed %nzcv - STRXui %xzr, killed %x0, 1 :: (store 8 into %ir.p) - %w0 = ORRWrs %wzr, killed %w8, 0 - RET %lr, implicit %w0 -... - - - diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-dead-copy.mir b/llvm/test/CodeGen/MIR/AArch64/machine-dead-copy.mir deleted file mode 100644 index cb552e5cab3..00000000000 --- a/llvm/test/CodeGen/MIR/AArch64/machine-dead-copy.mir +++ /dev/null @@ -1,67 +0,0 @@ - -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-cp -verify-machineinstrs -o - %s | FileCheck %s - ---- | - define i32 @copyprop1(i32 %a, i32 %b) { ret i32 %a } - define i32 @copyprop2(i32 %a, i32 %b) { ret i32 %a } - define i32 @copyprop3(i32 %a, i32 %b) { ret i32 %a } - define i32 @copyprop4(i32 %a, i32 %b) { ret i32 %a } - declare i32 @foo(i32) -... ---- -# The first copy is dead copy which is not used. -# CHECK-LABEL: name: copyprop1 -# CHECK: bb.0: -# CHECK-NOT: %w20 = COPY -name: copyprop1 -body: | - bb.0: - liveins: %w0, %w1 - %w20 = COPY %w1 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - RET_ReallyLR implicit %w0 -... ---- -# The first copy is not a dead copy which is used in the second copy after the -# call. -# CHECK-LABEL: name: copyprop2 -# CHECK: bb.0: -# CHECK: %w20 = COPY -name: copyprop2 -body: | - bb.0: - liveins: %w0, %w1 - %w20 = COPY %w1 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - %w0 = COPY %w20 - RET_ReallyLR implicit %w0 -... ---- -# Both the first and second copy are dead copies which are not used. -# CHECK-LABEL: name: copyprop3 -# CHECK: bb.0: -# CHECK-NOT: COPY -name: copyprop3 -body: | - bb.0: - liveins: %w0, %w1 - %w20 = COPY %w1 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - %w20 = COPY %w0 - RET_ReallyLR implicit %w0 -... -# The second copy is removed as a NOP copy, after then the first copy become -# dead which should be removed as well. -# CHECK-LABEL: name: copyprop4 -# CHECK: bb.0: -# CHECK-NOT: COPY -name: copyprop4 -body: | - bb.0: - liveins: %w0, %w1 - %w20 = COPY %w0 - %w0 = COPY %w20 - BL @foo, csr_aarch64_aapcs, implicit %w0, implicit-def %w0 - RET_ReallyLR implicit %w0 -... - diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-scheduler.mir b/llvm/test/CodeGen/MIR/AArch64/machine-scheduler.mir deleted file mode 100644 index e7e0dda53c5..00000000000 --- a/llvm/test/CodeGen/MIR/AArch64/machine-scheduler.mir +++ /dev/null @@ -1,34 +0,0 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-scheduler -verify-machineinstrs -o - %s | FileCheck %s - ---- | - define i64 @load_imp-def(i64* nocapture %P, i32 %v) { - entry: - %0 = bitcast i64* %P to i32* - %1 = load i32, i32* %0 - %conv = zext i32 %1 to i64 - %arrayidx19 = getelementptr inbounds i64, i64* %P, i64 1 - %arrayidx1 = bitcast i64* %arrayidx19 to i32* - store i32 %v, i32* %arrayidx1 - %2 = load i64, i64* %arrayidx19 - %and = and i64 %2, 4294967295 - %add = add nuw nsw i64 %and, %conv - ret i64 %add - } -... ---- -# CHECK-LABEL: name: load_imp-def -# CHECK: bb.0.entry: -# CHECK: LDRWui %x0, 0 -# CHECK: LDRWui %x0, 1 -# CHECK: STRWui %w1, %x0, 2 -name: load_imp-def -body: | - bb.0.entry: - liveins: %w1, %x0 - %w8 = LDRWui %x0, 1, implicit-def %x8 :: (load 4 from %ir.0) - STRWui killed %w1, %x0, 2 :: (store 4 into %ir.arrayidx1) - %w9 = LDRWui killed %x0, 0, implicit-def %x9 :: (load 4 from %ir.arrayidx19, align 8) - %x0 = ADDXrr killed %x9, killed %x8 - RET_ReallyLR implicit %x0 -... - diff --git a/llvm/test/CodeGen/MIR/AArch64/machine-sink-zr.mir b/llvm/test/CodeGen/MIR/AArch64/machine-sink-zr.mir deleted file mode 100644 index 535fba0dc63..00000000000 --- a/llvm/test/CodeGen/MIR/AArch64/machine-sink-zr.mir +++ /dev/null @@ -1,48 +0,0 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -run-pass machine-sink -o - %s | FileCheck %s ---- | - define void @sinkwzr() { ret void } -... ---- -name: sinkwzr -tracksRegLiveness: true -registers: - - { id: 0, class: gpr32 } - - { id: 1, class: gpr32 } - - { id: 2, class: gpr32sp } - - { id: 3, class: gpr32 } - - { id: 4, class: gpr32 } -body: | - ; Check that WZR copy is sunk into the loop preheader. - ; CHECK-LABEL: name: sinkwzr - ; CHECK-LABEL: bb.0: - ; CHECK-NOT: COPY %wzr - bb.0: - successors: %bb.3, %bb.1 - liveins: %w0 - - %0 = COPY %w0 - %1 = COPY %wzr - CBZW %0, %bb.3 - - ; CHECK-LABEL: bb.1: - ; CHECK: COPY %wzr - - bb.1: - successors: %bb.2 - - B %bb.2 - - bb.2: - successors: %bb.3, %bb.2 - - %2 = PHI %0, %bb.1, %4, %bb.2 - %w0 = COPY %1 - %3 = SUBSWri %2, 1, 0, implicit-def dead %nzcv - %4 = COPY %3 - CBZW %3, %bb.3 - B %bb.2 - - bb.3: - RET_ReallyLR - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/coalescer-subreg-join.mir b/llvm/test/CodeGen/MIR/AMDGPU/coalescer-subreg-join.mir deleted file mode 100644 index 234fe57b513..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/coalescer-subreg-join.mir +++ /dev/null @@ -1,75 +0,0 @@ -# RUN: llc -march=amdgcn -run-pass simple-register-coalescing -o - %s | FileCheck %s -# Check that %11 and %20 have been coalesced. -# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG:[0-9]+]] -# CHECK: IMAGE_SAMPLE_C_D_O_V1_V16 %[[REG]] - ---- -name: main -alignment: 0 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_64 } - - { id: 1, class: vgpr_32 } - - { id: 2, class: vgpr_32 } - - { id: 3, class: sreg_256 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_256 } - - { id: 6, class: sreg_128 } - - { id: 7, class: sreg_512 } - - { id: 9, class: vreg_512 } - - { id: 11, class: vreg_512 } - - { id: 18, class: vgpr_32 } - - { id: 20, class: vreg_512 } - - { id: 27, class: vgpr_32 } -liveins: - - { reg: '%sgpr2_sgpr3', virtual-reg: '%0' } - - { reg: '%vgpr2', virtual-reg: '%1' } - - { reg: '%vgpr3', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0: - liveins: %sgpr2_sgpr3, %vgpr2, %vgpr3 - - %0 = COPY %sgpr2_sgpr3 - %1 = COPY %vgpr2 - %2 = COPY %vgpr3 - %3 = S_LOAD_DWORDX8_IMM %0, 0, 0 - %4 = S_LOAD_DWORDX4_IMM %0, 12, 0 - %5 = S_LOAD_DWORDX8_IMM %0, 16, 0 - %6 = S_LOAD_DWORDX4_IMM %0, 28, 0 - undef %7.sub0 = S_MOV_B32 212739 - %20 = COPY %7 - %11 = COPY %20 - %11.sub1 = COPY %1 - %11.sub2 = COPY %1 - %11.sub3 = COPY %1 - %11.sub4 = COPY %1 - %11.sub5 = COPY %1 - %11.sub6 = COPY %1 - %11.sub7 = COPY %1 - %11.sub8 = COPY %1 - dead %18 = IMAGE_SAMPLE_C_D_O_V1_V16 %11, %3, %4, 1, 0, 0, 0, 0, 0, 0, -1, implicit %exec - %20.sub1 = COPY %2 - %20.sub2 = COPY %2 - %20.sub3 = COPY %2 - %20.sub4 = COPY %2 - %20.sub5 = COPY %2 - %20.sub6 = COPY %2 - %20.sub7 = COPY %2 - %20.sub8 = COPY %2 - dead %27 = IMAGE_SAMPLE_C_D_O_V1_V16 %20, %5, %6, 1, 0, 0, 0, 0, 0, 0, -1, implicit %exec - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/detect-dead-lanes.mir b/llvm/test/CodeGen/MIR/AMDGPU/detect-dead-lanes.mir deleted file mode 100644 index 9d70f67ef49..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/detect-dead-lanes.mir +++ /dev/null @@ -1,419 +0,0 @@ -# RUN: llc -march=amdgcn -run-pass detect-dead-lanes -o - %s | FileCheck %s ---- | - define void @test0() { ret void } - define void @test1() { ret void } - define void @test2() { ret void } - define void @test3() { ret void } - define void @test4() { ret void } - define void @test5() { ret void } - define void @loop0() { ret void } - define void @loop1() { ret void } - define void @loop2() { ret void } -... ---- -# Combined use/def transfer check, the basics. -# CHECK-LABEL: name: test0 -# CHECK: S_NOP 0, implicit-def %0 -# CHECK: S_NOP 0, implicit-def %1 -# CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %3.sub0 -# CHECK: S_NOP 0, implicit %3.sub1 -# CHECK: S_NOP 0, implicit undef %3.sub2 -# CHECK: %4 = COPY %3.sub0_sub1 -# CHECK: %5 = COPY undef %3.sub2_sub3 -# CHECK: S_NOP 0, implicit %4.sub0 -# CHECK: S_NOP 0, implicit %4.sub1 -# CHECK: S_NOP 0, implicit undef %5.sub0 -name: test0 -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_32_xm0 } - - { id: 3, class: sreg_128 } - - { id: 4, class: sreg_64 } - - { id: 5, class: sreg_64 } -body: | - bb.0: - S_NOP 0, implicit-def %0 - S_NOP 0, implicit-def %1 - S_NOP 0, implicit-def %2 - %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub3 - S_NOP 0, implicit %3.sub0 - S_NOP 0, implicit %3.sub1 - S_NOP 0, implicit %3.sub2 - %4 = COPY %3.sub0_sub1 - %5 = COPY %3.sub2_sub3 - S_NOP 0, implicit %4.sub0 - S_NOP 0, implicit %4.sub1 - S_NOP 0, implicit %5.sub0 -... ---- -# Check defined lanes transfer; Includes checking for some special cases like -# undef operands or IMPLICIT_DEF definitions. -# CHECK-LABEL: name: test1 -# CHECK: %0 = REG_SEQUENCE %sgpr0, {{[0-9]+}}, %sgpr0, {{[0-9]+}} -# CHECK: %1 = INSERT_SUBREG %0, %sgpr1, {{[0-9]+}} -# CHECK: %2 = INSERT_SUBREG %0.sub2_sub3, %sgpr42, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %1.sub0 -# CHECK: S_NOP 0, implicit undef %1.sub1 -# CHECK: S_NOP 0, implicit %1.sub2 -# CHECK: S_NOP 0, implicit %1.sub3 -# CHECK: S_NOP 0, implicit %2.sub0 -# CHECK: S_NOP 0, implicit undef %2.sub1 - -# CHECK: %3 = IMPLICIT_DEF -# CHECK: %4 = INSERT_SUBREG %0, undef %3, {{[0-9]+}} -# CHECK: S_NOP 0, implicit undef %4.sub0 -# CHECK: S_NOP 0, implicit undef %4.sub1 -# CHECK: S_NOP 0, implicit %4.sub2 -# CHECK: S_NOP 0, implicit undef %4.sub3 - -# CHECK: %5 = EXTRACT_SUBREG %0, {{[0-9]+}} -# CHECK: %6 = EXTRACT_SUBREG %5, {{[0-9]+}} -# CHECK: %7 = EXTRACT_SUBREG %5, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %5 -# CHECK: S_NOP 0, implicit %6 -# CHECK: S_NOP 0, implicit undef %7 - -# CHECK: %8 = IMPLICIT_DEF -# CHECK: %9 = EXTRACT_SUBREG undef %8, {{[0-9]+}} -# CHECK: S_NOP 0, implicit undef %9 - -# CHECK: %10 = EXTRACT_SUBREG undef %0, {{[0-9]+}} -# CHECK: S_NOP 0, implicit undef %10 -name: test1 -registers: - - { id: 0, class: sreg_128 } - - { id: 1, class: sreg_128 } - - { id: 2, class: sreg_64 } - - { id: 3, class: sreg_32_xm0 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_64 } - - { id: 6, class: sreg_32_xm0 } - - { id: 7, class: sreg_32_xm0 } - - { id: 8, class: sreg_64 } - - { id: 9, class: sreg_32_xm0 } - - { id: 10, class: sreg_128 } -body: | - bb.0: - %0 = REG_SEQUENCE %sgpr0, %subreg.sub0, %sgpr0, %subreg.sub2 - %1 = INSERT_SUBREG %0, %sgpr1, %subreg.sub3 - %2 = INSERT_SUBREG %0.sub2_sub3, %sgpr42, %subreg.sub0 - S_NOP 0, implicit %1.sub0 - S_NOP 0, implicit %1.sub1 - S_NOP 0, implicit %1.sub2 - S_NOP 0, implicit %1.sub3 - S_NOP 0, implicit %2.sub0 - S_NOP 0, implicit %2.sub1 - - %3 = IMPLICIT_DEF - %4 = INSERT_SUBREG %0, %3, %subreg.sub0 - S_NOP 0, implicit %4.sub0 - S_NOP 0, implicit %4.sub1 - S_NOP 0, implicit %4.sub2 - S_NOP 0, implicit %4.sub3 - - %5 = EXTRACT_SUBREG %0, %subreg.sub0_sub1 - %6 = EXTRACT_SUBREG %5, %subreg.sub0 - %7 = EXTRACT_SUBREG %5, %subreg.sub1 - S_NOP 0, implicit %5 - S_NOP 0, implicit %6 - S_NOP 0, implicit %7 - - %8 = IMPLICIT_DEF - %9 = EXTRACT_SUBREG %8, %subreg.sub1 - S_NOP 0, implicit %9 - - %10 = EXTRACT_SUBREG undef %0, %subreg.sub2_sub3 - S_NOP 0, implicit %10 -... ---- -# Check used lanes transfer; Includes checking for some special cases like -# undef operands. -# CHECK-LABEL: name: test2 -# CHECK: S_NOP 0, implicit-def dead %0 -# CHECK: S_NOP 0, implicit-def %1 -# CHECK: S_NOP 0, implicit-def %2 -# CHECK: %3 = REG_SEQUENCE undef %0, {{[0-9]+}}, %1, {{[0-9]+}}, %2, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %3.sub1 -# CHECK: S_NOP 0, implicit %3.sub3 - -# CHECK: S_NOP 0, implicit-def %4 -# CHECK: S_NOP 0, implicit-def dead %5 -# CHECK: %6 = REG_SEQUENCE %4, {{[0-9]+}}, undef %5, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %6 - -# CHECK: S_NOP 0, implicit-def dead %7 -# CHECK: S_NOP 0, implicit-def %8 -# CHECK: %9 = INSERT_SUBREG undef %7, %8, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %9.sub2 - -# CHECK: S_NOP 0, implicit-def %10 -# CHECK: S_NOP 0, implicit-def dead %11 -# CHECK: %12 = INSERT_SUBREG %10, undef %11, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %12.sub3 - -# CHECK: S_NOP 0, implicit-def %13 -# CHECK: S_NOP 0, implicit-def dead %14 -# CHECK: %15 = REG_SEQUENCE %13, {{[0-9]+}}, undef %14, {{[0-9]+}} -# CHECK: %16 = EXTRACT_SUBREG %15, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %16.sub1 - -name: test2 -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_64 } - - { id: 3, class: sreg_128 } - - { id: 4, class: sreg_32_xm0 } - - { id: 5, class: sreg_32_xm0 } - - { id: 6, class: sreg_64 } - - { id: 7, class: sreg_128 } - - { id: 8, class: sreg_64 } - - { id: 9, class: sreg_128 } - - { id: 10, class: sreg_128 } - - { id: 11, class: sreg_64 } - - { id: 12, class: sreg_128 } - - { id: 13, class: sreg_64 } - - { id: 14, class: sreg_64 } - - { id: 15, class: sreg_128 } - - { id: 16, class: sreg_64 } -body: | - bb.0: - S_NOP 0, implicit-def %0 - S_NOP 0, implicit-def %1 - S_NOP 0, implicit-def %2 - %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2_sub3 - S_NOP 0, implicit %3.sub1 - S_NOP 0, implicit %3.sub3 - - S_NOP 0, implicit-def %4 - S_NOP 0, implicit-def %5 - %6 = REG_SEQUENCE %4, %subreg.sub0, undef %5, %subreg.sub1 - S_NOP 0, implicit %6 - - S_NOP 0, implicit-def %7 - S_NOP 0, implicit-def %8 - %9 = INSERT_SUBREG %7, %8, %subreg.sub2_sub3 - S_NOP 0, implicit %9.sub2 - - S_NOP 0, implicit-def %10 - S_NOP 0, implicit-def %11 - %12 = INSERT_SUBREG %10, %11, %subreg.sub0_sub1 - S_NOP 0, implicit %12.sub3 - - S_NOP 0, implicit-def %13 - S_NOP 0, implicit-def %14 - %15 = REG_SEQUENCE %13, %subreg.sub0_sub1, %14, %subreg.sub2_sub3 - %16 = EXTRACT_SUBREG %15, %subreg.sub0_sub1 - S_NOP 0, implicit %16.sub1 -... ---- -# Check that copies to physregs use all lanes, copies from physregs define all -# lanes. So we should not get a dead/undef flag here. -# CHECK-LABEL: name: test3 -# CHECK: S_NOP 0, implicit-def %0 -# CHECK: %vcc = COPY %0 -# CHECK: %1 = COPY %vcc -# CHECK: S_NOP 0, implicit %1 -name: test3 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_64 } - - { id: 1, class: sreg_64 } -body: | - bb.0: - S_NOP 0, implicit-def %0 - %vcc = COPY %0 - - %1 = COPY %vcc - S_NOP 0, implicit %1 -... ---- -# Check that implicit-def/kill do not count as def/uses. -# CHECK-LABEL: name: test4 -# CHECK: S_NOP 0, implicit-def dead %0 -# CHECK: KILL undef %0 -# CHECK: %1 = IMPLICIT_DEF -# CHECK: S_NOP 0, implicit undef %1 -name: test4 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_64 } - - { id: 1, class: sreg_64 } -body: | - bb.0: - S_NOP 0, implicit-def %0 - KILL %0 - - %1 = IMPLICIT_DEF - S_NOP 0, implicit %1 -... ---- -# Check that unused inputs are marked as undef, even if the vreg itself is -# used. -# CHECK-LABEL: name: test5 -# CHECK: S_NOP 0, implicit-def %0 -# CHECK: %1 = REG_SEQUENCE undef %0, {{[0-9]+}}, %0, {{[0-9]+}} -# CHECK: S_NOP 0, implicit %1.sub1 -name: test5 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_64 } -body: | - bb.0: - S_NOP 0, implicit-def %0 - %1 = REG_SEQUENCE %0, %subreg.sub0, %0, %subreg.sub1 - S_NOP 0, implicit %1.sub1 -... ---- -# Check "optimistic" dataflow fixpoint in phi-loops. -# CHECK-LABEL: name: loop0 -# CHECK: bb.0: -# CHECK: S_NOP 0, implicit-def %0 -# CHECK: S_NOP 0, implicit-def dead %1 -# CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: %3 = REG_SEQUENCE %0, {{[0-9]+}}, undef %1, {{[0-9]+}}, undef %2, {{[0-9]+}} - -# CHECK: bb.1: -# CHECK: %4 = PHI %3, %bb.0, %5, %bb.1 - -# CHECK: bb.2: -# CHECK: S_NOP 0, implicit %4.sub0 -# CHECK: S_NOP 0, implicit undef %4.sub3 -name: loop0 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_32_xm0 } - - { id: 3, class: sreg_128 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_128 } -body: | - bb.0: - successors: %bb.1 - S_NOP 0, implicit-def %0 - S_NOP 0, implicit-def %1 - S_NOP 0, implicit-def %2 - %3 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2 - S_BRANCH %bb.1 - - bb.1: - successors: %bb.1, %bb.2 - %4 = PHI %3, %bb.0, %5, %bb.1 - - ; let's swiffle some lanes around for fun... - %5 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub2, %subreg.sub1, %4.sub1, %subreg.sub2, %4.sub3, %subreg.sub3 - - S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc - S_BRANCH %bb.2 - - bb.2: - S_NOP 0, implicit %4.sub0 - S_NOP 0, implicit %4.sub3 -... ---- -# Check a loop that needs to be traversed multiple times to reach the fixpoint -# for the used lanes. The example reads sub3 lane at the end, however with each -# loop iteration we should get 1 more lane marked as we cycles the sublanes -# along. Sublanes sub0, sub1 and sub3 are rotate in the loop so only sub2 -# should be dead. -# CHECK-LABEL: name: loop1 -# CHECK: bb.0: -# CHECK: S_NOP 0, implicit-def %0 -# CHECK: S_NOP 0, implicit-def %1 -# CHECK: S_NOP 0, implicit-def dead %2 -# CHECK: S_NOP 0, implicit-def %3 -# CHECK: %4 = REG_SEQUENCE %0, {{[0-9]+}}, %1, {{[0-9]+}}, undef %2, {{[0-9]+}}, %3, {{[0-9]+}} - -# CHECK: bb.1: -# CHECK: %5 = PHI %4, %bb.0, %6, %bb.1 - -# CHECK: %6 = REG_SEQUENCE %5.sub1, {{[0-9]+}}, %5.sub3, {{[0-9]+}}, undef %5.sub2, {{[0-9]+}}, %5.sub0, {{[0-9]+}} - -# CHECK: bb.2: -# CHECK: S_NOP 0, implicit %6.sub3 -name: loop1 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_32_xm0 } - - { id: 2, class: sreg_32_xm0 } - - { id: 3, class: sreg_32_xm0 } - - { id: 4, class: sreg_128 } - - { id: 5, class: sreg_128 } - - { id: 6, class: sreg_128 } -body: | - bb.0: - successors: %bb.1 - S_NOP 0, implicit-def %0 - S_NOP 0, implicit-def %1 - S_NOP 0, implicit-def dead %2 - S_NOP 0, implicit-def %3 - %4 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3 - S_BRANCH %bb.1 - - bb.1: - successors: %bb.1, %bb.2 - %5 = PHI %4, %bb.0, %6, %bb.1 - - ; rotate lanes, but skip sub2 lane... - %6 = REG_SEQUENCE %5.sub1, %subreg.sub0, %5.sub3, %subreg.sub1, %5.sub2, %subreg.sub2, %5.sub0, %subreg.sub3 - - S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc - S_BRANCH %bb.2 - - bb.2: - S_NOP 0, implicit %6.sub3 -... ---- -# Similar to loop1 test, but check for fixpoint of defined lanes. -# Lanes are rotate between sub0, sub2, sub3 so only sub1 should be dead/undef. -# CHECK-LABEL: name: loop2 -# CHECK: bb.0: -# CHECK: S_NOP 0, implicit-def %0 -# CHECK: %1 = REG_SEQUENCE %0, {{[0-9]+}} - -# CHECK: bb.1: -# CHECK: %2 = PHI %1, %bb.0, %3, %bb.1 - -# CHECK: %3 = REG_SEQUENCE %2.sub3, {{[0-9]+}}, undef %2.sub1, {{[0-9]+}}, %2.sub0, {{[0-9]+}}, %2.sub2, {{[0-9]+}} - -# CHECK: bb.2: -# CHECK: S_NOP 0, implicit %2.sub0 -# CHECK: S_NOP 0, implicit undef %2.sub1 -# CHECK: S_NOP 0, implicit %2.sub2 -# CHECK: S_NOP 0, implicit %2.sub3 -name: loop2 -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_128 } - - { id: 2, class: sreg_128 } - - { id: 3, class: sreg_128 } -body: | - bb.0: - successors: %bb.1 - S_NOP 0, implicit-def %0 - %1 = REG_SEQUENCE %0, %subreg.sub0 - S_BRANCH %bb.1 - - bb.1: - successors: %bb.1, %bb.2 - %2 = PHI %1, %bb.0, %3, %bb.1 - - ; rotate subreg lanes, skipping sub1 - %3 = REG_SEQUENCE %2.sub3, %subreg.sub0, %2.sub1, %subreg.sub1, %2.sub0, %subreg.sub2, %2.sub2, %subreg.sub3 - - S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc - S_BRANCH %bb.2 - - bb.2: - S_NOP 0, implicit %2.sub0 - S_NOP 0, implicit undef %2.sub1 - S_NOP 0, implicit %2.sub2 - S_NOP 0, implicit %2.sub3 -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir b/llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir deleted file mode 100644 index 9aaa374ed28..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/insert-waits-exp.mir +++ /dev/null @@ -1,63 +0,0 @@ -# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-insert-waits -o - %s | FileCheck %s ---- | - define amdgpu_ps <4 x float> @exp_done_waitcnt(<4 x i32> inreg, <4 x i32> inreg, i32 inreg %w, float %v) { - %a = load volatile float, float addrspace(1)* undef - %b = load volatile float, float addrspace(1)* undef - %c = load volatile float, float addrspace(1)* undef - %d = load volatile float, float addrspace(1)* undef - call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %a, float %b, float %c, float %d) - ret <4 x float> - } - - declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) - - attributes #0 = { readnone } - attributes #1 = { nounwind } - -... ---- - -# CHECK-LABEL: name: exp_done_waitcnt{{$}} -# CHECK: EXP_DONE -# CHECK-NEXT: S_WAITCNT 3855 -# CHECK: %vgpr0 = V_MOV_B32 -# CHECK: %vgpr1 = V_MOV_B32 -# CHECK: %vgpr2 = V_MOV_B32 -# CHECK: %vgpr3 = V_MOV_B32 -name: exp_done_waitcnt -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0 (%ir-block.2): - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) - %vgpr1 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) - %vgpr2 = BUFFER_LOAD_DWORD_OFFSET %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) - %vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `float addrspace(1)* undef`) - EXP_DONE 0, killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3, -1, -1, 15, implicit %exec - %vgpr0 = V_MOV_B32_e32 1056964608, implicit %exec - %vgpr1 = V_MOV_B32_e32 1065353216, implicit %exec - %vgpr2 = V_MOV_B32_e32 1073741824, implicit %exec - %vgpr3 = V_MOV_B32_e32 1082130432, implicit %exec - SI_RETURN killed %vgpr0, killed %vgpr1, killed %vgpr2, killed %vgpr3 - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir deleted file mode 100644 index 7cc9c7c1d92..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/inserted-wait-states.mir +++ /dev/null @@ -1,333 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=tahiti -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN -# RUN: llc -march=amdgcn -mcpu=hawaii -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass post-RA-hazard-rec %s -o - | FileCheck %s -check-prefixes=GCN,CIVI,VI - ---- | - define void @div_fmas() { ret void } - define void @s_getreg() { ret void } - define void @s_setreg() { ret void } - define void @vmem_gt_8dw_store() { ret void } - define void @readwrite_lane() { ret void } - define void @rfe() { ret void } -... ---- -# GCN-LABEL: name: div_fmas - -# GCN-LABEL: bb.0: -# GCN: S_MOV_B64 -# GCN-NOT: S_NOP -# GCN: V_DIV_FMAS - -# GCN-LABEL: bb.1: -# GCN: V_CMP_EQ_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_DIV_FMAS_F32 - -# GCN-LABEL: bb.2: -# GCN: V_CMP_EQ_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_DIV_FMAS_F32 - -# GCN-LABEL: bb.3: -# GCN: V_DIV_SCALE_F32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_DIV_FMAS_F32 -name: div_fmas - -body: | - bb.0: - successors: %bb.1 - %vcc = S_MOV_B64 0 - %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec - S_BRANCH %bb.1 - - bb.1: - successors: %bb.2 - implicit %vcc = V_CMP_EQ_I32_e32 %vgpr1, %vgpr2, implicit %exec - %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec - S_BRANCH %bb.2 - - bb.2: - successors: %bb.3 - %vcc = V_CMP_EQ_I32_e64 %vgpr1, %vgpr2, implicit %exec - %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec - S_BRANCH %bb.3 - - bb.3: - %vgpr4, %vcc = V_DIV_SCALE_F32 0, %vgpr1, 0, %vgpr1, 0, %vgpr3, 0, 0, implicit %exec - %vgpr0 = V_DIV_FMAS_F32 0, %vgpr1, 0, %vgpr2, 0, %vgpr3, 0, 0, implicit %vcc, implicit %exec - S_ENDPGM - -... - -... ---- -# GCN-LABEL: name: s_getreg - -# GCN-LABEL: bb.0: -# GCN: S_SETREG -# GCN: S_NOP 0 -# GCN: S_NOP 0 -# GCN: S_GETREG - -# GCN-LABEL: bb.1: -# GCN: S_SETREG_IMM32 -# GCN: S_NOP 0 -# GCN: S_NOP 0 -# GCN: S_GETREG - -# GCN-LABEL: bb.2: -# GCN: S_SETREG -# GCN: S_NOP 0 -# GCN: S_GETREG - -# GCN-LABEL: bb.3: -# GCN: S_SETREG -# GCN-NEXT: S_GETREG - -name: s_getreg - -body: | - bb.0: - successors: %bb.1 - S_SETREG_B32 %sgpr0, 1 - %sgpr1 = S_GETREG_B32 1 - S_BRANCH %bb.1 - - bb.1: - successors: %bb.2 - S_SETREG_IMM32_B32 0, 1 - %sgpr1 = S_GETREG_B32 1 - S_BRANCH %bb.2 - - bb.2: - successors: %bb.3 - S_SETREG_B32 %sgpr0, 1 - %sgpr1 = S_MOV_B32 0 - %sgpr2 = S_GETREG_B32 1 - S_BRANCH %bb.3 - - bb.3: - S_SETREG_B32 %sgpr0, 0 - %sgpr1 = S_GETREG_B32 1 - S_ENDPGM -... - -... ---- -# GCN-LABEL: name: s_setreg - -# GCN-LABEL: bb.0: -# GCN: S_SETREG -# GCN: S_NOP 0 -# VI: S_NOP 0 -# GCN-NEXT: S_SETREG - -# GCN-LABEL: bb.1: -# GCN: S_SETREG -# GCN: S_NOP 0 -# VI: S_NOP 0 -# GCN-NEXT: S_SETREG - -# GCN-LABEL: bb.2: -# GCN: S_SETREG -# GCN-NEXT: S_SETREG - -name: s_setreg - -body: | - bb.0: - successors: %bb.1 - S_SETREG_B32 %sgpr0, 1 - S_SETREG_B32 %sgpr1, 1 - S_BRANCH %bb.1 - - bb.1: - successors: %bb.2 - S_SETREG_B32 %sgpr0, 64 - S_SETREG_B32 %sgpr1, 128 - S_BRANCH %bb.2 - - bb.2: - S_SETREG_B32 %sgpr0, 1 - S_SETREG_B32 %sgpr1, 0 - S_ENDPGM -... - -... ---- -# GCN-LABEL: name: vmem_gt_8dw_store - -# GCN-LABEL: bb.0: -# GCN: BUFFER_STORE_DWORD_OFFSET -# GCN-NEXT: V_MOV_B32 -# GCN: BUFFER_STORE_DWORDX3_OFFSET -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 -# GCN: BUFFER_STORE_DWORDX4_OFFSET -# GCN-NEXT: V_MOV_B32 -# GCN: BUFFER_STORE_DWORDX4_OFFSET -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 -# GCN: BUFFER_STORE_FORMAT_XYZ_OFFSET -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 -# GCN: BUFFER_STORE_FORMAT_XYZW_OFFSET -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 - -# GCN-LABEL: bb.1: -# GCN: FLAT_STORE_DWORDX2 -# GCN-NEXT: V_MOV_B32 -# GCN: FLAT_STORE_DWORDX3 -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 -# GCN: FLAT_STORE_DWORDX4 -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 -# GCN: FLAT_ATOMIC_CMPSWAP_X2 -# CIVI: S_NOP -# GCN-NEXT: V_MOV_B32 -# GCN: FLAT_ATOMIC_FCMPSWAP_X2 -# CIVI: S_NOP -# GCN: V_MOV_B32 - -name: vmem_gt_8dw_store - -body: | - bb.0: - successors: %bb.1 - BUFFER_STORE_DWORD_OFFSET %vgpr3, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - BUFFER_STORE_DWORDX3_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, %sgpr4, 0, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - BUFFER_STORE_DWORDX4_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - BUFFER_STORE_FORMAT_XYZ_OFFSET %vgpr2_vgpr3_vgpr4, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - BUFFER_STORE_FORMAT_XYZW_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - BUFFER_ATOMIC_CMPSWAP_X2_OFFSET %vgpr2_vgpr3_vgpr4_vgpr5, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit %exec - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - S_BRANCH %bb.1 - - bb.1: - FLAT_STORE_DWORDX2 %vgpr0_vgpr1, %vgpr2_vgpr3, 0, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_STORE_DWORDX3 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4, 0, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_STORE_DWORDX4 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_ATOMIC_CMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - FLAT_ATOMIC_FCMPSWAP_X2 %vgpr0_vgpr1, %vgpr2_vgpr3_vgpr4_vgpr5, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3 = V_MOV_B32_e32 0, implicit %exec - S_ENDPGM - -... - -... ---- - -# GCN-LABEL: name: readwrite_lane - -# GCN-LABEL: bb.0: -# GCN: V_ADD_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_READLANE_B32 - -# GCN-LABEL: bb.1: -# GCN: V_ADD_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_WRITELANE_B32 - -# GCN-LABEL: bb.2: -# GCN: V_ADD_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_READLANE_B32 - -# GCN-LABEL: bb.3: -# GCN: V_ADD_I32 -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: S_NOP -# GCN: V_WRITELANE_B32 - -name: readwrite_lane - -body: | - bb.0: - successors: %bb.1 - %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec - %sgpr4 = V_READLANE_B32 %vgpr4, %sgpr0 - S_BRANCH %bb.1 - - bb.1: - successors: %bb.2 - %vgpr0,%sgpr0_sgpr1 = V_ADD_I32_e64 %vgpr1, %vgpr2, implicit %vcc, implicit %exec - %vgpr4 = V_WRITELANE_B32 %sgpr0, %sgpr0 - S_BRANCH %bb.2 - - bb.2: - successors: %bb.3 - %vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec - %sgpr4 = V_READLANE_B32 %vgpr4, %vcc_lo - S_BRANCH %bb.3 - - bb.3: - %vgpr0,implicit %vcc = V_ADD_I32_e32 %vgpr1, %vgpr2, implicit %vcc, implicit %exec - %vgpr4 = V_WRITELANE_B32 %sgpr4, %vcc_lo - S_ENDPGM - -... - -... ---- - -# GCN-LABEL: name: rfe - -# GCN-LABEL: bb.0: -# GCN: S_SETREG -# VI: S_NOP -# GCN-NEXT: S_RFE_B64 - -# GCN-LABEL: bb.1: -# GCN: S_SETREG -# GCN-NEXT: S_RFE_B64 - -name: rfe - -body: | - bb.0: - successors: %bb.1 - S_SETREG_B32 %sgpr0, 3 - S_RFE_B64 %sgpr2_sgpr3 - S_BRANCH %bb.1 - - bb.1: - S_SETREG_B32 %sgpr0, 0 - S_RFE_B64 %sgpr2_sgpr3 - S_ENDPGM - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/invert-br-undef-vcc.mir b/llvm/test/CodeGen/MIR/AMDGPU/invert-br-undef-vcc.mir deleted file mode 100644 index 66182d09289..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/invert-br-undef-vcc.mir +++ /dev/null @@ -1,89 +0,0 @@ -# RUN: llc -run-pass block-placement -march=amdgcn -verify-machineinstrs -o - %s | FileCheck %s ---- | - - define void @invert_br_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 { - entry: - br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 - - else: ; preds = %entry - store volatile i32 100, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - if: ; preds = %entry - store volatile i32 9, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - done: ; preds = %if, %else - %value = phi i32 [ 0, %if ], [ 1, %else ] - store i32 %value, i32 addrspace(1)* %out - ret void - } - - attributes #0 = { nounwind } - - !0 = !{} - -... ---- -# CHECK-LABEL: name: invert_br_undef_vcc -# CHECK: S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc - -name: invert_br_undef_vcc -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%sgpr0_sgpr1' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - successors: %bb.2.if, %bb.1.else - liveins: %sgpr0_sgpr1 - - %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - S_CBRANCH_VCCNZ %bb.2.if, implicit undef %vcc - - bb.1.else: - successors: %bb.3.done - liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %vgpr0 = V_MOV_B32_e32 100, implicit %exec - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) - %vgpr0 = V_MOV_B32_e32 1, implicit %exec - S_BRANCH %bb.3.done - - bb.2.if: - successors: %bb.3.done - liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %vgpr0 = V_MOV_B32_e32 9, implicit %exec - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) - %vgpr0 = V_MOV_B32_e32 0, implicit %exec - - bb.3.done: - liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) - S_ENDPGM - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/liveness.mir b/llvm/test/CodeGen/MIR/AMDGPU/liveness.mir deleted file mode 100644 index 112c3f8e69a..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/liveness.mir +++ /dev/null @@ -1,32 +0,0 @@ -# RUN: llc -march=amdgcn -run-pass liveintervals -verify-machineinstrs -o /dev/null -debug-only=regalloc %s 2>&1 | FileCheck %s -# REQUIRES: asserts -# We currently maintain a main liveness range which operates like a superset of -# all subregister liveranges. We may need to create additional SSA values at -# merge point in this main liverange even though none of the subregister -# liveranges needed it. -# -# Should see three distinct value numbers: -# CHECK: %vreg0 [{{.*}}:0)[{{.*}}:1)[{{.*}}:2) 0@{{[0-9]+[Berd]}} 1@{{[0-9]+[Berd]}} 2@{{[0-9]+B-phi}} ---- | - define void @test0() { ret void } -... ---- -name: test0 -registers: - - { id: 0, class: sreg_64 } -body: | - bb.0: - successors: %bb.1, %bb.2 - S_NOP 0, implicit-def undef %0.sub0 - S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc - S_BRANCH %bb.2 - - bb.1: - successors: %bb.2 - S_NOP 0, implicit-def %0.sub1 - S_NOP 0, implicit %0.sub1 - S_BRANCH %bb.2 - - bb.2: - S_NOP 0, implicit %0.sub0 -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir b/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir deleted file mode 100644 index 9c330bc8a6b..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/movrels-bug.mir +++ /dev/null @@ -1,31 +0,0 @@ -# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass post-RA-sched %s -o - | FileCheck %s - -# This tests a situation where a sub-register of a killed super-register operand -# of V_MOVRELS happens to have an undef use later on. This leads to the post RA -# scheduler adding additional implicit operands to the V_MOVRELS, which used -# to fail machine instruction verification. - ---- | - - define amdgpu_vs void @main(i32 %arg) { ret void } - -... ---- -# CHECK-LABEL: name: main -# CHECK-LABEL: bb.0: -# CHECK: V_MOVRELS_B32_e32 -# CHECK: V_MAC_F32_e32 - -name: main -tracksRegLiveness: true -body: | - bb.0: - %m0 = S_MOV_B32 undef %sgpr0 - V_MOVRELD_B32_e32 undef %vgpr2, 0, implicit %m0, implicit %exec, implicit-def %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8, implicit undef %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8(tied-def 4) - %m0 = S_MOV_B32 undef %sgpr0 - %vgpr1 = V_MOVRELS_B32_e32 undef %vgpr1, implicit %m0, implicit %exec, implicit killed %vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8 - %vgpr4 = V_MAC_F32_e32 undef %vgpr0, undef %vgpr0, undef %vgpr4, implicit %exec - EXP_DONE 15, undef %vgpr0, killed %vgpr1, killed %vgpr4, undef %vgpr0, 0, 0, 12, implicit %exec - S_ENDPGM - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/optimize-if-exec-masking.mir b/llvm/test/CodeGen/MIR/AMDGPU/optimize-if-exec-masking.mir deleted file mode 100644 index 4584802ad5a..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/optimize-if-exec-masking.mir +++ /dev/null @@ -1,755 +0,0 @@ -# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-optimize-exec-masking -o - %s | FileCheck %s - ---- | - target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64" - - define void @optimize_if_and_saveexec_xor(i32 %z, i32 %v) #0 { - main_body: - %id = call i32 @llvm.amdgcn.workitem.id.x() - %cc = icmp eq i32 %id, 0 - %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) - %1 = extractvalue { i1, i64 } %0, 0 - %2 = extractvalue { i1, i64 } %0, 1 - br i1 %1, label %if, label %end - - if: ; preds = %main_body - %v.if = load volatile i32, i32 addrspace(1)* undef - br label %end - - end: ; preds = %if, %main_body - %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] - call void @llvm.amdgcn.end.cf(i64 %2) - store i32 %r, i32 addrspace(1)* undef - ret void - } - - define void @optimize_if_and_saveexec(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - define void @optimize_if_or_saveexec(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - - define void @optimize_if_and_saveexec_xor_valu_middle(i32 %z, i32 %v) #0 { - main_body: - %id = call i32 @llvm.amdgcn.workitem.id.x() - %cc = icmp eq i32 %id, 0 - %0 = call { i1, i64 } @llvm.amdgcn.if(i1 %cc) - %1 = extractvalue { i1, i64 } %0, 0 - %2 = extractvalue { i1, i64 } %0, 1 - store i32 %id, i32 addrspace(1)* undef - br i1 %1, label %if, label %end - - if: ; preds = %main_body - %v.if = load volatile i32, i32 addrspace(1)* undef - br label %end - - end: ; preds = %if, %main_body - %r = phi i32 [ 4, %main_body ], [ %v.if, %if ] - call void @llvm.amdgcn.end.cf(i64 %2) - store i32 %r, i32 addrspace(1)* undef - ret void - } - - define void @optimize_if_and_saveexec_xor_wrong_reg(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - define void @optimize_if_and_saveexec_xor_modify_copy_to_exec(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - define void @optimize_if_and_saveexec_xor_live_out_setexec(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - define void @optimize_if_unknown_saveexec(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - define void @optimize_if_andn2_saveexec(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - define void @optimize_if_andn2_saveexec_no_commute(i32 %z, i32 %v) #0 { - main_body: - br i1 undef, label %if, label %end - - if: - br label %end - - end: - ret void - } - - ; Function Attrs: nounwind readnone - declare i32 @llvm.amdgcn.workitem.id.x() #1 - - declare { i1, i64 } @llvm.amdgcn.if(i1) - - declare void @llvm.amdgcn.end.cf(i64) - - - attributes #0 = { nounwind } - attributes #1 = { nounwind readnone } - -... ---- -# CHECK-LABEL: name: optimize_if_and_saveexec_xor{{$}} -# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec -# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc -# CHECK-NEXT: SI_MASK_BRANCH - -name: optimize_if_and_saveexec_xor -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_and_saveexec{{$}} -# CHECK: %sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec -# CHECK-NEXT: SI_MASK_BRANCH - -name: optimize_if_and_saveexec -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_or_saveexec{{$}} -# CHECK: %sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec -# CHECK-NEXT: SI_MASK_BRANCH - -name: optimize_if_or_saveexec -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_OR_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle -# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc -# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) -# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc -# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH -name: optimize_if_and_saveexec_xor_valu_middle -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - BUFFER_STORE_DWORD_OFFSET %vgpr0, undef %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_and_saveexec_xor_wrong_reg{{$}} -# CHECK: %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc -# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc -# CHECK-NEXT: %exec = COPY %sgpr0_sgpr1 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec -name: optimize_if_and_saveexec_xor_wrong_reg -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr6 = S_MOV_B32 -1 - %sgpr7 = S_MOV_B32 61440 - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr0_sgpr1 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %sgpr0_sgpr1 = S_XOR_B64 undef %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc - %exec = S_MOV_B64_term %sgpr0_sgpr1 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 , %sgpr4_sgpr5_sgpr6_sgpr7 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1, %sgpr4_sgpr5_sgpr6_sgpr7 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_and_saveexec_xor_modify_copy_to_exec{{$}} -# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc -# CHECK-NEXT: %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc -# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc -# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec - -name: optimize_if_and_saveexec_xor_modify_copy_to_exec -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %sgpr2_sgpr3 = S_OR_B64 killed %sgpr2_sgpr3, 1, implicit-def %scc - %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr0 = S_MOV_B32 0 - %sgpr1 = S_MOV_B32 1 - %sgpr2 = S_MOV_B32 -1 - %sgpr3 = S_MOV_B32 61440 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_and_saveexec_xor_live_out_setexec{{$}} -# CHECK: %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc -# CHECK-NEXT: %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc -# CHECK-NEXT: %exec = COPY %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH -name: optimize_if_and_saveexec_xor_live_out_setexec -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_AND_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %sgpr0_sgpr1 = S_XOR_B64 %sgpr2_sgpr3, killed %sgpr0_sgpr1, implicit-def %scc - %exec = S_MOV_B64_term %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1, %sgpr2_sgpr3 - S_SLEEP 0, implicit %sgpr2_sgpr3 - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... - -# CHECK-LABEL: name: optimize_if_unknown_saveexec{{$}} -# CHECK: %sgpr0_sgpr1 = COPY %exec -# CHECK: %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc -# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec - -name: optimize_if_unknown_saveexec -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_LSHR_B64 %sgpr0_sgpr1, killed %vcc_lo, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_andn2_saveexec{{$}} -# CHECK: %sgpr0_sgpr1 = S_ANDN2_SAVEEXEC_B64 %vcc, implicit-def %exec, implicit-def %scc, implicit %exec -# CHECK-NEXT: SI_MASK_BRANCH - -name: optimize_if_andn2_saveexec -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_ANDN2_B64 %sgpr0_sgpr1, killed %vcc, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: optimize_if_andn2_saveexec_no_commute{{$}} -# CHECK: %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc -# CHECK-NEXT: %exec = COPY killed %sgpr2_sgpr3 -# CHECK-NEXT: SI_MASK_BRANCH %bb.2.end, implicit %exec -name: optimize_if_andn2_saveexec_no_commute -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%vgpr0' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.main_body: - successors: %bb.1.if, %bb.2.end - liveins: %vgpr0 - - %sgpr0_sgpr1 = COPY %exec - %vcc = V_CMP_EQ_I32_e64 0, killed %vgpr0, implicit %exec - %vgpr0 = V_MOV_B32_e32 4, implicit %exec - %sgpr2_sgpr3 = S_ANDN2_B64 killed %vcc, %sgpr0_sgpr1, implicit-def %scc - %exec = S_MOV_B64_term killed %sgpr2_sgpr3 - SI_MASK_BRANCH %bb.2.end, implicit %exec - S_BRANCH %bb.1.if - - bb.1.if: - successors: %bb.2.end - liveins: %sgpr0_sgpr1 - - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vgpr0 = BUFFER_LOAD_DWORD_OFFSET %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile load 4 from `i32 addrspace(1)* undef`) - - bb.2.end: - liveins: %vgpr0, %sgpr0_sgpr1 - - %exec = S_OR_B64 %exec, killed %sgpr0_sgpr1, implicit-def %scc - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into `i32 addrspace(1)* undef`) - S_ENDPGM - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/rename-independent-subregs.mir b/llvm/test/CodeGen/MIR/AMDGPU/rename-independent-subregs.mir deleted file mode 100644 index b928bc7086b..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/rename-independent-subregs.mir +++ /dev/null @@ -1,70 +0,0 @@ -# RUN: llc -march=amdgcn -verify-machineinstrs -run-pass simple-register-coalescing,rename-independent-subregs -o - %s | FileCheck %s ---- | - define void @test0() { ret void } - define void @test1() { ret void } -... ---- -# In the test below we have two independent def+use pairs of subregister1 which -# can be moved to a new virtual register. The third def of sub1 however is used -# in combination with sub0 and needs to stay with the original vreg. -# CHECK-LABEL: name: test0 -# CHECK: S_NOP 0, implicit-def undef %0.sub0 -# CHECK: S_NOP 0, implicit-def undef %2.sub1 -# CHECK: S_NOP 0, implicit %2.sub1 -# CHECK: S_NOP 0, implicit-def undef %1.sub1 -# CHECK: S_NOP 0, implicit %1.sub1 -# CHECK: S_NOP 0, implicit-def %0.sub1 -# CHECK: S_NOP 0, implicit %0 -name: test0 -registers: - - { id: 0, class: sreg_128 } -body: | - bb.0: - S_NOP 0, implicit-def undef %0.sub0 - S_NOP 0, implicit-def %0.sub1 - S_NOP 0, implicit %0.sub1 - S_NOP 0, implicit-def %0.sub1 - S_NOP 0, implicit %0.sub1 - S_NOP 0, implicit-def %0.sub1 - S_NOP 0, implicit %0 -... ---- -# Test for a bug where we would incorrectly query liveness at the instruction -# index in rewriteOperands(). This should pass the verifier afterwards. -# CHECK-LABEL: test1 -# CHECK: bb.0 -# CHECK: S_NOP 0, implicit-def undef %2.sub2 -# CHECK: bb.1 -# CHECK: S_NOP 0, implicit-def %2.sub1 -# CHECK-NEXT: S_NOP 0, implicit-def %2.sub3 -# CHECK-NEXT: S_NOP 0, implicit %2 -# CHECK-NEXT: S_NOP 0, implicit-def undef %0.sub0 -# CHECK-NEXT: S_NOP 0, implicit %2.sub1 -# CHECK-NEXT: S_NOP 0, implicit %0.sub0 -# CHECK: bb.2 -# CHECK: S_NOP 0, implicit %2.sub -name: test1 -registers: - - { id: 0, class: sreg_128 } - - { id: 1, class: sreg_128 } -body: | - bb.0: - successors: %bb.1, %bb.2 - S_NOP 0, implicit-def undef %0.sub2 - S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc - S_BRANCH %bb.2 - - bb.1: - S_NOP 0, implicit-def %0.sub1 - S_NOP 0, implicit-def %0.sub3 - %1 = COPY %0 - S_NOP 0, implicit %1 - - S_NOP 0, implicit-def %1.sub0 - S_NOP 0, implicit %1.sub1 - S_NOP 0, implicit %1.sub0 - - bb.2: - S_NOP 0, implicit %0.sub2 - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/scalar-store-cache-flush.mir b/llvm/test/CodeGen/MIR/AMDGPU/scalar-store-cache-flush.mir deleted file mode 100644 index af71086e542..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/scalar-store-cache-flush.mir +++ /dev/null @@ -1,173 +0,0 @@ -# RUN: llc -march=amdgcn -run-pass si-insert-waits %s -o - | FileCheck %s - ---- | - define void @basic_insert_dcache_wb() { - ret void - } - - define void @explicit_flush_after() { - ret void - } - - define void @explicit_flush_before() { - ret void - } - - define void @no_scalar_store() { - ret void - } - - define void @multi_block_store() { - bb0: - br i1 undef, label %bb1, label %bb2 - - bb1: - ret void - - bb2: - ret void - } - - define void @one_block_store() { - bb0: - br i1 undef, label %bb1, label %bb2 - - bb1: - ret void - - bb2: - ret void - } - - define amdgpu_ps float @si_return() { - ret float undef - } - -... ---- -# CHECK-LABEL: name: basic_insert_dcache_wb -# CHECK: bb.0: -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -name: basic_insert_dcache_wb -tracksRegLiveness: false - -body: | - bb.0: - S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 - S_ENDPGM -... ---- -# Already has an explicitly requested flush after the last store. -# CHECK-LABEL: name: explicit_flush_after -# CHECK: bb.0: -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -name: explicit_flush_after -tracksRegLiveness: false - -body: | - bb.0: - S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 - S_DCACHE_WB - S_ENDPGM -... ---- -# Already has an explicitly requested flush before the last store. -# CHECK-LABEL: name: explicit_flush_before -# CHECK: bb.0: -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -name: explicit_flush_before -tracksRegLiveness: false - -body: | - bb.0: - S_DCACHE_WB - S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 - S_ENDPGM -... ---- -# CHECK-LABEL: no_scalar_store -# CHECK: bb.0 -# CHECK-NEXT: S_ENDPGM -name: no_scalar_store -tracksRegLiveness: false - -body: | - bb.0: - S_ENDPGM -... - -# CHECK-LABEL: name: multi_block_store -# CHECK: bb.0: -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -# CHECK: bb.1: -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -name: multi_block_store -tracksRegLiveness: false - -body: | - bb.0: - S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 - S_ENDPGM - - bb.1: - S_STORE_DWORD_SGPR undef %sgpr4, undef %sgpr6_sgpr7, undef %m0, 0 - S_ENDPGM -... -... - -# This one should be able to omit the flush in the storeless block but -# this isn't handled now. - -# CHECK-LABEL: name: one_block_store -# CHECK: bb.0: -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -# CHECK: bb.1: -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: S_ENDPGM - -name: one_block_store -tracksRegLiveness: false - -body: | - bb.0: - S_ENDPGM - - bb.1: - S_STORE_DWORD_SGPR undef %sgpr4, undef %sgpr6_sgpr7, undef %m0, 0 - S_ENDPGM -... ---- -# CHECK-LABEL: name: si_return -# CHECK: bb.0: -# CHECK-NEXT: S_STORE_DWORD -# CHECK-NEXT: S_WAITCNT -# CHECK-NEXT: S_DCACHE_WB -# CHECK-NEXT: SI_RETURN - -name: si_return -tracksRegLiveness: false - -body: | - bb.0: - S_STORE_DWORD_SGPR undef %sgpr2, undef %sgpr0_sgpr1, undef %m0, 0 - SI_RETURN undef %vgpr0 -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir b/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir deleted file mode 100644 index 0c08deb13a8..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/si-fix-sgpr-copies.mir +++ /dev/null @@ -1,43 +0,0 @@ -# RUN: llc -march=amdgcn -run-pass si-fix-sgpr-copies %s -o - | FileCheck %s -check-prefixes=GCN - ---- | - define void @phi_visit_order() { ret void } - -name: phi_visit_order -tracksRegLiveness: true -registers: - - { id: 0, class: sreg_32_xm0 } - - { id: 1, class: sreg_64 } - - { id: 2, class: sreg_32_xm0 } - - { id: 7, class: vgpr_32 } - - { id: 8, class: sreg_32_xm0 } - - { id: 9, class: vgpr_32 } - - { id: 10, class: sreg_64 } - - { id: 11, class: sreg_32_xm0 } - -body: | - ; GCN-LABEL: name: phi_visit_order - ; GCN: V_ADD_I32 - bb.0: - liveins: %vgpr0 - successors: %bb.1 - %7 = COPY %vgpr0 - %8 = S_MOV_B32 0 - - bb.1: - successors: %bb.1, %bb.2 - %0 = PHI %8, %bb.0, %0, %bb.1, %2, %bb.2 - %9 = V_MOV_B32_e32 9, implicit %exec - %10 = V_CMP_EQ_U32_e64 %7, %9, implicit %exec - %1 = SI_IF %10, %bb.2, implicit-def %exec, implicit-def %scc, implicit %exec - S_BRANCH %bb.1 - - bb.2: - successors: %bb.1 - SI_END_CF %1, implicit-def %exec, implicit-def %scc, implicit %exec - %11 = S_MOV_B32 1 - %2 = S_ADD_I32 %0, %11, implicit-def %scc - S_BRANCH %bb.1 - -... ---- diff --git a/llvm/test/CodeGen/MIR/AMDGPU/subreg-intervals.mir b/llvm/test/CodeGen/MIR/AMDGPU/subreg-intervals.mir deleted file mode 100644 index c4e00215159..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/subreg-intervals.mir +++ /dev/null @@ -1,51 +0,0 @@ -# RUN: llc -march=amdgcn -run-pass liveintervals -debug-only=regalloc -o /dev/null %s 2>&1 | FileCheck %s -# REQUIRES: asserts - -# CHECK: INTERVALS -# CHECK: vreg0 -# CHECK-LABEL: Machine code for function test0: - -# CHECK: INTERVALS -# CHECK: vreg0 -# CHECK-LABEL: Machine code for function test1: - ---- | - define void @test0() { ret void } - define void @test1() { ret void } -... ---- -name: test0 -registers: - - { id: 0, class: sreg_64 } -body: | - bb.0: - S_NOP 0, implicit-def %0 - S_NOP 0, implicit %0 - - S_NOP 0, implicit-def undef %0.sub0 - S_NOP 0, implicit %0 -... ---- -name: test1 -registers: - - { id: 0, class: sreg_64 } -body: | - bb.0: - successors: %bb.1, %bb.2 - S_CBRANCH_VCCNZ %bb.1, implicit undef %vcc - S_BRANCH %bb.2 - - bb.1: - successors: %bb.3 - S_NOP 0, implicit-def undef %0.sub0 - S_BRANCH %bb.3 - - bb.2: - successors: %bb.3 - S_NOP 0, implicit-def %0 - S_BRANCH %bb.3 - - bb.3: - S_NOP 0 - S_NOP 0, implicit %0 -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/vccz-corrupt-bug-workaround.mir b/llvm/test/CodeGen/MIR/AMDGPU/vccz-corrupt-bug-workaround.mir deleted file mode 100644 index 03e473e3a0c..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/vccz-corrupt-bug-workaround.mir +++ /dev/null @@ -1,177 +0,0 @@ -# RUN: llc -run-pass si-insert-waits -march=amdgcn -mcpu=tahiti -o - %s | FileCheck %s ---- | - - define void @vccz_corrupt_workaround(float %cond, i32 addrspace(1)* %out) #0 { - entry: - %cmp0 = fcmp oeq float %cond, 0.000000e+00 - br i1 %cmp0, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 - - else: ; preds = %entry - store volatile i32 100, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - if: ; preds = %entry - store volatile i32 9, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - done: ; preds = %if, %else - %value = phi i32 [ 0, %if ], [ 1, %else ] - store i32 %value, i32 addrspace(1)* %out - ret void - } - - define void @vccz_corrupt_undef_vcc(float %cond, i32 addrspace(1)* %out) #0 { - entry: - br i1 undef, label %if, label %else, !structurizecfg.uniform !0, !amdgpu.uniform !0 - - else: ; preds = %entry - store volatile i32 100, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - if: ; preds = %entry - store volatile i32 9, i32 addrspace(1)* undef - br label %done, !structurizecfg.uniform !0 - - done: ; preds = %if, %else - %value = phi i32 [ 0, %if ], [ 1, %else ] - store i32 %value, i32 addrspace(1)* %out - ret void - } - - attributes #0 = { nounwind } - attributes #1 = { readnone } - - !0 = !{} - -... ---- -# CHECK-LABEL: name: vccz_corrupt_workaround -# CHECK: %vcc = V_CMP_EQ_F32 -# CHECK-NEXT: %vcc = S_MOV_B64 %vcc -# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit killed %vcc - -name: vccz_corrupt_workaround -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%sgpr0_sgpr1' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - successors: %bb.2.if, %bb.1.else - liveins: %sgpr0_sgpr1 - - %sgpr2 = S_LOAD_DWORD_IMM %sgpr0_sgpr1, 9, 0 :: (non-temporal dereferenceable invariant load 4 from `float addrspace(2)* undef`) - %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - %vcc = V_CMP_EQ_F32_e64 0, 0, 0, %sgpr2, 0, 0, implicit %exec - S_CBRANCH_VCCZ %bb.1.else, implicit killed %vcc - - bb.2.if: - successors: %bb.3.done - liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %vgpr0 = V_MOV_B32_e32 9, implicit %exec - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) - %vgpr0 = V_MOV_B32_e32 0, implicit %exec - S_BRANCH %bb.3.done - - bb.1.else: - successors: %bb.3.done - liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %vgpr0 = V_MOV_B32_e32 100, implicit %exec - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) - %vgpr0 = V_MOV_B32_e32 1, implicit %exec - - bb.3.done: - liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) - S_ENDPGM - -... ---- -# CHECK-LABEL: name: vccz_corrupt_undef_vcc -# CHECK: S_WAITCNT -# CHECK-NEXT: S_CBRANCH_VCCZ %bb.2.else, implicit undef %vcc - -name: vccz_corrupt_undef_vcc -alignment: 0 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -tracksRegLiveness: true -liveins: - - { reg: '%sgpr0_sgpr1' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - successors: %bb.2.if, %bb.1.else - liveins: %sgpr0_sgpr1 - - %sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed %sgpr0_sgpr1, 11, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(2)* undef`) - %sgpr7 = S_MOV_B32 61440 - %sgpr6 = S_MOV_B32 -1 - S_CBRANCH_VCCZ %bb.1.else, implicit undef %vcc - - bb.2.if: - successors: %bb.3.done - liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %vgpr0 = V_MOV_B32_e32 9, implicit %exec - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) - %vgpr0 = V_MOV_B32_e32 0, implicit %exec - S_BRANCH %bb.3.done - - bb.1.else: - successors: %bb.3.done - liveins: %sgpr6, %sgpr7, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %vgpr0 = V_MOV_B32_e32 100, implicit %exec - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit %exec :: (volatile store 4 into `i32 addrspace(1)* undef`) - %vgpr0 = V_MOV_B32_e32 1, implicit %exec - - bb.3.done: - liveins: %vgpr0, %sgpr0_sgpr1_sgpr2_sgpr3:0x00000003 - - %sgpr3 = S_MOV_B32 61440 - %sgpr2 = S_MOV_B32 -1 - BUFFER_STORE_DWORD_OFFSET killed %vgpr0, killed %sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, implicit %exec :: (store 4 into %ir.out) - S_ENDPGM - -... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/waitcnt.mir b/llvm/test/CodeGen/MIR/AMDGPU/waitcnt.mir deleted file mode 100644 index cb5de6a2419..00000000000 --- a/llvm/test/CodeGen/MIR/AMDGPU/waitcnt.mir +++ /dev/null @@ -1,59 +0,0 @@ -# RUN: llc -march=amdgcn -mcpu=fiji -run-pass si-insert-waits %s -o - | FileCheck %s - ---- | - define void @flat_zero_waitcnt(i32 addrspace(1)* %global4, - <4 x i32> addrspace(1)* %global16, - i32 addrspace(4)* %flat4, - <4 x i32> addrspace(4)* %flat16) { - ret void - } -... ---- - -# CHECK-LABEL: name: flat_zero_waitcnt - -# CHECK-LABEL: bb.0: -# CHECK: FLAT_LOAD_DWORD -# CHECK: FLAT_LOAD_DWORDX4 -# Global loads will return in order so we should: -# s_waitcnt vmcnt(1) lgkmcnt(0) -# CHECK-NEXT: S_WAITCNT 113 - -# CHECK-LABEL: bb.1: -# CHECK: FLAT_LOAD_DWORD -# CHECK: FLAT_LOAD_DWORDX4 -# The first load has no mem operand, so we should assume it accesses the flat -# address space. -# s_waitcnt vmcnt(0) lgkmcnt(0) -# CHECK-NEXT: S_WAITCNT 112 - -# CHECK-LABEL: bb.2: -# CHECK: FLAT_LOAD_DWORD -# CHECK: FLAT_LOAD_DWORDX4 -# One outstand loads access the flat address space. -# s_waitcnt vmcnt(0) lgkmcnt(0) -# CHECK-NEXT: S_WAITCNT 112 - -name: flat_zero_waitcnt - -body: | - bb.0: - successors: %bb.1 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.global4) - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) - %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec - S_BRANCH %bb.1 - - bb.1: - successors: %bb.2 - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.global16) - %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec - S_BRANCH %bb.2 - - bb.2: - %vgpr0 = FLAT_LOAD_DWORD %vgpr1_vgpr2, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 4 from %ir.flat4) - %vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 %vgpr7_vgpr8, 0, 0, 0, implicit %exec, implicit %flat_scr :: (load 16 from %ir.flat16) - %vgpr0 = V_MOV_B32_e32 %vgpr1, implicit %exec - S_ENDPGM -... diff --git a/llvm/test/CodeGen/MIR/ARM/imm-peephole-arm.mir b/llvm/test/CodeGen/MIR/ARM/imm-peephole-arm.mir deleted file mode 100644 index cd30bdb74d5..00000000000 --- a/llvm/test/CodeGen/MIR/ARM/imm-peephole-arm.mir +++ /dev/null @@ -1,60 +0,0 @@ -# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s - -# CHECK: [[IN:%.*]] = COPY %r0 -# CHECK: [[SUM1TMP:%.*]] = ADDri [[IN]], 133 -# CHECK: [[SUM1:%.*]] = ADDri killed [[SUM1TMP]], 25600 - -# CHECK: [[SUM2TMP:%.*]] = SUBri [[IN]], 133 -# CHECK: [[SUM2:%.*]] = SUBri killed [[SUM2TMP]], 25600 - -# CHECK: [[SUM3TMP:%.*]] = SUBri [[IN]], 133 -# CHECK: [[SUM3:%.*]] = SUBri killed [[SUM3TMP]], 25600 - -# CHECK: [[SUM4TMP:%.*]] = ADDri killed [[IN]], 133 -# CHECK: [[SUM4:%.*]] = ADDri killed [[SUM4TMP]], 25600 - - ---- | - target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" - target triple = "armv7-apple-ios" - - define i32 @foo(i32 %in) { - ret i32 undef - } -... ---- -name: foo -registers: - - { id: 0, class: gprnopc } - - { id: 1, class: rgpr } - - { id: 2, class: rgpr } - - { id: 3, class: rgpr } - - { id: 4, class: rgpr } - - { id: 5, class: rgpr } - - { id: 6, class: rgpr } - - { id: 7, class: rgpr } - - { id: 8, class: rgpr } -liveins: - - { reg: '%r0', virtual-reg: '%0' } -body: | - bb.0 (%ir-block.0): - liveins: %r0 - - %0 = COPY %r0 - %1 = MOVi32imm -25733 - %2 = SUBrr %0, killed %1, 14, _, _ - - %3 = MOVi32imm 25733 - %4 = SUBrr %0, killed %3, 14, _, _ - - %5 = MOVi32imm -25733 - %6 = ADDrr %0, killed %5, 14, _, _ - - %7 = MOVi32imm 25733 - %8 = ADDrr killed %0, killed %7, 14, _, _ - - %r0 = COPY killed %8 - BX_RET 14, _, implicit %r0 - -... - diff --git a/llvm/test/CodeGen/MIR/ARM/imm-peephole-thumb.mir b/llvm/test/CodeGen/MIR/ARM/imm-peephole-thumb.mir deleted file mode 100644 index 3d342902d80..00000000000 --- a/llvm/test/CodeGen/MIR/ARM/imm-peephole-thumb.mir +++ /dev/null @@ -1,59 +0,0 @@ -# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s - -# CHECK: [[IN:%.*]] = COPY %r0 -# CHECK: [[SUM1TMP:%.*]] = t2ADDri [[IN]], 25600 -# CHECK: [[SUM1:%.*]] = t2ADDri killed [[SUM1TMP]], 133 - -# CHECK: [[SUM2TMP:%.*]] = t2SUBri [[IN]], 25600 -# CHECK: [[SUM2:%.*]] = t2SUBri killed [[SUM2TMP]], 133 - -# CHECK: [[SUM3TMP:%.*]] = t2SUBri [[IN]], 25600 -# CHECK: [[SUM3:%.*]] = t2SUBri killed [[SUM3TMP]], 133 - -# CHECK: [[SUM4TMP:%.*]] = t2ADDri killed [[IN]], 25600 -# CHECK: [[SUM4:%.*]] = t2ADDri killed [[SUM4TMP]], 133 - - ---- | - target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" - target triple = "thumbv7-apple-ios" - - define i32 @foo(i32 %in) { - ret i32 undef - } -... ---- -name: foo -registers: - - { id: 0, class: gprnopc } - - { id: 1, class: rgpr } - - { id: 2, class: rgpr } - - { id: 3, class: rgpr } - - { id: 4, class: rgpr } - - { id: 5, class: rgpr } - - { id: 6, class: rgpr } - - { id: 7, class: rgpr } - - { id: 8, class: rgpr } -liveins: - - { reg: '%r0', virtual-reg: '%0' } -body: | - bb.0 (%ir-block.0): - liveins: %r0 - %0 = COPY %r0 - %1 = t2MOVi32imm -25733 - %2 = t2SUBrr %0, killed %1, 14, _, _ - - %3 = t2MOVi32imm 25733 - %4 = t2SUBrr %0, killed %3, 14, _, _ - - %5 = t2MOVi32imm -25733 - %6= t2ADDrr %0, killed %5, 14, _, _ - - %7 = t2MOVi32imm 25733 - %8 = t2ADDrr killed %0, killed %7, 14, _, _ - - %r0 = COPY killed %8 - tBX_RET 14, _, implicit %r0 - -... - diff --git a/llvm/test/CodeGen/MIR/Hexagon/anti-dep-partial.mir b/llvm/test/CodeGen/MIR/Hexagon/anti-dep-partial.mir deleted file mode 100644 index 09bc49c508a..00000000000 --- a/llvm/test/CodeGen/MIR/Hexagon/anti-dep-partial.mir +++ /dev/null @@ -1,34 +0,0 @@ -# RUN: llc -march=hexagon -post-RA-scheduler -run-pass post-RA-sched %s -o - | FileCheck %s - ---- | - declare void @check(i64, i32, i32, i64) - define void @foo() { - ret void - } -... - ---- -name: foo -tracksRegLiveness: true -body: | - bb.0: - successors: - liveins: %r0, %r1, %d1, %d2, %r16, %r17, %r19, %r22, %r23 - %r2 = A2_add %r23, killed %r17 - %r6 = M2_mpyi %r16, %r16 - %r22 = M2_accii %r22, killed %r2, 2 - %r7 = A2_tfrsi 12345678 - %r3 = A2_tfr killed %r16 - %d2 = A2_tfrp killed %d0 - %r2 = L2_loadri_io %r29, 28 - %r2 = M2_mpyi killed %r6, killed %r2 - %r23 = S2_asr_i_r %r22, 31 - S2_storeri_io killed %r29, 0, killed %r7 - ; The anti-dependency on r23 between the first A2_add and the - ; S2_asr_i_r was causing d11 to be renamed, while r22 remained - ; unchanged. Check that the renaming of d11 does not happen. - ; CHECK: d11 - %d0 = A2_tfrp killed %d11 - J2_call @check, implicit-def %d0, implicit-def %d1, implicit-def %d2, implicit %d0, implicit %d1, implicit %d2 -... - diff --git a/llvm/test/CodeGen/MIR/Lanai/lit.local.cfg b/llvm/test/CodeGen/MIR/Lanai/lit.local.cfg deleted file mode 100644 index f1b8b4f4e21..00000000000 --- a/llvm/test/CodeGen/MIR/Lanai/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if not 'Lanai' in config.root.targets: - config.unsupported = True diff --git a/llvm/test/CodeGen/MIR/Lanai/peephole-compare.mir b/llvm/test/CodeGen/MIR/Lanai/peephole-compare.mir deleted file mode 100644 index 5056a05ed1f..00000000000 --- a/llvm/test/CodeGen/MIR/Lanai/peephole-compare.mir +++ /dev/null @@ -1,678 +0,0 @@ -# RUN: llc -run-pass=peephole-opt %s -o - | FileCheck %s - -# Test the compare fold peephole. - -# CHECK-LABEL: name: test0a -# TODO: Enhance combiner to handle this case. This expands into: -# sub %r7, %r6, %r3 -# sub.f %r7, %r6, %r0 -# sel.eq %r18, %r3, %rv -# This is different from the pattern currently matched. If the lowered form had -# been sub.f %r3, 0, %r0 then it would have matched. - -# CHECK-LABEL: name: test1a -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 -# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr - -# CHECK-LABEL: name: test1b -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 -# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr - -# CHECK-LABEL: name: test2a -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 -# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr - -# CHECK-LABEL: name: test2b -# CHECK: [[IN1:%.*]] = COPY %r7 -# CHECK: [[IN2:%.*]] = COPY %r6 -# CHECK: SUB_F_R [[IN1]], [[IN2]], 0, implicit-def %sr - -# CHECK-LABEL: name: test3 -# CHECK: AND_F_R -# CHECK: AND_F_R -# CHECK: AND_F_R - ---- | - target datalayout = "E-m:e-p:32:32-i64:64-a:0:32-n32-S64" - target triple = "lanai-unknown-unknown" - - @a = global i32 -1, align 4 - @b = global i32 0, align 4 - - define i32 @test0a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %sub = sub i32 %b, %a - %cmp = icmp eq i32 %sub, 0 - %cond = select i1 %cmp, i32 %c, i32 %sub - ret i32 %cond - } - - define i32 @test0b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %cmp = icmp eq i32 %b, %a - %cond = select i1 %cmp, i32 %c, i32 %b - ret i32 %cond - } - - define i32 @test1a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %sub = sub i32 %b, %a - %cmp = icmp slt i32 %sub, 0 - %cond = select i1 %cmp, i32 %c, i32 %d - ret i32 %cond - } - - define i32 @test1b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %sub = sub i32 %b, %a - %cmp = icmp slt i32 %sub, 0 - %cond = select i1 %cmp, i32 %c, i32 %d - ret i32 %cond - } - - define i32 @test2a(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %sub = sub i32 %b, %a - %cmp = icmp sgt i32 %sub, -1 - %cond = select i1 %cmp, i32 %c, i32 %d - ret i32 %cond - } - - define i32 @test2b(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %sub = sub i32 %b, %a - %cmp = icmp sgt i32 %sub, -1 - %cond = select i1 %cmp, i32 %c, i32 %d - ret i32 %cond - } - - define i32 @test3(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %sub = sub i32 %b, %a - %cmp = icmp slt i32 %sub, 1 - %cond = select i1 %cmp, i32 %c, i32 %d - ret i32 %cond - } - - define i32 @test4(i32 inreg %a, i32 inreg %b, i32 inreg %c, i32 inreg %d) { - entry: - %cmp = icmp ne i32 %a, 0 - %cmp1 = icmp ult i32 %a, %b - %or.cond = and i1 %cmp, %cmp1 - br i1 %or.cond, label %return, label %if.end - - if.end: ; preds = %entry - %cmp2 = icmp ne i32 %b, 0 - %cmp4 = icmp ult i32 %b, %c - %or.cond29 = and i1 %cmp2, %cmp4 - br i1 %or.cond29, label %return, label %if.end6 - - if.end6: ; preds = %if.end - %cmp7 = icmp ne i32 %c, 0 - %cmp9 = icmp ult i32 %c, %d - %or.cond30 = and i1 %cmp7, %cmp9 - br i1 %or.cond30, label %return, label %if.end11 - - if.end11: ; preds = %if.end6 - %cmp12 = icmp ne i32 %d, 0 - %cmp14 = icmp ult i32 %d, %a - %or.cond31 = and i1 %cmp12, %cmp14 - %b. = select i1 %or.cond31, i32 %b, i32 21 - ret i32 %b. - - return: ; preds = %if.end6, %if.end, %entry - %retval.0 = phi i32 [ %c, %entry ], [ %d, %if.end ], [ %a, %if.end6 ] - ret i32 %retval.0 - } - - define void @testBB() { - entry: - %0 = load i32, i32* @a, align 4, !tbaa !0 - %1 = load i32, i32* @b, align 4, !tbaa !0 - %sub.i = sub i32 %1, %0 - %tobool = icmp sgt i32 %sub.i, -1 - br i1 %tobool, label %if.end, label %if.then - - if.then: ; preds = %entry - %call1 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() - br label %while.body - - while.body: ; preds = %while.body, %if.then - br label %while.body - - if.end: ; preds = %entry - %cmp.i = icmp slt i32 %sub.i, 1 - br i1 %cmp.i, label %if.then4, label %if.end7 - - if.then4: ; preds = %if.end - %call5 = tail call i32 bitcast (i32 (...)* @g to i32 ()*)() - br label %while.body6 - - while.body6: ; preds = %while.body6, %if.then4 - br label %while.body6 - - if.end7: ; preds = %if.end - ret void - } - - declare i32 @g(...) - - ; Function Attrs: nounwind - declare void @llvm.stackprotector(i8*, i8**) #0 - - attributes #0 = { nounwind } - - !0 = !{!1, !1, i64 0} - !1 = !{!"int", !2, i64 0} - !2 = !{!"omnipotent char", !3, i64 0} - !3 = !{!"Simple C/C++ TBAA"} - -... ---- -name: test0a -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18 - - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - %4 = SUB_R %1, %0, 0 - SFSUB_F_RI_LO %4, 0, implicit-def %sr - %5 = SELECT %2, %4, 7, implicit %sr - %rv = COPY %5 - RET implicit %rca, implicit %rv - -... ---- -name: test0b -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18 - - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - SFSUB_F_RR %1, %0, implicit-def %sr - %4 = SELECT %2, %1, 7, implicit %sr - %rv = COPY %4 - RET implicit %rca, implicit %rv - -... ---- -name: test1a -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } - - { reg: '%r19', virtual-reg: '%3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18, %r19 - - %3 = COPY %r19 - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - %4 = SUB_R %1, %0, 0 - SFSUB_F_RI_LO killed %4, 0, implicit-def %sr - %5 = SELECT %2, %3, 11, implicit %sr - %rv = COPY %5 - RET implicit %rca, implicit %rv - -... ---- -name: test1b -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } - - { reg: '%r19', virtual-reg: '%3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18, %r19 - - %3 = COPY %r19 - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - %4 = SUB_R %1, %0, 0 - SFSUB_F_RI_LO killed %4, 0, implicit-def %sr - %5 = SELECT %2, %3, 11, implicit %sr - %rv = COPY %5 - RET implicit %rca, implicit %rv - -... ---- -name: test2a -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } - - { reg: '%r19', virtual-reg: '%3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18, %r19 - - %3 = COPY %r19 - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - %4 = SUB_R %1, %0, 0 - SFSUB_F_RI_LO killed %4, 0, implicit-def %sr - %5 = SELECT %2, %3, 10, implicit %sr - %rv = COPY %5 - RET implicit %rca, implicit %rv - -... ---- -name: test2b -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } - - { reg: '%r19', virtual-reg: '%3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18, %r19 - - %3 = COPY %r19 - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - %4 = SUB_R %1, %0, 0 - SFSUB_F_RI_LO killed %4, 0, implicit-def %sr - %5 = SELECT %2, %3, 10, implicit %sr - %rv = COPY %5 - RET implicit %rca, implicit %rv - -... ---- -name: test3 -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%0' } - - { reg: '%r7', virtual-reg: '%1' } - - { reg: '%r18', virtual-reg: '%2' } - - { reg: '%r19', virtual-reg: '%3' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - liveins: %r6, %r7, %r18, %r19 - - %3 = COPY %r19 - %2 = COPY %r18 - %1 = COPY %r7 - %0 = COPY %r6 - %4 = SUB_R %1, %0, 0 - SFSUB_F_RI_LO killed %4, 1, implicit-def %sr - %5 = SELECT %2, %3, 13, implicit %sr - %rv = COPY %5 - RET implicit %rca, implicit %rv - -... ---- -name: test4 -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } - - { id: 6, class: gpr } - - { id: 7, class: gpr } - - { id: 8, class: gpr } - - { id: 9, class: gpr } - - { id: 10, class: gpr } - - { id: 11, class: gpr } - - { id: 12, class: gpr } - - { id: 13, class: gpr } - - { id: 14, class: gpr } - - { id: 15, class: gpr } - - { id: 16, class: gpr } - - { id: 17, class: gpr } - - { id: 18, class: gpr } - - { id: 19, class: gpr } - - { id: 20, class: gpr } - - { id: 21, class: gpr } - - { id: 22, class: gpr } -liveins: - - { reg: '%r6', virtual-reg: '%1' } - - { reg: '%r7', virtual-reg: '%2' } - - { reg: '%r18', virtual-reg: '%3' } - - { reg: '%r19', virtual-reg: '%4' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: false - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - successors: %bb.4.return, %bb.1.if.end - liveins: %r6, %r7, %r18, %r19 - - %4 = COPY %r19 - %3 = COPY %r18 - %2 = COPY %r7 - %1 = COPY %r6 - SFSUB_F_RI_LO %1, 0, implicit-def %sr - %5 = SCC 6, implicit %sr - SFSUB_F_RR %1, %2, implicit-def %sr - %6 = SCC 4, implicit %sr - %7 = AND_R killed %5, killed %6, 0 - %8 = SLI 1 - %9 = AND_R killed %7, %8, 0 - SFSUB_F_RI_LO killed %9, 0, implicit-def %sr - BRCC %bb.4.return, 6, implicit %sr - BT %bb.1.if.end - - bb.1.if.end: - successors: %bb.4.return, %bb.2.if.end6 - - SFSUB_F_RI_LO %2, 0, implicit-def %sr - %10 = SCC 6, implicit %sr - SFSUB_F_RR %2, %3, implicit-def %sr - %11 = SCC 4, implicit %sr - %12 = AND_R killed %10, killed %11, 0 - %14 = AND_R killed %12, %8, 0 - SFSUB_F_RI_LO killed %14, 0, implicit-def %sr - BRCC %bb.4.return, 6, implicit %sr - BT %bb.2.if.end6 - - bb.2.if.end6: - successors: %bb.4.return, %bb.3.if.end11 - - SFSUB_F_RI_LO %3, 0, implicit-def %sr - %15 = SCC 6, implicit %sr - SFSUB_F_RR %3, %4, implicit-def %sr - %16 = SCC 4, implicit %sr - %17 = AND_R killed %15, killed %16, 0 - %18 = SLI 1 - %19 = AND_R killed %17, killed %18, 0 - SFSUB_F_RI_LO killed %19, 0, implicit-def %sr - BRCC %bb.4.return, 6, implicit %sr - BT %bb.3.if.end11 - - bb.3.if.end11: - %20 = SLI 21 - SFSUB_F_RR %4, %1, implicit-def %sr - %21 = SELECT %2, %20, 4, implicit %sr - SFSUB_F_RI_LO %4, 0, implicit-def %sr - %22 = SELECT killed %21, %20, 6, implicit %sr - %rv = COPY %22 - RET implicit %rca, implicit %rv - - bb.4.return: - %0 = PHI %3, %bb.0.entry, %4, %bb.1.if.end, %1, %bb.2.if.end6 - %rv = COPY %0 - RET implicit %rca, implicit %rv - -... ---- -name: testBB -alignment: 2 -exposesReturnsTwice: false -tracksRegLiveness: true -registers: - - { id: 0, class: gpr } - - { id: 1, class: gpr } - - { id: 2, class: gpr } - - { id: 3, class: gpr } - - { id: 4, class: gpr } - - { id: 5, class: gpr } - - { id: 6, class: gpr } - - { id: 7, class: gpr } - - { id: 8, class: gpr } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 0 - adjustsStack: false - hasCalls: true - maxCallFrameSize: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false -body: | - bb.0.entry: - successors: %bb.3.if.end, %bb.1.if.then - - %1 = MOVHI target-flags(lanai-hi) @a - %2 = OR_I_LO killed %1, target-flags(lanai-lo) @a - %3 = LDW_RI killed %2, 0, 0 :: (load 4 from @a, !tbaa !0) - %4 = MOVHI target-flags(lanai-hi) @b - %5 = OR_I_LO killed %4, target-flags(lanai-lo) @b - %6 = LDW_RI killed %5, 0, 0 :: (load 4 from @b, !tbaa !0) - %0 = SUB_R killed %6, killed %3, 0 - SFSUB_F_RI_LO %0, 0, implicit-def %sr - BRCC %bb.3.if.end, 10, implicit %sr - BT %bb.1.if.then - - bb.1.if.then: - successors: %bb.2.while.body - - ADJCALLSTACKDOWN 0, implicit-def dead %sp, implicit %sp - CALL @g, csr, implicit-def dead %rca, implicit %sp, implicit-def %sp, implicit-def %rv - ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp - - bb.2.while.body: - successors: %bb.2.while.body - - BT %bb.2.while.body - - bb.3.if.end: - successors: %bb.4.if.then4, %bb.6.if.end7 - liveins: %sr - - BRCC %bb.6.if.end7, 14, implicit %sr - BT %bb.4.if.then4 - - bb.4.if.then4: - successors: %bb.5.while.body6 - - ADJCALLSTACKDOWN 0, implicit-def dead %sp, implicit %sp - CALL @g, csr, implicit-def dead %rca, implicit %sp, implicit-def %sp, implicit-def %rv - ADJCALLSTACKUP 0, 0, implicit-def dead %sp, implicit %sp - - bb.5.while.body6: - successors: %bb.5.while.body6 - - BT %bb.5.while.body6 - - bb.6.if.end7: - RET implicit %rca - -... -- cgit v1.2.3