diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-09 12:30:02 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-08-09 12:30:02 +0000 |
| commit | 01ae462fef748212bfe42c2555bc3eb1f4309d0f (patch) | |
| tree | 1a211d8b95013ae17c8f0c57a2a438c83b142a62 /llvm/test/CodeGen/X86/sse2-schedule.ll | |
| parent | bf7f18b79c1315eb482a580a9be81e3aa7dd55ac (diff) | |
| download | bcm5719-llvm-01ae462fef748212bfe42c2555bc3eb1f4309d0f.tar.gz bcm5719-llvm-01ae462fef748212bfe42c2555bc3eb1f4309d0f.zip | |
[X86][SSE] Combine (some) target shuffles with multiple uses
As discussed on D41794, we have many cases where we fail to combine shuffles as the input operands have other uses.
This patch permits these shuffles to be combined as long as they don't introduce additional variable shuffle masks, which should reduce instruction dependencies and allow the total number of shuffles to still drop without increasing the constant pool.
However, this may mean that some memory folds may no longer occur, and on pre-AVX require the occasional extra register move.
This also exposes some poor PMULDQ/PMULUDQ codegen which was doing unnecessary upper/lower calculations which will in fact fold to zero/undef - the fix will be added in a followup commit.
Differential Revision: https://reviews.llvm.org/D50328
llvm-svn: 339335
Diffstat (limited to 'llvm/test/CodeGen/X86/sse2-schedule.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-schedule.ll | 132 |
1 files changed, 61 insertions, 71 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 0af49688c52..8e293c2c491 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -15003,141 +15003,131 @@ define <2 x double> @test_unpckhpd(<2 x double> %a0, <2 x double> %a1, <2 x doub define <2 x double> @test_unpcklpd(<2 x double> %a0, <2 x double> %a1, <2 x double> *%a2) { ; GENERIC-LABEL: test_unpcklpd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; GENERIC-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] -; GENERIC-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; GENERIC-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; GENERIC-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; GENERIC-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; GENERIC-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_unpcklpd: ; ATOM: # %bb.0: -; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; ATOM-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] -; ATOM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00] -; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00] -; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] +; ATOM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [1:1.00] +; ATOM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; ATOM-NEXT: addpd %xmm2, %xmm0 # sched: [6:3.00] ; ATOM-NEXT: retq # sched: [79:39.50] ; ; SLM-LABEL: test_unpcklpd: ; SLM: # %bb.0: -; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SLM-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] -; SLM-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00] -; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; SLM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] +; SLM-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [4:1.00] +; SLM-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; SLM-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; SLM-NEXT: retq # sched: [4:1.00] ; ; SANDY-SSE-LABEL: test_unpcklpd: ; SANDY-SSE: # %bb.0: -; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] -; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; SANDY-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; SANDY-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; SANDY-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; SANDY-SSE-NEXT: retq # sched: [1:1.00] ; ; SANDY-LABEL: test_unpcklpd: ; SANDY: # %bb.0: -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] -; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] +; SANDY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; SANDY-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-SSE-LABEL: test_unpcklpd: ; HASWELL-SSE: # %bb.0: -; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] -; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; HASWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; HASWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; HASWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; HASWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; HASWELL-LABEL: test_unpcklpd: ; HASWELL: # %bb.0: -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] -; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] +; HASWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; HASWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; HASWELL-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-SSE-LABEL: test_unpcklpd: ; BROADWELL-SSE: # %bb.0: -; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:1.00] -; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; BROADWELL-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] +; BROADWELL-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; BROADWELL-SSE-NEXT: retq # sched: [7:1.00] ; ; BROADWELL-LABEL: test_unpcklpd: ; BROADWELL: # %bb.0: -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00] -; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] +; BROADWELL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] +; BROADWELL-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BROADWELL-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-SSE-LABEL: test_unpcklpd: ; SKYLAKE-SSE: # %bb.0: -; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] -; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] -; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] +; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; SKYLAKE-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] ; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00] ; ; SKYLAKE-LABEL: test_unpcklpd: ; SKYLAKE: # %bb.0: -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] -; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKYLAKE-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; SKYLAKE-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKYLAKE-NEXT: retq # sched: [7:1.00] ; ; SKX-SSE-LABEL: test_unpcklpd: ; SKX-SSE: # %bb.0: -; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.33] -; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00] -; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50] -; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33] +; SKX-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.33] +; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:1.00] +; SKX-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; SKX-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [4:0.50] ; SKX-SSE-NEXT: retq # sched: [7:1.00] ; ; SKX-LABEL: test_unpcklpd: ; SKX: # %bb.0: -; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:1.00] -; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [7:1.00] -; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50] +; SKX-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:1.00] +; SKX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [7:1.00] +; SKX-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [4:0.50] ; SKX-NEXT: retq # sched: [7:1.00] ; ; BTVER2-SSE-LABEL: test_unpcklpd: ; BTVER2-SSE: # %bb.0: -; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00] -; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] +; BTVER2-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50] +; BTVER2-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_unpcklpd: ; BTVER2: # %bb.0: -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50] +; BTVER2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [6:1.00] +; BTVER2-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_unpcklpd: ; ZNVER1-SSE: # %bb.0: -; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm1 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50] -; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00] -; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50] +; ZNVER1-SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] sched: [1:0.50] +; ZNVER1-SSE-NEXT: addpd %xmm2, %xmm0 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_unpcklpd: ; ZNVER1: # %bb.0: -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] sched: [1:0.50] -; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] sched: [8:0.50] -; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00] +; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] sched: [1:0.50] +; ZNVER1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] sched: [8:0.50] +; ZNVER1-NEXT: vaddpd %xmm0, %xmm1, %xmm0 # sched: [3:1.00] ; ZNVER1-NEXT: retq # sched: [1:0.50] %1 = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> %2 = load <2 x double>, <2 x double> *%a2, align 16 |

