summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/sse3-schedule.ll
diff options
context:
space:
mode:
authorAshutosh Nema <ashu1212@gmail.com>2017-08-31 12:38:35 +0000
committerAshutosh Nema <ashu1212@gmail.com>2017-08-31 12:38:35 +0000
commitbfcac0b4806ad528c93a65281d7eb0d5f66305e9 (patch)
tree76aa60776bceeab42df6dead6eb58005ee17e8b7 /llvm/test/CodeGen/X86/sse3-schedule.ll
parent23a86ea4b4399c651df88bcb7b18a4f7cb7b183f (diff)
downloadbcm5719-llvm-bfcac0b4806ad528c93a65281d7eb0d5f66305e9.tar.gz
bcm5719-llvm-bfcac0b4806ad528c93a65281d7eb0d5f66305e9.zip
AMD family 17h (znver1) scheduler model update.
Summary: This patch enables the following: 1) Regex based Instruction itineraries for integer instructions. 2) The instructions are grouped as per the nature of the instructions (move, arithmetic, logic, Misc, Control Transfer). 3) FP instructions and their itineraries are added which includes values for SSE4A, BMI, BMI2 and SHA instructions. Patch by Ganesh Gopalasubramanian Reviewers: RKSimon, craig.topper Subscribers: vprasad, shivaram, ddibyend, andreadb, javed.absar, llvm-commits Differential Revision: https://reviews.llvm.org/D36617 llvm-svn: 312237
Diffstat (limited to 'llvm/test/CodeGen/X86/sse3-schedule.ll')
-rw-r--r--llvm/test/CodeGen/X86/sse3-schedule.ll40
1 files changed, 20 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/X86/sse3-schedule.ll b/llvm/test/CodeGen/X86/sse3-schedule.ll
index bb70599c280..7f6d0221f55 100644
--- a/llvm/test/CodeGen/X86/sse3-schedule.ll
+++ b/llvm/test/CodeGen/X86/sse3-schedule.ll
@@ -56,7 +56,7 @@ define <2 x double> @test_addsubpd(<2 x double> %a0, <2 x double> %a1, <2 x doub
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %1, <2 x double> %2)
@@ -111,7 +111,7 @@ define <4 x float> @test_addsubps(<4 x float> %a0, <4 x float> %a1, <4 x float>
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vaddsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %1, <4 x float> %2)
@@ -164,9 +164,9 @@ define <2 x double> @test_haddpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; ZNVER1-LABEL: test_haddpd:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: vhaddpd (%rdi), %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %1, <2 x double> %2)
@@ -219,9 +219,9 @@ define <4 x float> @test_haddps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; ZNVER1-LABEL: test_haddps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: vhaddps %xmm1, %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: vhaddps (%rdi), %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %1, <4 x float> %2)
@@ -274,9 +274,9 @@ define <2 x double> @test_hsubpd(<2 x double> %a0, <2 x double> %a1, <2 x double
;
; ZNVER1-LABEL: test_hsubpd:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: vhsubpd (%rdi), %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1)
%2 = load <2 x double>, <2 x double> *%a2, align 16
%3 = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %1, <2 x double> %2)
@@ -329,9 +329,9 @@ define <4 x float> @test_hsubps(<4 x float> %a0, <4 x float> %a1, <4 x float> *%
;
; ZNVER1-LABEL: test_hsubps:
; ZNVER1: # BB#0:
-; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [10:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: vhsubps %xmm1, %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: vhsubps (%rdi), %xmm0, %xmm0 # sched: [100:?]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1)
%2 = load <4 x float>, <4 x float> *%a2, align 16
%3 = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %1, <4 x float> %2)
@@ -380,7 +380,7 @@ define <16 x i8> @test_lddqu(i8* %a0) {
; ZNVER1-LABEL: test_lddqu:
; ZNVER1: # BB#0:
; ZNVER1-NEXT: vlddqu (%rdi), %xmm0 # sched: [8:0.50]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0)
ret <16 x i8> %1
}
@@ -441,7 +441,7 @@ define void @test_monitor(i8* %a0, i32 %a1, i32 %a2) {
; ZNVER1-NEXT: leaq (%rdi), %rax # sched: [1:0.25]
; ZNVER1-NEXT: movl %esi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: monitor # sched: [100:?]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse3.monitor(i8* %a0, i32 %a1, i32 %a2)
ret void
}
@@ -503,7 +503,7 @@ define <2 x double> @test_movddup(<2 x double> %a0, <2 x double> *%a1) {
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] sched: [8:0.50]
; ZNVER1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] sched: [1:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <2 x double> %a0, <2 x double> undef, <2 x i32> zeroinitializer
%2 = load <2 x double>, <2 x double> *%a1, align 16
%3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> zeroinitializer
@@ -567,7 +567,7 @@ define <4 x float> @test_movshdup(<4 x float> %a0, <4 x float> *%a1) {
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm1 = mem[1,1,3,3] sched: [8:0.50]
; ZNVER1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
@@ -631,7 +631,7 @@ define <4 x float> @test_movsldup(<4 x float> %a0, <4 x float> *%a1) {
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm1 = mem[0,0,2,2] sched: [8:0.50]
; ZNVER1-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] sched: [1:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = shufflevector <4 x float> %a0, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
%2 = load <4 x float>, <4 x float> *%a1, align 16
%3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
@@ -694,7 +694,7 @@ define void @test_mwait(i32 %a0, i32 %a1) {
; ZNVER1-NEXT: movl %edi, %ecx # sched: [1:0.25]
; ZNVER1-NEXT: movl %esi, %eax # sched: [1:0.25]
; ZNVER1-NEXT: mwait # sched: [100:?]
-; ZNVER1-NEXT: retq # sched: [5:0.50]
+; ZNVER1-NEXT: retq # sched: [1:0.50]
tail call void @llvm.x86.sse3.mwait(i32 %a0, i32 %a1)
ret void
}
OpenPOWER on IntegriCloud