summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
diff options
context:
space:
mode:
authorSanjay Patel <spatel@rotateright.com>2017-03-12 18:28:48 +0000
committerSanjay Patel <spatel@rotateright.com>2017-03-12 18:28:48 +0000
commitf06b963a2b4550128be28ca119c88486a6b3e478 (patch)
tree0e825356cca58c9758126a11d109e3a3ab9ff16a /llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
parentd5bd3a1e6abf1996dc883335a3957526d1186117 (diff)
downloadbcm5719-llvm-f06b963a2b4550128be28ca119c88486a6b3e478.tar.gz
bcm5719-llvm-f06b963a2b4550128be28ca119c88486a6b3e478.zip
[x86] don't blindly transform SETB into SBB
I noticed unnecessary 'sbb' instructions in D30472 and while looking at 'ptest' codegen recently. This happens because we were transforming any 'setb' - even when we only wanted a single-bit result. This patch moves those transforms under visitAdd/visitSub, so we we're only creating sbb/adc when it is a win. I don't know why we need a SETCC_CARRY node type, but I'm not proposing to change that existing behavior in this patch. Also, I'm skeptical that sbb/adc are a win for all micro-arches, so I added comments to the test files where this transform still fires. The test changes here are all cases where we no longer produce sbb/adc. Avoiding partial register stalls (generating an xor to clear a register) is not handled in some cases, but that's a separate issue. Differential Revision: https://reviews.llvm.org/D30611 llvm-svn: 297586
Diffstat (limited to 'llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll')
-rw-r--r--llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll40
1 files changed, 20 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
index fe4fc65ef71..4a86fa22f08 100644
--- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -3310,16 +3310,16 @@ define <8 x float> @test_mm256_sub_ps(<8 x float> %a0, <8 x float> %a1) nounwind
define i32 @test_mm_testc_pd(<2 x double> %a0, <2 x double> %a1) nounwind {
; X32-LABEL: test_mm_testc_pd:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestpd %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_testc_pd:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestpd %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1)
ret i32 %res
@@ -3329,17 +3329,17 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon
define i32 @test_mm256_testc_pd(<4 x double> %a0, <4 x double> %a1) nounwind {
; X32-LABEL: test_mm256_testc_pd:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestpd %ymm1, %ymm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_testc_pd:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestpd %ymm1, %ymm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1)
@@ -3350,16 +3350,16 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea
define i32 @test_mm_testc_ps(<4 x float> %a0, <4 x float> %a1) nounwind {
; X32-LABEL: test_mm_testc_ps:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestps %xmm1, %xmm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: retl
;
; X64-LABEL: test_mm_testc_ps:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestps %xmm1, %xmm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1)
ret i32 %res
@@ -3369,17 +3369,17 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone
define i32 @test_mm256_testc_ps(<8 x float> %a0, <8 x float> %a1) nounwind {
; X32-LABEL: test_mm256_testc_ps:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vtestps %ymm1, %ymm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_testc_ps:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vtestps %ymm1, %ymm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1)
@@ -3390,17 +3390,17 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readn
define i32 @test_mm256_testc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind {
; X32-LABEL: test_mm256_testc_si256:
; X32: # BB#0:
+; X32-NEXT: xorl %eax, %eax
; X32-NEXT: vptest %ymm1, %ymm0
-; X32-NEXT: sbbl %eax, %eax
-; X32-NEXT: andl $1, %eax
+; X32-NEXT: setb %al
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_testc_si256:
; X64: # BB#0:
+; X64-NEXT: xorl %eax, %eax
; X64-NEXT: vptest %ymm1, %ymm0
-; X64-NEXT: sbbl %eax, %eax
-; X64-NEXT: andl $1, %eax
+; X64-NEXT: setb %al
; X64-NEXT: vzeroupper
; X64-NEXT: retq
%res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1)
OpenPOWER on IntegriCloud