diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2017-03-12 18:28:48 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2017-03-12 18:28:48 +0000 |
| commit | f06b963a2b4550128be28ca119c88486a6b3e478 (patch) | |
| tree | 0e825356cca58c9758126a11d109e3a3ab9ff16a /llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll | |
| parent | d5bd3a1e6abf1996dc883335a3957526d1186117 (diff) | |
| download | bcm5719-llvm-f06b963a2b4550128be28ca119c88486a6b3e478.tar.gz bcm5719-llvm-f06b963a2b4550128be28ca119c88486a6b3e478.zip | |
[x86] don't blindly transform SETB into SBB
I noticed unnecessary 'sbb' instructions in D30472 and while looking at 'ptest' codegen recently.
This happens because we were transforming any 'setb' - even when we only wanted a single-bit result.
This patch moves those transforms under visitAdd/visitSub, so we we're only creating sbb/adc when it
is a win. I don't know why we need a SETCC_CARRY node type, but I'm not proposing to change that
existing behavior in this patch.
Also, I'm skeptical that sbb/adc are a win for all micro-arches, so I added comments to the test files
where this transform still fires.
The test changes here are all cases where we no longer produce sbb/adc. Avoiding partial register
stalls (generating an xor to clear a register) is not handled in some cases, but that's a separate
issue.
Differential Revision: https://reviews.llvm.org/D30611
llvm-svn: 297586
Diffstat (limited to 'llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll | 40 |
1 files changed, 20 insertions, 20 deletions
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index fe4fc65ef71..4a86fa22f08 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -3310,16 +3310,16 @@ define <8 x float> @test_mm256_sub_ps(<8 x float> %a0, <8 x float> %a1) nounwind define i32 @test_mm_testc_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; X32-LABEL: test_mm_testc_pd: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testc_pd: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.pd(<2 x double> %a0, <2 x double> %a1) ret i32 %res @@ -3329,17 +3329,17 @@ declare i32 @llvm.x86.avx.vtestc.pd(<2 x double>, <2 x double>) nounwind readnon define i32 @test_mm256_testc_pd(<4 x double> %a0, <4 x double> %a1) nounwind { ; X32-LABEL: test_mm256_testc_pd: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestpd %ymm1, %ymm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testc_pd: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestpd %ymm1, %ymm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.pd.256(<4 x double> %a0, <4 x double> %a1) @@ -3350,16 +3350,16 @@ declare i32 @llvm.x86.avx.vtestc.pd.256(<4 x double>, <4 x double>) nounwind rea define i32 @test_mm_testc_ps(<4 x float> %a0, <4 x float> %a1) nounwind { ; X32-LABEL: test_mm_testc_ps: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %xmm1, %xmm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: retl ; ; X64-LABEL: test_mm_testc_ps: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %xmm1, %xmm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.ps(<4 x float> %a0, <4 x float> %a1) ret i32 %res @@ -3369,17 +3369,17 @@ declare i32 @llvm.x86.avx.vtestc.ps(<4 x float>, <4 x float>) nounwind readnone define i32 @test_mm256_testc_ps(<8 x float> %a0, <8 x float> %a1) nounwind { ; X32-LABEL: test_mm256_testc_ps: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vtestps %ymm1, %ymm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testc_ps: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vtestps %ymm1, %ymm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.vtestc.ps.256(<8 x float> %a0, <8 x float> %a1) @@ -3390,17 +3390,17 @@ declare i32 @llvm.x86.avx.vtestc.ps.256(<8 x float>, <8 x float>) nounwind readn define i32 @test_mm256_testc_si256(<4 x i64> %a0, <4 x i64> %a1) nounwind { ; X32-LABEL: test_mm256_testc_si256: ; X32: # BB#0: +; X32-NEXT: xorl %eax, %eax ; X32-NEXT: vptest %ymm1, %ymm0 -; X32-NEXT: sbbl %eax, %eax -; X32-NEXT: andl $1, %eax +; X32-NEXT: setb %al ; X32-NEXT: vzeroupper ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_testc_si256: ; X64: # BB#0: +; X64-NEXT: xorl %eax, %eax ; X64-NEXT: vptest %ymm1, %ymm0 -; X64-NEXT: sbbl %eax, %eax -; X64-NEXT: andl $1, %eax +; X64-NEXT: setb %al ; X64-NEXT: vzeroupper ; X64-NEXT: retq %res = call i32 @llvm.x86.avx.ptestc.256(<4 x i64> %a0, <4 x i64> %a1) |

