diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll | 210 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll | 141 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll | 292 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll | 264 |
4 files changed, 492 insertions, 415 deletions
diff --git a/llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll index 5bf36a51c76..653a3a31d04 100644 --- a/llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse3-intrinsics-fast-isel.ll @@ -1,110 +1,125 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32 -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse3-builtins.c define <2 x double> @test_mm_addsub_pd(<2 x double> %a0, <2 x double> %a1) { -; X32-LABEL: test_mm_addsub_pd: -; X32: # %bb.0: -; X32-NEXT: addsubpd %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_addsub_pd: +; SSE: # %bb.0: +; SSE-NEXT: addsubpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_addsub_pd: -; X64: # %bb.0: -; X64-NEXT: addsubpd %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_addsub_pd: +; AVX: # %bb.0: +; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } declare <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double>, <2 x double>) nounwind readnone define <4 x float> @test_mm_addsub_ps(<4 x float> %a0, <4 x float> %a1) { -; X32-LABEL: test_mm_addsub_ps: -; X32: # %bb.0: -; X32-NEXT: addsubps %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_addsub_ps: +; SSE: # %bb.0: +; SSE-NEXT: addsubps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_addsub_ps: -; X64: # %bb.0: -; X64-NEXT: addsubps %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_addsub_ps: +; AVX: # %bb.0: +; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float>, <4 x float>) nounwind readnone define <2 x double> @test_mm_hadd_pd(<2 x double> %a0, <2 x double> %a1) { -; X32-LABEL: test_mm_hadd_pd: -; X32: # %bb.0: -; X32-NEXT: haddpd %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hadd_pd: +; SSE: # %bb.0: +; SSE-NEXT: haddpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hadd_pd: -; X64: # %bb.0: -; X64-NEXT: haddpd %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hadd_pd: +; AVX: # %bb.0: +; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } declare <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double>, <2 x double>) nounwind readnone define <4 x float> @test_mm_hadd_ps(<4 x float> %a0, <4 x float> %a1) { -; X32-LABEL: test_mm_hadd_ps: -; X32: # %bb.0: -; X32-NEXT: haddps %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hadd_ps: +; SSE: # %bb.0: +; SSE-NEXT: haddps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hadd_ps: -; X64: # %bb.0: -; X64-NEXT: haddps %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hadd_ps: +; AVX: # %bb.0: +; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone define <2 x double> @test_mm_hsub_pd(<2 x double> %a0, <2 x double> %a1) { -; X32-LABEL: test_mm_hsub_pd: -; X32: # %bb.0: -; X32-NEXT: hsubpd %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hsub_pd: +; SSE: # %bb.0: +; SSE-NEXT: hsubpd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hsub_pd: -; X64: # %bb.0: -; X64-NEXT: hsubpd %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hsub_pd: +; AVX: # %bb.0: +; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } declare <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double>, <2 x double>) nounwind readnone define <4 x float> @test_mm_hsub_ps(<4 x float> %a0, <4 x float> %a1) { -; X32-LABEL: test_mm_hsub_ps: -; X32: # %bb.0: -; X32-NEXT: hsubps %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hsub_ps: +; SSE: # %bb.0: +; SSE-NEXT: hsubps %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hsub_ps: -; X64: # %bb.0: -; X64-NEXT: hsubps %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hsub_ps: +; AVX: # %bb.0: +; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ret <4 x float> %res } declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind readnone define <2 x i64> @test_mm_lddqu_si128(<2 x i64>* %a0) { -; X32-LABEL: test_mm_lddqu_si128: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: lddqu (%eax), %xmm0 -; X32-NEXT: retl +; X86-SSE-LABEL: test_mm_lddqu_si128: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: lddqu (%eax), %xmm0 +; X86-SSE-NEXT: retl ; -; X64-LABEL: test_mm_lddqu_si128: -; X64: # %bb.0: -; X64-NEXT: lddqu (%rdi), %xmm0 -; X64-NEXT: retq +; X86-AVX-LABEL: test_mm_lddqu_si128: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vlddqu (%eax), %xmm0 +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: test_mm_lddqu_si128: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: lddqu (%rdi), %xmm0 +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: test_mm_lddqu_si128: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vlddqu (%rdi), %xmm0 +; X64-AVX-NEXT: retq %bc = bitcast <2 x i64>* %a0 to i8* %call = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %bc) %res = bitcast <16 x i8> %call to <2 x i64> @@ -113,16 +128,27 @@ define <2 x i64> @test_mm_lddqu_si128(<2 x i64>* %a0) { declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly define <2 x double> @test_mm_loaddup_pd(double* %a0) { -; X32-LABEL: test_mm_loaddup_pd: -; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] -; X32-NEXT: retl +; X86-SSE-LABEL: test_mm_loaddup_pd: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] +; X86-SSE-NEXT: retl +; +; X86-AVX-LABEL: test_mm_loaddup_pd: +; X86-AVX: # %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X86-AVX-NEXT: retl +; +; X64-SSE-LABEL: test_mm_loaddup_pd: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] +; X64-SSE-NEXT: retq ; -; X64-LABEL: test_mm_loaddup_pd: -; X64: # %bb.0: -; X64-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] -; X64-NEXT: retq +; X64-AVX-LABEL: test_mm_loaddup_pd: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] +; X64-AVX-NEXT: retq %ld = load double, double* %a0 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 @@ -130,43 +156,43 @@ define <2 x double> @test_mm_loaddup_pd(double* %a0) { } define <2 x double> @test_mm_movedup_pd(<2 x double> %a0) { -; X32-LABEL: test_mm_movedup_pd: -; X32: # %bb.0: -; X32-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] -; X32-NEXT: retl +; SSE-LABEL: test_mm_movedup_pd: +; SSE: # %bb.0: +; SSE-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_movedup_pd: -; X64: # %bb.0: -; X64-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] -; X64-NEXT: retq +; AVX-LABEL: test_mm_movedup_pd: +; AVX: # %bb.0: +; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] +; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <2 x double> %a0, <2 x double> %a0, <2 x i32> zeroinitializer ret <2 x double> %res } define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) { -; X32-LABEL: test_mm_movehdup_ps: -; X32: # %bb.0: -; X32-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X32-NEXT: retl +; SSE-LABEL: test_mm_movehdup_ps: +; SSE: # %bb.0: +; SSE-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_movehdup_ps: -; X64: # %bb.0: -; X64-NEXT: movshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] -; X64-NEXT: retq +; AVX-LABEL: test_mm_movehdup_ps: +; AVX: # %bb.0: +; AVX-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3] +; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 1, i32 1, i32 3, i32 3> ret <4 x float> %res } define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) { -; X32-LABEL: test_mm_moveldup_ps: -; X32: # %bb.0: -; X32-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] -; X32-NEXT: retl +; SSE-LABEL: test_mm_moveldup_ps: +; SSE: # %bb.0: +; SSE-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_moveldup_ps: -; X64: # %bb.0: -; X64-NEXT: movsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] -; X64-NEXT: retq +; AVX-LABEL: test_mm_moveldup_ps: +; AVX: # %bb.0: +; AVX-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2] +; AVX-NEXT: ret{{[l|q]}} %res = shufflevector <4 x float> %a0, <4 x float> %a0, <4 x i32> <i32 0, i32 0, i32 2, i32 2> ret <4 x float> %res } diff --git a/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll index 18bd2195cb9..f97bf08101f 100644 --- a/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse3-intrinsics-x86.ll @@ -1,18 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+sse3 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=SSE -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=AVX2 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=CHECK --check-prefix=VCHECK --check-prefix=SKX +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 define <2 x double> @test_x86_sse3_addsub_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse3_addsub_pd: ; SSE: ## %bb.0: ; SSE-NEXT: addsubpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xd0,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_addsub_pd: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd0,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse3_addsub_pd: +; AVX: ## %bb.0: +; AVX-NEXT: vaddsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xd0,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse3.addsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -23,12 +26,12 @@ define <4 x float> @test_x86_sse3_addsub_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse3_addsub_ps: ; SSE: ## %bb.0: ; SSE-NEXT: addsubps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0xd0,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_addsub_ps: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xd0,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse3_addsub_ps: +; AVX: ## %bb.0: +; AVX-NEXT: vaddsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0xd0,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse3.addsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -39,12 +42,12 @@ define <2 x double> @test_x86_sse3_hadd_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse3_hadd_pd: ; SSE: ## %bb.0: ; SSE-NEXT: haddpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x7c,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_hadd_pd: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7c,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse3_hadd_pd: +; AVX: ## %bb.0: +; AVX-NEXT: vhaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7c,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse3.hadd.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -55,12 +58,12 @@ define <4 x float> @test_x86_sse3_hadd_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse3_hadd_ps: ; SSE: ## %bb.0: ; SSE-NEXT: haddps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x7c,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_hadd_ps: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7c,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse3_hadd_ps: +; AVX: ## %bb.0: +; AVX-NEXT: vhaddps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7c,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -71,12 +74,12 @@ define <2 x double> @test_x86_sse3_hsub_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse3_hsub_pd: ; SSE: ## %bb.0: ; SSE-NEXT: hsubpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x7d,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_hsub_pd: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7d,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse3_hsub_pd: +; AVX: ## %bb.0: +; AVX-NEXT: vhsubpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x7d,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse3.hsub.pd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] ret <2 x double> %res } @@ -87,12 +90,12 @@ define <4 x float> @test_x86_sse3_hsub_ps(<4 x float> %a0, <4 x float> %a1) { ; SSE-LABEL: test_x86_sse3_hsub_ps: ; SSE: ## %bb.0: ; SSE-NEXT: hsubps %xmm1, %xmm0 ## encoding: [0xf2,0x0f,0x7d,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_hsub_ps: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7d,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_sse3_hsub_ps: +; AVX: ## %bb.0: +; AVX-NEXT: vhsubps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xfb,0x7d,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float> %a0, <4 x float> %a1) ; <<4 x float>> [#uses=1] ret <4 x float> %res } @@ -100,17 +103,27 @@ declare <4 x float> @llvm.x86.sse3.hsub.ps(<4 x float>, <4 x float>) nounwind re define <16 x i8> @test_x86_sse3_ldu_dq(i8* %a0) { -; SSE-LABEL: test_x86_sse3_ldu_dq: -; SSE: ## %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: lddqu (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x00] -; SSE-NEXT: retl ## encoding: [0xc3] +; X86-SSE-LABEL: test_x86_sse3_ldu_dq: +; X86-SSE: ## %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: lddqu (%eax), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x00] +; X86-SSE-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX-LABEL: test_x86_sse3_ldu_dq: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX-NEXT: vlddqu (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x00] +; X86-AVX-NEXT: retl ## encoding: [0xc3] +; +; X64-SSE-LABEL: test_x86_sse3_ldu_dq: +; X64-SSE: ## %bb.0: +; X64-SSE-NEXT: lddqu (%rdi), %xmm0 ## encoding: [0xf2,0x0f,0xf0,0x07] +; X64-SSE-NEXT: retq ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_sse3_ldu_dq: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; VCHECK-NEXT: vlddqu (%eax), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x00] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; X64-AVX-LABEL: test_x86_sse3_ldu_dq: +; X64-AVX: ## %bb.0: +; X64-AVX-NEXT: vlddqu (%rdi), %xmm0 ## encoding: [0xc5,0xfb,0xf0,0x07] +; X64-AVX-NEXT: retq ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.sse3.ldu.dq(i8* %a0) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -119,26 +132,40 @@ declare <16 x i8> @llvm.x86.sse3.ldu.dq(i8*) nounwind readonly ; Make sure instructions with no AVX equivalents, but are associated with SSEX feature flags still work define void @monitor(i8* %P, i32 %E, i32 %H) nounwind { -; CHECK-LABEL: monitor: -; CHECK: ## %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c] -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; CHECK-NEXT: leal (%eax), %eax ## encoding: [0x8d,0x00] -; CHECK-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] -; CHECK-NEXT: retl ## encoding: [0xc3] +; X86-LABEL: monitor: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x0c] +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: leal (%eax), %eax ## encoding: [0x8d,0x00] +; X86-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: monitor: +; X64: ## %bb.0: +; X64-NEXT: leaq (%rdi), %rax ## encoding: [0x48,0x8d,0x07] +; X64-NEXT: movl %esi, %ecx ## encoding: [0x89,0xf1] +; X64-NEXT: monitor ## encoding: [0x0f,0x01,0xc8] +; X64-NEXT: retq ## encoding: [0xc3] tail call void @llvm.x86.sse3.monitor(i8* %P, i32 %E, i32 %H) ret void } declare void @llvm.x86.sse3.monitor(i8*, i32, i32) nounwind define void @mwait(i32 %E, i32 %H) nounwind { -; CHECK-LABEL: mwait: -; CHECK: ## %bb.0: -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] -; CHECK-NEXT: mwait ## encoding: [0x0f,0x01,0xc9] -; CHECK-NEXT: retl ## encoding: [0xc3] +; X86-LABEL: mwait: +; X86: ## %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04] +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: mwait ## encoding: [0x0f,0x01,0xc9] +; X86-NEXT: retl ## encoding: [0xc3] +; +; X64-LABEL: mwait: +; X64: ## %bb.0: +; X64-NEXT: movl %edi, %ecx ## encoding: [0x89,0xf9] +; X64-NEXT: movl %esi, %eax ## encoding: [0x89,0xf0] +; X64-NEXT: mwait ## encoding: [0x0f,0x01,0xc9] +; X64-NEXT: retq ## encoding: [0xc3] tail call void @llvm.x86.sse3.mwait(i32 %E, i32 %H) ret void } diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll index 74c5924b600..b0529640eb1 100644 --- a/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-fast-isel.ll @@ -1,22 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X32 -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=ssse3 | FileCheck %s --check-prefix=ALL --check-prefix=X64 +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/ssse3-builtins.c define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { -; X32-LABEL: test_mm_abs_epi8: -; X32: # %bb.0: -; X32-NEXT: pabsb %xmm0, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_abs_epi8: +; SSE: # %bb.0: +; SSE-NEXT: pabsb %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_abs_epi8: -; X64: # %bb.0: -; X64-NEXT: pabsb %xmm0, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_abs_epi8: +; AVX: # %bb.0: +; AVX-NEXT: vpabsb %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <16 x i8> %sub = sub <16 x i8> zeroinitializer, %arg - %cmp = icmp sgt <16 x i8> %arg, zeroinitializer + %cmp = icmp sgt <16 x i8> %arg, zeroinitializer %sel = select <16 x i1> %cmp, <16 x i8> %arg, <16 x i8> %sub %res = bitcast <16 x i8> %sel to <2 x i64> ret <2 x i64> %res @@ -24,18 +28,18 @@ define <2 x i64> @test_mm_abs_epi8(<2 x i64> %a0) { declare <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8>) nounwind readnone define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { -; X32-LABEL: test_mm_abs_epi16: -; X32: # %bb.0: -; X32-NEXT: pabsw %xmm0, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_abs_epi16: +; SSE: # %bb.0: +; SSE-NEXT: pabsw %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_abs_epi16: -; X64: # %bb.0: -; X64-NEXT: pabsw %xmm0, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_abs_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vpabsw %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <8 x i16> %sub = sub <8 x i16> zeroinitializer, %arg - %cmp = icmp sgt <8 x i16> %arg, zeroinitializer + %cmp = icmp sgt <8 x i16> %arg, zeroinitializer %sel = select <8 x i1> %cmp, <8 x i16> %arg, <8 x i16> %sub %res = bitcast <8 x i16> %sel to <2 x i64> ret <2 x i64> %res @@ -43,18 +47,18 @@ define <2 x i64> @test_mm_abs_epi16(<2 x i64> %a0) { declare <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16>) nounwind readnone define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { -; X32-LABEL: test_mm_abs_epi32: -; X32: # %bb.0: -; X32-NEXT: pabsd %xmm0, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_abs_epi32: +; SSE: # %bb.0: +; SSE-NEXT: pabsd %xmm0, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_abs_epi32: -; X64: # %bb.0: -; X64-NEXT: pabsd %xmm0, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_abs_epi32: +; AVX: # %bb.0: +; AVX-NEXT: vpabsd %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg = bitcast <2 x i64> %a0 to <4 x i32> %sub = sub <4 x i32> zeroinitializer, %arg - %cmp = icmp sgt <4 x i32> %arg, zeroinitializer + %cmp = icmp sgt <4 x i32> %arg, zeroinitializer %sel = select <4 x i1> %cmp, <4 x i32> %arg, <4 x i32> %sub %res = bitcast <4 x i32> %sel to <2 x i64> ret <2 x i64> %res @@ -62,17 +66,16 @@ define <2 x i64> @test_mm_abs_epi32(<2 x i64> %a0) { declare <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32>) nounwind readnone define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_alignr_epi8: -; X32: # %bb.0: -; X32-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] -; X32-NEXT: movdqa %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_alignr_epi8: +; SSE: # %bb.0: +; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_alignr_epi8: -; X64: # %bb.0: -; X64-NEXT: palignr {{.*#+}} xmm1 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_alignr_epi8: +; AVX: # %bb.0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1] +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17> @@ -81,17 +84,16 @@ define <2 x i64> @test_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { } define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test2_mm_alignr_epi8: -; X32: # %bb.0: -; X32-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] -; X32-NEXT: movdqa %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test2_mm_alignr_epi8: +; SSE: # %bb.0: +; SSE-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; SSE-NEXT: movdqa %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test2_mm_alignr_epi8: -; X64: # %bb.0: -; X64-NEXT: palignr {{.*#+}} xmm1 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] -; X64-NEXT: movdqa %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test2_mm_alignr_epi8: +; AVX: # %bb.0: +; AVX-NEXT: vpalignr {{.*#+}} xmm0 = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0] +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %shuf = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> @@ -100,15 +102,15 @@ define <2 x i64> @test2_mm_alignr_epi8(<2 x i64> %a0, <2 x i64> %a1) { } define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_hadd_epi16: -; X32: # %bb.0: -; X32-NEXT: phaddw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hadd_epi16: +; SSE: # %bb.0: +; SSE-NEXT: phaddw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hadd_epi16: -; X64: # %bb.0: -; X64-NEXT: phaddw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hadd_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %call = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -118,15 +120,15 @@ define <2 x i64> @test_mm_hadd_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_hadd_epi32: -; X32: # %bb.0: -; X32-NEXT: phaddd %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hadd_epi32: +; SSE: # %bb.0: +; SSE-NEXT: phaddd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hadd_epi32: -; X64: # %bb.0: -; X64-NEXT: phaddd %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hadd_epi32: +; AVX: # %bb.0: +; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %call = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %arg0, <4 x i32> %arg1) @@ -136,15 +138,15 @@ define <2 x i64> @test_mm_hadd_epi32(<2 x i64> %a0, <2 x i64> %a1) { declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_hadds_epi16: -; X32: # %bb.0: -; X32-NEXT: phaddsw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hadds_epi16: +; SSE: # %bb.0: +; SSE-NEXT: phaddsw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hadds_epi16: -; X64: # %bb.0: -; X64-NEXT: phaddsw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hadds_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %call = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -154,15 +156,15 @@ define <2 x i64> @test_mm_hadds_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_hsub_epi16: -; X32: # %bb.0: -; X32-NEXT: phsubw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hsub_epi16: +; SSE: # %bb.0: +; SSE-NEXT: phsubw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hsub_epi16: -; X64: # %bb.0: -; X64-NEXT: phsubw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hsub_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %call = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -172,15 +174,15 @@ define <2 x i64> @test_mm_hsub_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_hsub_epi32: -; X32: # %bb.0: -; X32-NEXT: phsubd %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hsub_epi32: +; SSE: # %bb.0: +; SSE-NEXT: phsubd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hsub_epi32: -; X64: # %bb.0: -; X64-NEXT: phsubd %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hsub_epi32: +; AVX: # %bb.0: +; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %call = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %arg0, <4 x i32> %arg1) @@ -190,15 +192,15 @@ define <2 x i64> @test_mm_hsub_epi32(<2 x i64> %a0, <2 x i64> %a1) { declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_hsubs_epi16: -; X32: # %bb.0: -; X32-NEXT: phsubsw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_hsubs_epi16: +; SSE: # %bb.0: +; SSE-NEXT: phsubsw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_hsubs_epi16: -; X64: # %bb.0: -; X64-NEXT: phsubsw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_hsubs_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %call = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -208,15 +210,15 @@ define <2 x i64> @test_mm_hsubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_maddubs_epi16: -; X32: # %bb.0: -; X32-NEXT: pmaddubsw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_maddubs_epi16: +; SSE: # %bb.0: +; SSE-NEXT: pmaddubsw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_maddubs_epi16: -; X64: # %bb.0: -; X64-NEXT: pmaddubsw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_maddubs_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %call = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %arg0, <16 x i8> %arg1) @@ -226,15 +228,15 @@ define <2 x i64> @test_mm_maddubs_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_mulhrs_epi16: -; X32: # %bb.0: -; X32-NEXT: pmulhrsw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_mulhrs_epi16: +; SSE: # %bb.0: +; SSE-NEXT: pmulhrsw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_mulhrs_epi16: -; X64: # %bb.0: -; X64-NEXT: pmulhrsw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_mulhrs_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %call = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -244,15 +246,15 @@ define <2 x i64> @test_mm_mulhrs_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_shuffle_epi8: -; X32: # %bb.0: -; X32-NEXT: pshufb %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_shuffle_epi8: +; SSE: # %bb.0: +; SSE-NEXT: pshufb %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_shuffle_epi8: -; X64: # %bb.0: -; X64-NEXT: pshufb %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_shuffle_epi8: +; AVX: # %bb.0: +; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %call = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %arg0, <16 x i8> %arg1) @@ -262,15 +264,15 @@ define <2 x i64> @test_mm_shuffle_epi8(<2 x i64> %a0, <2 x i64> %a1) { declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_sign_epi8: -; X32: # %bb.0: -; X32-NEXT: psignb %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_sign_epi8: +; SSE: # %bb.0: +; SSE-NEXT: psignb %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_sign_epi8: -; X64: # %bb.0: -; X64-NEXT: psignb %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_sign_epi8: +; AVX: # %bb.0: +; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %call = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %arg0, <16 x i8> %arg1) @@ -280,15 +282,15 @@ define <2 x i64> @test_mm_sign_epi8(<2 x i64> %a0, <2 x i64> %a1) { declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_sign_epi16: -; X32: # %bb.0: -; X32-NEXT: psignw %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_sign_epi16: +; SSE: # %bb.0: +; SSE-NEXT: psignw %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_sign_epi16: -; X64: # %bb.0: -; X64-NEXT: psignw %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_sign_epi16: +; AVX: # %bb.0: +; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %call = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -298,15 +300,15 @@ define <2 x i64> @test_mm_sign_epi16(<2 x i64> %a0, <2 x i64> %a1) { declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_sign_epi32(<2 x i64> %a0, <2 x i64> %a1) { -; X32-LABEL: test_mm_sign_epi32: -; X32: # %bb.0: -; X32-NEXT: psignd %xmm1, %xmm0 -; X32-NEXT: retl +; SSE-LABEL: test_mm_sign_epi32: +; SSE: # %bb.0: +; SSE-NEXT: psignd %xmm1, %xmm0 +; SSE-NEXT: ret{{[l|q]}} ; -; X64-LABEL: test_mm_sign_epi32: -; X64: # %bb.0: -; X64-NEXT: psignd %xmm1, %xmm0 -; X64-NEXT: retq +; AVX-LABEL: test_mm_sign_epi32: +; AVX: # %bb.0: +; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: ret{{[l|q]}} %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %call = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %arg0, <4 x i32> %arg1) diff --git a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll index 66265d63a97..629a759332a 100644 --- a/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/ssse3-intrinsics-x86.ll @@ -1,23 +1,26 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=-avx,+ssse3 -show-mc-encoding | FileCheck %s --check-prefix=SSE -; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx2 -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=AVX2 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=skx -show-mc-encoding | FileCheck %s --check-prefix=VCHECK --check-prefix=SKX +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+ssse3 -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl -show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 define <16 x i8> @test_x86_ssse3_pabs_b_128(<16 x i8> %a0) { ; SSE-LABEL: test_x86_ssse3_pabs_b_128: ; SSE: ## %bb.0: ; SSE-NEXT: pabsb %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1c,0xc0] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pabs_b_128: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] +; AVX1-LABEL: test_x86_ssse3_pabs_b_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsb %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pabs_b_128: -; SKX: ## %bb.0: -; SKX-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] +; AVX512-LABEL: test_x86_ssse3_pabs_b_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsb %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.pabs.b.128(<16 x i8> %a0) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -28,17 +31,17 @@ define <4 x i32> @test_x86_ssse3_pabs_d_128(<4 x i32> %a0) { ; SSE-LABEL: test_x86_ssse3_pabs_d_128: ; SSE: ## %bb.0: ; SSE-NEXT: pabsd %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1e,0xc0] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pabs_d_128: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] +; AVX1-LABEL: test_x86_ssse3_pabs_d_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsd %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pabs_d_128: -; SKX: ## %bb.0: -; SKX-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] +; AVX512-LABEL: test_x86_ssse3_pabs_d_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsd %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.ssse3.pabs.d.128(<4 x i32> %a0) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -49,17 +52,17 @@ define <8 x i16> @test_x86_ssse3_pabs_w_128(<8 x i16> %a0) { ; SSE-LABEL: test_x86_ssse3_pabs_w_128: ; SSE: ## %bb.0: ; SSE-NEXT: pabsw %xmm0, %xmm0 ## encoding: [0x66,0x0f,0x38,0x1d,0xc0] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pabs_w_128: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] +; AVX1-LABEL: test_x86_ssse3_pabs_w_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpabsw %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pabs_w_128: -; SKX: ## %bb.0: -; SKX-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] +; AVX512-LABEL: test_x86_ssse3_pabs_w_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpabsw %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xc0] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pabs.w.128(<8 x i16> %a0) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -70,12 +73,12 @@ define <4 x i32> @test_x86_ssse3_phadd_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_ssse3_phadd_d_128: ; SSE: ## %bb.0: ; SSE-NEXT: phaddd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x02,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_phadd_d_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x02,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_phadd_d_128: +; AVX: ## %bb.0: +; AVX-NEXT: vphaddd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x02,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -86,12 +89,12 @@ define <8 x i16> @test_x86_ssse3_phadd_sw_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_ssse3_phadd_sw_128: ; SSE: ## %bb.0: ; SSE-NEXT: phaddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x03,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_phadd_sw_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x03,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_phadd_sw_128: +; AVX: ## %bb.0: +; AVX-NEXT: vphaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x03,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -102,12 +105,12 @@ define <8 x i16> @test_x86_ssse3_phadd_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_ssse3_phadd_w_128: ; SSE: ## %bb.0: ; SSE-NEXT: phaddw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x01,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_phadd_w_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vphaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x01,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_phadd_w_128: +; AVX: ## %bb.0: +; AVX-NEXT: vphaddw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x01,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -118,12 +121,12 @@ define <4 x i32> @test_x86_ssse3_phsub_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_ssse3_phsub_d_128: ; SSE: ## %bb.0: ; SSE-NEXT: phsubd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x06,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_phsub_d_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x06,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_phsub_d_128: +; AVX: ## %bb.0: +; AVX-NEXT: vphsubd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x06,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -134,12 +137,12 @@ define <8 x i16> @test_x86_ssse3_phsub_sw_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_ssse3_phsub_sw_128: ; SSE: ## %bb.0: ; SSE-NEXT: phsubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x07,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_phsub_sw_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x07,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_phsub_sw_128: +; AVX: ## %bb.0: +; AVX-NEXT: vphsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x07,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -150,12 +153,12 @@ define <8 x i16> @test_x86_ssse3_phsub_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_ssse3_phsub_w_128: ; SSE: ## %bb.0: ; SSE-NEXT: phsubw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x05,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_phsub_w_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vphsubw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x05,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_phsub_w_128: +; AVX: ## %bb.0: +; AVX-NEXT: vphsubw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x05,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -166,17 +169,17 @@ define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128: ; SSE: ## %bb.0: ; SSE-NEXT: pmaddubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x04,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pmadd_ub_sw_128: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x04,0xc1] -; AVX2-NEXT: retl ## encoding: [0xc3] +; AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x04,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pmadd_ub_sw_128: -; SKX: ## %bb.0: -; SKX-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] -; SKX-NEXT: retl ## encoding: [0xc3] +; AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpmaddubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -185,27 +188,46 @@ declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind ; Make sure we don't commute this operation. define <8 x i16> @test_x86_ssse3_pmadd_ub_sw_128_load_op0(<16 x i8>* %ptr, <16 x i8> %a1) { -; SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: -; SSE: ## %bb.0: -; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SSE-NEXT: movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08] -; SSE-NEXT: pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8] -; SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; X86-SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; X86-SSE: ## %bb.0: +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movdqa (%eax), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x08] +; X86-SSE-NEXT: pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8] +; X86-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1] +; X86-SSE-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; X86-AVX1: ## %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08] +; X86-AVX1-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0] +; X86-AVX1-NEXT: retl ## encoding: [0xc3] +; +; X86-AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; X86-AVX512: ## %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08] +; X86-AVX512-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0] +; X86-AVX512-NEXT: retl ## encoding: [0xc3] +; +; X64-SSE-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; X64-SSE: ## %bb.0: +; X64-SSE-NEXT: movdqa (%rdi), %xmm1 ## encoding: [0x66,0x0f,0x6f,0x0f] +; X64-SSE-NEXT: pmaddubsw %xmm0, %xmm1 ## encoding: [0x66,0x0f,0x38,0x04,0xc8] +; X64-SSE-NEXT: movdqa %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x6f,0xc1] +; X64-SSE-NEXT: retq ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: -; AVX2: ## %bb.0: -; AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; AVX2-NEXT: vmovdqa (%eax), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x08] -; AVX2-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0] -; AVX2-NEXT: retl ## encoding: [0xc3] +; X64-AVX1-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; X64-AVX1: ## %bb.0: +; X64-AVX1-NEXT: vmovdqa (%rdi), %xmm1 ## encoding: [0xc5,0xf9,0x6f,0x0f] +; X64-AVX1-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0x04,0xc0] +; X64-AVX1-NEXT: retq ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: -; SKX: ## %bb.0: -; SKX-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; SKX-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08] -; SKX-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0] -; SKX-NEXT: retl ## encoding: [0xc3] +; X64-AVX512-LABEL: test_x86_ssse3_pmadd_ub_sw_128_load_op0: +; X64-AVX512: ## %bb.0: +; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f] +; X64-AVX512-NEXT: vpmaddubsw %xmm0, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x04,0xc0] +; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a0 = load <16 x i8>, <16 x i8>* %ptr %res = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res @@ -216,17 +238,17 @@ define <8 x i16> @test_x86_ssse3_pmul_hr_sw_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_ssse3_pmul_hr_sw_128: ; SSE: ## %bb.0: ; SSE-NEXT: pmulhrsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x0b,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pmul_hr_sw_128: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0b,0xc1] -; AVX2-NEXT: retl ## encoding: [0xc3] +; AVX1-LABEL: test_x86_ssse3_pmul_hr_sw_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0b,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pmul_hr_sw_128: -; SKX: ## %bb.0: -; SKX-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] -; SKX-NEXT: retl ## encoding: [0xc3] +; AVX512-LABEL: test_x86_ssse3_pmul_hr_sw_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpmulhrsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } @@ -237,17 +259,17 @@ define <16 x i8> @test_x86_ssse3_pshuf_b_128(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_x86_ssse3_pshuf_b_128: ; SSE: ## %bb.0: ; SSE-NEXT: pshufb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x00,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; AVX2-LABEL: test_x86_ssse3_pshuf_b_128: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1] -; AVX2-NEXT: retl ## encoding: [0xc3] +; AVX1-LABEL: test_x86_ssse3_pshuf_b_128: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; SKX-LABEL: test_x86_ssse3_pshuf_b_128: -; SKX: ## %bb.0: -; SKX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1] -; SKX-NEXT: retl ## encoding: [0xc3] +; AVX512-LABEL: test_x86_ssse3_pshuf_b_128: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -258,12 +280,12 @@ define <16 x i8> @test_x86_ssse3_psign_b_128(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_x86_ssse3_psign_b_128: ; SSE: ## %bb.0: ; SSE-NEXT: psignb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x08,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_psign_b_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vpsignb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x08,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_psign_b_128: +; AVX: ## %bb.0: +; AVX-NEXT: vpsignb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x08,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] ret <16 x i8> %res } @@ -274,12 +296,12 @@ define <4 x i32> @test_x86_ssse3_psign_d_128(<4 x i32> %a0, <4 x i32> %a1) { ; SSE-LABEL: test_x86_ssse3_psign_d_128: ; SSE: ## %bb.0: ; SSE-NEXT: psignd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x0a,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_psign_d_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vpsignd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0a,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_psign_d_128: +; AVX: ## %bb.0: +; AVX-NEXT: vpsignd %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x0a,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } @@ -290,12 +312,12 @@ define <8 x i16> @test_x86_ssse3_psign_w_128(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_ssse3_psign_w_128: ; SSE: ## %bb.0: ; SSE-NEXT: psignw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x38,0x09,0xc1] -; SSE-NEXT: retl ## encoding: [0xc3] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] ; -; VCHECK-LABEL: test_x86_ssse3_psign_w_128: -; VCHECK: ## %bb.0: -; VCHECK-NEXT: vpsignw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x09,0xc1] -; VCHECK-NEXT: retl ## encoding: [0xc3] +; AVX-LABEL: test_x86_ssse3_psign_w_128: +; AVX: ## %bb.0: +; AVX-NEXT: vpsignw %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x09,0xc1] +; AVX-NEXT: ret{{[l|q]}} ## encoding: [0xc3] %res = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] ret <8 x i16> %res } |

