diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll | 6156 |
1 files changed, 3900 insertions, 2256 deletions
diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll index 31046e94c34..03acbaafe82 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-fast-isel.ll @@ -1,23 +1,28 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 -; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 -; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86,SSE,X86-SSE +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX1,X86-AVX1 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86,AVX,X86-AVX,AVX512,X86-AVX512 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X64,SSE,X64-SSE +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX1,X64-AVX1 +; RUN: llc < %s -show-mc-encoding -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64,AVX,X64-AVX,AVX512,X64-AVX512 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/sse2-builtins.c define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_add_epi8: ; SSE: # %bb.0: -; SSE-NEXT: paddb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfc,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_add_epi8: -; AVX: # %bb.0: -; AVX-NEXT: vpaddb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_add_epi8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfc,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_add_epi8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = add <16 x i8> %arg0, %arg1 @@ -28,13 +33,18 @@ define <2 x i64> @test_mm_add_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_add_epi16: ; SSE: # %bb.0: -; SSE-NEXT: paddw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfd,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_add_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpaddw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_add_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfd,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_add_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = add <8 x i16> %arg0, %arg1 @@ -45,13 +55,18 @@ define <2 x i64> @test_mm_add_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_add_epi32: ; SSE: # %bb.0: -; SSE-NEXT: paddd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfe,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_add_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_add_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_add_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = add <4 x i32> %arg0, %arg1 @@ -62,13 +77,18 @@ define <2 x i64> @test_mm_add_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_add_epi64: ; SSE: # %bb.0: -; SSE-NEXT: paddq %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd4,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_add_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_add_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd4,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_add_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = add <2 x i64> %a0, %a1 ret <2 x i64> %res } @@ -76,13 +96,18 @@ define <2 x i64> @test_mm_add_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_add_pd: ; SSE: # %bb.0: -; SSE-NEXT: addpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: addpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x58,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_add_pd: -; AVX: # %bb.0: -; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_add_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x58,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_add_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = fadd <2 x double> %a0, %a1 ret <2 x double> %res } @@ -90,13 +115,18 @@ define <2 x double> @test_mm_add_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_add_sd: ; SSE: # %bb.0: -; SSE-NEXT: addsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: addsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x58,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_add_sd: -; AVX: # %bb.0: -; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_add_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x58,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_add_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vaddsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x58,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext0 = extractelement <2 x double> %a0, i32 0 %ext1 = extractelement <2 x double> %a1, i32 0 %fadd = fadd double %ext0, %ext1 @@ -107,13 +137,18 @@ define <2 x double> @test_mm_add_sd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x i64> @test_mm_adds_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_adds_epi8: ; SSE: # %bb.0: -; SSE-NEXT: paddsb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xec,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_adds_epi8: -; AVX: # %bb.0: -; AVX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_adds_epi8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xec,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_adds_epi8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %arg0, <16 x i8> %arg1) @@ -125,13 +160,18 @@ declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_adds_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_adds_epi16: ; SSE: # %bb.0: -; SSE-NEXT: paddsw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xed,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_adds_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_adds_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xed,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_adds_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -143,13 +183,18 @@ declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_adds_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_adds_epu8: ; SSE: # %bb.0: -; SSE-NEXT: paddusb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdc,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_adds_epu8: -; AVX: # %bb.0: -; AVX-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_adds_epu8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdc,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_adds_epu8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdc,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8> %arg0, <16 x i8> %arg1) @@ -161,13 +206,18 @@ declare <16 x i8> @llvm.x86.sse2.paddus.b(<16 x i8>, <16 x i8>) nounwind readnon define <2 x i64> @test_mm_adds_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_adds_epu16: ; SSE: # %bb.0: -; SSE-NEXT: paddusw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: paddusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdd,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_adds_epu16: -; AVX: # %bb.0: -; AVX-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_adds_epu16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdd,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_adds_epu16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpaddusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdd,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -179,13 +229,18 @@ declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnon define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_and_pd: ; SSE: # %bb.0: -; SSE-NEXT: andps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_and_pd: -; AVX: # %bb.0: -; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_and_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_and_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x double> %a0 to <4 x i32> %arg1 = bitcast <2 x double> %a1 to <4 x i32> %res = and <4 x i32> %arg0, %arg1 @@ -196,13 +251,18 @@ define <2 x double> @test_mm_and_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_and_si128: ; SSE: # %bb.0: -; SSE-NEXT: andps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: andps %xmm1, %xmm0 # encoding: [0x0f,0x54,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_and_si128: -; AVX: # %bb.0: -; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_and_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_and_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vandps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x54,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = and <2 x i64> %a0, %a1 ret <2 x i64> %res } @@ -210,13 +270,18 @@ define <2 x i64> @test_mm_and_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_andnot_pd: ; SSE: # %bb.0: -; SSE-NEXT: andnps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: andnps %xmm1, %xmm0 # encoding: [0x0f,0x55,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_andnot_pd: -; AVX: # %bb.0: -; AVX-NEXT: vandnps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_andnot_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_andnot_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vandnps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x55,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x double> %a0 to <4 x i32> %arg1 = bitcast <2 x double> %a1 to <4 x i32> %not = xor <4 x i32> %arg0, <i32 -1, i32 -1, i32 -1, i32 -1> @@ -228,23 +293,23 @@ define <2 x double> @test_mm_andnot_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_andnot_si128: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm2, %xmm2 -; SSE-NEXT: pxor %xmm2, %xmm0 -; SSE-NEXT: pand %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpeqd %xmm2, %xmm2 # encoding: [0x66,0x0f,0x76,0xd2] +; SSE-NEXT: pxor %xmm2, %xmm0 # encoding: [0x66,0x0f,0xef,0xc2] +; SSE-NEXT: pand %xmm1, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_andnot_si128: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0x76,0xd2] +; AVX1-NEXT: vpxor %xmm2, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xef,0xc2] +; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xdb,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_andnot_si128: ; AVX512: # %bb.0: -; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpternlogq $15, %xmm0, %xmm0, %xmm0 # encoding: [0x62,0xf3,0xfd,0x08,0x25,0xc0,0x0f] +; AVX512-NEXT: vpand %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdb,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %not = xor <2 x i64> %a0, <i64 -1, i64 -1> %res = and <2 x i64> %not, %a1 ret <2 x i64> %res @@ -253,25 +318,27 @@ define <2 x i64> @test_mm_andnot_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_avg_epu8: ; SSE: # %bb.0: -; SSE-NEXT: pavgb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pavgb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe0,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_avg_epu8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpavgb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe0,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_avg_epu8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero -; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovwb %ymm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpmovzxbw %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc0] +; AVX512-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero +; AVX512-NEXT: vpmovzxbw %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xc9] +; AVX512-NEXT: # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero +; AVX512-NEXT: vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1] +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] +; AVX512-NEXT: vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1] +; AVX512-NEXT: vpsrlw $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x71,0xd0,0x01] +; AVX512-NEXT: vpmovwb %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %zext0 = zext <16 x i8> %arg0 to <16 x i16> @@ -287,25 +354,27 @@ define <2 x i64> @test_mm_avg_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_avg_epu16: ; SSE: # %bb.0: -; SSE-NEXT: pavgw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pavgw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe3,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_avg_epu16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe3,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_avg_epu16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero -; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 -; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0 -; AVX512-NEXT: vpmovdw %ymm0, %xmm0 -; AVX512-NEXT: vzeroupper -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpmovzxwd %xmm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc0] +; AVX512-NEXT: # ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero +; AVX512-NEXT: vpmovzxwd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xc9] +; AVX512-NEXT: # ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero +; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] +; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0x76,0xc9] +; AVX512-NEXT: vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1] +; AVX512-NEXT: vpsrld $1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x72,0xd0,0x01] +; AVX512-NEXT: vpmovdw %ymm0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x28,0x33,0xc0] +; AVX512-NEXT: vzeroupper # encoding: [0xc5,0xf8,0x77] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %zext0 = zext <8 x i16> %arg0 to <8 x i32> @@ -321,13 +390,21 @@ define <2 x i64> @test_mm_avg_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_bslli_si128: ; SSE: # %bb.0: -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] +; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_bslli_si128: -; AVX: # %bb.0: -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_bslli_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] +; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_bslli_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] +; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> %bc = bitcast <16 x i8> %res to <2 x i64> @@ -337,13 +414,21 @@ define <2 x i64> @test_mm_bslli_si128(<2 x i64> %a0) nounwind { define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_bsrli_si128: ; SSE: # %bb.0: -; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] +; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_bsrli_si128: -; AVX: # %bb.0: -; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_bsrli_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] +; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_bsrli_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] +; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> %bc = bitcast <16 x i8> %res to <2 x i64> @@ -353,7 +438,7 @@ define <2 x i64> @test_mm_bsrli_si128(<2 x i64> %a0) nounwind { define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { ; CHECK-LABEL: test_mm_castpd_ps: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = bitcast <2 x double> %a0 to <4 x float> ret <4 x float> %res } @@ -361,7 +446,7 @@ define <4 x float> @test_mm_castpd_ps(<2 x double> %a0) nounwind { define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { ; CHECK-LABEL: test_mm_castpd_si128: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = bitcast <2 x double> %a0 to <2 x i64> ret <2 x i64> %res } @@ -369,7 +454,7 @@ define <2 x i64> @test_mm_castpd_si128(<2 x double> %a0) nounwind { define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { ; CHECK-LABEL: test_mm_castps_pd: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = bitcast <4 x float> %a0 to <2 x double> ret <2 x double> %res } @@ -377,7 +462,7 @@ define <2 x double> @test_mm_castps_pd(<4 x float> %a0) nounwind { define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { ; CHECK-LABEL: test_mm_castps_si128: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = bitcast <4 x float> %a0 to <2 x i64> ret <2 x i64> %res } @@ -385,7 +470,7 @@ define <2 x i64> @test_mm_castps_si128(<4 x float> %a0) nounwind { define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { ; CHECK-LABEL: test_mm_castsi128_pd: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = bitcast <2 x i64> %a0 to <2 x double> ret <2 x double> %res } @@ -393,7 +478,7 @@ define <2 x double> @test_mm_castsi128_pd(<2 x i64> %a0) nounwind { define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { ; CHECK-LABEL: test_mm_castsi128_ps: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = bitcast <2 x i64> %a0 to <4 x float> ret <4 x float> %res } @@ -401,14 +486,14 @@ define <4 x float> @test_mm_castsi128_ps(<2 x i64> %a0) nounwind { define void @test_mm_clflush(i8* %a0) nounwind { ; X86-LABEL: test_mm_clflush: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: clflush (%eax) -; X86-NEXT: retl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: clflush (%eax) # encoding: [0x0f,0xae,0x38] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm_clflush: ; X64: # %bb.0: -; X64-NEXT: clflush (%rdi) -; X64-NEXT: retq +; X64-NEXT: clflush (%rdi) # encoding: [0x0f,0xae,0x3f] +; X64-NEXT: retq # encoding: [0xc3] call void @llvm.x86.sse2.clflush(i8* %a0) ret void } @@ -417,19 +502,19 @@ declare void @llvm.x86.sse2.clflush(i8*) nounwind readnone define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_epi8: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpeqb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x74,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpeq_epi8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x74,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpeq_epi8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2b %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1] +; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %cmp = icmp eq <16 x i8> %arg0, %arg1 @@ -441,19 +526,19 @@ define <2 x i64> @test_mm_cmpeq_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpeqw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x75,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpeq_epi16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x75,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpeq_epi16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2w %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1] +; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %cmp = icmp eq <8 x i16> %arg0, %arg1 @@ -465,19 +550,19 @@ define <2 x i64> @test_mm_cmpeq_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_epi32: ; SSE: # %bb.0: -; SSE-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpeqd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x76,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpeq_epi32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x76,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpeq_epi32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2d %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1] +; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %cmp = icmp eq <4 x i32> %arg0, %arg1 @@ -489,19 +574,19 @@ define <2 x i64> @test_mm_cmpeq_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpeqpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpeqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x00] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpeq_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x00] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpeq_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x00] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp oeq <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -511,13 +596,13 @@ define <2 x double> @test_mm_cmpeq_pd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmpeq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpeq_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpeqsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpeqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x00] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmpeq_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpeqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x00] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 0) ret <2 x double> %res } @@ -526,20 +611,20 @@ declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8) nounw define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpge_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmplepd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmplepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x02] +; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpge_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmplepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x02] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpge_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmplepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x02] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp ole <2 x double> %a1, %a0 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -549,21 +634,24 @@ define <2 x double> @test_mm_cmpge_pd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpge_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmplesd %xmm0, %xmm1 -; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmplesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x02] +; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] +; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpge_sd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02] +; AVX1-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] +; AVX1-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpge_sd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 -; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmplesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x02] +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] +; AVX512-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 2) %ext0 = extractelement <2 x double> %cmp, i32 0 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 @@ -575,19 +663,19 @@ define <2 x double> @test_mm_cmpge_sd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_epi8: ; SSE: # %bb.0: -; SSE-NEXT: pcmpgtb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpgtb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x64,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpgt_epi8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x64,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpgt_epi8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2b %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1] +; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %cmp = icmp sgt <16 x i8> %arg0, %arg1 @@ -599,19 +687,19 @@ define <2 x i64> @test_mm_cmpgt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pcmpgtw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpgtw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x65,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpgt_epi16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x65,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpgt_epi16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2w %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1] +; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %cmp = icmp sgt <8 x i16> %arg0, %arg1 @@ -623,19 +711,19 @@ define <2 x i64> @test_mm_cmpgt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_epi32: ; SSE: # %bb.0: -; SSE-NEXT: pcmpgtd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpgtd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x66,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpgt_epi32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x66,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpgt_epi32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2d %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1] +; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %cmp = icmp sgt <4 x i32> %arg0, %arg1 @@ -647,20 +735,20 @@ define <2 x i64> @test_mm_cmpgt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpltpd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x01] +; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpgt_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpgt_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x01] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp olt <2 x double> %a1, %a0 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -670,21 +758,24 @@ define <2 x double> @test_mm_cmpgt_pd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpgt_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpltsd %xmm0, %xmm1 -; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x01] +; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] +; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpgt_sd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01] +; AVX1-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] +; AVX1-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpgt_sd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 -; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x01] +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] +; AVX512-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 1) %ext0 = extractelement <2 x double> %cmp, i32 0 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 @@ -696,19 +787,19 @@ define <2 x double> @test_mm_cmpgt_sd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmple_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmplepd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmplepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x02] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmple_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmplepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x02] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmple_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp ole <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -718,13 +809,13 @@ define <2 x double> @test_mm_cmple_pd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmple_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmplesd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmplesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x02] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmple_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmplesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x02] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 2) ret <2 x double> %res } @@ -732,20 +823,20 @@ define <2 x double> @test_mm_cmple_sd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_epi8: ; SSE: # %bb.0: -; SSE-NEXT: pcmpgtb %xmm0, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpgtb %xmm0, %xmm1 # encoding: [0x66,0x0f,0x64,0xc8] +; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmplt_epi8: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x64,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmplt_epi8: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2b %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpgtb %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc0] +; AVX512-NEXT: vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %cmp = icmp sgt <16 x i8> %arg1, %arg0 @@ -757,20 +848,20 @@ define <2 x i64> @test_mm_cmplt_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pcmpgtw %xmm0, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpgtw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x65,0xc8] +; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmplt_epi16: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x65,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmplt_epi16: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2w %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpgtw %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc0] +; AVX512-NEXT: vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %cmp = icmp sgt <8 x i16> %arg1, %arg0 @@ -782,20 +873,20 @@ define <2 x i64> @test_mm_cmplt_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_epi32: ; SSE: # %bb.0: -; SSE-NEXT: pcmpgtd %xmm0, %xmm1 -; SSE-NEXT: movdqa %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pcmpgtd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x66,0xc8] +; SSE-NEXT: movdqa %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6f,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmplt_epi32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x66,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmplt_epi32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2d %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpcmpgtd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc0] +; AVX512-NEXT: vpmovm2d %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %cmp = icmp sgt <4 x i32> %arg1, %arg0 @@ -807,19 +898,19 @@ define <2 x i64> @test_mm_cmplt_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpltpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmplt_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmplt_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x01] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp olt <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -829,13 +920,13 @@ define <2 x double> @test_mm_cmplt_pd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmplt_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpltsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmplt_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x01] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 1) ret <2 x double> %res } @@ -843,19 +934,19 @@ define <2 x double> @test_mm_cmplt_sd(<2 x double> %a0, <2 x double> %a1) nounwi define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpneq_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpneqpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpneqpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x04] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpneq_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x04] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpneq_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpneqpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x04] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp une <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -865,13 +956,13 @@ define <2 x double> @test_mm_cmpneq_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpneq_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpneqsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpneqsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x04] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmpneq_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x04] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 4) ret <2 x double> %res } @@ -879,20 +970,20 @@ define <2 x double> @test_mm_cmpneq_sd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnge_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnlepd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnlepd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x06] +; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpnge_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x06] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpnge_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpnlepd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x06] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp ugt <2 x double> %a1, %a0 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -902,21 +993,24 @@ define <2 x double> @test_mm_cmpnge_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnge_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnlesd %xmm0, %xmm1 -; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnlesd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x06] +; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] +; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpnge_sd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06] +; AVX1-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] +; AVX1-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpnge_sd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 -; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x06] +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] +; AVX512-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 6) %ext0 = extractelement <2 x double> %cmp, i32 0 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 @@ -928,20 +1022,20 @@ define <2 x double> @test_mm_cmpnge_sd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpngt_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltpd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnltpd %xmm0, %xmm1 # encoding: [0x66,0x0f,0xc2,0xc8,0x05] +; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpngt_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpnltpd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0xc2,0xc0,0x05] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpngt_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpnltpd %xmm0, %xmm1, %k0 # encoding: [0x62,0xf1,0xf5,0x08,0xc2,0xc0,0x05] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp uge <2 x double> %a1, %a0 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -951,21 +1045,24 @@ define <2 x double> @test_mm_cmpngt_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpngt_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltsd %xmm0, %xmm1 -; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnltsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0xc2,0xc8,0x05] +; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] +; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpngt_sd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 -; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05] +; AVX1-NEXT: vblendpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0d,0xc1,0x01] +; AVX1-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpngt_sd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 -; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpnltsd %xmm0, %xmm1, %xmm1 # encoding: [0xc5,0xf3,0xc2,0xc8,0x05] +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] +; AVX512-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %cmp = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a1, <2 x double> %a0, i8 5) %ext0 = extractelement <2 x double> %cmp, i32 0 %ins0 = insertelement <2 x double> undef, double %ext0, i32 0 @@ -977,19 +1074,19 @@ define <2 x double> @test_mm_cmpngt_sd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnle_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnlepd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnlepd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x06] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpnle_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpnlepd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x06] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpnle_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpnlepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x06] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp ugt <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -999,13 +1096,13 @@ define <2 x double> @test_mm_cmpnle_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnle_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnlesd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnlesd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x06] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmpnle_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpnlesd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x06] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 6) ret <2 x double> %res } @@ -1013,19 +1110,19 @@ define <2 x double> @test_mm_cmpnle_sd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnlt_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnltpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x05] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpnlt_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpnltpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x05] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpnlt_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpnltpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x05] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp uge <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -1035,13 +1132,13 @@ define <2 x double> @test_mm_cmpnlt_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpnlt_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpnltsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpnltsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x05] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmpnlt_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpnltsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x05] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 5) ret <2 x double> %res } @@ -1049,19 +1146,19 @@ define <2 x double> @test_mm_cmpnlt_sd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpord_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpordpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x07] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpord_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x07] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpord_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x07] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp ord <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -1071,13 +1168,13 @@ define <2 x double> @test_mm_cmpord_pd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpord_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpordsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x07] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmpord_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x07] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 7) ret <2 x double> %res } @@ -1085,19 +1182,19 @@ define <2 x double> @test_mm_cmpord_sd(<2 x double> %a0, <2 x double> %a1) nounw define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpunord_pd: ; SSE: # %bb.0: -; SSE-NEXT: cmpunordpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpunordpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc2,0xc1,0x03] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_cmpunord_pd: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vcmpunordpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc2,0xc1,0x03] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_cmpunord_pd: ; AVX512: # %bb.0: -; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 -; AVX512-NEXT: vpmovm2q %k0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vcmpunordpd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x03] +; AVX512-NEXT: vpmovm2q %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x38,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %fcmp = fcmp uno <2 x double> %a0, %a1 %sext = sext <2 x i1> %fcmp to <2 x i64> %res = bitcast <2 x i64> %sext to <2 x double> @@ -1107,13 +1204,13 @@ define <2 x double> @test_mm_cmpunord_pd(<2 x double> %a0, <2 x double> %a1) nou define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_cmpunord_sd: ; SSE: # %bb.0: -; SSE-NEXT: cmpunordsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cmpunordsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0xc2,0xc1,0x03] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cmpunord_sd: ; AVX: # %bb.0: -; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcmpunordsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xc2,0xc1,0x03] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a0, <2 x double> %a1, i8 3) ret <2 x double> %res } @@ -1121,21 +1218,30 @@ define <2 x double> @test_mm_cmpunord_sd(<2 x double> %a0, <2 x double> %a1) nou define i32 @test_mm_comieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_comieq_sd: ; SSE: # %bb.0: -; SSE-NEXT: comisd %xmm1, %xmm0 -; SSE-NEXT: setnp %al -; SSE-NEXT: sete %cl -; SSE-NEXT: andb %al, %cl -; SSE-NEXT: movzbl %cl, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] +; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_comieq_sd: -; AVX: # %bb.0: -; AVX-NEXT: vcomisd %xmm1, %xmm0 -; AVX-NEXT: setnp %al -; AVX-NEXT: sete %cl -; AVX-NEXT: andb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_comieq_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_comieq_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -1144,17 +1250,24 @@ declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_comige_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: comisd %xmm1, %xmm0 -; SSE-NEXT: setae %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] +; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_comige_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vcomisd %xmm1, %xmm0 -; AVX-NEXT: setae %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_comige_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_comige_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comige.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -1163,17 +1276,24 @@ declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_comigt_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: comisd %xmm1, %xmm0 -; SSE-NEXT: seta %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] +; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_comigt_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vcomisd %xmm1, %xmm0 -; AVX-NEXT: seta %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_comigt_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_comigt_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -1182,17 +1302,24 @@ declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_comile_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: comisd %xmm0, %xmm1 -; SSE-NEXT: setae %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] +; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_comile_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vcomisd %xmm0, %xmm1 -; AVX-NEXT: setae %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_comile_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] +; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_comile_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] +; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comile.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -1201,17 +1328,24 @@ declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_comilt_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: comisd %xmm0, %xmm1 -; SSE-NEXT: seta %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: comisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2f,0xc8] +; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_comilt_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vcomisd %xmm0, %xmm1 -; AVX-NEXT: seta %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_comilt_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vcomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2f,0xc8] +; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_comilt_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vcomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc8] +; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -1220,21 +1354,30 @@ declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>) nounwind readno define i32 @test_mm_comineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_comineq_sd: ; SSE: # %bb.0: -; SSE-NEXT: comisd %xmm1, %xmm0 -; SSE-NEXT: setp %al -; SSE-NEXT: setne %cl -; SSE-NEXT: orb %al, %cl -; SSE-NEXT: movzbl %cl, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: comisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2f,0xc1] +; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_comineq_sd: -; AVX: # %bb.0: -; AVX-NEXT: vcomisd %xmm1, %xmm0 -; AVX-NEXT: setp %al -; AVX-NEXT: setne %cl -; AVX-NEXT: orb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_comineq_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vcomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_comineq_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vcomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2f,0xc1] +; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -1243,13 +1386,18 @@ declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>) nounwind readn define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_cvtepi32_pd: ; SSE: # %bb.0: -; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtdq2pd %xmm0, %xmm0 # encoding: [0xf3,0x0f,0xe6,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtepi32_pd: -; AVX: # %bb.0: -; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtepi32_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtdq2pd %xmm0, %xmm0 # encoding: [0xc5,0xfa,0xe6,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtepi32_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtdq2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %ext = shufflevector <4 x i32> %arg0, <4 x i32> %arg0, <2 x i32> <i32 0, i32 1> %res = sitofp <2 x i32> %ext to <2 x double> @@ -1259,13 +1407,18 @@ define <2 x double> @test_mm_cvtepi32_pd(<2 x i64> %a0) nounwind { define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_cvtepi32_ps: ; SSE: # %bb.0: -; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtdq2ps %xmm0, %xmm0 # encoding: [0x0f,0x5b,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtepi32_ps: -; AVX: # %bb.0: -; AVX-NEXT: vcvtdq2ps %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtepi32_ps: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtdq2ps %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5b,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtepi32_ps: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtdq2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res = sitofp <4 x i32> %arg0 to <4 x float> ret <4 x float> %res @@ -1274,13 +1427,18 @@ define <4 x float> @test_mm_cvtepi32_ps(<2 x i64> %a0) nounwind { define <2 x i64> @test_mm_cvtpd_epi32(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_cvtpd_epi32: ; SSE: # %bb.0: -; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtpd2dq %xmm0, %xmm0 # encoding: [0xf2,0x0f,0xe6,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtpd_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vcvtpd2dq %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtpd_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xfb,0xe6,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtpd_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0xe6,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double> %a0) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc @@ -1290,13 +1448,18 @@ declare <4 x i32> @llvm.x86.sse2.cvtpd2dq(<2 x double>) nounwind readnone define <4 x float> @test_mm_cvtpd_ps(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_cvtpd_ps: ; SSE: # %bb.0: -; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtpd2ps %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5a,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtpd_ps: -; AVX: # %bb.0: -; AVX-NEXT: vcvtpd2ps %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtpd_ps: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtpd2ps %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5a,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtpd_ps: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtpd2ps %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5a,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double> %a0) ret <4 x float> %res } @@ -1305,13 +1468,18 @@ declare <4 x float> @llvm.x86.sse2.cvtpd2ps(<2 x double>) nounwind readnone define <2 x i64> @test_mm_cvtps_epi32(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvtps_epi32: ; SSE: # %bb.0: -; SSE-NEXT: cvtps2dq %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtps2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0x5b,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtps_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vcvtps2dq %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtps_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtps2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5b,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtps_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5b,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float> %a0) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc @@ -1321,13 +1489,18 @@ declare <4 x i32> @llvm.x86.sse2.cvtps2dq(<4 x float>) nounwind readnone define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvtps_pd: ; SSE: # %bb.0: -; SSE-NEXT: cvtps2pd %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtps2pd %xmm0, %xmm0 # encoding: [0x0f,0x5a,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtps_pd: -; AVX: # %bb.0: -; AVX-NEXT: vcvtps2pd %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtps_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtps2pd %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x5a,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtps_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext = shufflevector <4 x float> %a0, <4 x float> %a0, <2 x i32> <i32 0, i32 1> %res = fpext <2 x float> %ext to <2 x double> ret <2 x double> %res @@ -1336,31 +1509,43 @@ define <2 x double> @test_mm_cvtps_pd(<4 x float> %a0) nounwind { define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { ; X86-SSE-LABEL: test_mm_cvtsd_f64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %ebp -; X86-SSE-NEXT: movl %esp, %ebp -; X86-SSE-NEXT: andl $-8, %esp -; X86-SSE-NEXT: subl $8, %esp -; X86-SSE-NEXT: movlps %xmm0, (%esp) -; X86-SSE-NEXT: fldl (%esp) -; X86-SSE-NEXT: movl %ebp, %esp -; X86-SSE-NEXT: popl %ebp -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_cvtsd_f64: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %ebp -; X86-AVX-NEXT: movl %esp, %ebp -; X86-AVX-NEXT: andl $-8, %esp -; X86-AVX-NEXT: subl $8, %esp -; X86-AVX-NEXT: vmovlps %xmm0, (%esp) -; X86-AVX-NEXT: fldl (%esp) -; X86-AVX-NEXT: movl %ebp, %esp -; X86-AVX-NEXT: popl %ebp -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] +; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] +; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; X86-SSE-NEXT: movlps %xmm0, (%esp) # encoding: [0x0f,0x13,0x04,0x24] +; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] +; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_cvtsd_f64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] +; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] +; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; X86-AVX1-NEXT: vmovlps %xmm0, (%esp) # encoding: [0xc5,0xf8,0x13,0x04,0x24] +; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] +; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_cvtsd_f64: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] +; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] +; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; X86-AVX512-NEXT: vmovlps %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x04,0x24] +; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] +; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm_cvtsd_f64: ; X64: # %bb.0: -; X64-NEXT: retq +; X64-NEXT: retq # encoding: [0xc3] %res = extractelement <2 x double> %a0, i32 0 ret double %res } @@ -1368,13 +1553,18 @@ define double @test_mm_cvtsd_f64(<2 x double> %a0) nounwind { define i32 @test_mm_cvtsd_si32(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_cvtsd_si32: ; SSE: # %bb.0: -; SSE-NEXT: cvtsd2si %xmm0, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2d,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtsd_si32: -; AVX: # %bb.0: -; AVX-NEXT: vcvtsd2si %xmm0, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtsd_si32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2d,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtsd_si32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2d,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %a0) ret i32 %res } @@ -1383,13 +1573,13 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>) nounwind readnone define <4 x float> @test_mm_cvtsd_ss(<4 x float> %a0, <2 x double> %a1) { ; SSE-LABEL: test_mm_cvtsd_ss: ; SSE: # %bb.0: -; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtsd2ss %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5a,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_cvtsd_ss: ; AVX: # %bb.0: -; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vcvtsd2ss %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0xc1] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ret <4 x float> %res } @@ -1398,25 +1588,25 @@ declare <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float>, <2 x double>) nounwind define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { ; X86-SSE-LABEL: test_mm_cvtsd_ss_load: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: cvtsd2ss (%eax), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX-LABEL: test_mm_cvtsd_ss_load: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX-NEXT: vcvtsd2ss (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x00] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_cvtsd_ss_load: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: cvtsd2ss (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x5a,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_mm_cvtsd_ss_load: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX-NEXT: vcvtsd2ss (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5a,0x07] +; X64-AVX-NEXT: retq # encoding: [0xc3] %a1 = load <2 x double>, <2 x double>* %p1 %res = call <4 x float> @llvm.x86.sse2.cvtsd2ss(<4 x float> %a0, <2 x double> %a1) ret <4 x float> %res @@ -1425,13 +1615,18 @@ define <4 x float> @test_mm_cvtsd_ss_load(<4 x float> %a0, <2 x double>* %p1) { define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_cvtsi128_si32: ; SSE: # %bb.0: -; SSE-NEXT: movd %xmm0, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: movd %xmm0, %eax # encoding: [0x66,0x0f,0x7e,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtsi128_si32: -; AVX: # %bb.0: -; AVX-NEXT: vmovd %xmm0, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtsi128_si32: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovd %xmm0, %eax # encoding: [0xc5,0xf9,0x7e,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtsi128_si32: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovd %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7e,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res = extractelement <4 x i32> %arg0, i32 0 ret i32 %res @@ -1440,23 +1635,33 @@ define i32 @test_mm_cvtsi128_si32(<2 x i64> %a0) nounwind { define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { ; X86-SSE-LABEL: test_mm_cvtsi32_sd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: cvtsi2sdl {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x2a,0x44,0x24,0x04] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_cvtsi32_sd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_cvtsi32_sd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_cvtsi32_sd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vcvtsi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0x44,0x24,0x04] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_cvtsi32_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: cvtsi2sdl %edi, %xmm0 # encoding: [0xf2,0x0f,0x2a,0xc7] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_cvtsi32_sd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_cvtsi32_sd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x2a,0xc7] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_cvtsi32_sd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vcvtsi2sdl %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2a,0xc7] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %cvt = sitofp i32 %a1 to double %res = insertelement <2 x double> %a0, double %cvt, i32 0 ret <2 x double> %res @@ -1465,23 +1670,36 @@ define <2 x double> @test_mm_cvtsi32_sd(<2 x double> %a0, i32 %a1) nounwind { define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { ; X86-SSE-LABEL: test_mm_cvtsi32_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_cvtsi32_si128: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_cvtsi32_si128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_cvtsi32_si128: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_cvtsi32_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_cvtsi32_si128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_cvtsi32_si128: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_cvtsi32_si128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 0, i32 1 %res2 = insertelement <4 x i32> %res1, i32 0, i32 2 @@ -1493,13 +1711,18 @@ define <2 x i64> @test_mm_cvtsi32_si128(i32 %a0) nounwind { define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwind { ; SSE-LABEL: test_mm_cvtss_sd: ; SSE: # %bb.0: -; SSE-NEXT: cvtss2sd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvtss2sd %xmm1, %xmm0 # encoding: [0xf3,0x0f,0x5a,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvtss_sd: -; AVX: # %bb.0: -; AVX-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvtss_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5a,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvtss_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvtss2sd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5a,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext = extractelement <4 x float> %a1, i32 0 %cvt = fpext float %ext to double %res = insertelement <2 x double> %a0, double %cvt, i32 0 @@ -1509,13 +1732,18 @@ define <2 x double> @test_mm_cvtss_sd(<2 x double> %a0, <4 x float> %a1) nounwin define <2 x i64> @test_mm_cvttpd_epi32(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_cvttpd_epi32: ; SSE: # %bb.0: -; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvttpd2dq %xmm0, %xmm0 # encoding: [0x66,0x0f,0xe6,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvttpd_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvttpd_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttpd2dq %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe6,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvttpd_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttpd2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe6,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double> %a0) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc @@ -1525,13 +1753,18 @@ declare <4 x i32> @llvm.x86.sse2.cvttpd2dq(<2 x double>) nounwind readnone define <2 x i64> @test_mm_cvttps_epi32(<4 x float> %a0) nounwind { ; SSE-LABEL: test_mm_cvttps_epi32: ; SSE: # %bb.0: -; SSE-NEXT: cvttps2dq %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvttps2dq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x5b,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvttps_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vcvttps2dq %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvttps_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttps2dq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x5b,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvttps_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttps2dq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float> %a0) %bc = bitcast <4 x i32> %res to <2 x i64> ret <2 x i64> %bc @@ -1541,13 +1774,18 @@ declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>) nounwind readnone define i32 @test_mm_cvttsd_si32(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_cvttsd_si32: ; SSE: # %bb.0: -; SSE-NEXT: cvttsd2si %xmm0, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: cvttsd2si %xmm0, %eax # encoding: [0xf2,0x0f,0x2c,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_cvttsd_si32: -; AVX: # %bb.0: -; AVX-NEXT: vcvttsd2si %xmm0, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_cvttsd_si32: +; AVX1: # %bb.0: +; AVX1-NEXT: vcvttsd2si %xmm0, %eax # encoding: [0xc5,0xfb,0x2c,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_cvttsd_si32: +; AVX512: # %bb.0: +; AVX512-NEXT: vcvttsd2si %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x2c,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %a0) ret i32 %res } @@ -1556,13 +1794,18 @@ declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>) nounwind readnone define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_div_pd: ; SSE: # %bb.0: -; SSE-NEXT: divpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: divpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5e,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_div_pd: -; AVX: # %bb.0: -; AVX-NEXT: vdivpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_div_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5e,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_div_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5e,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = fdiv <2 x double> %a0, %a1 ret <2 x double> %res } @@ -1570,13 +1813,18 @@ define <2 x double> @test_mm_div_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_div_sd: ; SSE: # %bb.0: -; SSE-NEXT: divsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: divsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5e,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_div_sd: -; AVX: # %bb.0: -; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_div_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5e,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_div_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vdivsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5e,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext0 = extractelement <2 x double> %a0, i32 0 %ext1 = extractelement <2 x double> %a1, i32 0 %fdiv = fdiv double %ext0, %ext1 @@ -1587,15 +1835,21 @@ define <2 x double> @test_mm_div_sd(<2 x double> %a0, <2 x double> %a1) nounwind define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_extract_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pextrw $1, %xmm0, %eax -; SSE-NEXT: movzwl %ax, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pextrw $1, %xmm0, %eax # encoding: [0x66,0x0f,0xc5,0xc0,0x01] +; SSE-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_extract_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpextrw $1, %xmm0, %eax -; AVX-NEXT: movzwl %ax, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_extract_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpextrw $1, %xmm0, %eax # encoding: [0xc5,0xf9,0xc5,0xc0,0x01] +; AVX1-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_extract_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpextrw $1, %xmm0, %eax # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc5,0xc0,0x01] +; AVX512-NEXT: movzwl %ax, %eax # encoding: [0x0f,0xb7,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %ext = extractelement <8 x i16> %arg0, i32 1 %res = zext i16 %ext to i32 @@ -1605,25 +1859,36 @@ define i32 @test_mm_extract_epi16(<2 x i64> %a0) nounwind { define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { ; X86-SSE-LABEL: test_mm_insert_epi16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-SSE-NEXT: pinsrw $1, %eax, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc0,0x01] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_insert_epi16: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_insert_epi16: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_insert_epi16: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_insert_epi16: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: pinsrw $1, %edi, %xmm0 # encoding: [0x66,0x0f,0xc4,0xc7,0x01] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_insert_epi16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_insert_epi16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x01] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_insert_epi16: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x01] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = insertelement <8 x i16> %arg0, i16 %a1,i32 1 %bc = bitcast <8 x i16> %res to <2 x i64> @@ -1633,8 +1898,8 @@ define <2 x i64> @test_mm_insert_epi16(<2 x i64> %a0, i16 %a1) nounwind { define void @test_mm_lfence() nounwind { ; CHECK-LABEL: test_mm_lfence: ; CHECK: # %bb.0: -; CHECK-NEXT: lfence -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: lfence # encoding: [0x0f,0xae,0xe8] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] call void @llvm.x86.sse2.lfence() ret void } @@ -1643,25 +1908,36 @@ declare void @llvm.x86.sse2.lfence() nounwind readnone define <2 x double> @test_mm_load_pd(double* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movaps (%eax), %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_load_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovaps (%eax), %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_load_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_load_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_load_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps (%rdi), %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_load_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_load_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_load_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %res = load <2 x double>, <2 x double>* %arg0, align 16 ret <2 x double> %res @@ -1670,25 +1946,42 @@ define <2 x double> @test_mm_load_pd(double* %a0) nounwind { define <2 x double> @test_mm_load_sd(double* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load_sd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_load_sd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_load_sd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_load_sd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_load_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] +; X64-SSE-NEXT: # xmm0 = mem[0],zero +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_load_sd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_load_sd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] +; X64-AVX1-NEXT: # xmm0 = mem[0],zero +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_load_sd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-AVX512-NEXT: # xmm0 = mem[0],zero +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a0, align 1 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 @@ -1698,25 +1991,36 @@ define <2 x double> @test_mm_load_sd(double* %a0) nounwind { define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movaps (%eax), %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movaps (%eax), %xmm0 # encoding: [0x0f,0x28,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_load_si128: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovaps (%eax), %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_load_si128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovaps (%eax), %xmm0 # encoding: [0xc5,0xf8,0x28,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_load_si128: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_load_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps (%rdi), %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movaps (%rdi), %xmm0 # encoding: [0x0f,0x28,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_load_si128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovaps (%rdi), %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_load_si128: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovaps (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x28,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_load_si128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res = load <2 x i64>, <2 x i64>* %a0, align 16 ret <2 x i64> %res } @@ -1724,27 +2028,46 @@ define <2 x i64> @test_mm_load_si128(<2 x i64>* %a0) nounwind { define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { ; X86-SSE-LABEL: test_mm_load1_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_load1_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovddup (%eax), %xmm0 # encoding: [0xc5,0xfb,0x12,0x00] +; X86-AVX1-NEXT: # xmm0 = mem[0,0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_load1_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X86-AVX-NEXT: retl +; X86-AVX512-LABEL: test_mm_load1_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovddup (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x00] +; X86-AVX512-NEXT: # xmm0 = mem[0,0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_load1_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] +; X64-SSE-NEXT: # xmm0 = mem[0],zero +; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_load1_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_load1_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovddup (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x12,0x07] +; X64-AVX1-NEXT: # xmm0 = mem[0,0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_load1_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovddup (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0x07] +; X64-AVX512-NEXT: # xmm0 = mem[0,0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a0, align 8 %res0 = insertelement <2 x double> undef, double %ld, i32 0 %res1 = insertelement <2 x double> %res0, double %ld, i32 1 @@ -1754,25 +2077,42 @@ define <2 x double> @test_mm_load1_pd(double* %a0) nounwind { define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { ; X86-SSE-LABEL: test_mm_loadh_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movhpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x16,0x00] +; X86-SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_loadh_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_loadh_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x00] +; X86-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_loadh_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovhpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x00] +; X86-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadh_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movhpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x16,0x07] +; X64-SSE-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_loadh_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_loadh_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x16,0x07] +; X64-AVX1-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_loadh_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovhpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x16,0x07] +; X64-AVX512-NEXT: # xmm0 = xmm0[0],mem[0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 1 ret <2 x double> %res @@ -1781,25 +2121,42 @@ define <2 x double> @test_mm_loadh_pd(<2 x double> %a0, double* %a1) nounwind { define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { ; X86-SSE-LABEL: test_mm_loadl_epi64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movsd (%eax), %xmm0 # encoding: [0xf2,0x0f,0x10,0x00] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_loadl_epi64: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_loadl_epi64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovsd (%eax), %xmm0 # encoding: [0xc5,0xfb,0x10,0x00] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_loadl_epi64: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovsd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x00] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadl_epi64: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movsd (%rdi), %xmm0 # encoding: [0xf2,0x0f,0x10,0x07] +; X64-SSE-NEXT: # xmm0 = mem[0],zero +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_loadl_epi64: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_loadl_epi64: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovsd (%rdi), %xmm0 # encoding: [0xc5,0xfb,0x10,0x07] +; X64-AVX1-NEXT: # xmm0 = mem[0],zero +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_loadl_epi64: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovsd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07] +; X64-AVX512-NEXT: # xmm0 = mem[0],zero +; X64-AVX512-NEXT: retq # encoding: [0xc3] %bc = bitcast <2 x i64>* %a1 to i64* %ld = load i64, i64* %bc, align 1 %res0 = insertelement <2 x i64> undef, i64 %ld, i32 0 @@ -1810,25 +2167,42 @@ define <2 x i64> @test_mm_loadl_epi64(<2 x i64> %a0, <2 x i64>* %a1) nounwind { define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { ; X86-SSE-LABEL: test_mm_loadl_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movlpd (%eax), %xmm0 # encoding: [0x66,0x0f,0x12,0x00] +; X86-SSE-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_loadl_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_loadl_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x00] +; X86-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_loadl_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovlpd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x00] +; X86-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadl_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlpd {{.*#+}} xmm0 = mem[0],xmm0[1] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlpd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x12,0x07] +; X64-SSE-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_loadl_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovlpd {{.*#+}} xmm0 = mem[0],xmm0[1] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_loadl_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x12,0x07] +; X64-AVX1-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_loadl_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovlpd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x12,0x07] +; X64-AVX512-NEXT: # xmm0 = mem[0],xmm0[1] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ld = load double, double* %a1, align 8 %res = insertelement <2 x double> %a0, double %ld, i32 0 ret <2 x double> %res @@ -1837,27 +2211,44 @@ define <2 x double> @test_mm_loadl_pd(<2 x double> %a0, double* %a1) nounwind { define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { ; X86-SSE-LABEL: test_mm_loadr_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movapd (%eax), %xmm0 -; X86-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movapd (%eax), %xmm0 # encoding: [0x66,0x0f,0x28,0x00] +; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] +; X86-SSE-NEXT: # xmm0 = xmm0[1,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_loadr_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_loadr_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vpermilpd $1, (%eax), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] +; X86-AVX1-NEXT: # xmm0 = mem[1,0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_loadr_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpermilpd $1, (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x00,0x01] +; X86-AVX512-NEXT: # xmm0 = mem[1,0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadr_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movapd (%rdi), %xmm0 -; X64-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movapd (%rdi), %xmm0 # encoding: [0x66,0x0f,0x28,0x07] +; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] +; X64-SSE-NEXT: # xmm0 = xmm0[1,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_loadr_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_loadr_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpermilpd $1, (%rdi), %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] +; X64-AVX1-NEXT: # xmm0 = mem[1,0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_loadr_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vpermilpd $1, (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0x07,0x01] +; X64-AVX512-NEXT: # xmm0 = mem[1,0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %ld = load <2 x double>, <2 x double>* %arg0, align 16 %res = shufflevector <2 x double> %ld, <2 x double> undef, <2 x i32> <i32 1, i32 0> @@ -1867,25 +2258,36 @@ define <2 x double> @test_mm_loadr_pd(double* %a0) nounwind { define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { ; X86-SSE-LABEL: test_mm_loadu_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movups (%eax), %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_loadu_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovups (%eax), %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_loadu_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_loadu_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadu_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movups (%rdi), %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_loadu_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovups (%rdi), %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_loadu_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_loadu_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %res = load <2 x double>, <2 x double>* %arg0, align 1 ret <2 x double> %res @@ -1894,25 +2296,36 @@ define <2 x double> @test_mm_loadu_pd(double* %a0) nounwind { define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { ; X86-SSE-LABEL: test_mm_loadu_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movups (%eax), %xmm0 -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movups (%eax), %xmm0 # encoding: [0x0f,0x10,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_loadu_si128: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovups (%eax), %xmm0 -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_loadu_si128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovups (%eax), %xmm0 # encoding: [0xc5,0xf8,0x10,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_loadu_si128: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_loadu_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movups (%rdi), %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movups (%rdi), %xmm0 # encoding: [0x0f,0x10,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_loadu_si128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovups (%rdi), %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_loadu_si128: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovups (%rdi), %xmm0 # encoding: [0xc5,0xf8,0x10,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_loadu_si128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res = load <2 x i64>, <2 x i64>* %a0, align 1 ret <2 x i64> %res } @@ -1920,13 +2333,18 @@ define <2 x i64> @test_mm_loadu_si128(<2 x i64>* %a0) nounwind { define <2 x i64> @test_mm_madd_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_madd_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pmaddwd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmaddwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf5,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_madd_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_madd_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf5,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_madd_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmaddwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %arg0, <8 x i16> %arg1) @@ -1938,29 +2356,29 @@ declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnon define void @test_mm_maskmoveu_si128(<2 x i64> %a0, <2 x i64> %a1, i8* %a2) nounwind { ; X86-SSE-LABEL: test_mm_maskmoveu_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: pushl %edi -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 -; X86-SSE-NEXT: popl %edi -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: pushl %edi # encoding: [0x57] +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] +; X86-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] +; X86-SSE-NEXT: popl %edi # encoding: [0x5f] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX-LABEL: test_mm_maskmoveu_si128: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: pushl %edi -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 -; X86-AVX-NEXT: popl %edi -; X86-AVX-NEXT: retl +; X86-AVX-NEXT: pushl %edi # encoding: [0x57] +; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %edi # encoding: [0x8b,0x7c,0x24,0x08] +; X86-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] +; X86-AVX-NEXT: popl %edi # encoding: [0x5f] +; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_maskmoveu_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: maskmovdqu %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf7,0xc1] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX-LABEL: test_mm_maskmoveu_si128: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 -; X64-AVX-NEXT: retq +; X64-AVX-NEXT: vmaskmovdqu %xmm1, %xmm0 # encoding: [0xc5,0xf9,0xf7,0xc1] +; X64-AVX-NEXT: retq # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %arg0, <16 x i8> %arg1, i8* %a2) @@ -1971,13 +2389,18 @@ declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*) nounwind define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_max_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pmaxsw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmaxsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xee,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_max_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_max_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xee,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_max_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmaxsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %cmp = icmp sgt <8 x i16> %arg0, %arg1 @@ -1989,13 +2412,18 @@ define <2 x i64> @test_mm_max_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_max_epu8: ; SSE: # %bb.0: -; SSE-NEXT: pmaxub %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmaxub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xde,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_max_epu8: -; AVX: # %bb.0: -; AVX-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_max_epu8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xde,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_max_epu8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmaxub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xde,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %cmp = icmp ugt <16 x i8> %arg0, %arg1 @@ -2007,13 +2435,18 @@ define <2 x i64> @test_mm_max_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_max_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_max_pd: ; SSE: # %bb.0: -; SSE-NEXT: maxpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: maxpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5f,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_max_pd: -; AVX: # %bb.0: -; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_max_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5f,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_max_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5f,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.max.pd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } @@ -2022,13 +2455,18 @@ declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_mm_max_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_max_sd: ; SSE: # %bb.0: -; SSE-NEXT: maxsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: maxsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5f,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_max_sd: -; AVX: # %bb.0: -; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_max_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5f,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_max_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5f,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } @@ -2037,8 +2475,8 @@ declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind define void @test_mm_mfence() nounwind { ; CHECK-LABEL: test_mm_mfence: ; CHECK: # %bb.0: -; CHECK-NEXT: mfence -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: mfence # encoding: [0x0f,0xae,0xf0] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] call void @llvm.x86.sse2.mfence() ret void } @@ -2047,13 +2485,18 @@ declare void @llvm.x86.sse2.mfence() nounwind readnone define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_min_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pminsw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pminsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xea,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_min_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpminsw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_min_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xea,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_min_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpminsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %cmp = icmp slt <8 x i16> %arg0, %arg1 @@ -2065,13 +2508,18 @@ define <2 x i64> @test_mm_min_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_min_epu8: ; SSE: # %bb.0: -; SSE-NEXT: pminub %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pminub %xmm1, %xmm0 # encoding: [0x66,0x0f,0xda,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_min_epu8: -; AVX: # %bb.0: -; AVX-NEXT: vpminub %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_min_epu8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpminub %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xda,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_min_epu8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpminub %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xda,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %cmp = icmp ult <16 x i8> %arg0, %arg1 @@ -2083,13 +2531,18 @@ define <2 x i64> @test_mm_min_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_min_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_min_pd: ; SSE: # %bb.0: -; SSE-NEXT: minpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: minpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5d,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_min_pd: -; AVX: # %bb.0: -; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_min_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vminpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5d,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_min_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vminpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5d,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.min.pd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } @@ -2098,13 +2551,18 @@ declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind define <2 x double> @test_mm_min_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_min_sd: ; SSE: # %bb.0: -; SSE-NEXT: minsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: minsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5d,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_min_sd: -; AVX: # %bb.0: -; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_min_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vminsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5d,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_min_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vminsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5d,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a0, <2 x double> %a1) ret <2 x double> %res } @@ -2113,13 +2571,21 @@ declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_move_epi64: ; SSE: # %bb.0: -; SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] +; SSE-NEXT: # xmm0 = xmm0[0],zero +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_move_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_move_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] +; AVX1-NEXT: # xmm0 = xmm0[0],zero +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_move_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] +; AVX512-NEXT: # xmm0 = xmm0[0],zero +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2> ret <2 x i64> %res } @@ -2127,18 +2593,21 @@ define <2 x i64> @test_mm_move_epi64(<2 x i64> %a0) nounwind { define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_move_sd: ; SSE: # %bb.0: -; SSE-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: movsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x10,0xc1] +; SSE-NEXT: # xmm0 = xmm1[0],xmm0[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_move_sd: ; AVX1: # %bb.0: -; AVX1-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vblendps $3, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x03] +; AVX1-NEXT: # xmm0 = xmm1[0,1],xmm0[2,3] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_move_sd: ; AVX512: # %bb.0: -; AVX512-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vmovsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0xc1] +; AVX512-NEXT: # xmm0 = xmm1[0],xmm0[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext0 = extractelement <2 x double> %a1, i32 0 %res0 = insertelement <2 x double> undef, double %ext0, i32 0 %ext1 = extractelement <2 x double> %a0, i32 1 @@ -2149,13 +2618,13 @@ define <2 x double> @test_mm_move_sd(<2 x double> %a0, <2 x double> %a1) nounwin define i32 @test_mm_movemask_epi8(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_movemask_epi8: ; SSE: # %bb.0: -; SSE-NEXT: pmovmskb %xmm0, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmovmskb %xmm0, %eax # encoding: [0x66,0x0f,0xd7,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_movemask_epi8: ; AVX: # %bb.0: -; AVX-NEXT: vpmovmskb %xmm0, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vpmovmskb %xmm0, %eax # encoding: [0xc5,0xf9,0xd7,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %arg0) ret i32 %res @@ -2165,42 +2634,58 @@ declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone define i32 @test_mm_movemask_pd(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_movemask_pd: ; SSE: # %bb.0: -; SSE-NEXT: movmskpd %xmm0, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: movmskpd %xmm0, %eax # encoding: [0x66,0x0f,0x50,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_movemask_pd: ; AVX: # %bb.0: -; AVX-NEXT: vmovmskpd %xmm0, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vmovmskpd %xmm0, %eax # encoding: [0xc5,0xf9,0x50,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0) ret i32 %res } declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind { -; SSE-LABEL: test_mm_mul_epu32: -; SSE: # %bb.0: -; SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] -; SSE-NEXT: pand %xmm2, %xmm0 -; SSE-NEXT: pand %xmm2, %xmm1 -; SSE-NEXT: pmuludq %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; X86-SSE-LABEL: test_mm_mul_epu32: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] +; X86-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A] +; X86-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2] +; X86-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca] +; X86-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_mul_epu32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] -; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] -; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 # encoding: [0xc5,0xe9,0xef,0xd2] +; AVX1-NEXT: vpblendw $204, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc2,0xcc] +; AVX1-NEXT: # xmm0 = xmm0[0,1],xmm2[2,3],xmm0[4,5],xmm2[6,7] +; AVX1-NEXT: vpblendw $204, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x0e,0xca,0xcc] +; AVX1-NEXT: # xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] +; AVX1-NEXT: vpmuludq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf4,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_mul_epu32: ; AVX512: # %bb.0: -; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] -; AVX512-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] -; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vpxor %xmm2, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xd2] +; AVX512-NEXT: vpblendd $10, %xmm2, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc2,0x0a] +; AVX512-NEXT: # xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] +; AVX512-NEXT: vpblendd $10, %xmm2, %xmm1, %xmm1 # encoding: [0xc4,0xe3,0x71,0x02,0xca,0x0a] +; AVX512-NEXT: # xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] +; AVX512-NEXT: vpmullq %xmm1, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x40,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; X64-SSE-LABEL: test_mm_mul_epu32: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: movdqa {{.*#+}} xmm2 = [4294967295,0,4294967295,0] +; X64-SSE-NEXT: # encoding: [0x66,0x0f,0x6f,0x15,A,A,A,A] +; X64-SSE-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-SSE-NEXT: pand %xmm2, %xmm0 # encoding: [0x66,0x0f,0xdb,0xc2] +; X64-SSE-NEXT: pand %xmm2, %xmm1 # encoding: [0x66,0x0f,0xdb,0xca] +; X64-SSE-NEXT: pmuludq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf4,0xc1] +; X64-SSE-NEXT: retq # encoding: [0xc3] %A = and <2 x i64> %a0, <i64 4294967295, i64 4294967295> %B = and <2 x i64> %a1, <i64 4294967295, i64 4294967295> %res = mul nuw <2 x i64> %A, %B @@ -2210,13 +2695,18 @@ define <2 x i64> @test_mm_mul_epu32(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_mul_pd: ; SSE: # %bb.0: -; SSE-NEXT: mulpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: mulpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x59,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_mul_pd: -; AVX: # %bb.0: -; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_mul_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x59,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_mul_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x59,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = fmul <2 x double> %a0, %a1 ret <2 x double> %res } @@ -2224,13 +2714,18 @@ define <2 x double> @test_mm_mul_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_mul_sd: ; SSE: # %bb.0: -; SSE-NEXT: mulsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: mulsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x59,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_mul_sd: -; AVX: # %bb.0: -; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_mul_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x59,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_mul_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmulsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x59,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext0 = extractelement <2 x double> %a0, i32 0 %ext1 = extractelement <2 x double> %a1, i32 0 %fmul = fmul double %ext0, %ext1 @@ -2241,13 +2736,18 @@ define <2 x double> @test_mm_mul_sd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x i64> @test_mm_mulhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_mulhi_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pmulhw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmulhw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe5,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_mulhi_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_mulhi_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe5,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_mulhi_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmulhw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -2259,13 +2759,18 @@ declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_mulhi_epu16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_mulhi_epu16: ; SSE: # %bb.0: -; SSE-NEXT: pmulhuw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmulhuw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe4,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_mulhi_epu16: -; AVX: # %bb.0: -; AVX-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_mulhi_epu16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe4,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_mulhi_epu16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmulhuw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -2277,13 +2782,18 @@ declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnon define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_mullo_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pmullw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pmullw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd5,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_mullo_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpmullw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_mullo_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd5,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_mullo_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = mul <8 x i16> %arg0, %arg1 @@ -2294,13 +2804,18 @@ define <2 x i64> @test_mm_mullo_epi16(<2 x i64> %a0, <2 x i64> %a1) { define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_or_pd: ; SSE: # %bb.0: -; SSE-NEXT: orps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_or_pd: -; AVX: # %bb.0: -; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_or_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_or_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x double> %a0 to <4 x i32> %arg1 = bitcast <2 x double> %a1 to <4 x i32> %res = or <4 x i32> %arg0, %arg1 @@ -2311,13 +2826,18 @@ define <2 x double> @test_mm_or_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_or_si128: ; SSE: # %bb.0: -; SSE-NEXT: orps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: orps %xmm1, %xmm0 # encoding: [0x0f,0x56,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_or_si128: -; AVX: # %bb.0: -; AVX-NEXT: vorps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_or_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_or_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x56,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = or <2 x i64> %a0, %a1 ret <2 x i64> %res } @@ -2325,13 +2845,18 @@ define <2 x i64> @test_mm_or_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_packs_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_packs_epi16: ; SSE: # %bb.0: -; SSE-NEXT: packsswb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: packsswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x63,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_packs_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_packs_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x63,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_packs_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -2343,13 +2868,18 @@ declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind rea define <2 x i64> @test_mm_packs_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_packs_epi32: ; SSE: # %bb.0: -; SSE-NEXT: packssdw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: packssdw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6b,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_packs_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_packs_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x6b,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_packs_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %arg0, <4 x i32> %arg1) @@ -2361,13 +2891,18 @@ declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind rea define <2 x i64> @test_mm_packus_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_packus_epi16: ; SSE: # %bb.0: -; SSE-NEXT: packuswb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: packuswb %xmm1, %xmm0 # encoding: [0x66,0x0f,0x67,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_packus_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_packus_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x67,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_packus_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %arg0, <8 x i16> %arg1) @@ -2379,8 +2914,8 @@ declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind rea define void @test_mm_pause() nounwind { ; CHECK-LABEL: test_mm_pause: ; CHECK: # %bb.0: -; CHECK-NEXT: pause -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: pause # encoding: [0xf3,0x90] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] call void @llvm.x86.sse2.pause() ret void } @@ -2389,13 +2924,18 @@ declare void @llvm.x86.sse2.pause() nounwind readnone define <2 x i64> @test_mm_sad_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_sad_epu8: ; SSE: # %bb.0: -; SSE-NEXT: psadbw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psadbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf6,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sad_epu8: -; AVX: # %bb.0: -; AVX-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sad_epu8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf6,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sad_epu8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsadbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf6,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %arg0, <16 x i8> %arg1) @@ -2406,177 +2946,279 @@ declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { ; X86-SSE-LABEL: test_mm_set_epi8: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm1 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm3 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm1 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm3 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm4 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_set_epi8: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] +; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_set_epi8: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] +; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] +; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] +; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] +; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] +; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] +; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_epi8: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x40] +; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] +; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] +; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] +; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] +; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] +; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_epi8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movzbl %dil, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl %sil, %eax -; X64-SSE-NEXT: movd %eax, %xmm1 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-SSE-NEXT: movzbl %dl, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl %cl, %eax -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64-SSE-NEXT: movzbl %r8b, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl %r9b, %eax -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm1 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm4 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_set_epi8: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vmovd %eax, %xmm0 -; X64-AVX-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %r9b, %eax -; X64-AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %r8b, %eax -; X64-AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %cl, %eax -; X64-AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %dl, %eax -; X64-AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %sil, %eax -; X64-AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %dil, %eax -; X64-AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] +; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX1-LABEL: test_mm_set_epi8: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX1-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_epi8: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb6,0x54,0x24,0x48] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX512-NEXT: vpinsrb $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc3,0x79,0x20,0xc2,0x01] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X64-AVX512-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a15, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a14, i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a13, i32 2 @@ -2600,85 +3242,133 @@ define <2 x i64> @test_mm_set_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { ; X86-SSE-LABEL: test_mm_set_epi16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm1 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm3 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm4 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm5 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm6 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm7 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_set_epi16: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovd %eax, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] +; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] +; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] +; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] +; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] +; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] +; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] +; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] +; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] +; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] +; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_set_epi16: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] +; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] +; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] +; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] +; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_epi16: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] +; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] +; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] +; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] +; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] +; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_epi16: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d -; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: movd %esi, %xmm1 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-SSE-NEXT: movd %edx, %xmm0 -; X64-SSE-NEXT: movd %ecx, %xmm2 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64-SSE-NEXT: movd %r8d, %xmm0 -; X64-SSE-NEXT: movd %r9d, %xmm1 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: movd %r10d, %xmm0 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_set_epi16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d -; X64-AVX-NEXT: vmovd %eax, %xmm0 -; X64-AVX-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] +; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] +; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X64-SSE-NEXT: movd %edx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc2] +; X64-SSE-NEXT: movd %ecx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd1] +; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-SSE-NEXT: movd %r8d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movd %r9d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xc9] +; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X64-SSE-NEXT: movd %r10d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc2] +; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX1-LABEL: test_mm_set_epi16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] +; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX1-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] +; X64-AVX1-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] +; X64-AVX1-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] +; X64-AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X64-AVX1-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x05] +; X64-AVX1-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x06] +; X64-AVX1-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_epi16: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] +; X64-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX512-NEXT: vpinsrw $1, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x01] +; X64-AVX512-NEXT: vpinsrw $2, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x02] +; X64-AVX512-NEXT: vpinsrw $3, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x03] +; X64-AVX512-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04] +; X64-AVX512-NEXT: vpinsrw $5, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x05] +; X64-AVX512-NEXT: vpinsrw $6, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x06] +; X64-AVX512-NEXT: vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a7, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a6, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a5, i32 2 @@ -2694,41 +3384,69 @@ define <2 x i64> @test_mm_set_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ; X86-SSE-LABEL: test_mm_set_epi32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_set_epi32: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x08] +; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x0c] +; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_set_epi32: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] +; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] +; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_epi32: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x10] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x01] +; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x02] +; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x03] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_epi32: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: movd %esi, %xmm1 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-SSE-NEXT: movd %edx, %xmm2 -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_set_epi32: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovd %ecx, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: movd %esi, %xmm1 # encoding: [0x66,0x0f,0x6e,0xce] +; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-SSE-NEXT: movd %edx, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd2] +; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX1-LABEL: test_mm_set_epi32: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-AVX1-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] +; X64-AVX1-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] +; X64-AVX1-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_epi32: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X64-AVX512-NEXT: vpinsrd $1, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x01] +; X64-AVX512-NEXT: vpinsrd $2, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x02] +; X64-AVX512-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a3, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a2, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a1, i32 2 @@ -2742,36 +3460,63 @@ define <2 x i64> @test_mm_set_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { ; X86-SSE-LABEL: test_mm_set_epi64x: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_set_epi64x: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x04] +; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x08] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x0c] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x10] +; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_set_epi64x: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] +; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] +; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_epi64x: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x0c] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x01] +; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x04,0x02] +; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x03] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_epi64x: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq %rdi, %xmm1 -; X64-SSE-NEXT: movq %rsi, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movq %rdi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xcf] +; X64-SSE-NEXT: movq %rsi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc6] +; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_set_epi64x: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovq %rdi, %xmm0 -; X64-AVX-NEXT: vmovq %rsi, %xmm1 -; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_set_epi64x: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vmovq %rsi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xce] +; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_epi64x: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovq %rdi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X64-AVX512-NEXT: vmovq %rsi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xce] +; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a1, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 ret <2 x i64> %res1 @@ -2780,28 +3525,52 @@ define <2 x i64> @test_mm_set_epi64x(i64 %a0, i64 %a1) nounwind { define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { ; X86-SSE-LABEL: test_mm_set_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_set_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x0c] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x04] +; X86-SSE-NEXT: # xmm1 = mem[0],zero +; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_set_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] +; X86-AVX1-NEXT: # xmm1 = mem[0],zero +; X86-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] +; X86-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] +; X86-AVX512-NEXT: # xmm1 = mem[0],zero +; X86-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] +; X86-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] -; X64-SSE-NEXT: movaps %xmm1, %xmm0 -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlhps %xmm0, %xmm1 # encoding: [0x0f,0x16,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0] +; X64-SSE-NEXT: movaps %xmm1, %xmm0 # encoding: [0x0f,0x28,0xc1] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_set_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_set_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a1, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -2810,25 +3579,45 @@ define <2 x double> @test_mm_set_pd(double %a0, double %a1) nounwind { define <2 x double> @test_mm_set_pd1(double %a0) nounwind { ; X86-SSE-LABEL: test_mm_set_pd1: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_set_pd1: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_set_pd1: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_pd1: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_pd1: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_set_pd1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_set_pd1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_pd1: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -2837,25 +3626,45 @@ define <2 x double> @test_mm_set_pd1(double %a0) nounwind { define <2 x double> @test_mm_set_sd(double %a0) nounwind { ; X86-SSE-LABEL: test_mm_set_sd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x7e,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0],zero +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_set_sd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_set_sd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] +; X86-AVX1-NEXT: # xmm0 = xmm0[0],zero +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set_sd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovq {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] +; X86-AVX512-NEXT: # xmm0 = xmm0[0],zero +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movq %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x7e,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0],zero +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_set_sd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_set_sd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovq %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x7e,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm0[0],zero +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set_sd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm0[0],zero +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double 0.0, i32 1 ret <2 x double> %res1 @@ -2864,48 +3673,54 @@ define <2 x double> @test_mm_set_sd(double %a0) nounwind { define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { ; X86-SSE-LABEL: test_mm_set1_epi8: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X86-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_set1_epi8: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX1-NEXT: vmovd %eax, %xmm0 -; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X86-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; X86-AVX1-NEXT: retl +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; X86-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] +; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_set1_epi8: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 -; X86-AVX512-NEXT: retl +; X86-AVX512-NEXT: movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpbroadcastb %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set1_epi8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movzbl %dil, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] -; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm0 # encoding: [0x66,0x0f,0x60,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] +; X64-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_set1_epi8: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: movzbl %dil, %eax -; X64-AVX1-NEXT: vmovd %eax, %xmm0 -; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 -; X64-AVX1-NEXT: retq +; X64-AVX1-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X64-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X64-AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0xef,0xc9] +; X64-AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x00,0xc1] +; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_set1_epi8: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 -; X64-AVX512-NEXT: retq +; X64-AVX512-NEXT: vpbroadcastb %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc7] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a0, i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a0, i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a0, i32 2 @@ -2929,44 +3744,52 @@ define <2 x i64> @test_mm_set1_epi8(i8 %a0) nounwind { define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { ; X86-SSE-LABEL: test_mm_set1_epi16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_set1_epi16: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX1-NEXT: vmovd %eax, %xmm0 -; X86-AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; X86-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-AVX1-NEXT: retl +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-AVX1-NEXT: vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X86-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_set1_epi16: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX512-NEXT: vpbroadcastw %eax, %xmm0 -; X86-AVX512-NEXT: retl +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpbroadcastw %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set1_epi16: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: pshuflw $224, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0xe0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_set1_epi16: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovd %edi, %xmm0 -; X64-AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,2,3,4,5,6,7] -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-AVX1-NEXT: retq +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpshuflw $224, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0xe0] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,2,3,4,5,6,7] +; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_set1_epi16: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 -; X64-AVX512-NEXT: retq +; X64-AVX512-NEXT: vpbroadcastw %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc7] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a0, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a0, i32 2 @@ -2982,38 +3805,44 @@ define <2 x i64> @test_mm_set1_epi16(i16 %a0) nounwind { define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { ; X86-SSE-LABEL: test_mm_set1_epi32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_set1_epi32: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X86-AVX1-NEXT: retl +; X86-AVX1-NEXT: vmovss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfa,0x10,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_set1_epi32: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX512-NEXT: vpbroadcastd %eax, %xmm0 -; X86-AVX512-NEXT: retl +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpbroadcastd %eax, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set1_epi32: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_set1_epi32: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovd %edi, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; X64-AVX1-NEXT: retq +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpshufd $0, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x00] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_set1_epi32: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 -; X64-AVX512-NEXT: retq +; X64-AVX512-NEXT: vpbroadcastd %edi, %xmm0 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc7] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a0, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a0, i32 2 @@ -3027,45 +3856,52 @@ define <2 x i64> @test_mm_set1_epi32(i32 %a0) nounwind { define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { ; X86-SSE-LABEL: test_mm_set1_epi64x: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm0 # encoding: [0x66,0x0f,0x6e,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movd {{[0-9]+}}(%esp), %xmm1 # encoding: [0x66,0x0f,0x6e,0x4c,0x24,0x08] +; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] +; X86-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; ; X86-AVX1-LABEL: test_mm_set1_epi64x: ; X86-AVX1: # %bb.0: -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-AVX1-NEXT: vmovd %ecx, %xmm0 -; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 -; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 -; X86-AVX1-NEXT: retl +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] +; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X86-AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01] +; X86-AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02] +; X86-AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03] +; X86-AVX1-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512-LABEL: test_mm_set1_epi64x: ; X86-AVX512: # %bb.0: -; X86-AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 -; X86-AVX512-NEXT: retl +; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] +; X86-AVX512-NEXT: vpbroadcastq %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xc0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set1_epi64x: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq %rdi, %xmm0 -; X64-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: pshufd $68, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x44] +; X64-SSE-NEXT: # xmm0 = xmm0[0,1,0,1] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; ; X64-AVX1-LABEL: test_mm_set1_epi64x: ; X64-AVX1: # %bb.0: -; X64-AVX1-NEXT: vmovq %rdi, %xmm0 -; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] -; X64-AVX1-NEXT: retq +; X64-AVX1-NEXT: vmovq %rdi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpshufd $68, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x70,0xc0,0x44] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,1,0,1] +; X64-AVX1-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512-LABEL: test_mm_set1_epi64x: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 -; X64-AVX512-NEXT: retq +; X64-AVX512-NEXT: vpbroadcastq %rdi, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xc7] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a0, i32 1 ret <2 x i64> %res1 @@ -3074,25 +3910,45 @@ define <2 x i64> @test_mm_set1_epi64x(i64 %a0) nounwind { define <2 x double> @test_mm_set1_pd(double %a0) nounwind { ; X86-SSE-LABEL: test_mm_set1_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_set1_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_set1_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_set1_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_set1_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_set1_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_set1_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_set1_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a0, i32 1 ret <2 x double> %res1 @@ -3101,177 +3957,279 @@ define <2 x double> @test_mm_set1_pd(double %a0) nounwind { define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 %a5, i8 %a6, i8 %a7, i8 %a8, i8 %a9, i8 %a10, i8 %a11, i8 %a12, i8 %a13, i8 %a14, i8 %a15) nounwind { ; X86-SSE-LABEL: test_mm_setr_epi8: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm1 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm3 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm1 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm3 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm4 -; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_setr_epi8: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-AVX-NEXT: vmovd %ecx, %xmm0 -; X86-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] +; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] +; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] +; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X86-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X86-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] +; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X86-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X86-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X86-SSE-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X86-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X86-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X86-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_setr_epi8: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] +; X86-AVX1-NEXT: vmovd %ecx, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc1] +; X86-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X86-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] +; X86-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X86-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] +; X86-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X86-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] +; X86-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X86-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] +; X86-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X86-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] +; X86-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X86-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] +; X86-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X86-AVX1-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_setr_epi8: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04] +; X86-AVX512-NEXT: vmovd %ecx, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc1] +; X86-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c] +; X86-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X86-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x14] +; X86-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X86-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x1c] +; X86-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X86-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x24] +; X86-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X86-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x2c] +; X86-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X86-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x34] +; X86-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X86-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x3c] +; X86-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X86-AVX512-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X86-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_setr_epi8: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm1 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm1 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-SSE-NEXT: movzbl %r9b, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl %r8b, %eax -; X64-SSE-NEXT: movd %eax, %xmm3 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] -; X64-SSE-NEXT: movzbl %cl, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movzbl %dl, %eax -; X64-SSE-NEXT: movd %eax, %xmm2 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] -; X64-SSE-NEXT: movzbl %sil, %eax -; X64-SSE-NEXT: movd %eax, %xmm4 -; X64-SSE-NEXT: movzbl %dil, %eax -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_setr_epi8: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movzbl %sil, %eax -; X64-AVX-NEXT: movzbl %dil, %esi -; X64-AVX-NEXT: vmovd %esi, %xmm0 -; X64-AVX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %dl, %eax -; X64-AVX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %cl, %eax -; X64-AVX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %r8b, %eax -; X64-AVX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl %r9b, %eax -; X64-AVX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: movzbl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] +; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X64-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X64-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm1 # encoding: [0x66,0x0f,0x60,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] +; X64-SSE-NEXT: punpcklwd %xmm3, %xmm1 # encoding: [0x66,0x0f,0x61,0xcb] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] +; X64-SSE-NEXT: punpckldq %xmm2, %xmm1 # encoding: [0x66,0x0f,0x62,0xca] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X64-SSE-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X64-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm3 # encoding: [0x66,0x0f,0x60,0xd8] +; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] +; X64-SSE-NEXT: punpcklwd %xmm2, %xmm3 # encoding: [0x66,0x0f,0x61,0xda] +; X64-SSE-NEXT: # xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] +; X64-SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X64-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklbw %xmm0, %xmm2 # encoding: [0x66,0x0f,0x60,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] +; X64-SSE-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X64-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X64-SSE-NEXT: movzbl %dil, %eax # encoding: [0x40,0x0f,0xb6,0xc7] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: punpcklbw %xmm4, %xmm0 # encoding: [0x66,0x0f,0x60,0xc4] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] +; X64-SSE-NEXT: punpcklwd %xmm2, %xmm0 # encoding: [0x66,0x0f,0x61,0xc2] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] +; X64-SSE-NEXT: punpckldq %xmm3, %xmm0 # encoding: [0x66,0x0f,0x62,0xc3] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] +; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX1-LABEL: test_mm_setr_epi8: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X64-AVX1-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] +; X64-AVX1-NEXT: vmovd %esi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc6] +; X64-AVX1-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X64-AVX1-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X64-AVX1-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X64-AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X64-AVX1-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X64-AVX1-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X64-AVX1-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X64-AVX1-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X64-AVX1-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X64-AVX1-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X64-AVX1-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X64-AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X64-AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X64-AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X64-AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X64-AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X64-AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] +; X64-AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X64-AVX1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_setr_epi8: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movzbl %sil, %eax # encoding: [0x40,0x0f,0xb6,0xc6] +; X64-AVX512-NEXT: movzbl %dil, %esi # encoding: [0x40,0x0f,0xb6,0xf7] +; X64-AVX512-NEXT: vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6] +; X64-AVX512-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x01] +; X64-AVX512-NEXT: movzbl %dl, %eax # encoding: [0x0f,0xb6,0xc2] +; X64-AVX512-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x02] +; X64-AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; X64-AVX512-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x03] +; X64-AVX512-NEXT: movzbl %r8b, %eax # encoding: [0x41,0x0f,0xb6,0xc0] +; X64-AVX512-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x04] +; X64-AVX512-NEXT: movzbl %r9b, %eax # encoding: [0x41,0x0f,0xb6,0xc1] +; X64-AVX512-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x05] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08] +; X64-AVX512-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x06] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x10] +; X64-AVX512-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x07] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x18] +; X64-AVX512-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x08] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x20] +; X64-AVX512-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x09] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x28] +; X64-AVX512-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0a] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x30] +; X64-AVX512-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0b] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x38] +; X64-AVX512-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0c] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x40] +; X64-AVX512-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0d] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x48] +; X64-AVX512-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0e] +; X64-AVX512-NEXT: movzbl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x50] +; X64-AVX512-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x20,0xc0,0x0f] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <16 x i8> undef, i8 %a0 , i32 0 %res1 = insertelement <16 x i8> %res0, i8 %a1 , i32 1 %res2 = insertelement <16 x i8> %res1, i8 %a2 , i32 2 @@ -3295,85 +4253,133 @@ define <2 x i64> @test_mm_setr_epi8(i8 %a0, i8 %a1, i8 %a2, i8 %a3, i8 %a4, i8 % define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4, i16 %a5, i16 %a6, i16 %a7) nounwind { ; X86-SSE-LABEL: test_mm_setr_epi16: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm1 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm2 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm3 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm4 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm5 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm6 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm7 -; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movd %eax, %xmm0 -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] -; X86-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] -; X86-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] -; X86-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm4[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_setr_epi16: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovd %eax, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] +; X86-SSE-NEXT: movd %eax, %xmm1 # encoding: [0x66,0x0f,0x6e,0xc8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] +; X86-SSE-NEXT: movd %eax, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd0] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] +; X86-SSE-NEXT: movd %eax, %xmm3 # encoding: [0x66,0x0f,0x6e,0xd8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] +; X86-SSE-NEXT: movd %eax, %xmm4 # encoding: [0x66,0x0f,0x6e,0xe0] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X86-SSE-NEXT: movd %eax, %xmm5 # encoding: [0x66,0x0f,0x6e,0xe8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] +; X86-SSE-NEXT: movd %eax, %xmm6 # encoding: [0x66,0x0f,0x6e,0xf0] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X86-SSE-NEXT: movd %eax, %xmm7 # encoding: [0x66,0x0f,0x6e,0xf8] +; X86-SSE-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X86-SSE-NEXT: punpcklwd %xmm1, %xmm2 # encoding: [0x66,0x0f,0x61,0xd1] +; X86-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] +; X86-SSE-NEXT: punpcklwd %xmm3, %xmm4 # encoding: [0x66,0x0f,0x61,0xe3] +; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] +; X86-SSE-NEXT: punpckldq %xmm2, %xmm4 # encoding: [0x66,0x0f,0x62,0xe2] +; X86-SSE-NEXT: # xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] +; X86-SSE-NEXT: punpcklwd %xmm5, %xmm6 # encoding: [0x66,0x0f,0x61,0xf5] +; X86-SSE-NEXT: # xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3] +; X86-SSE-NEXT: punpcklwd %xmm7, %xmm0 # encoding: [0x66,0x0f,0x61,0xc7] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm7[0],xmm0[1],xmm7[1],xmm0[2],xmm7[2],xmm0[3],xmm7[3] +; X86-SSE-NEXT: punpckldq %xmm6, %xmm0 # encoding: [0x66,0x0f,0x62,0xc6] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1] +; X86-SSE-NEXT: punpcklqdq %xmm4, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc4] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm4[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_setr_epi16: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovd %eax, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X86-AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] +; X86-AVX1-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X86-AVX1-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x03] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] +; X86-AVX1-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] +; X86-AVX1-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] +; X86-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X86-AVX1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] +; X86-AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_setr_epi16: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovd %eax, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc0] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X86-AVX512-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x0c] +; X86-AVX512-NEXT: vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X86-AVX512-NEXT: vpinsrw $3, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x03] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x14] +; X86-AVX512-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x18] +; X86-AVX512-NEXT: vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x1c] +; X86-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X86-AVX512-NEXT: movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x20] +; X86-AVX512-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_setr_epi16: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d -; X64-SSE-NEXT: movd %eax, %xmm0 -; X64-SSE-NEXT: movd %r10d, %xmm1 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-SSE-NEXT: movd %r9d, %xmm0 -; X64-SSE-NEXT: movd %r8d, %xmm2 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] -; X64-SSE-NEXT: movd %esi, %xmm3 -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_setr_epi16: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d -; X64-AVX-NEXT: movzwl {{[0-9]+}}(%rsp), %eax -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x10] +; X64-SSE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x08] +; X64-SSE-NEXT: movd %eax, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc0] +; X64-SSE-NEXT: movd %r10d, %xmm1 # encoding: [0x66,0x41,0x0f,0x6e,0xca] +; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X64-SSE-NEXT: movd %r9d, %xmm0 # encoding: [0x66,0x41,0x0f,0x6e,0xc1] +; X64-SSE-NEXT: movd %r8d, %xmm2 # encoding: [0x66,0x41,0x0f,0x6e,0xd0] +; X64-SSE-NEXT: punpcklwd %xmm0, %xmm2 # encoding: [0x66,0x0f,0x61,0xd0] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] +; X64-SSE-NEXT: punpckldq %xmm1, %xmm2 # encoding: [0x66,0x0f,0x62,0xd1] +; X64-SSE-NEXT: # xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] +; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] +; X64-SSE-NEXT: punpcklwd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x61,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] +; X64-SSE-NEXT: movd %esi, %xmm3 # encoding: [0x66,0x0f,0x6e,0xde] +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: punpcklwd %xmm3, %xmm0 # encoding: [0x66,0x0f,0x61,0xc3] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] +; X64-SSE-NEXT: punpckldq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x62,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE-NEXT: punpcklqdq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc2] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX1-LABEL: test_mm_setr_epi16: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] +; X64-AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc6,0x01] +; X64-AVX1-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc2,0x02] +; X64-AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X64-AVX1-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] +; X64-AVX1-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] +; X64-AVX1-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X64-AVX1-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_setr_epi16: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d # encoding: [0x44,0x0f,0xb7,0x54,0x24,0x10] +; X64-AVX512-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x08] +; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX512-NEXT: vpinsrw $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc6,0x01] +; X64-AVX512-NEXT: vpinsrw $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc2,0x02] +; X64-AVX512-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x03] +; X64-AVX512-NEXT: vpinsrw $4, %r8d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc0,0x04] +; X64-AVX512-NEXT: vpinsrw $5, %r9d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc1,0x05] +; X64-AVX512-NEXT: vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06] +; X64-AVX512-NEXT: vpinsrw $7, %r10d, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0xc4,0xc2,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <8 x i16> undef, i16 %a0, i32 0 %res1 = insertelement <8 x i16> %res0, i16 %a1, i32 1 %res2 = insertelement <8 x i16> %res1, i16 %a2, i32 2 @@ -3389,41 +4395,69 @@ define <2 x i64> @test_mm_setr_epi16(i16 %a0, i16 %a1, i16 %a2, i16 %a3, i16 %a4 define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwind { ; X86-SSE-LABEL: test_mm_setr_epi32: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_setr_epi32: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] +; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] +; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_setr_epi32: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] +; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] +; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_setr_epi32: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] +; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] +; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_setr_epi32: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movd %ecx, %xmm0 -; X64-SSE-NEXT: movd %edx, %xmm1 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X64-SSE-NEXT: movd %esi, %xmm2 -; X64-SSE-NEXT: movd %edi, %xmm0 -; X64-SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq -; -; X64-AVX-LABEL: test_mm_setr_epi32: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovd %edi, %xmm0 -; X64-AVX-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 -; X64-AVX-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 -; X64-AVX-NEXT: retq +; X64-SSE-NEXT: movd %ecx, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc1] +; X64-SSE-NEXT: movd %edx, %xmm1 # encoding: [0x66,0x0f,0x6e,0xca] +; X64-SSE-NEXT: punpckldq %xmm0, %xmm1 # encoding: [0x66,0x0f,0x62,0xc8] +; X64-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X64-SSE-NEXT: movd %esi, %xmm2 # encoding: [0x66,0x0f,0x6e,0xd6] +; X64-SSE-NEXT: movd %edi, %xmm0 # encoding: [0x66,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: punpckldq %xmm2, %xmm0 # encoding: [0x66,0x0f,0x62,0xc2] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX1-LABEL: test_mm_setr_epi32: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovd %edi, %xmm0 # encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX1-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] +; X64-AVX1-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; X64-AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_setr_epi32: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7] +; X64-AVX512-NEXT: vpinsrd $1, %esi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc6,0x01] +; X64-AVX512-NEXT: vpinsrd $2, %edx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02] +; X64-AVX512-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x03] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <4 x i32> undef, i32 %a0, i32 0 %res1 = insertelement <4 x i32> %res0, i32 %a1, i32 1 %res2 = insertelement <4 x i32> %res1, i32 %a2, i32 2 @@ -3437,36 +4471,63 @@ define <2 x i64> @test_mm_setr_epi32(i32 %a0, i32 %a1, i32 %a2, i32 %a3) nounwin define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { ; X86-SSE-LABEL: test_mm_setr_epi64x: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; X86-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero -; X86-SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_setr_epi64x: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero -; X86-AVX-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf3,0x0f,0x10,0x4c,0x24,0x0c] +; X86-SSE-NEXT: # xmm1 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x10] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm0, %xmm1 # encoding: [0x0f,0x14,0xc8] +; X86-SSE-NEXT: # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf3,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-SSE-NEXT: movss {{[0-9]+}}(%esp), %xmm2 # encoding: [0xf3,0x0f,0x10,0x54,0x24,0x08] +; X86-SSE-NEXT: # xmm2 = mem[0],zero,zero,zero +; X86-SSE-NEXT: unpcklps %xmm2, %xmm0 # encoding: [0x0f,0x14,0xc2] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] +; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_setr_epi64x: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX1-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] +; X86-AVX1-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] +; X86-AVX1-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_setr_epi64x: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0x44,0x24,0x04] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero,zero,zero +; X86-AVX512-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x08,0x01] +; X86-AVX512-NEXT: vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x0c,0x02] +; X86-AVX512-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x22,0x44,0x24,0x10,0x03] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_setr_epi64x: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq %rsi, %xmm1 -; X64-SSE-NEXT: movq %rdi, %xmm0 -; X64-SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movq %rsi, %xmm1 # encoding: [0x66,0x48,0x0f,0x6e,0xce] +; X64-SSE-NEXT: movq %rdi, %xmm0 # encoding: [0x66,0x48,0x0f,0x6e,0xc7] +; X64-SSE-NEXT: punpcklqdq %xmm1, %xmm0 # encoding: [0x66,0x0f,0x6c,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_setr_epi64x: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovq %rsi, %xmm0 -; X64-AVX-NEXT: vmovq %rdi, %xmm1 -; X64-AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_setr_epi64x: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovq %rsi, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] +; X64-AVX1-NEXT: vmovq %rdi, %xmm1 # encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] +; X64-AVX1-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf1,0x6c,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_setr_epi64x: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovq %rsi, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc6] +; X64-AVX512-NEXT: vmovq %rdi, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xcf] +; X64-AVX512-NEXT: vpunpcklqdq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x i64> undef, i64 %a0, i32 0 %res1 = insertelement <2 x i64> %res0, i64 %a1, i32 1 ret <2 x i64> %res1 @@ -3475,27 +4536,51 @@ define <2 x i64> @test_mm_setr_epi64x(i64 %a0, i64 %a1) nounwind { define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { ; X86-SSE-LABEL: test_mm_setr_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero -; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X86-SSE-NEXT: retl -; -; X86-AVX-LABEL: test_mm_setr_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; X86-AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; X86-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] -; X86-AVX-NEXT: retl +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xf2,0x0f,0x10,0x4c,0x24,0x0c] +; X86-SSE-NEXT: # xmm1 = mem[0],zero +; X86-SSE-NEXT: movsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x44,0x24,0x04] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X86-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_setr_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] +; X86-AVX1-NEXT: # xmm1 = mem[0],zero +; X86-AVX1-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf0,0x16,0xc0] +; X86-AVX1-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_setr_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x44,0x24,0x0c] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: vmovsd {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x4c,0x24,0x04] +; X86-AVX512-NEXT: # xmm1 = mem[0],zero +; X86-AVX512-NEXT: vmovlhps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x16,0xc0] +; X86-AVX512-NEXT: # xmm0 = xmm1[0],xmm0[0] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_setr_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; X64-SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_setr_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_setr_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] +; X64-AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_setr_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] +; X64-AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %res0 = insertelement <2 x double> undef, double %a0, i32 0 %res1 = insertelement <2 x double> %res0, double %a1, i32 1 ret <2 x double> %res1 @@ -3504,44 +4589,56 @@ define <2 x double> @test_mm_setr_pd(double %a0, double %a1) nounwind { define <2 x double> @test_mm_setzero_pd() { ; SSE-LABEL: test_mm_setzero_pd: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_setzero_pd: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_setzero_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_setzero_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] ret <2 x double> zeroinitializer } define <2 x i64> @test_mm_setzero_si128() { ; SSE-LABEL: test_mm_setzero_si128: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorps %xmm0, %xmm0 # encoding: [0x0f,0x57,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_setzero_si128: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_setzero_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_setzero_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] ret <2 x i64> zeroinitializer } define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { ; SSE-LABEL: test_mm_shuffle_epi32: ; SSE: # %bb.0: -; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pshufd $0, %xmm0, %xmm0 # encoding: [0x66,0x0f,0x70,0xc0,0x00] +; SSE-NEXT: # xmm0 = xmm0[0,0,0,0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX1-LABEL: test_mm_shuffle_epi32: ; AVX1: # %bb.0: -; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0] -; AVX1-NEXT: ret{{[l|q]}} +; AVX1-NEXT: vpermilps $0, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x04,0xc0,0x00] +; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX512-LABEL: test_mm_shuffle_epi32: ; AVX512: # %bb.0: -; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 -; AVX512-NEXT: ret{{[l|q]}} +; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res = shufflevector <4 x i32> %arg0, <4 x i32> undef, <4 x i32> zeroinitializer %bc = bitcast <4 x i32> %res to <2 x i64> @@ -3551,13 +4648,21 @@ define <2 x i64> @test_mm_shuffle_epi32(<2 x i64> %a0) { define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_mm_shuffle_pd: ; SSE: # %bb.0: -; SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: shufpd $1, %xmm1, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc1,0x01] +; SSE-NEXT: # xmm0 = xmm0[1],xmm1[0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_shuffle_pd: -; AVX: # %bb.0: -; AVX-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_shuffle_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc6,0xc1,0x01] +; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_shuffle_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vshufpd $1, %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xc1,0x01] +; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 2> ret <2 x double> %res } @@ -3565,13 +4670,21 @@ define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) { define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_shufflehi_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pshufhw $0, %xmm0, %xmm0 # encoding: [0xf3,0x0f,0x70,0xc0,0x00] +; SSE-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_shufflehi_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_shufflehi_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshufhw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfa,0x70,0xc0,0x00] +; AVX1-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_shufflehi_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshufhw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xc0,0x00] +; AVX512-NEXT: # xmm0 = xmm0[0,1,2,3,4,4,4,4] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4> %bc = bitcast <8 x i16> %res to <2 x i64> @@ -3581,13 +4694,21 @@ define <2 x i64> @test_mm_shufflehi_epi16(<2 x i64> %a0) { define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_shufflelo_epi16: ; SSE: # %bb.0: -; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pshuflw $0, %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x70,0xc0,0x00] +; SSE-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_shufflelo_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_shufflelo_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpshuflw $0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x70,0xc0,0x00] +; AVX1-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_shufflelo_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpshuflw $0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xc0,0x00] +; AVX512-NEXT: # xmm0 = xmm0[0,0,0,0,4,5,6,7] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7> %bc = bitcast <8 x i16> %res to <2 x i64> @@ -3597,13 +4718,18 @@ define <2 x i64> @test_mm_shufflelo_epi16(<2 x i64> %a0) { define <2 x i64> @test_mm_sll_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_sll_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psllw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psllw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf1,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sll_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sll_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf1,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sll_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -3615,13 +4741,18 @@ declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_sll_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_sll_epi32: ; SSE: # %bb.0: -; SSE-NEXT: pslld %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pslld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf2,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sll_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpslld %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sll_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpslld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf2,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sll_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpslld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %arg0, <4 x i32> %arg1) @@ -3633,13 +4764,18 @@ declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_mm_sll_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_sll_epi64: ; SSE: # %bb.0: -; SSE-NEXT: psllq %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psllq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf3,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sll_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vpsllq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sll_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf3,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sll_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf3,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1) ret <2 x i64> %res } @@ -3648,13 +4784,18 @@ declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone define <2 x i64> @test_mm_slli_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_slli_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psllw $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psllw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xf0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_slli_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsllw $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_slli_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xf0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_slli_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xf0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %arg0, i32 1) %bc = bitcast <8 x i16> %res to <2 x i64> @@ -3665,13 +4806,18 @@ declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) nounwind readnone define <2 x i64> @test_mm_slli_epi32(<2 x i64> %a0) { ; SSE-LABEL: test_mm_slli_epi32: ; SSE: # %bb.0: -; SSE-NEXT: pslld $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pslld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xf0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_slli_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpslld $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_slli_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpslld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xf0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_slli_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpslld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xf0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res = call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %arg0, i32 1) %bc = bitcast <4 x i32> %res to <2 x i64> @@ -3682,13 +4828,18 @@ declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) nounwind readnone define <2 x i64> @test_mm_slli_epi64(<2 x i64> %a0) { ; SSE-LABEL: test_mm_slli_epi64: ; SSE: # %bb.0: -; SSE-NEXT: psllq $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psllq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xf0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_slli_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vpsllq $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_slli_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsllq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_slli_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %a0, i32 1) ret <2 x i64> %res } @@ -3697,13 +4848,21 @@ declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) nounwind readnone define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_slli_si128: ; SSE: # %bb.0: -; SSE-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: pslldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xf8,0x05] +; SSE-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_slli_si128: -; AVX: # %bb.0: -; AVX-NEXT: vpslldq {{.*#+}} xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_slli_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vpslldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xf8,0x05] +; AVX1-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_slli_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vpslldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xf8,0x05] +; AVX512-NEXT: # xmm0 = zero,zero,zero,zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = shufflevector <16 x i8> zeroinitializer, <16 x i8> %arg0, <16 x i32> <i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26> %bc = bitcast <16 x i8> %res to <2 x i64> @@ -3713,13 +4872,18 @@ define <2 x i64> @test_mm_slli_si128(<2 x i64> %a0) nounwind { define <2 x double> @test_mm_sqrt_pd(<2 x double> %a0) nounwind { ; SSE-LABEL: test_mm_sqrt_pd: ; SSE: # %bb.0: -; SSE-NEXT: sqrtpd %xmm0, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: sqrtpd %xmm0, %xmm0 # encoding: [0x66,0x0f,0x51,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sqrt_pd: -; AVX: # %bb.0: -; AVX-NEXT: vsqrtpd %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sqrt_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsqrtpd %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x51,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sqrt_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsqrtpd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x51,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %a0) ret <2 x double> %res } @@ -3728,14 +4892,14 @@ declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) nounwind readnone define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_sqrt_sd: ; SSE: # %bb.0: -; SSE-NEXT: sqrtsd %xmm0, %xmm1 -; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: sqrtsd %xmm0, %xmm1 # encoding: [0xf2,0x0f,0x51,0xc8] +; SSE-NEXT: movapd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x28,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; ; AVX-LABEL: test_mm_sqrt_sd: ; AVX: # %bb.0: -; AVX-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX-NEXT: vsqrtsd %xmm0, %xmm1, %xmm0 # encoding: [0xc5,0xf3,0x51,0xc0] +; AVX-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext = extractelement <2 x double> %a0, i32 0 %sqrt = call double @llvm.sqrt.f64(double %ext) %ins = insertelement <2 x double> %a1, double %sqrt, i32 0 @@ -3743,16 +4907,81 @@ define <2 x double> @test_mm_sqrt_sd(<2 x double> %a0, <2 x double> %a1) nounwin } declare double @llvm.sqrt.f64(double) nounwind readnone +; This doesn't match a clang test, but helps with fast-isel coverage. +define double @test_mm_sqrt_sd_scalar(double %a0) nounwind { +; X86-SSE-LABEL: test_mm_sqrt_sd_scalar: +; X86-SSE: # %bb.0: +; X86-SSE-NEXT: pushl %ebp # encoding: [0x55] +; X86-SSE-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-SSE-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] +; X86-SSE-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; X86-SSE-NEXT: movsd 8(%ebp), %xmm0 # encoding: [0xf2,0x0f,0x10,0x45,0x08] +; X86-SSE-NEXT: # xmm0 = mem[0],zero +; X86-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] +; X86-SSE-NEXT: movsd %xmm0, (%esp) # encoding: [0xf2,0x0f,0x11,0x04,0x24] +; X86-SSE-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] +; X86-SSE-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-SSE-NEXT: popl %ebp # encoding: [0x5d] +; X86-SSE-NEXT: retl # encoding: [0xc3] +; +; X86-AVX1-LABEL: test_mm_sqrt_sd_scalar: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: pushl %ebp # encoding: [0x55] +; X86-AVX1-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-AVX1-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] +; X86-AVX1-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; X86-AVX1-NEXT: vmovsd 8(%ebp), %xmm0 # encoding: [0xc5,0xfb,0x10,0x45,0x08] +; X86-AVX1-NEXT: # xmm0 = mem[0],zero +; X86-AVX1-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X86-AVX1-NEXT: vmovsd %xmm0, (%esp) # encoding: [0xc5,0xfb,0x11,0x04,0x24] +; X86-AVX1-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] +; X86-AVX1-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-AVX1-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_sqrt_sd_scalar: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: pushl %ebp # encoding: [0x55] +; X86-AVX512-NEXT: movl %esp, %ebp # encoding: [0x89,0xe5] +; X86-AVX512-NEXT: andl $-8, %esp # encoding: [0x83,0xe4,0xf8] +; X86-AVX512-NEXT: subl $8, %esp # encoding: [0x83,0xec,0x08] +; X86-AVX512-NEXT: vmovsd 8(%ebp), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x45,0x08] +; X86-AVX512-NEXT: # xmm0 = mem[0],zero +; X86-AVX512-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x51,0xc0] +; X86-AVX512-NEXT: vmovsd %xmm0, (%esp) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x04,0x24] +; X86-AVX512-NEXT: fldl (%esp) # encoding: [0xdd,0x04,0x24] +; X86-AVX512-NEXT: movl %ebp, %esp # encoding: [0x89,0xec] +; X86-AVX512-NEXT: popl %ebp # encoding: [0x5d] +; X86-AVX512-NEXT: retl # encoding: [0xc3] +; +; X64-SSE-LABEL: test_mm_sqrt_sd_scalar: +; X64-SSE: # %bb.0: +; X64-SSE-NEXT: sqrtsd %xmm0, %xmm0 # encoding: [0xf2,0x0f,0x51,0xc0] +; X64-SSE-NEXT: retq # encoding: [0xc3] +; +; X64-AVX-LABEL: test_mm_sqrt_sd_scalar: +; X64-AVX: # %bb.0: +; X64-AVX-NEXT: vsqrtsd %xmm0, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x51,0xc0] +; X64-AVX-NEXT: retq # encoding: [0xc3] + %sqrt = call double @llvm.sqrt.f64(double %a0) + ret double %sqrt +} + define <2 x i64> @test_mm_sra_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_sra_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psraw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psraw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe1,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sra_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsraw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sra_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe1,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sra_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -3764,13 +4993,18 @@ declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_sra_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_sra_epi32: ; SSE: # %bb.0: -; SSE-NEXT: psrad %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrad %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe2,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sra_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpsrad %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sra_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe2,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sra_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrad %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %arg0, <4 x i32> %arg1) @@ -3782,13 +5016,18 @@ declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_mm_srai_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_srai_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psraw $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psraw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xe0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srai_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsraw $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srai_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsraw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xe0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srai_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsraw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xe0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %arg0, i32 1) %bc = bitcast <8 x i16> %res to <2 x i64> @@ -3799,13 +5038,18 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) nounwind readnone define <2 x i64> @test_mm_srai_epi32(<2 x i64> %a0) { ; SSE-LABEL: test_mm_srai_epi32: ; SSE: # %bb.0: -; SSE-NEXT: psrad $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrad $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xe0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srai_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpsrad $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srai_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrad $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xe0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srai_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrad $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xe0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res = call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %arg0, i32 1) %bc = bitcast <4 x i32> %res to <2 x i64> @@ -3816,13 +5060,18 @@ declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) nounwind readnone define <2 x i64> @test_mm_srl_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_srl_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psrlw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrlw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd1,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srl_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srl_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd1,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srl_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -3834,13 +5083,18 @@ declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_srl_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_srl_epi32: ; SSE: # %bb.0: -; SSE-NEXT: psrld %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrld %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd2,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srl_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpsrld %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srl_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd2,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srl_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %arg0, <4 x i32> %arg1) @@ -3852,13 +5106,18 @@ declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone define <2 x i64> @test_mm_srl_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_srl_epi64: ; SSE: # %bb.0: -; SSE-NEXT: psrlq %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrlq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd3,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srl_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srl_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd3,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srl_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1) ret <2 x i64> %res } @@ -3867,13 +5126,18 @@ declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone define <2 x i64> @test_mm_srli_epi16(<2 x i64> %a0) { ; SSE-LABEL: test_mm_srli_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psrlw $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrlw $1, %xmm0 # encoding: [0x66,0x0f,0x71,0xd0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srli_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsrlw $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srli_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrlw $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x71,0xd0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srli_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlw $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x71,0xd0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %arg0, i32 1) %bc = bitcast <8 x i16> %res to <2 x i64> @@ -3884,13 +5148,18 @@ declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone define <2 x i64> @test_mm_srli_epi32(<2 x i64> %a0) { ; SSE-LABEL: test_mm_srli_epi32: ; SSE: # %bb.0: -; SSE-NEXT: psrld $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrld $1, %xmm0 # encoding: [0x66,0x0f,0x72,0xd0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srli_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpsrld $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srli_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrld $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x72,0xd0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srli_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrld $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x72,0xd0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %res = call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %arg0, i32 1) %bc = bitcast <4 x i32> %res to <2 x i64> @@ -3901,13 +5170,18 @@ declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) nounwind readnone define <2 x i64> @test_mm_srli_epi64(<2 x i64> %a0) { ; SSE-LABEL: test_mm_srli_epi64: ; SSE: # %bb.0: -; SSE-NEXT: psrlq $1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrlq $1, %xmm0 # encoding: [0x66,0x0f,0x73,0xd0,0x01] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srli_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vpsrlq $1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srli_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrlq $1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd0,0x01] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srli_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlq $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd0,0x01] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %a0, i32 1) ret <2 x i64> %res } @@ -3916,13 +5190,21 @@ declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) nounwind readnone define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { ; SSE-LABEL: test_mm_srli_si128: ; SSE: # %bb.0: -; SSE-NEXT: psrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psrldq $5, %xmm0 # encoding: [0x66,0x0f,0x73,0xd8,0x05] +; SSE-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_srli_si128: -; AVX: # %bb.0: -; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_srli_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsrldq $5, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x73,0xd8,0x05] +; AVX1-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_srli_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrldq $5, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x73,0xd8,0x05] +; AVX512-NEXT: # xmm0 = xmm0[5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %res = shufflevector <16 x i8> %arg0, <16 x i8> zeroinitializer, <16 x i32> <i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20> %bc = bitcast <16 x i8> %res to <2 x i64> @@ -3932,25 +5214,36 @@ define <2 x i64> @test_mm_srli_si128(<2 x i64> %a0) nounwind { define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_store_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movaps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_store_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovaps %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_store_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_store_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_store_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_store_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_store_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_store_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 16 ret void @@ -3959,29 +5252,48 @@ define void @test_mm_store_pd(double *%a0, <2 x double> %a1) { define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_store_pd1: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X86-SSE-NEXT: movaps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0] +; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_store_pd1: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X86-AVX-NEXT: vmovapd %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_store_pd1: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_store_pd1: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_store_pd1: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0] +; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_store_pd1: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X64-AVX-NEXT: vmovapd %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_store_pd1: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_store_pd1: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double * %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -3991,25 +5303,36 @@ define void @test_mm_store_pd1(double *%a0, <2 x double> %a1) { define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_store_sd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_store_sd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovsd %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_store_sd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_store_sd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_store_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movsd %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_store_sd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_store_sd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_store_sd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 0 store double %ext, double* %a0, align 1 ret void @@ -4018,25 +5341,36 @@ define void @test_mm_store_sd(double *%a0, <2 x double> %a1) { define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { ; X86-SSE-LABEL: test_mm_store_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movaps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_store_si128: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovaps %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_store_si128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovaps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x29,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_store_si128: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_store_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_store_si128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovaps %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_store_si128: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovaps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x29,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_store_si128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovaps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 16 ret void } @@ -4044,29 +5378,48 @@ define void @test_mm_store_si128(<2 x i64> *%a0, <2 x i64> %a1) { define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_store1_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X86-SSE-NEXT: movaps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[0,0] +; X86-SSE-NEXT: movaps %xmm0, (%eax) # encoding: [0x0f,0x29,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_store1_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X86-AVX-NEXT: vmovapd %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_store1_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_store1_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X86-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_store1_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] -; X64-SSE-NEXT: movaps %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movlhps %xmm0, %xmm0 # encoding: [0x0f,0x16,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[0,0] +; X64-SSE-NEXT: movaps %xmm0, (%rdi) # encoding: [0x0f,0x29,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_store1_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] -; X64-AVX-NEXT: vmovapd %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_store1_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovddup %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX1-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_store1_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovddup %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xc0] +; X64-AVX512-NEXT: # xmm0 = xmm0[0,0] +; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double * %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> zeroinitializer store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -4076,29 +5429,48 @@ define void @test_mm_store1_pd(double *%a0, <2 x double> %a1) { define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_storeh_sd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; X86-SSE-NEXT: movsd %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] +; X86-SSE-NEXT: # xmm0 = xmm0[1,1] +; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_storeh_sd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; X86-AVX-NEXT: vmovsd %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_storeh_sd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_storeh_sd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storeh_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] -; X64-SSE-NEXT: movsd %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movhlps %xmm0, %xmm0 # encoding: [0x0f,0x12,0xc0] +; X64-SSE-NEXT: # xmm0 = xmm0[1,1] +; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_storeh_sd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_storeh_sd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_storeh_sd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 1 store double %ext, double* %a0, align 8 ret void @@ -4107,27 +5479,39 @@ define void @test_mm_storeh_sd(double *%a0, <2 x double> %a1) { define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { ; X86-SSE-LABEL: test_mm_storel_epi64: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movlps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movlps %xmm0, (%eax) # encoding: [0x0f,0x13,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_storel_epi64: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovlps %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_storel_epi64: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovlps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x13,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_storel_epi64: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovlps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x13,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storel_epi64: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movq %xmm0, %rax -; X64-SSE-NEXT: movq %rax, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movq %xmm0, %rax # encoding: [0x66,0x48,0x0f,0x7e,0xc0] +; X64-SSE-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_storel_epi64: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovq %xmm0, %rax -; X64-AVX-NEXT: movq %rax, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_storel_epi64: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovq %xmm0, %rax # encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X64-AVX1-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_storel_epi64: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovq %xmm0, %rax # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x7e,0xc0] +; X64-AVX512-NEXT: movq %rax, (%rdi) # encoding: [0x48,0x89,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x i64> %a1, i32 0 %bc = bitcast <2 x i64> *%a0 to i64* store i64 %ext, i64* %bc, align 8 @@ -4137,25 +5521,36 @@ define void @test_mm_storel_epi64(<2 x i64> *%a0, <2 x i64> %a1) { define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_storel_sd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movsd %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movsd %xmm0, (%eax) # encoding: [0xf2,0x0f,0x11,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_storel_sd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovsd %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_storel_sd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovsd %xmm0, (%eax) # encoding: [0xc5,0xfb,0x11,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_storel_sd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovsd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storel_sd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movsd %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movsd %xmm0, (%rdi) # encoding: [0xf2,0x0f,0x11,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_storel_sd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovsd %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_storel_sd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovsd %xmm0, (%rdi) # encoding: [0xc5,0xfb,0x11,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_storel_sd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovsd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %ext = extractelement <2 x double> %a1, i32 0 store double %ext, double* %a0, align 8 ret void @@ -4164,29 +5559,48 @@ define void @test_mm_storel_sd(double *%a0, <2 x double> %a1) { define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_storer_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] -; X86-SSE-NEXT: movapd %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] +; X86-SSE-NEXT: # xmm0 = xmm0[1,0] +; X86-SSE-NEXT: movapd %xmm0, (%eax) # encoding: [0x66,0x0f,0x29,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_storer_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; X86-AVX-NEXT: vmovapd %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_storer_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X86-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X86-AVX1-NEXT: vmovapd %xmm0, (%eax) # encoding: [0xc5,0xf9,0x29,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_storer_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X86-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X86-AVX512-NEXT: vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storer_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: shufpd {{.*#+}} xmm0 = xmm0[1,0] -; X64-SSE-NEXT: movapd %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: shufpd $1, %xmm0, %xmm0 # encoding: [0x66,0x0f,0xc6,0xc0,0x01] +; X64-SSE-NEXT: # xmm0 = xmm0[1,0] +; X64-SSE-NEXT: movapd %xmm0, (%rdi) # encoding: [0x66,0x0f,0x29,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_storer_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] -; X64-AVX-NEXT: vmovapd %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_storer_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vpermilpd $1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X64-AVX1-NEXT: # xmm0 = xmm0[1,0] +; X64-AVX1-NEXT: vmovapd %xmm0, (%rdi) # encoding: [0xc5,0xf9,0x29,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_storer_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vpermilpd $1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xc0,0x01] +; X64-AVX512-NEXT: # xmm0 = xmm0[1,0] +; X64-AVX512-NEXT: vmovapd %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* %shuf = shufflevector <2 x double> %a1, <2 x double> undef, <2 x i32> <i32 1, i32 0> store <2 x double> %shuf, <2 x double>* %arg0, align 16 @@ -4196,25 +5610,36 @@ define void @test_mm_storer_pd(double *%a0, <2 x double> %a1) { define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_storeu_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movups %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_storeu_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovups %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_storeu_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_storeu_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storeu_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movups %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_storeu_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovups %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_storeu_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_storeu_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 1 ret void @@ -4223,25 +5648,36 @@ define void @test_mm_storeu_pd(double *%a0, <2 x double> %a1) { define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { ; X86-SSE-LABEL: test_mm_storeu_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movups %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movups %xmm0, (%eax) # encoding: [0x0f,0x11,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_storeu_si128: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovups %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_storeu_si128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovups %xmm0, (%eax) # encoding: [0xc5,0xf8,0x11,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_storeu_si128: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_storeu_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movups %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movups %xmm0, (%rdi) # encoding: [0x0f,0x11,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_storeu_si128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovups %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_storeu_si128: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovups %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x11,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_storeu_si128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovups %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 1 ret void } @@ -4249,25 +5685,36 @@ define void @test_mm_storeu_si128(<2 x i64> *%a0, <2 x i64> %a1) { define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { ; X86-SSE-LABEL: test_mm_stream_pd: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movntps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_stream_pd: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovntps %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_stream_pd: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_stream_pd: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_stream_pd: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movntps %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_stream_pd: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovntps %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_stream_pd: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_stream_pd: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] %arg0 = bitcast double* %a0 to <2 x double>* store <2 x double> %a1, <2 x double>* %arg0, align 16, !nontemporal !0 ret void @@ -4276,15 +5723,15 @@ define void @test_mm_stream_pd(double *%a0, <2 x double> %a1) { define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { ; X86-LABEL: test_mm_stream_si32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movntil %eax, (%ecx) -; X86-NEXT: retl +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08] +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04] +; X86-NEXT: movntil %eax, (%ecx) # encoding: [0x0f,0xc3,0x01] +; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_mm_stream_si32: ; X64: # %bb.0: -; X64-NEXT: movntil %esi, (%rdi) -; X64-NEXT: retq +; X64-NEXT: movntil %esi, (%rdi) # encoding: [0x0f,0xc3,0x37] +; X64-NEXT: retq # encoding: [0xc3] store i32 %a1, i32* %a0, align 1, !nontemporal !0 ret void } @@ -4292,25 +5739,36 @@ define void @test_mm_stream_si32(i32 *%a0, i32 %a1) { define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { ; X86-SSE-LABEL: test_mm_stream_si128: ; X86-SSE: # %bb.0: -; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SSE-NEXT: movntps %xmm0, (%eax) -; X86-SSE-NEXT: retl +; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-SSE-NEXT: movntps %xmm0, (%eax) # encoding: [0x0f,0x2b,0x00] +; X86-SSE-NEXT: retl # encoding: [0xc3] ; -; X86-AVX-LABEL: test_mm_stream_si128: -; X86-AVX: # %bb.0: -; X86-AVX-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-AVX-NEXT: vmovntps %xmm0, (%eax) -; X86-AVX-NEXT: retl +; X86-AVX1-LABEL: test_mm_stream_si128: +; X86-AVX1: # %bb.0: +; X86-AVX1-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX1-NEXT: vmovntps %xmm0, (%eax) # encoding: [0xc5,0xf8,0x2b,0x00] +; X86-AVX1-NEXT: retl # encoding: [0xc3] +; +; X86-AVX512-LABEL: test_mm_stream_si128: +; X86-AVX512: # %bb.0: +; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-AVX512-NEXT: vmovntps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x00] +; X86-AVX512-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: test_mm_stream_si128: ; X64-SSE: # %bb.0: -; X64-SSE-NEXT: movntps %xmm0, (%rdi) -; X64-SSE-NEXT: retq +; X64-SSE-NEXT: movntps %xmm0, (%rdi) # encoding: [0x0f,0x2b,0x07] +; X64-SSE-NEXT: retq # encoding: [0xc3] ; -; X64-AVX-LABEL: test_mm_stream_si128: -; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vmovntps %xmm0, (%rdi) -; X64-AVX-NEXT: retq +; X64-AVX1-LABEL: test_mm_stream_si128: +; X64-AVX1: # %bb.0: +; X64-AVX1-NEXT: vmovntps %xmm0, (%rdi) # encoding: [0xc5,0xf8,0x2b,0x07] +; X64-AVX1-NEXT: retq # encoding: [0xc3] +; +; X64-AVX512-LABEL: test_mm_stream_si128: +; X64-AVX512: # %bb.0: +; X64-AVX512-NEXT: vmovntps %xmm0, (%rdi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x2b,0x07] +; X64-AVX512-NEXT: retq # encoding: [0xc3] store <2 x i64> %a1, <2 x i64>* %a0, align 16, !nontemporal !0 ret void } @@ -4318,13 +5776,18 @@ define void @test_mm_stream_si128(<2 x i64> *%a0, <2 x i64> %a1) { define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_sub_epi8: ; SSE: # %bb.0: -; SSE-NEXT: psubb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf8,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sub_epi8: -; AVX: # %bb.0: -; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sub_epi8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf8,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sub_epi8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf8,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = sub <16 x i8> %arg0, %arg1 @@ -4335,13 +5798,18 @@ define <2 x i64> @test_mm_sub_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_sub_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psubw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xf9,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sub_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsubw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sub_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xf9,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sub_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = sub <8 x i16> %arg0, %arg1 @@ -4352,13 +5820,18 @@ define <2 x i64> @test_mm_sub_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_sub_epi32: ; SSE: # %bb.0: -; SSE-NEXT: psubd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubd %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfa,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sub_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vpsubd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sub_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfa,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sub_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = sub <4 x i32> %arg0, %arg1 @@ -4369,13 +5842,18 @@ define <2 x i64> @test_mm_sub_epi32(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_sub_epi64: ; SSE: # %bb.0: -; SSE-NEXT: psubq %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubq %xmm1, %xmm0 # encoding: [0x66,0x0f,0xfb,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sub_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vpsubq %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sub_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfb,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sub_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfb,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = sub <2 x i64> %a0, %a1 ret <2 x i64> %res } @@ -4383,13 +5861,18 @@ define <2 x i64> @test_mm_sub_epi64(<2 x i64> %a0, <2 x i64> %a1) nounwind { define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_sub_pd: ; SSE: # %bb.0: -; SSE-NEXT: subpd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: subpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x5c,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sub_pd: -; AVX: # %bb.0: -; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sub_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x5c,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sub_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x5c,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = fsub <2 x double> %a0, %a1 ret <2 x double> %res } @@ -4397,13 +5880,18 @@ define <2 x double> @test_mm_sub_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_sub_sd: ; SSE: # %bb.0: -; SSE-NEXT: subsd %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: subsd %xmm1, %xmm0 # encoding: [0xf2,0x0f,0x5c,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_sub_sd: -; AVX: # %bb.0: -; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_sub_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xfb,0x5c,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_sub_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vsubsd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x5c,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %ext0 = extractelement <2 x double> %a0, i32 0 %ext1 = extractelement <2 x double> %a1, i32 0 %fsub = fsub double %ext0, %ext1 @@ -4414,13 +5902,18 @@ define <2 x double> @test_mm_sub_sd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x i64> @test_mm_subs_epi8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_subs_epi8: ; SSE: # %bb.0: -; SSE-NEXT: psubsb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubsb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe8,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_subs_epi8: -; AVX: # %bb.0: -; AVX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_subs_epi8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe8,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_subs_epi8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %arg0, <16 x i8> %arg1) @@ -4432,13 +5925,18 @@ declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone define <2 x i64> @test_mm_subs_epi16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_subs_epi16: ; SSE: # %bb.0: -; SSE-NEXT: psubsw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubsw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xe9,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_subs_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_subs_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xe9,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_subs_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -4450,13 +5948,18 @@ declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone define <2 x i64> @test_mm_subs_epu8(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_subs_epu8: ; SSE: # %bb.0: -; SSE-NEXT: psubusb %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubusb %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd8,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_subs_epu8: -; AVX: # %bb.0: -; AVX-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_subs_epu8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd8,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_subs_epu8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubusb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd8,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = call <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8> %arg0, <16 x i8> %arg1) @@ -4468,13 +5971,18 @@ declare <16 x i8> @llvm.x86.sse2.psubus.b(<16 x i8>, <16 x i8>) nounwind readnon define <2 x i64> @test_mm_subs_epu16(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_subs_epu16: ; SSE: # %bb.0: -; SSE-NEXT: psubusw %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: psubusw %xmm1, %xmm0 # encoding: [0x66,0x0f,0xd9,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_subs_epu16: -; AVX: # %bb.0: -; AVX-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_subs_epu16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xd9,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_subs_epu16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsubusw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd9,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = call <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16> %arg0, <8 x i16> %arg1) @@ -4486,21 +5994,30 @@ declare <8 x i16> @llvm.x86.sse2.psubus.w(<8 x i16>, <8 x i16>) nounwind readnon define i32 @test_mm_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_ucomieq_sd: ; SSE: # %bb.0: -; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: setnp %al -; SSE-NEXT: sete %cl -; SSE-NEXT: andb %al, %cl -; SSE-NEXT: movzbl %cl, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] +; SSE-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; SSE-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; SSE-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_ucomieq_sd: -; AVX: # %bb.0: -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: setnp %al -; AVX-NEXT: sete %cl -; AVX-NEXT: andb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_ucomieq_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX1-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; AVX1-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; AVX1-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_ucomieq_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX512-NEXT: setnp %al # encoding: [0x0f,0x9b,0xc0] +; AVX512-NEXT: sete %cl # encoding: [0x0f,0x94,0xc1] +; AVX512-NEXT: andb %al, %cl # encoding: [0x20,0xc1] +; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -4509,17 +6026,24 @@ declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomige_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_ucomige_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: setae %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] +; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_ucomige_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: setae %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_ucomige_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_ucomige_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -4528,17 +6052,24 @@ declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomigt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_ucomigt_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: seta %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] +; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_ucomigt_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: seta %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_ucomigt_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_ucomigt_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -4547,17 +6078,24 @@ declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomile_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_ucomile_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: ucomisd %xmm0, %xmm1 -; SSE-NEXT: setae %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] +; SSE-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_ucomile_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vucomisd %xmm0, %xmm1 -; AVX-NEXT: setae %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_ucomile_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] +; AVX1-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_ucomile_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] +; AVX512-NEXT: setae %al # encoding: [0x0f,0x93,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -4566,17 +6104,24 @@ declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomilt_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_ucomilt_sd: ; SSE: # %bb.0: -; SSE-NEXT: xorl %eax, %eax -; SSE-NEXT: ucomisd %xmm0, %xmm1 -; SSE-NEXT: seta %al -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; SSE-NEXT: ucomisd %xmm0, %xmm1 # encoding: [0x66,0x0f,0x2e,0xc8] +; SSE-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_ucomilt_sd: -; AVX: # %bb.0: -; AVX-NEXT: xorl %eax, %eax -; AVX-NEXT: vucomisd %xmm0, %xmm1 -; AVX-NEXT: seta %al -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_ucomilt_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX1-NEXT: vucomisd %xmm0, %xmm1 # encoding: [0xc5,0xf9,0x2e,0xc8] +; AVX1-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_ucomilt_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: xorl %eax, %eax # encoding: [0x31,0xc0] +; AVX512-NEXT: vucomisd %xmm0, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc8] +; AVX512-NEXT: seta %al # encoding: [0x0f,0x97,0xc0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -4585,21 +6130,30 @@ declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>) nounwind readn define i32 @test_mm_ucomineq_sd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_ucomineq_sd: ; SSE: # %bb.0: -; SSE-NEXT: ucomisd %xmm1, %xmm0 -; SSE-NEXT: setp %al -; SSE-NEXT: setne %cl -; SSE-NEXT: orb %al, %cl -; SSE-NEXT: movzbl %cl, %eax -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: ucomisd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x2e,0xc1] +; SSE-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; SSE-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; SSE-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; SSE-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_ucomineq_sd: -; AVX: # %bb.0: -; AVX-NEXT: vucomisd %xmm1, %xmm0 -; AVX-NEXT: setp %al -; AVX-NEXT: setne %cl -; AVX-NEXT: orb %al, %cl -; AVX-NEXT: movzbl %cl, %eax -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_ucomineq_sd: +; AVX1: # %bb.0: +; AVX1-NEXT: vucomisd %xmm1, %xmm0 # encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX1-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; AVX1-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; AVX1-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; AVX1-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_ucomineq_sd: +; AVX512: # %bb.0: +; AVX512-NEXT: vucomisd %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x2e,0xc1] +; AVX512-NEXT: setp %al # encoding: [0x0f,0x9a,0xc0] +; AVX512-NEXT: setne %cl # encoding: [0x0f,0x95,0xc1] +; AVX512-NEXT: orb %al, %cl # encoding: [0x08,0xc1] +; AVX512-NEXT: movzbl %cl, %eax # encoding: [0x0f,0xb6,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %a0, <2 x double> %a1) ret i32 %res } @@ -4608,27 +6162,35 @@ declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>) nounwind read define <2 x double> @test_mm_undefined_pd() { ; CHECK-LABEL: test_mm_undefined_pd: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] ret <2 x double> undef } define <2 x i64> @test_mm_undefined_si128() { ; CHECK-LABEL: test_mm_undefined_si128: ; CHECK: # %bb.0: -; CHECK-NEXT: ret{{[l|q]}} +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] ret <2 x i64> undef } define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpackhi_epi8: ; SSE: # %bb.0: -; SSE-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: punpckhbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x68,0xc1] +; SSE-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpackhi_epi8: -; AVX: # %bb.0: -; AVX-NEXT: vpunpckhbw {{.*#+}} xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpackhi_epi8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x68,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpackhi_epi8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> @@ -4639,13 +6201,21 @@ define <2 x i64> @test_mm_unpackhi_epi8(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpackhi_epi16: ; SSE: # %bb.0: -; SSE-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: punpckhwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x69,0xc1] +; SSE-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpackhi_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpunpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpackhi_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x69,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpackhi_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> @@ -4656,13 +6226,21 @@ define <2 x i64> @test_mm_unpackhi_epi16(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpackhi_epi32: ; SSE: # %bb.0: -; SSE-NEXT: unpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: unpckhps %xmm1, %xmm0 # encoding: [0x0f,0x15,0xc1] +; SSE-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpackhi_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpackhi_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x15,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpackhi_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 2, i32 6, i32 3, i32 7> @@ -4673,13 +6251,21 @@ define <2 x i64> @test_mm_unpackhi_epi32(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpackhi_epi64: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] +; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpackhi_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpackhi_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpackhi_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3> ret <2 x i64> %res } @@ -4687,13 +6273,21 @@ define <2 x i64> @test_mm_unpackhi_epi64(<2 x i64> %a0, <2 x i64> %a1) { define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_mm_unpackhi_pd: ; SSE: # %bb.0: -; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: unpckhpd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x15,0xc1] +; SSE-NEXT: # xmm0 = xmm0[1],xmm1[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpackhi_pd: -; AVX: # %bb.0: -; AVX-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpackhi_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x15,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[1],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpackhi_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[1],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 1, i32 3> ret <2 x double> %res } @@ -4701,13 +6295,21 @@ define <2 x double> @test_mm_unpackhi_pd(<2 x double> %a0, <2 x double> %a1) { define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpacklo_epi8: ; SSE: # %bb.0: -; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: punpcklbw %xmm1, %xmm0 # encoding: [0x66,0x0f,0x60,0xc1] +; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpacklo_epi8: -; AVX: # %bb.0: -; AVX-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpacklo_epi8: +; AVX1: # %bb.0: +; AVX1-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x60,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpacklo_epi8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <16 x i8> %arg1 = bitcast <2 x i64> %a1 to <16 x i8> %res = shufflevector <16 x i8> %arg0, <16 x i8> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> @@ -4718,13 +6320,21 @@ define <2 x i64> @test_mm_unpacklo_epi8(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpacklo_epi16: ; SSE: # %bb.0: -; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: punpcklwd %xmm1, %xmm0 # encoding: [0x66,0x0f,0x61,0xc1] +; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpacklo_epi16: -; AVX: # %bb.0: -; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpacklo_epi16: +; AVX1: # %bb.0: +; AVX1-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0x61,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpacklo_epi16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <8 x i16> %arg1 = bitcast <2 x i64> %a1 to <8 x i16> %res = shufflevector <8 x i16> %arg0, <8 x i16> %arg1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> @@ -4735,13 +6345,21 @@ define <2 x i64> @test_mm_unpacklo_epi16(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpacklo_epi32: ; SSE: # %bb.0: -; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: unpcklps %xmm1, %xmm0 # encoding: [0x0f,0x14,0xc1] +; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpacklo_epi32: -; AVX: # %bb.0: -; AVX-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpacklo_epi32: +; AVX1: # %bb.0: +; AVX1-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x14,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpacklo_epi32: +; AVX512: # %bb.0: +; AVX512-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x i64> %a0 to <4 x i32> %arg1 = bitcast <2 x i64> %a1 to <4 x i32> %res = shufflevector <4 x i32> %arg0,<4 x i32> %arg1, <4 x i32> <i32 0, i32 4, i32 1, i32 5> @@ -4752,13 +6370,21 @@ define <2 x i64> @test_mm_unpacklo_epi32(<2 x i64> %a0, <2 x i64> %a1) { define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { ; SSE-LABEL: test_mm_unpacklo_epi64: ; SSE: # %bb.0: -; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpacklo_epi64: -; AVX: # %bb.0: -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpacklo_epi64: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpacklo_epi64: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2> ret <2 x i64> %res } @@ -4766,13 +6392,21 @@ define <2 x i64> @test_mm_unpacklo_epi64(<2 x i64> %a0, <2 x i64> %a1) { define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_mm_unpacklo_pd: ; SSE: # %bb.0: -; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: movlhps %xmm1, %xmm0 # encoding: [0x0f,0x16,0xc1] +; SSE-NEXT: # xmm0 = xmm0[0],xmm1[0] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_unpacklo_pd: -; AVX: # %bb.0: -; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_unpacklo_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x16,0xc1] +; AVX1-NEXT: # xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_unpacklo_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vmovlhps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x16,0xc1] +; AVX512-NEXT: # xmm0 = xmm0[0],xmm1[0] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = shufflevector <2 x double> %a0, <2 x double> %a1, <2 x i32> <i32 0, i32 2> ret <2 x double> %res } @@ -4780,13 +6414,18 @@ define <2 x double> @test_mm_unpacklo_pd(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind { ; SSE-LABEL: test_mm_xor_pd: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_xor_pd: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_xor_pd: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_xor_pd: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %arg0 = bitcast <2 x double> %a0 to <4 x i32> %arg1 = bitcast <2 x double> %a1 to <4 x i32> %res = xor <4 x i32> %arg0, %arg1 @@ -4797,13 +6436,18 @@ define <2 x double> @test_mm_xor_pd(<2 x double> %a0, <2 x double> %a1) nounwind define <2 x i64> @test_mm_xor_si128(<2 x i64> %a0, <2 x i64> %a1) nounwind { ; SSE-LABEL: test_mm_xor_si128: ; SSE: # %bb.0: -; SSE-NEXT: xorps %xmm1, %xmm0 -; SSE-NEXT: ret{{[l|q]}} +; SSE-NEXT: xorps %xmm1, %xmm0 # encoding: [0x0f,0x57,0xc1] +; SSE-NEXT: ret{{[l|q]}} # encoding: [0xc3] ; -; AVX-LABEL: test_mm_xor_si128: -; AVX: # %bb.0: -; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 -; AVX-NEXT: ret{{[l|q]}} +; AVX1-LABEL: test_mm_xor_si128: +; AVX1: # %bb.0: +; AVX1-NEXT: vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1] +; AVX1-NEXT: ret{{[l|q]}} # encoding: [0xc3] +; +; AVX512-LABEL: test_mm_xor_si128: +; AVX512: # %bb.0: +; AVX512-NEXT: vxorps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x57,0xc1] +; AVX512-NEXT: ret{{[l|q]}} # encoding: [0xc3] %res = xor <2 x i64> %a0, %a1 ret <2 x i64> %res } |