diff options
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll | 44 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 117 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll | 214 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 215 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll | 681 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll | 680 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll | 84 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 84 |
8 files changed, 1049 insertions, 1070 deletions
diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll index 81e10a5d242..25d7d088420 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86-upgrade.ll @@ -614,6 +614,28 @@ define <4 x i64> @test_x86_avx2_pmul_dq(<8 x i32> %a0, <8 x i32> %a1) { declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone +define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { +; CHECK-LABEL: test_x86_avx2_padds_b: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: test_x86_avx2_padds_w: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone + + define <32 x i8> @test_x86_avx2_paddus_b(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_x86_avx2_paddus_b: ; CHECK: ## %bb.0: @@ -636,6 +658,28 @@ define <16 x i16> @test_x86_avx2_paddus_w(<16 x i16> %a0, <16 x i16> %a1) { declare <16 x i16> @llvm.x86.avx2.paddus.w(<16 x i16>, <16 x i16>) nounwind readnone +define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) { +; CHECK-LABEL: test_x86_avx2_psubs_b: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] + ret <32 x i8> %res +} +declare <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8>, <32 x i8>) nounwind readnone + + +define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) { +; CHECK-LABEL: test_x86_avx2_psubs_w: +; CHECK: ## %bb.0: +; CHECK-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: ret{{[l|q]}} + %res = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] + ret <16 x i16> %res +} +declare <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16>, <16 x i16>) nounwind readnone + + define <32 x i8> @test_x86_avx2_psubus_b(<32 x i8> %a0, <32 x i8> %a1) { ; CHECK-LABEL: test_x86_avx2_psubus_b: ; CHECK: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index 101448e22ac..614973aa66f 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -151,38 +151,6 @@ define <32 x i8> @test_x86_avx2_packuswb_fold() { } -define <32 x i8> @test_x86_avx2_padds_b(<32 x i8> %a0, <32 x i8> %a1) { -; AVX2-LABEL: test_x86_avx2_padds_b: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xec,0xc1] -; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_padds_b: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xec,0xc1] -; AVX512VL-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] - ret <32 x i8> %res -} -declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) nounwind readnone - - -define <16 x i16> @test_x86_avx2_padds_w(<16 x i16> %a0, <16 x i16> %a1) { -; AVX2-LABEL: test_x86_avx2_padds_w: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xed,0xc1] -; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_padds_w: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xed,0xc1] -; AVX512VL-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] - ret <16 x i16> %res -} -declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) nounwind readnone - - define <8 x i32> @test_x86_avx2_pmadd_wd(<16 x i16> %a0, <16 x i16> %a1) { ; AVX2-LABEL: test_x86_avx2_pmadd_wd: ; AVX2: ## %bb.0: @@ -607,37 +575,6 @@ define <16 x i16> @test_x86_avx2_psrli_w(<16 x i16> %a0) { declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) nounwind readnone -define <32 x i8> @test_x86_avx2_psubs_b(<32 x i8> %a0, <32 x i8> %a1) { -; AVX2-LABEL: test_x86_avx2_psubs_b: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe8,0xc1] -; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_psubs_b: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe8,0xc1] -; AVX512VL-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a0, <32 x i8> %a1) ; <<32 x i8>> [#uses=1] - ret <32 x i8> %res -} -declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) nounwind readnone - - -define <16 x i16> @test_x86_avx2_psubs_w(<16 x i16> %a0, <16 x i16> %a1) { -; AVX2-LABEL: test_x86_avx2_psubs_w: -; AVX2: ## %bb.0: -; AVX2-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0xc5,0xfd,0xe9,0xc1] -; AVX2-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512VL-LABEL: test_x86_avx2_psubs_w: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe9,0xc1] -; AVX512VL-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a0, <16 x i16> %a1) ; <<16 x i16>> [#uses=1] - ret <16 x i16> %res -} -declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) nounwind readnone - define <8 x i32> @test_x86_avx2_phadd_d(<8 x i32> %a0, <8 x i32> %a1) { ; CHECK-LABEL: test_x86_avx2_phadd_d: ; CHECK: ## %bb.0: @@ -848,28 +785,28 @@ define <16 x i16> @test_x86_avx2_packusdw_fold() { ; X86-AVX: ## %bb.0: ; X86-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] ; X86-AVX-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI51_0, kind: FK_Data_4 +; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI47_0, kind: FK_Data_4 ; X86-AVX-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_packusdw_fold: ; X86-AVX512VL: ## %bb.0: -; X86-AVX512VL-NEXT: vmovaps LCPI51_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] +; X86-AVX512VL-NEXT: vmovaps LCPI47_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] ; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI51_0, kind: FK_Data_4 +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI47_0, kind: FK_Data_4 ; X86-AVX512VL-NEXT: retl ## encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_packusdw_fold: ; X64-AVX: ## %bb.0: ; X64-AVX-NEXT: vmovaps {{.*#+}} ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] ; X64-AVX-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI51_0-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_packusdw_fold: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vmovaps {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [0,0,0,0,255,32767,65535,0,0,0,0,0,0,0,0,0] ; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfc,0x28,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI51_0-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI47_0-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq ## encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> zeroinitializer, <8 x i32> <i32 255, i32 32767, i32 65535, i32 -1, i32 -32767, i32 -65535, i32 0, i32 -256>) ret <16 x i16> %res @@ -1376,36 +1313,36 @@ define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) { ; X86-AVX: ## %bb.0: ; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] ; X86-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI83_0, kind: FK_Data_4 -; X86-AVX-NEXT: vpsravd LCPI83_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI83_1, kind: FK_Data_4 +; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI79_0, kind: FK_Data_4 +; X86-AVX-NEXT: vpsravd LCPI79_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI79_1, kind: FK_Data_4 ; X86-AVX-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; X86-AVX512VL: ## %bb.0: -; X86-AVX512VL-NEXT: vmovdqa LCPI83_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; X86-AVX512VL-NEXT: vmovdqa LCPI79_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI83_0, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpsravd LCPI83_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI83_1, kind: FK_Data_4 +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI79_0, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsravd LCPI79_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI79_1, kind: FK_Data_4 ; X86-AVX512VL-NEXT: retl ## encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrav_d_const: ; X64-AVX: ## %bb.0: ; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] ; X64-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI83_0-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI79_0-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI83_1-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI79_1-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI83_0-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI79_0-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI83_1-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI79_1-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> <i32 2, i32 9, i32 -12, i32 23>, <4 x i32> <i32 1, i32 18, i32 35, i32 52>) ret <4 x i32> %res @@ -1431,36 +1368,36 @@ define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) ; X86-AVX: ## %bb.0: ; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X86-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4 -; X86-AVX-NEXT: vpsravd LCPI85_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4 +; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI81_0, kind: FK_Data_4 +; X86-AVX-NEXT: vpsravd LCPI81_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI81_1, kind: FK_Data_4 ; X86-AVX-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; X86-AVX512VL: ## %bb.0: -; X86-AVX512VL-NEXT: vmovdqa LCPI85_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; X86-AVX512VL-NEXT: vmovdqa LCPI81_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI85_0, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpsravd LCPI85_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI85_1, kind: FK_Data_4 +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI81_0, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsravd LCPI81_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI81_1, kind: FK_Data_4 ; X86-AVX512VL-NEXT: retl ## encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const: ; X64-AVX: ## %bb.0: ; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X64-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI85_0-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI81_0-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI85_1-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI81_1-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; X64-AVX512VL: ## %bb.0: ; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI85_0-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI81_0-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI85_1-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI81_1-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>) ret <8 x i32> %res diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll index c623d8bbf2d..d8e7d247c30 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics-upgrade.ll @@ -3285,6 +3285,113 @@ define <64 x i8> @test_mask_subs_epu8_rmkz_512(<64 x i8> %a, <64 x i8>* %ptr_b, declare <64 x i8> @llvm.x86.avx512.mask.psubus.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +define <32 x i16> @test_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { +; CHECK-LABEL: test_adds_epi16_rr_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { +; X86-LABEL: test_adds_epi16_rrk_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] +; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rrk_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] +; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { +; X86-LABEL: test_adds_epi16_rrkz_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rrkz_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { +; X86-LABEL: test_adds_epi16_rm_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rm_512: +; X64: # %bb.0: +; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { +; X86-LABEL: test_adds_epi16_rmk_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rmk_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { +; X86-LABEL: test_adds_epi16_rmkz_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rmkz_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>) + define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; CHECK-LABEL: test_mask_adds_epi16_rr_512: ; CHECK: # %bb.0: @@ -3384,6 +3491,113 @@ define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) +define <32 x i16> @test_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { +; CHECK-LABEL: test_subs_epi16_rr_512: +; CHECK: # %bb.0: +; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] +; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { +; X86-LABEL: test_subs_epi16_rrk_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] +; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_subs_epi16_rrk_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] +; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { +; X86-LABEL: test_subs_epi16_rrkz_512: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_subs_epi16_rrkz_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +define <32 x i16> @test_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { +; X86-LABEL: test_subs_epi16_rm_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_subs_epi16_rm_512: +; X64: # %bb.0: +; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + ret <32 x i16> %1 +} + +define <32 x i16> @test_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { +; X86-LABEL: test_subs_epi16_rmk_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] +; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_subs_epi16_rmk_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] +; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru + ret <32 x i16> %3 +} + +define <32 x i16> @test_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { +; X86-LABEL: test_subs_epi16_rmkz_512: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_subs_epi16_rmkz_512: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i16>, <32 x i16>* %ptr_b + %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer + ret <32 x i16> %3 +} + +declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>) + define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { ; CHECK-LABEL: test_mask_subs_epi16_rr_512: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 17101447f01..348c2748fac 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -683,221 +683,6 @@ define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %pt declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) -define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { -; CHECK-LABEL: test_mask_adds_epi16_rr_512: -; CHECK: # %bb.0: -; CHECK-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) - ret <32 x i16> %1 -} - -define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { -; X86-LABEL: test_mask_adds_epi16_rrk_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] -; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rrk_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1] -; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru - ret <32 x i16> %3 -} - -define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { -; X86-LABEL: test_mask_adds_epi16_rrkz_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rrkz_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer - ret <32 x i16> %3 -} - -define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { -; X86-LABEL: test_mask_adds_epi16_rm_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rm_512: -; X64: # %bb.0: -; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i16>, <32 x i16>* %ptr_b - %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) - ret <32 x i16> %1 -} - -define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { -; X86-LABEL: test_mask_adds_epi16_rmk_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08] -; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rmk_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f] -; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i16>, <32 x i16>* %ptr_b - %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru - ret <32 x i16> %3 -} - -define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { -; X86-LABEL: test_mask_adds_epi16_rmkz_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rmkz_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i16>, <32 x i16>* %ptr_b - %1 = call <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer - ret <32 x i16> %3 -} - -declare <32 x i16> @llvm.x86.avx512.padds.w.512(<32 x i16>, <32 x i16>) - -define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) { -; CHECK-LABEL: test_mask_subs_epi16_rr_512: -; CHECK: # %bb.0: -; CHECK-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1] -; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3] - %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) - ret <32 x i16> %1 -} - -define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) { -; X86-LABEL: test_mask_subs_epi16_rrk_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] -; X86-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rrk_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1] -; X64-NEXT: vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru - ret <32 x i16> %3 -} - -define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) { -; X86-LABEL: test_mask_subs_epi16_rrkz_512: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rrkz_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer - ret <32 x i16> %3 -} - -define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) { -; X86-LABEL: test_mask_subs_epi16_rm_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rm_512: -; X64: # %bb.0: -; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i16>, <32 x i16>* %ptr_b - %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) - ret <32 x i16> %1 -} - -define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) { -; X86-LABEL: test_mask_subs_epi16_rmk_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08] -; X86-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rmk_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f] -; X64-NEXT: vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i16>, <32 x i16>* %ptr_b - %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru - ret <32 x i16> %3 -} - -define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) { -; X86-LABEL: test_mask_subs_epi16_rmkz_512: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rmkz_512: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i16>, <32 x i16>* %ptr_b - %1 = call <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16> %a, <32 x i16> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer - ret <32 x i16> %3 -} - -declare <32 x i16> @llvm.x86.avx512.psubs.w.512(<32 x i16>, <32 x i16>) - - define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll index 21231b07109..e853bd5b1b2 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics-upgrade.ll @@ -7348,6 +7348,338 @@ define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) +declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0 + +define <8 x i16> @test_test_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { +; X86-LABEL: test_test_subs_epi16_rrk_128: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rrk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru + ret <8 x i16> %3 +} + +define <8 x i16> @test_test_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { +; X86-LABEL: test_test_subs_epi16_rrkz_128: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rrkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + ret <8 x i16> %3 +} + +define <8 x i16> @test_test_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { +; X86-LABEL: test_test_subs_epi16_rmk_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] +; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rmk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru + ret <8 x i16> %3 +} + +define <8 x i16> @test_test_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { +; X86-LABEL: test_test_subs_epi16_rmkz_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] +; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rmkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + ret <8 x i16> %3 +} + +declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0 + +define <16 x i16> @test_test_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { +; X86-LABEL: test_test_subs_epi16_rrk_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] +; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rrk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] +; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru + ret <16 x i16> %3 +} + +define <16 x i16> @test_test_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { +; X86-LABEL: test_test_subs_epi16_rrkz_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rrkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + ret <16 x i16> %3 +} + +define <16 x i16> @test_test_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { +; X86-LABEL: test_test_subs_epi16_rmk_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rmk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru + ret <16 x i16> %3 +} + +define <16 x i16> @test_test_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { +; X86-LABEL: test_test_subs_epi16_rmkz_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi16_rmkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + ret <16 x i16> %3 +} + +declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0 + +define <16 x i8> @test_test_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { +; X86-LABEL: test_test_subs_epi8_rrk_128: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rrk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru + ret <16 x i8> %3 +} + +define <16 x i8> @test_test_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { +; X86-LABEL: test_test_subs_epi8_rrkz_128: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rrkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer + ret <16 x i8> %3 +} + +define <16 x i8> @test_test_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { +; X86-LABEL: test_test_subs_epi8_rmk_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rmk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i8>, <16 x i8>* %ptr_b + %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru + ret <16 x i8> %3 +} + +define <16 x i8> @test_test_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { +; X86-LABEL: test_test_subs_epi8_rmkz_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rmkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i8>, <16 x i8>* %ptr_b + %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer + ret <16 x i8> %3 +} + +declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0 + +define <32 x i8> @test_test_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { +; X86-LABEL: test_test_subs_epi8_rrk_256: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] +; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rrk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] +; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru + ret <32 x i8> %3 +} + +define <32 x i8> @test_test_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { +; X86-LABEL: test_test_subs_epi8_rrkz_256: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rrkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer + ret <32 x i8> %3 +} + +define <32 x i8> @test_test_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { +; X86-LABEL: test_test_subs_epi8_rmk_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rmk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i8>, <32 x i8>* %ptr_b + %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru + ret <32 x i8> %3 +} + +define <32 x i8> @test_test_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { +; X86-LABEL: test_test_subs_epi8_rmkz_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_test_subs_epi8_rmkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i8>, <32 x i8>* %ptr_b + %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer + ret <32 x i8> %3 +} + define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: test_mask_subs_epi16_rr_128: ; CHECK: # %bb.0: @@ -7550,6 +7882,354 @@ define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) +declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0 + +define <8 x i16> @test_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { +; X86-LABEL: test_adds_epi16_rrk_128: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rrk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru + ret <8 x i16> %3 +} + +define <8 x i16> @test_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { +; X86-LABEL: test_adds_epi16_rrkz_128: +; X86: # %bb.0: +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] +; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] +; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rrkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + ret <8 x i16> %3 +} + +define <8 x i16> @test_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { +; X86-LABEL: test_adds_epi16_rmk_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] +; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rmk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru + ret <8 x i16> %3 +} + +define <8 x i16> @test_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { +; X86-LABEL: test_adds_epi16_rmkz_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] +; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] +; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rmkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <8 x i16>, <8 x i16>* %ptr_b + %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) + %2 = bitcast i8 %mask to <8 x i1> + %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer + ret <8 x i16> %3 +} + +declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0 + +define <16 x i16> @test_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { +; X86-LABEL: test_adds_epi16_rrk_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] +; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rrk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] +; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru + ret <16 x i16> %3 +} + +define <16 x i16> @test_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { +; X86-LABEL: test_adds_epi16_rrkz_256: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rrkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + ret <16 x i16> %3 +} + +define <16 x i16> @test_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { +; X86-LABEL: test_adds_epi16_rmk_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rmk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru + ret <16 x i16> %3 +} + +define <16 x i16> @test_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { +; X86-LABEL: test_adds_epi16_rmkz_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi16_rmkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i16>, <16 x i16>* %ptr_b + %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer + ret <16 x i16> %3 +} + +declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0 + +define <16 x i8> @test_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { +; X86-LABEL: test_adds_epi8_rrk_128: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] +; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rrk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] +; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru + ret <16 x i8> %3 +} + +define <16 x i8> @test_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { +; X86-LABEL: test_adds_epi8_rrkz_128: +; X86: # %bb.0: +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rrkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer + ret <16 x i8> %3 +} + +define <16 x i8> @test_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { +; X86-LABEL: test_adds_epi8_rm_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rm_128: +; X64: # %bb.0: +; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i8>, <16 x i8>* %ptr_b + %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) + ret <16 x i8> %1 +} + +define <16 x i8> @test_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { +; X86-LABEL: test_adds_epi8_rmk_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] +; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rmk_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] +; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i8>, <16 x i8>* %ptr_b + %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru + ret <16 x i8> %3 +} + +define <16 x i8> @test_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { +; X86-LABEL: test_adds_epi8_rmkz_128: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rmkz_128: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <16 x i8>, <16 x i8>* %ptr_b + %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) + %2 = bitcast i16 %mask to <16 x i1> + %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer + ret <16 x i8> %3 +} + +declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0 + +define <32 x i8> @test_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { +; X86-LABEL: test_adds_epi8_rrk_256: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] +; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rrk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] +; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru + ret <32 x i8> %3 +} + +define <32 x i8> @test_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { +; X86-LABEL: test_adds_epi8_rrkz_256: +; X86: # %bb.0: +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] +; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rrkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] +; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer + ret <32 x i8> %3 +} + +define <32 x i8> @test_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { +; X86-LABEL: test_adds_epi8_rmk_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] +; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rmk_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] +; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i8>, <32 x i8>* %ptr_b + %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru + ret <32 x i8> %3 +} + +define <32 x i8> @test_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { +; X86-LABEL: test_adds_epi8_rmkz_256: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] +; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] +; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] +; X86-NEXT: retl # encoding: [0xc3] +; +; X64-LABEL: test_adds_epi8_rmkz_256: +; X64: # %bb.0: +; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] +; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] +; X64-NEXT: retq # encoding: [0xc3] + %b = load <32 x i8>, <32 x i8>* %ptr_b + %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) + %2 = bitcast i32 %mask to <32 x i1> + %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer + ret <32 x i8> %3 +} + define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: test_mask_adds_epi8_rr_128: ; CHECK: # %bb.0: @@ -7945,4 +8625,3 @@ define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, } declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) - diff --git a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll index 568cdeccdec..7c4091850fd 100644 --- a/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -1127,686 +1127,6 @@ define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %pt declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) -declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) #0 - -define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { -; X86-LABEL: test_mask_adds_epi16_rrk_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] -; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rrk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1] -; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru - ret <8 x i16> %3 -} - -define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { -; X86-LABEL: test_mask_adds_epi16_rrkz_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rrkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer - ret <8 x i16> %3 -} - -define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { -; X86-LABEL: test_mask_adds_epi16_rmk_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] -; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] -; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x08] -; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rmk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f] -; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <8 x i16>, <8 x i16>* %ptr_b - %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru - ret <8 x i16> %3 -} - -define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { -; X86-LABEL: test_mask_adds_epi16_rmkz_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] -; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] -; X86-NEXT: vpaddsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rmkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <8 x i16>, <8 x i16>* %ptr_b - %1 = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer - ret <8 x i16> %3 -} - -declare <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16>, <16 x i16>) #0 - -define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { -; X86-LABEL: test_mask_adds_epi16_rrk_256: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] -; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rrk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1] -; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru - ret <16 x i16> %3 -} - -define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { -; X86-LABEL: test_mask_adds_epi16_rrkz_256: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rrkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer - ret <16 x i16> %3 -} - -define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { -; X86-LABEL: test_mask_adds_epi16_rmk_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x08] -; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rmk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f] -; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i16>, <16 x i16>* %ptr_b - %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru - ret <16 x i16> %3 -} - -define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { -; X86-LABEL: test_mask_adds_epi16_rmkz_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi16_rmkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i16>, <16 x i16>* %ptr_b - %1 = call <16 x i16> @llvm.x86.avx2.padds.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer - ret <16 x i16> %3 -} - -declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) #0 - -define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) { -; X86-LABEL: test_mask_subs_epi16_rrk_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] -; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rrk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1] -; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru - ret <8 x i16> %3 -} - -define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) { -; X86-LABEL: test_mask_subs_epi16_rrkz_128: -; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04] -; X86-NEXT: kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8] -; X86-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rrkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer - ret <8 x i16> %3 -} - -define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) { -; X86-LABEL: test_mask_subs_epi16_rmk_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] -; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] -; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x08] -; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rmk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f] -; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <8 x i16>, <8 x i16>* %ptr_b - %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passThru - ret <8 x i16> %3 -} - -define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) { -; X86-LABEL: test_mask_subs_epi16_rmkz_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08] -; X86-NEXT: kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9] -; X86-NEXT: vpsubsw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rmkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <8 x i16>, <8 x i16>* %ptr_b - %1 = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a, <8 x i16> %b) - %2 = bitcast i8 %mask to <8 x i1> - %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> zeroinitializer - ret <8 x i16> %3 -} - -declare <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16>, <16 x i16>) #0 - -define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) { -; X86-LABEL: test_mask_subs_epi16_rrk_256: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] -; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rrk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1] -; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru - ret <16 x i16> %3 -} - -define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) { -; X86-LABEL: test_mask_subs_epi16_rrkz_256: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rrkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer - ret <16 x i16> %3 -} - -define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) { -; X86-LABEL: test_mask_subs_epi16_rmk_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x08] -; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rmk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f] -; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i16>, <16 x i16>* %ptr_b - %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passThru - ret <16 x i16> %3 -} - -define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) { -; X86-LABEL: test_mask_subs_epi16_rmkz_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi16_rmkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i16>, <16 x i16>* %ptr_b - %1 = call <16 x i16> @llvm.x86.avx2.psubs.w(<16 x i16> %a, <16 x i16> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> zeroinitializer - ret <16 x i16> %3 -} - -declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) #0 - -define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { -; X86-LABEL: test_mask_adds_epi8_rrk_128: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] -; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rrk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1] -; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru - ret <16 x i8> %3 -} - -define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { -; X86-LABEL: test_mask_adds_epi8_rrkz_128: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rrkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer - ret <16 x i8> %3 -} - -define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) { -; X86-LABEL: test_mask_adds_epi8_rm_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rm_128: -; X64: # %bb.0: -; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i8>, <16 x i8>* %ptr_b - %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) - ret <16 x i8> %1 -} - -define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { -; X86-LABEL: test_mask_adds_epi8_rmk_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x08] -; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rmk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f] -; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i8>, <16 x i8>* %ptr_b - %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru - ret <16 x i8> %3 -} - -define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { -; X86-LABEL: test_mask_adds_epi8_rmkz_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rmkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i8>, <16 x i8>* %ptr_b - %1 = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer - ret <16 x i8> %3 -} - -declare <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8>, <32 x i8>) #0 - -define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { -; X86-LABEL: test_mask_adds_epi8_rrk_256: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] -; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rrk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1] -; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru - ret <32 x i8> %3 -} - -define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { -; X86-LABEL: test_mask_adds_epi8_rrkz_256: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rrkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer - ret <32 x i8> %3 -} - -define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { -; X86-LABEL: test_mask_adds_epi8_rmk_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x08] -; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rmk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f] -; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i8>, <32 x i8>* %ptr_b - %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru - ret <32 x i8> %3 -} - -define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { -; X86-LABEL: test_mask_adds_epi8_rmkz_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpaddsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_adds_epi8_rmkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i8>, <32 x i8>* %ptr_b - %1 = call <32 x i8> @llvm.x86.avx2.padds.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer - ret <32 x i8> %3 -} - -declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) #0 - -define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) { -; X86-LABEL: test_mask_subs_epi8_rrk_128: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] -; X86-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rrk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1] -; X64-NEXT: vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru - ret <16 x i8> %3 -} - -define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { -; X86-LABEL: test_mask_subs_epi8_rrkz_128: -; X86: # %bb.0: -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rrkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer - ret <16 x i8> %3 -} - -define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) { -; X86-LABEL: test_mask_subs_epi8_rmk_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x08] -; X86-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rmk_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f] -; X64-NEXT: vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i8>, <16 x i8>* %ptr_b - %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passThru - ret <16 x i8> %3 -} - -define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) { -; X86-LABEL: test_mask_subs_epi8_rmkz_128: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rmkz_128: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <16 x i8>, <16 x i8>* %ptr_b - %1 = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a, <16 x i8> %b) - %2 = bitcast i16 %mask to <16 x i1> - %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> zeroinitializer - ret <16 x i8> %3 -} - -declare <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8>, <32 x i8>) #0 - -define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) { -; X86-LABEL: test_mask_subs_epi8_rrk_256: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] -; X86-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rrk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1] -; X64-NEXT: vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru - ret <32 x i8> %3 -} - -define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { -; X86-LABEL: test_mask_subs_epi8_rrkz_256: -; X86: # %bb.0: -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rrkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer - ret <32 x i8> %3 -} - -define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) { -; X86-LABEL: test_mask_subs_epi8_rmk_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x08] -; X86-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rmk_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f] -; X64-NEXT: vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i8>, <32 x i8>* %ptr_b - %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passThru - ret <32 x i8> %3 -} - -define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) { -; X86-LABEL: test_mask_subs_epi8_rmkz_256: -; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08] -; X86-NEXT: vpsubsb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x00] -; X86-NEXT: retl # encoding: [0xc3] -; -; X64-LABEL: test_mask_subs_epi8_rmkz_256: -; X64: # %bb.0: -; X64-NEXT: kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce] -; X64-NEXT: vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07] -; X64-NEXT: retq # encoding: [0xc3] - %b = load <32 x i8>, <32 x i8>* %ptr_b - %1 = call <32 x i8> @llvm.x86.avx2.psubs.b(<32 x i8> %a, <32 x i8> %b) - %2 = bitcast i32 %mask to <32 x i1> - %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> zeroinitializer - ret <32 x i8> %3 -} - define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128: ; X86: # %bb.0: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll index 7216f192797..7e4703f6957 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86-upgrade.ll @@ -884,6 +884,48 @@ define <4 x float> @test_x86_sse2_cvtdq2ps(<4 x i32> %a0) { declare <4 x float> @llvm.x86.sse2.cvtdq2ps(<4 x i32>) nounwind readnone +define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { +; SSE-LABEL: test_x86_sse2_padds_b: +; SSE: ## %bb.0: +; SSE-NEXT: paddsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xec,0xc1] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_sse2_padds_b: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xec,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse2_padds_b: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: test_x86_sse2_padds_w: +; SSE: ## %bb.0: +; SSE-NEXT: paddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xed,0xc1] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_sse2_padds_w: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xed,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse2_padds_w: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone + + define <16 x i8> @test_x86_sse2_paddus_b(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_x86_sse2_paddus_b: ; SSE: ## %bb.0: @@ -926,6 +968,48 @@ define <8 x i16> @test_x86_sse2_paddus_w(<8 x i16> %a0, <8 x i16> %a1) { declare <8 x i16> @llvm.x86.sse2.paddus.w(<8 x i16>, <8 x i16>) nounwind readnone +define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { +; SSE-LABEL: test_x86_sse2_psubs_b: +; SSE: ## %bb.0: +; SSE-NEXT: psubsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe8,0xc1] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_sse2_psubs_b: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe8,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse2_psubs_b: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] + ret <16 x i8> %res +} +declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone + + +define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { +; SSE-LABEL: test_x86_sse2_psubs_w: +; SSE: ## %bb.0: +; SSE-NEXT: psubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe9,0xc1] +; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX1-LABEL: test_x86_sse2_psubs_w: +; AVX1: ## %bb.0: +; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe9,0xc1] +; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] +; +; AVX512-LABEL: test_x86_sse2_psubs_w: +; AVX512: ## %bb.0: +; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] +; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] + %res = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] + ret <8 x i16> %res +} +declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone + + define <16 x i8> @test_x86_sse2_psubus_b(<16 x i8> %a0, <16 x i8> %a1) { ; SSE-LABEL: test_x86_sse2_psubus_b: ; SSE: ## %bb.0: diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index 8dedce5fc8b..f8a9074bc00 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -919,48 +919,6 @@ define <16 x i8> @test_x86_sse2_packuswb_128_fold() { } -define <16 x i8> @test_x86_sse2_padds_b(<16 x i8> %a0, <16 x i8> %a1) { -; SSE-LABEL: test_x86_sse2_padds_b: -; SSE: ## %bb.0: -; SSE-NEXT: paddsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xec,0xc1] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse2_padds_b: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xec,0xc1] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse2_padds_b: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xec,0xc1] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.sse2.padds.b(<16 x i8>, <16 x i8>) nounwind readnone - - -define <8 x i16> @test_x86_sse2_padds_w(<8 x i16> %a0, <8 x i16> %a1) { -; SSE-LABEL: test_x86_sse2_padds_w: -; SSE: ## %bb.0: -; SSE-NEXT: paddsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xed,0xc1] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse2_padds_w: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xed,0xc1] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse2_padds_w: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpaddsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xed,0xc1] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.sse2.padds.w(<8 x i16>, <8 x i16>) nounwind readnone - - define <4 x i32> @test_x86_sse2_pmadd_wd(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: test_x86_sse2_pmadd_wd: ; SSE: ## %bb.0: @@ -1520,48 +1478,6 @@ define <8 x i16> @test_x86_sse2_psrli_w(<8 x i16> %a0) { declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) nounwind readnone -define <16 x i8> @test_x86_sse2_psubs_b(<16 x i8> %a0, <16 x i8> %a1) { -; SSE-LABEL: test_x86_sse2_psubs_b: -; SSE: ## %bb.0: -; SSE-NEXT: psubsb %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe8,0xc1] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse2_psubs_b: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe8,0xc1] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse2_psubs_b: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe8,0xc1] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8> %a0, <16 x i8> %a1) ; <<16 x i8>> [#uses=1] - ret <16 x i8> %res -} -declare <16 x i8> @llvm.x86.sse2.psubs.b(<16 x i8>, <16 x i8>) nounwind readnone - - -define <8 x i16> @test_x86_sse2_psubs_w(<8 x i16> %a0, <8 x i16> %a1) { -; SSE-LABEL: test_x86_sse2_psubs_w: -; SSE: ## %bb.0: -; SSE-NEXT: psubsw %xmm1, %xmm0 ## encoding: [0x66,0x0f,0xe9,0xc1] -; SSE-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX1-LABEL: test_x86_sse2_psubs_w: -; AVX1: ## %bb.0: -; AVX1-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xe9,0xc1] -; AVX1-NEXT: ret{{[l|q]}} ## encoding: [0xc3] -; -; AVX512-LABEL: test_x86_sse2_psubs_w: -; AVX512: ## %bb.0: -; AVX512-NEXT: vpsubsw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe9,0xc1] -; AVX512-NEXT: ret{{[l|q]}} ## encoding: [0xc3] - %res = call <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] - ret <8 x i16> %res -} -declare <8 x i16> @llvm.x86.sse2.psubs.w(<8 x i16>, <8 x i16>) nounwind readnone - - define i32 @test_x86_sse2_ucomieq_sd(<2 x double> %a0, <2 x double> %a1) { ; SSE-LABEL: test_x86_sse2_ucomieq_sd: ; SSE: ## %bb.0: |