diff options
| author | Craig Topper <craig.topper@gmail.com> | 2016-10-09 06:41:47 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@gmail.com> | 2016-10-09 06:41:47 +0000 |
| commit | 43973154ddc67624b5cbc7d013b2102fed43406b (patch) | |
| tree | f272128fa98add775ae08f5848925014d6f2e2aa | |
| parent | cc723cccabfc74c93bf99458f1a92caff0fc9504 (diff) | |
| download | bcm5719-llvm-43973154ddc67624b5cbc7d013b2102fed43406b.tar.gz bcm5719-llvm-43973154ddc67624b5cbc7d013b2102fed43406b.zip | |
[AVX-512] Fix execution domain for EVEX encoded VINSERTPS.
llvm-svn: 283692
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/pr29112.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-half-conversions.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll | 16 |
4 files changed, 16 insertions, 20 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f51b3235368..d0ef44c92ba 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -571,6 +571,7 @@ defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; // vinsertps - insert f32 to XMM +let ExeDomain = SSEPackedSingle in { def VINSERTPSZrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), "vinsertps\t{$src3, $src2, $src1, $dst|$dst, $src1, $src2, $src3}", @@ -582,6 +583,7 @@ def VINSERTPSZrm: AVX512AIi8<0x21, MRMSrcMem, (outs VR128X:$dst), [(set VR128X:$dst, (X86insertps VR128X:$src1, (v4f32 (scalar_to_vector (loadf32 addr:$src2))), imm:$src3))]>, EVEX_4V, EVEX_CD8<32, CD8VT1>; +} //===----------------------------------------------------------------------===// // AVX-512 VECTOR EXTRACT diff --git a/llvm/test/CodeGen/X86/pr29112.ll b/llvm/test/CodeGen/X86/pr29112.ll index 08d6aa69315..ab114bebacd 100644 --- a/llvm/test/CodeGen/X86/pr29112.ll +++ b/llvm/test/CodeGen/X86/pr29112.ll @@ -20,7 +20,7 @@ define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, < ; CHECK-NEXT: vblendps {{.*#+}} xmm4 = xmm8[0],xmm1[1],xmm8[2,3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm2[1],xmm4[3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm6 = xmm4[0,1,2],xmm3[1] -; CHECK-NEXT: vmovdqa %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: vmovaps %xmm6, {{[0-9]+}}(%rsp) # 16-byte Spill ; CHECK-NEXT: vextractf32x4 $2, %zmm3, %xmm4 ; CHECK-NEXT: vblendps {{.*#+}} xmm4 = xmm0[0,1,2],xmm4[3] ; CHECK-NEXT: vpermilps {{.*#+}} xmm5 = xmm2[3,1,2,3] @@ -49,8 +49,8 @@ define <4 x float> @bar(<4 x float>* %a1p, <4 x float>* %a2p, <4 x float> %a3, < ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm9[0,1],xmm2[3],xmm9[3] ; CHECK-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm12[0] ; CHECK-NEXT: vaddps %xmm3, %xmm2, %xmm2 -; CHECK-NEXT: vmovdqa %xmm15, %xmm1 -; CHECK-NEXT: vmovdqa %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill +; CHECK-NEXT: vmovaps %xmm15, %xmm1 +; CHECK-NEXT: vmovaps %xmm1, {{[0-9]+}}(%rsp) # 16-byte Spill ; CHECK-NEXT: vaddps %xmm0, %xmm1, %xmm9 ; CHECK-NEXT: vaddps %xmm14, %xmm10, %xmm0 ; CHECK-NEXT: vaddps %xmm1, %xmm1, %xmm8 diff --git a/llvm/test/CodeGen/X86/vector-half-conversions.ll b/llvm/test/CodeGen/X86/vector-half-conversions.ll index 57705ad7101..11827d66a39 100644 --- a/llvm/test/CodeGen/X86/vector-half-conversions.ll +++ b/llvm/test/CodeGen/X86/vector-half-conversions.ll @@ -412,7 +412,7 @@ define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) nounwind { ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 +; AVX512F-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: cvt_8i16_to_8f32: @@ -744,14 +744,14 @@ define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) nounwind { ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm11[0],xmm1[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm10[0],xmm1[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm9[0] -; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 +; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0],xmm7[0],xmm8[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm6[0],xmm2[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm5[0] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm16[0] -; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 +; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; @@ -1193,7 +1193,7 @@ define <8 x float> @load_cvt_8i16_to_8f32(<8 x i16>* %a0) nounwind { ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] -; AVX512F-NEXT: vinserti128 $1, %xmm4, %ymm0, %ymm0 +; AVX512F-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: load_cvt_8i16_to_8f32: @@ -1425,14 +1425,14 @@ define <16 x float> @load_cvt_16i16_to_16f32(<16 x i16>* %a0) nounwind { ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm10[0],xmm11[0],xmm10[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm9[0],xmm1[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm8[0] -; AVX512F-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 +; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm6[0],xmm7[0],xmm6[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm5[0],xmm1[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm17[0],xmm2[3] ; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm16[0] -; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1 +; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 ; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 ; AVX512F-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll index 23dd6c5ca2c..667863267c9 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -201,17 +201,11 @@ define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) { ; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8f32_08080808: -; AVX2: # BB#0: -; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero -; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512VL-LABEL: shuffle_v8f32_08080808: -; AVX512VL: # BB#0: -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero -; AVX512VL-NEXT: vpbroadcastq %xmm0, %ymm0 -; AVX512VL-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8f32_08080808: +; AVX2OR512VL: # BB#0: +; AVX2OR512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; AVX2OR512VL-NEXT: vbroadcastsd %xmm0, %ymm0 +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8> ret <8 x float> %shuffle } |

