diff options
| author | Craig Topper <craig.topper@intel.com> | 2018-10-15 21:51:32 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2018-10-15 21:51:32 +0000 |
| commit | 2909a3d9d02132583137374114175cb982892320 (patch) | |
| tree | 2dfd4a426f6f0a491b4cfd7f953d44ffd425e2a0 | |
| parent | 548e8b5b47f4937b7c165b8be481466f51d4e529 (diff) | |
| download | bcm5719-llvm-2909a3d9d02132583137374114175cb982892320.tar.gz bcm5719-llvm-2909a3d9d02132583137374114175cb982892320.zip | |
[X86] Fix a bad bitcast in the load form of vXi16 uniform shift patterns for EVEX encoded instructions.
llvm-svn: 344563
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 19 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512bw-intrinsics.ll | 6 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll | 6 |
4 files changed, 16 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b1cb1545ec4..158aba447ed 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -5826,7 +5826,7 @@ multiclass avx512_shift_rmbi<bits<8> opc, Format ImmFormM, multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, X86FoldableSchedWrite sched, ValueType SrcVT, - PatFrag bc_frag, X86VectorVTInfo _> { + X86VectorVTInfo _> { // src2 is always 128-bit let ExeDomain = _.ExeDomain in { defm rr : AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), @@ -5837,7 +5837,8 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, defm rm : AVX512_maskable<opc, MRMSrcMem, _, (outs _.RC:$dst), (ins _.RC:$src1, i128mem:$src2), OpcodeStr, "$src2, $src1", "$src1, $src2", - (_.VT (OpNode _.RC:$src1, (bc_frag (loadv2i64 addr:$src2))))>, + (_.VT (OpNode _.RC:$src1, + (SrcVT (bitconvert (loadv2i64 addr:$src2)))))>, AVX512BIBase, EVEX_4V, Sched<[sched.Folded, sched.ReadAfterFold]>; } @@ -5845,18 +5846,18 @@ multiclass avx512_shift_rrm<bits<8> opc, string OpcodeStr, SDNode OpNode, multiclass avx512_shift_sizes<bits<8> opc, string OpcodeStr, SDNode OpNode, X86SchedWriteWidths sched, ValueType SrcVT, - PatFrag bc_frag, AVX512VLVectorVTInfo VTInfo, + AVX512VLVectorVTInfo VTInfo, Predicate prd> { let Predicates = [prd] in defm Z : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.ZMM, SrcVT, - bc_frag, VTInfo.info512>, EVEX_V512, + VTInfo.info512>, EVEX_V512, EVEX_CD8<VTInfo.info512.EltSize, CD8VQ> ; let Predicates = [prd, HasVLX] in { defm Z256 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.YMM, SrcVT, - bc_frag, VTInfo.info256>, EVEX_V256, + VTInfo.info256>, EVEX_V256, EVEX_CD8<VTInfo.info256.EltSize, CD8VH>; defm Z128 : avx512_shift_rrm<opc, OpcodeStr, OpNode, sched.XMM, SrcVT, - bc_frag, VTInfo.info128>, EVEX_V128, + VTInfo.info128>, EVEX_V128, EVEX_CD8<VTInfo.info128.EltSize, CD8VF>; } } @@ -5866,12 +5867,12 @@ multiclass avx512_shift_types<bits<8> opcd, bits<8> opcq, bits<8> opcw, X86SchedWriteWidths sched, bit NotEVEX2VEXConvertibleQ = 0> { defm D : avx512_shift_sizes<opcd, OpcodeStr#"d", OpNode, sched, v4i32, - bc_v4i32, avx512vl_i32_info, HasAVX512>; + avx512vl_i32_info, HasAVX512>; let notEVEX2VEXConvertible = NotEVEX2VEXConvertibleQ in defm Q : avx512_shift_sizes<opcq, OpcodeStr#"q", OpNode, sched, v2i64, - bc_v2i64, avx512vl_i64_info, HasAVX512>, VEX_W; + avx512vl_i64_info, HasAVX512>, VEX_W; defm W : avx512_shift_sizes<opcw, OpcodeStr#"w", OpNode, sched, v8i16, - bc_v2i64, avx512vl_i16_info, HasBWI>; + avx512vl_i16_info, HasBWI>; } multiclass avx512_shift_rmi_sizes<bits<8> opc, Format ImmFormR, Format ImmFormM, diff --git a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll index bba70b139e2..101448e22ac 100644 --- a/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -541,8 +541,7 @@ define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, <8 x i16>* %p) { ; X86-AVX512VL-LABEL: test_x86_avx2_psrl_w_load: ; X86-AVX512VL: ## %bb.0: ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512VL-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08] -; X86-AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] +; X86-AVX512VL-NEXT: vpsrlw (%eax), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x00] ; X86-AVX512VL-NEXT: retl ## encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrl_w_load: @@ -552,8 +551,7 @@ define <16 x i16> @test_x86_avx2_psrl_w_load(<16 x i16> %a0, <8 x i16>* %p) { ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrl_w_load: ; X64-AVX512VL: ## %bb.0: -; X64-AVX512VL-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f] -; X64-AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xc1] +; X64-AVX512VL-NEXT: vpsrlw (%rdi), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0x07] ; X64-AVX512VL-NEXT: retq ## encoding: [0xc3] %a1 = load <8 x i16>, <8 x i16>* %p %res = call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %a0, <8 x i16> %a1) ; <<16 x i16>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll index 650235d51b3..cf52746c3a5 100644 --- a/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1952,14 +1952,12 @@ define <32 x i16> @test_x86_avx512_psrl_w_512_load(<32 x i16> %a0, <8 x i16>* %p ; X86-LABEL: test_x86_avx512_psrl_w_512_load: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X86-NEXT: vmovdqa (%eax), %xmm1 # encoding: [0xc5,0xf9,0x6f,0x08] -; X86-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] +; X86-NEXT: vpsrlw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x00] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx512_psrl_w_512_load: ; X64: # %bb.0: -; X64-NEXT: vmovdqa (%rdi), %xmm1 # encoding: [0xc5,0xf9,0x6f,0x0f] -; X64-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1] +; X64-NEXT: vpsrlw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0x07] ; X64-NEXT: retq # encoding: [0xc3] %a1 = load <8 x i16>, <8 x i16>* %p %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1] diff --git a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll index 020c4985943..8dedce5fc8b 100644 --- a/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll +++ b/llvm/test/CodeGen/X86/sse2-intrinsics-x86.ll @@ -1434,8 +1434,7 @@ define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, <8 x i16>* %p) { ; X86-AVX512-LABEL: test_x86_sse2_psrl_w_load: ; X86-AVX512: ## %bb.0: ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04] -; X86-AVX512-NEXT: vmovdqa (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x08] -; X86-AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] +; X86-AVX512-NEXT: vpsrlw (%eax), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0x00] ; X86-AVX512-NEXT: retl ## encoding: [0xc3] ; ; X64-SSE-LABEL: test_x86_sse2_psrl_w_load: @@ -1450,8 +1449,7 @@ define <8 x i16> @test_x86_sse2_psrl_w_load(<8 x i16> %a0, <8 x i16>* %p) { ; ; X64-AVX512-LABEL: test_x86_sse2_psrl_w_load: ; X64-AVX512: ## %bb.0: -; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x0f] -; X64-AVX512-NEXT: vpsrlw %xmm1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xc1] +; X64-AVX512-NEXT: vpsrlw (%rdi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0x07] ; X64-AVX512-NEXT: retq ## encoding: [0xc3] %a1 = load <8 x i16>, <8 x i16>* %p %res = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1) ; <<8 x i16>> [#uses=1] |

