diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 68 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-cvt.ll | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_fpext.ll | 7 |
3 files changed, 62 insertions, 16 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index d93059b44f3..f6e4e851192 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7677,19 +7677,73 @@ multiclass avx512_vcvt_fp_rc<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, EVEX, EVEX_B, EVEX_RC, Sched<[sched]>; } +// Similar to avx512_vcvt_fp, but uses an extload for the memory form. +multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, + X86VectorVTInfo _Src, SDNode OpNode, + X86FoldableSchedWrite sched, + string Broadcast = _.BroadcastStr, + string Alias = "", X86MemOperand MemOp = _Src.MemOp, + RegisterClass MaskRC = _.KRCWM, + PatFrag LdFrag = !cast<PatFrag>("extload"#_Src.VTName)> { + + defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), + (ins _Src.RC:$src), + (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), + (ins MaskRC:$mask, _Src.RC:$src), + OpcodeStr, "$src", "$src", + (_.VT (OpNode (_Src.VT _Src.RC:$src))), + (vselect MaskRC:$mask, + (_.VT (OpNode (_Src.VT _Src.RC:$src))), + _.RC:$src0), + vselect, "$src0 = $dst">, + EVEX, Sched<[sched]>; + + defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins MemOp:$src), + (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), + (ins MaskRC:$mask, MemOp:$src), + OpcodeStr#Alias, "$src", "$src", + (_.VT (LdFrag addr:$src)), + (vselect MaskRC:$mask, + (_.VT (OpNode (_Src.VT + (_Src.LdFrag addr:$src)))), + _.RC:$src0), + vselect, "$src0 = $dst">, + EVEX, Sched<[sched.Folded]>; + + defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), + (ins _Src.ScalarMemOp:$src), + (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), + (ins MaskRC:$mask, _Src.ScalarMemOp:$src), + OpcodeStr, + "${src}"##Broadcast, "${src}"##Broadcast, + (_.VT (OpNode (_Src.VT + (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) + )), + (vselect MaskRC:$mask, + (_.VT + (OpNode + (_Src.VT + (X86VBroadcast + (_Src.ScalarLdFrag addr:$src))))), + _.RC:$src0), + vselect, "$src0 = $dst">, + EVEX, EVEX_B, Sched<[sched.Folded]>; +} + // Extend Float to Double multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, X86SchedWriteWidths sched> { let Predicates = [HasAVX512] in { - defm Z : avx512_vcvt_fp<opc, OpcodeStr, v8f64_info, v8f32x_info, + defm Z : avx512_vcvt_fpextend<opc, OpcodeStr, v8f64_info, v8f32x_info, fpextend, sched.ZMM>, avx512_vcvt_fp_sae<opc, OpcodeStr, v8f64_info, v8f32x_info, X86vfpextSAE, sched.ZMM>, EVEX_V512; } let Predicates = [HasVLX] in { - defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4f32x_info, + defm Z128 : avx512_vcvt_fpextend<opc, OpcodeStr, v2f64x_info, v4f32x_info, X86vfpext, sched.XMM, "{1to2}", "", f64mem>, EVEX_V128; - defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, + defm Z256 : avx512_vcvt_fpextend<opc, OpcodeStr, v4f64x_info, v4f32x_info, fpextend, sched.YMM>, EVEX_V256; } } @@ -7784,9 +7838,6 @@ let Predicates = [HasAVX512] in { (fpround (v8f64 (X86VBroadcast (loadf64 addr:$src)))), v8f32x_info.ImmAllZerosV), (VCVTPD2PSZrmbkz VK8WM:$mask, addr:$src)>; - - def : Pat<(v8f64 (extloadv8f32 addr:$src)), - (VCVTPS2PDZrm addr:$src)>; } let Predicates = [HasVLX] in { @@ -7819,11 +7870,6 @@ let Predicates = [HasVLX] in { v4f32x_info.ImmAllZerosV), (VCVTPD2PSZ256rmbkz VK4WM:$mask, addr:$src)>; - def : Pat<(v2f64 (extloadv2f32 addr:$src)), - (VCVTPS2PDZ128rm addr:$src)>; - def : Pat<(v4f64 (extloadv4f32 addr:$src)), - (VCVTPS2PDZ256rm addr:$src)>; - // Special patterns to allow use of X86vmfpround for masking. Instruction // patterns have been disabled with null_frag. def : Pat<(X86vfpround (v2f64 VR128X:$src)), diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index aa7f533c1d4..48293c08a5c 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -791,9 +791,8 @@ define <4 x double> @f32to4f64_mask_load(<4 x float>* %p, <4 x double> %b1, <4 x ; ; VL-LABEL: f32to4f64_mask_load: ; VL: # %bb.0: -; VL-NEXT: vcvtps2pd (%rdi), %ymm2 ; VL-NEXT: vcmpltpd %ymm1, %ymm0, %k1 -; VL-NEXT: vmovapd %ymm2, %ymm0 {%k1} {z} +; VL-NEXT: vcvtps2pd (%rdi), %ymm0 {%k1} {z} ; VL-NEXT: retq %b = load <4 x float>, <4 x float>* %p %a = fpext <4 x float> %b to <4 x double> diff --git a/llvm/test/CodeGen/X86/vec_fpext.ll b/llvm/test/CodeGen/X86/vec_fpext.ll index 082a5336657..46ad1f16f3c 100644 --- a/llvm/test/CodeGen/X86/vec_fpext.ll +++ b/llvm/test/CodeGen/X86/vec_fpext.ll @@ -300,7 +300,6 @@ entry: } ; Make sure we don't narrow a volatile load. -; FIXME: We incorrectly narrow it for avx512vl. define <2 x double> @PR42079(<4 x float>* %x) { ; X32-SSE-LABEL: PR42079: ; X32-SSE: # %bb.0: @@ -319,7 +318,8 @@ define <2 x double> @PR42079(<4 x float>* %x) { ; X32-AVX512VL-LABEL: PR42079: ; X32-AVX512VL: # %bb.0: ; X32-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04] -; X32-AVX512VL-NEXT: vcvtps2pd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x00] +; X32-AVX512VL-NEXT: vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00] +; X32-AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] ; X32-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-SSE-LABEL: PR42079: @@ -336,7 +336,8 @@ define <2 x double> @PR42079(<4 x float>* %x) { ; ; X64-AVX512VL-LABEL: PR42079: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vcvtps2pd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0x07] +; X64-AVX512VL-NEXT: vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07] +; X64-AVX512VL-NEXT: vcvtps2pd %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xc0] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %a = load volatile <4 x float>, <4 x float>* %x %b = shufflevector <4 x float> %a, <4 x float> %a, <2 x i32> <i32 0, i32 1> |

