diff options
| author | Craig Topper <craig.topper@intel.com> | 2019-05-31 07:38:26 +0000 |
|---|---|---|
| committer | Craig Topper <craig.topper@intel.com> | 2019-05-31 07:38:26 +0000 |
| commit | 31d00d80a21ffbc5bc03c7b90de030b29660a3bd (patch) | |
| tree | a2b857102222464ecde9938d10c2998f82effd5d /llvm/lib | |
| parent | cded5737109524d7cc756ac364a29cde835942b3 (diff) | |
| download | bcm5719-llvm-31d00d80a21ffbc5bc03c7b90de030b29660a3bd.tar.gz bcm5719-llvm-31d00d80a21ffbc5bc03c7b90de030b29660a3bd.zip | |
[X86] Remove patterns for X86VSintToFP/X86VUintToFP+loadv4f32 to v2f64.
These patterns can incorrectly narrow a volatile load from 128-bits to 64-bits.
Similar to PR42079.
Switch to using (v4i32 (bitcast (v2i64 (scalar_to_vector (loadi64))))) as the
load pattern used in the instructions.
This probably still has issues in 32-bit mode where loadi64 isn't legal. Maybe
we should use VZMOVL for widened loads even when we don't need the upper bits
as zeroes?
llvm-svn: 362203
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 63 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 14 |
2 files changed, 20 insertions, 57 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index f6e4e851192..753f1b71b07 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -7608,7 +7608,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp, - RegisterClass MaskRC = _.KRCWM> { + RegisterClass MaskRC = _.KRCWM, + dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> { defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), (ins _Src.RC:$src), @@ -7627,8 +7628,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _, (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), (ins MaskRC:$mask, MemOp:$src), OpcodeStr#Alias, "$src", "$src", - (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src)))), + LdDAG, (vselect MaskRC:$mask, (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src)))), @@ -7683,53 +7683,10 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _ X86FoldableSchedWrite sched, string Broadcast = _.BroadcastStr, string Alias = "", X86MemOperand MemOp = _Src.MemOp, - RegisterClass MaskRC = _.KRCWM, - PatFrag LdFrag = !cast<PatFrag>("extload"#_Src.VTName)> { - - defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst), - (ins _Src.RC:$src), - (ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src), - (ins MaskRC:$mask, _Src.RC:$src), - OpcodeStr, "$src", "$src", - (_.VT (OpNode (_Src.VT _Src.RC:$src))), - (vselect MaskRC:$mask, - (_.VT (OpNode (_Src.VT _Src.RC:$src))), - _.RC:$src0), - vselect, "$src0 = $dst">, - EVEX, Sched<[sched]>; - - defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins MemOp:$src), - (ins _.RC:$src0, MaskRC:$mask, MemOp:$src), - (ins MaskRC:$mask, MemOp:$src), - OpcodeStr#Alias, "$src", "$src", - (_.VT (LdFrag addr:$src)), - (vselect MaskRC:$mask, - (_.VT (OpNode (_Src.VT - (_Src.LdFrag addr:$src)))), - _.RC:$src0), - vselect, "$src0 = $dst">, - EVEX, Sched<[sched.Folded]>; - - defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst), - (ins _Src.ScalarMemOp:$src), - (ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src), - (ins MaskRC:$mask, _Src.ScalarMemOp:$src), - OpcodeStr, - "${src}"##Broadcast, "${src}"##Broadcast, - (_.VT (OpNode (_Src.VT - (X86VBroadcast (_Src.ScalarLdFrag addr:$src))) - )), - (vselect MaskRC:$mask, - (_.VT - (OpNode - (_Src.VT - (X86VBroadcast - (_Src.ScalarLdFrag addr:$src))))), - _.RC:$src0), - vselect, "$src0 = $dst">, - EVEX, EVEX_B, Sched<[sched.Folded]>; -} + RegisterClass MaskRC = _.KRCWM> + : avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias, + MemOp, MaskRC, + (_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>; // Extend Float to Double multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr, @@ -7910,7 +7867,11 @@ multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode, let Predicates = [HasVLX] in { defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info, - OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128; + OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM, + (v2f64 (OpNode128 (bc_v4i32 + (v2i64 + (scalar_to_vector (loadi64 addr:$src))))))>, + EVEX_V128; defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode, sched.YMM>, EVEX_V256; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 18d9af8bdcd..23aea3ea908 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1655,7 +1655,10 @@ let hasSideEffects = 0, mayLoad = 1 in def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>, + (v2f64 (X86VSintToFP + (bc_v4i32 + (v2i64 (scalar_to_vector + (loadi64 addr:$src)))))))]>, VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG; def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vcvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1679,7 +1682,10 @@ let hasSideEffects = 0, mayLoad = 1 in def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, - (v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>, + (v2f64 (X86VSintToFP + (bc_v4i32 + (v2i64 (scalar_to_vector + (loadi64 addr:$src)))))))]>, Sched<[WriteCvtI2PDLd]>; def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "cvtdq2pd\t{$src, $dst|$dst, $src}", @@ -1689,16 +1695,12 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), // AVX register conversion intrinsics let Predicates = [HasAVX, NoVLX] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), - (VCVTDQ2PDrm addr:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), (VCVTDQ2PDrm addr:$src)>; } // Predicates = [HasAVX, NoVLX] // SSE2 register conversion intrinsics let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), - (CVTDQ2PDrm addr:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))), (CVTDQ2PDrm addr:$src)>; } // Predicates = [UseSSE2] |

