diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 133 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFMA.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrMMX.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 186 |
4 files changed, 139 insertions, 183 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index c425dc467eb..85fc440fd40 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4317,12 +4317,10 @@ def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# VR128X:$src1, VR128X:$src2), 0>; let Predicates = [HasAVX512] in { - let AddedComplexity = 15 in { def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; - } // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))), @@ -4342,7 +4340,6 @@ let Predicates = [HasAVX512] in { (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>; - let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), @@ -4398,7 +4395,7 @@ let Predicates = [HasAVX512] in { (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; def : Pat<(v8f64 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>; - } + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, (v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))), (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; @@ -4442,7 +4439,6 @@ let Predicates = [HasAVX512] in { } let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { -let AddedComplexity = 15 in def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", @@ -4452,42 +4448,39 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst), } let Predicates = [HasAVX512] in { - let AddedComplexity = 15 in { - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), - (VMOVDI2PDIZrr GR32:$src)>; + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), + (VMOVDI2PDIZrr GR32:$src)>; - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), - (VMOV64toPQIZrr GR64:$src)>; + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), + (VMOV64toPQIZrr GR64:$src)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; + + def : Pat<(v8i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; - def : Pat<(v8i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>; - } // AVX 128-bit movd/movq instruction write zeros in the high 128-bit part. - let AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v4i32 (X86vzload addr:$src)), - (VMOVDI2PDIZrm addr:$src)>; - def : Pat<(v8i32 (X86vzload addr:$src)), - (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; - def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), - (VMOVQI2PQIZrm addr:$src)>; - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), - (VMOVZPQILo2PQIZrr VR128X:$src)>; - def : Pat<(v2i64 (X86vzload addr:$src)), - (VMOVQI2PQIZrm addr:$src)>; - def : Pat<(v4i64 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; - } + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v4i32 (X86vzload addr:$src)), + (VMOVDI2PDIZrm addr:$src)>; + def : Pat<(v8i32 (X86vzload addr:$src)), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>; + def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), + (VMOVQI2PQIZrm addr:$src)>; + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))), + (VMOVZPQILo2PQIZrr VR128X:$src)>; + def : Pat<(v2i64 (X86vzload addr:$src)), + (VMOVQI2PQIZrm addr:$src)>; + def : Pat<(v4i64 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>; // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, @@ -7721,14 +7714,12 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src)), (VCVTPS2PDZrm addr:$src)>; let Predicates = [HasVLX] in { - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), - (VCVTPD2PSZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), - (VCVTPD2PSZ128rm addr:$src)>; - } + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128X:$src)))))), + (VCVTPD2PSZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (VCVTPD2PSZ128rm addr:$src)>; def : Pat<(v2f64 (extloadv2f32 addr:$src)), (VCVTPS2PDZ128rm addr:$src)>; def : Pat<(v4f64 (extloadv4f32 addr:$src)), @@ -8224,26 +8215,24 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))), } let Predicates = [HasAVX512, HasVLX] in { - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), - (VCVTPD2DQZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), - (VCVTPD2DQZ128rm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), - (VCVTPD2UDQZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), - (VCVTTPD2DQZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), - (VCVTTPD2DQZ128rm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), - (VCVTTPD2UDQZ128rr VR128X:$src)>; - } + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))), + (VCVTPD2DQZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), + (VCVTPD2DQZ128rm addr:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))), + (VCVTPD2UDQZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))), + (VCVTTPD2DQZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), + (VCVTTPD2DQZ128rm addr:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))), + (VCVTTPD2UDQZ128rr VR128X:$src)>; def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))), (VCVTDQ2PDZ128rm addr:$src)>; @@ -8264,14 +8253,12 @@ let Predicates = [HasAVX512] in { } let Predicates = [HasDQI, HasVLX] in { - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), - (VCVTQQ2PSZ128rr VR128X:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), - (VCVTUQQ2PSZ128rr VR128X:$src)>; - } + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))), + (VCVTQQ2PSZ128rr VR128X:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))), + (VCVTUQQ2PSZ128rr VR128X:$src)>; } let Predicates = [HasDQI, NoVLX] in { diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td index 594eb3baa49..376f643050f 100644 --- a/llvm/lib/Target/X86/X86InstrFMA.td +++ b/llvm/lib/Target/X86/X86InstrFMA.td @@ -589,7 +589,6 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name, ValueType VT, ValueType EltVT, RegisterClass RC, PatFrag mem_frag> { let Predicates = [HasFMA4] in { - let AddedComplexity = 15 in def : Pat<(VT (X86vzmovl (VT (scalar_to_vector (Op RC:$src1, RC:$src2, RC:$src3))))), (!cast<Instruction>(Name#"rr_Int") diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index e9dc4f6a68b..aefeffedfc1 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -273,11 +273,9 @@ def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src), Sched<[SchedWriteVecMoveLSNT.MMX.MR]>; let Predicates = [HasMMX] in { - let AddedComplexity = 15 in // movd to MMX register zero-extends def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))), (MMX_MOVD64rr GR32:$src)>; - let AddedComplexity = 20 in def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))), (MMX_MOVD64rm addr:$src)>; } diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 407b37c6a7c..74b843d988f 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -248,7 +248,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in { // Patterns let Predicates = [UseAVX] in { - let AddedComplexity = 20 in { // MOVSSrm zeros the high parts of the register; represent this // with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0 def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), @@ -285,7 +284,6 @@ let Predicates = [UseAVX] in { (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; def : Pat<(v4f64 (X86vzload addr:$src)), (SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>; - } // Extract and store. def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), @@ -308,7 +306,7 @@ let Predicates = [UseAVX] in { } let Predicates = [UseSSE1] in { - let Predicates = [NoSSE41], AddedComplexity = 15 in { + let Predicates = [NoSSE41] in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), @@ -317,7 +315,6 @@ let Predicates = [UseSSE1] in { (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } - let AddedComplexity = 20 in { // MOVSSrm already zeros the high parts of the register. def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))), (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; @@ -327,7 +324,6 @@ let Predicates = [UseSSE1] in { (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; def : Pat<(v4f32 (X86vzload addr:$src)), (COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>; - } // Extract and store. def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))), @@ -343,7 +339,6 @@ let Predicates = [UseSSE1] in { } let Predicates = [UseSSE2] in { - let AddedComplexity = 20 in { // MOVSDrm already zeros the high parts of the register. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))), (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; @@ -355,7 +350,6 @@ let Predicates = [UseSSE2] in { (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; def : Pat<(v2f64 (X86vzload addr:$src)), (COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>; - } // Shuffle with MOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), @@ -1637,20 +1631,18 @@ let Predicates = [HasAVX, NoVLX] in { } let Predicates = [HasAVX, NoVLX] in { - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), - (VCVTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), - (VCVTPD2DQrm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), - (VCVTTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), - (VCVTTPD2DQrm addr:$src)>; - } + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), + (VCVTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))), + (VCVTPD2DQrm addr:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), + (VCVTTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))), + (VCVTTPD2DQrm addr:$src)>; } // Predicates = [HasAVX, NoVLX] def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), @@ -1665,20 +1657,18 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src), Sched<[WriteCvtPD2ILd]>; let Predicates = [UseSSE2] in { - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), - (CVTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))), - (CVTPD2DQrm addr:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), - (CVTTPD2DQrr VR128:$src)>; - def : Pat<(X86vzmovl (v2i64 (bitconvert - (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))), - (CVTTPD2DQrm addr:$src)>; - } + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))), + (CVTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))), + (CVTPD2DQrm addr:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))), + (CVTTPD2DQrr VR128:$src)>; + def : Pat<(X86vzmovl (v2i64 (bitconvert + (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))), + (CVTTPD2DQrm addr:$src)>; } // Predicates = [UseSSE2] // Convert packed single to packed double @@ -1819,26 +1809,22 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src), let Predicates = [HasAVX, NoVLX] in { // Match fpround and fpextend for 128/256-bit conversions - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (VCVTPD2PSrr VR128:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), - (VCVTPD2PSrm addr:$src)>; - } + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))), + (VCVTPD2PSrr VR128:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (loadv2f64 addr:$src)))))), + (VCVTPD2PSrm addr:$src)>; } let Predicates = [UseSSE2] in { // Match fpround and fpextend for 128 conversions - let AddedComplexity = 15 in { - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (v2f64 VR128:$src)))))), - (CVTPD2PSrr VR128:$src)>; - def : Pat<(X86vzmovl (v2f64 (bitconvert - (v4f32 (X86vfpround (memopv2f64 addr:$src)))))), - (CVTPD2PSrm addr:$src)>; - } + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (v2f64 VR128:$src)))))), + (CVTPD2PSrr VR128:$src)>; + def : Pat<(X86vzmovl (v2f64 (bitconvert + (v4f32 (X86vfpround (memopv2f64 addr:$src)))))), + (CVTPD2PSrm addr:$src)>; } //===----------------------------------------------------------------------===// @@ -4165,34 +4151,30 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { } // ExeDomain = SSEPackedInt, isCodeGenOnly = 1 let Predicates = [UseAVX] in { - let AddedComplexity = 15 in { - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), - (VMOVDI2PDIrr GR32:$src)>; + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), + (VMOVDI2PDIrr GR32:$src)>; - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), - (VMOV64toPQIrr GR64:$src)>; + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), + (VMOV64toPQIrr GR64:$src)>; - def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, - (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), - (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>; - } + def : Pat<(v4i64 (X86vzmovl (insert_subvector undef, + (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))), + (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>; // AVX 128-bit movd/movq instructions write zeros in the high 128-bit part. // These instructions also write zeros in the high part of a 256-bit register. - let AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), - (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzload addr:$src)), - (VMOVDI2PDIrm addr:$src)>; - def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, - (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), - (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; - def : Pat<(v8i32 (X86vzload addr:$src)), - (SUBREG_TO_REG (i64 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; - } + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), + (VMOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (VMOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (VMOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzload addr:$src)), + (VMOVDI2PDIrm addr:$src)>; + def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, + (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))), + (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; + def : Pat<(v8i32 (X86vzload addr:$src)), + (SUBREG_TO_REG (i64 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>; // Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext. def : Pat<(v8i32 (X86vzmovl (insert_subvector undef, (v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))), @@ -4200,23 +4182,19 @@ let Predicates = [UseAVX] in { } let Predicates = [UseSSE2] in { - let AddedComplexity = 15 in { - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), - (MOVDI2PDIrr GR32:$src)>; + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))), + (MOVDI2PDIrr GR32:$src)>; - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), - (MOV64toPQIrr GR64:$src)>; - } - let AddedComplexity = 20 in { - def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), - (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), - (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), - (MOVDI2PDIrm addr:$src)>; - def : Pat<(v4i32 (X86vzload addr:$src)), - (MOVDI2PDIrm addr:$src)>; - } + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))), + (MOV64toPQIrr GR64:$src)>; + def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))), + (MOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))), + (MOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))), + (MOVDI2PDIrm addr:$src)>; + def : Pat<(v4i32 (X86vzload addr:$src)), + (MOVDI2PDIrm addr:$src)>; } // Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of @@ -4287,7 +4265,7 @@ def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}", def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}", (MOVPQI2QIrr VR128:$dst, VR128:$src), 0>; -let Predicates = [UseAVX], AddedComplexity = 20 in { +let Predicates = [UseAVX] in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (VMOVQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), @@ -4299,7 +4277,7 @@ let Predicates = [UseAVX], AddedComplexity = 20 in { (SUBREG_TO_REG (i64 0), (VMOVQI2PQIrm addr:$src), sub_xmm)>; } -let Predicates = [UseSSE2], AddedComplexity = 20 in { +let Predicates = [UseSSE2] in { def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))), (MOVQI2PQIrm addr:$src)>; def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>; @@ -4310,27 +4288,23 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in { // IA32 document. movq xmm1, xmm2 does clear the high bits. // let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in { -let AddedComplexity = 15 in def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, VEX, Requires<[UseAVX]>, VEX_WIG; -let AddedComplexity = 15 in def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>, XS, Requires<[UseSSE2]>; } // ExeDomain, SchedRW -let AddedComplexity = 20 in { - let Predicates = [UseAVX] in { - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), - (VMOVZPQILo2PQIrr VR128:$src)>; - } - let Predicates = [UseSSE2] in { - def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), - (MOVZPQILo2PQIrr VR128:$src)>; - } +let Predicates = [UseAVX] in { + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (VMOVZPQILo2PQIrr VR128:$src)>; +} +let Predicates = [UseSSE2] in { + def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))), + (MOVZPQILo2PQIrr VR128:$src)>; } //===---------------------------------------------------------------------===// @@ -6438,7 +6412,6 @@ let Predicates = [HasAVX2] in { // blends because blends have better throughput on SandyBridge and Haswell, but // movs[s/d] are 1-2 byte shorter instructions. let Predicates = [UseAVX] in { - let AddedComplexity = 15 in { def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), (VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), @@ -6451,7 +6424,6 @@ let Predicates = [UseAVX] in { // Move low f64 and clear high bits. def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))), (VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>; - } // These will incur an FP/int domain crossing penalty, but it may be the only // way without AVX2. Do not add any complexity because we may be able to match @@ -6466,7 +6438,7 @@ let Predicates = [UseAVX] in { // on targets where they have equal performance. These were changed to use // blends because blends have better throughput on SandyBridge and Haswell, but // movs[s/d] are 1-2 byte shorter instructions. -let Predicates = [UseSSE41], AddedComplexity = 15 in { +let Predicates = [UseSSE41] in { // With SSE41 we can use blends for these patterns. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), (BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>; |

