summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td133
-rw-r--r--llvm/lib/Target/X86/X86InstrFMA.td1
-rw-r--r--llvm/lib/Target/X86/X86InstrMMX.td2
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td186
4 files changed, 139 insertions, 183 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index c425dc467eb..85fc440fd40 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4317,12 +4317,10 @@ def : InstAlias<"vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"#
VR128X:$src1, VR128X:$src2), 0>;
let Predicates = [HasAVX512] in {
- let AddedComplexity = 15 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))),
(VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))),
(VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>;
- }
// Move low f32 and clear high bits.
def : Pat<(v8f32 (X86vzmovl (v8f32 VR256X:$src))),
@@ -4342,7 +4340,6 @@ let Predicates = [HasAVX512] in {
(VMOVSSZrr (v4i32 (AVX512_128_SET0)),
(EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm)), sub_xmm)>;
- let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
@@ -4398,7 +4395,7 @@ let Predicates = [HasAVX512] in {
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
def : Pat<(v8f64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDZrm addr:$src), sub_xmm)>;
- }
+
def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
(v2i64 (scalar_to_vector (loadi64 addr:$src))), (iPTR 0)))),
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
@@ -4442,7 +4439,6 @@ let Predicates = [HasAVX512] in {
}
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
-let AddedComplexity = 15 in
def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
(ins VR128X:$src),
"vmovq\t{$src, $dst|$dst, $src}",
@@ -4452,42 +4448,39 @@ def VMOVZPQILo2PQIZrr : AVX512XSI<0x7E, MRMSrcReg, (outs VR128X:$dst),
}
let Predicates = [HasAVX512] in {
- let AddedComplexity = 15 in {
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
- (VMOVDI2PDIZrr GR32:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (VMOVDI2PDIZrr GR32:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
- (VMOV64toPQIZrr GR64:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (VMOV64toPQIZrr GR64:$src)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
+
+ def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
- def : Pat<(v8i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOV64toPQIZrr GR64:$src), sub_xmm)>;
- }
// AVX 128-bit movd/movq instruction write zeros in the high 128-bit part.
- let AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
- (VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
- (VMOVDI2PDIZrm addr:$src)>;
- def : Pat<(v8i32 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
- def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
- (VMOVQI2PQIZrm addr:$src)>;
- def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
- (VMOVZPQILo2PQIZrr VR128X:$src)>;
- def : Pat<(v2i64 (X86vzload addr:$src)),
- (VMOVQI2PQIZrm addr:$src)>;
- def : Pat<(v4i64 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
- }
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzload addr:$src)),
+ (VMOVDI2PDIZrm addr:$src)>;
+ def : Pat<(v8i32 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIZrm addr:$src), sub_xmm)>;
+ def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
+ (VMOVQI2PQIZrm addr:$src)>;
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128X:$src))),
+ (VMOVZPQILo2PQIZrr VR128X:$src)>;
+ def : Pat<(v2i64 (X86vzload addr:$src)),
+ (VMOVQI2PQIZrm addr:$src)>;
+ def : Pat<(v4i64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (VMOVQI2PQIZrm addr:$src), sub_xmm)>;
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
@@ -7721,14 +7714,12 @@ def : Pat<(v8f64 (extloadv8f32 addr:$src)),
(VCVTPS2PDZrm addr:$src)>;
let Predicates = [HasVLX] in {
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
- (VCVTPD2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
- (VCVTPD2PSZ128rm addr:$src)>;
- }
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (v2f64 VR128X:$src)))))),
+ (VCVTPD2PSZ128rr VR128X:$src)>;
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
+ (VCVTPD2PSZ128rm addr:$src)>;
def : Pat<(v2f64 (extloadv2f32 addr:$src)),
(VCVTPS2PDZ128rm addr:$src)>;
def : Pat<(v4f64 (extloadv4f32 addr:$src)),
@@ -8224,26 +8215,24 @@ def : Pat<(v2f64 (X86VUintToFP (v4i32 VR128X:$src1))),
}
let Predicates = [HasAVX512, HasVLX] in {
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
- (VCVTPD2DQZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
- (VCVTPD2DQZ128rm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
- (VCVTPD2UDQZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
- (VCVTTPD2DQZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
- (VCVTTPD2DQZ128rm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
- (VCVTTPD2UDQZ128rr VR128X:$src)>;
- }
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2Int (v2f64 VR128X:$src)))))),
+ (VCVTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
+ (VCVTPD2DQZ128rm addr:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2UInt (v2f64 VR128X:$src)))))),
+ (VCVTPD2UDQZ128rr VR128X:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2si (v2f64 VR128X:$src)))))),
+ (VCVTTPD2DQZ128rr VR128X:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
+ (VCVTTPD2DQZ128rm addr:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2ui (v2f64 VR128X:$src)))))),
+ (VCVTTPD2UDQZ128rr VR128X:$src)>;
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
(VCVTDQ2PDZ128rm addr:$src)>;
@@ -8264,14 +8253,12 @@ let Predicates = [HasAVX512] in {
}
let Predicates = [HasDQI, HasVLX] in {
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
- (VCVTQQ2PSZ128rr VR128X:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
- (VCVTUQQ2PSZ128rr VR128X:$src)>;
- }
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86VSintToFP (v2i64 VR128X:$src)))))),
+ (VCVTQQ2PSZ128rr VR128X:$src)>;
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86VUintToFP (v2i64 VR128X:$src)))))),
+ (VCVTUQQ2PSZ128rr VR128X:$src)>;
}
let Predicates = [HasDQI, NoVLX] in {
diff --git a/llvm/lib/Target/X86/X86InstrFMA.td b/llvm/lib/Target/X86/X86InstrFMA.td
index 594eb3baa49..376f643050f 100644
--- a/llvm/lib/Target/X86/X86InstrFMA.td
+++ b/llvm/lib/Target/X86/X86InstrFMA.td
@@ -589,7 +589,6 @@ multiclass scalar_fma4_patterns<SDNode Op, string Name,
ValueType VT, ValueType EltVT,
RegisterClass RC, PatFrag mem_frag> {
let Predicates = [HasFMA4] in {
- let AddedComplexity = 15 in
def : Pat<(VT (X86vzmovl (VT (scalar_to_vector
(Op RC:$src1, RC:$src2, RC:$src3))))),
(!cast<Instruction>(Name#"rr_Int")
diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td
index e9dc4f6a68b..aefeffedfc1 100644
--- a/llvm/lib/Target/X86/X86InstrMMX.td
+++ b/llvm/lib/Target/X86/X86InstrMMX.td
@@ -273,11 +273,9 @@ def MMX_MOVNTQmr : MMXI<0xE7, MRMDestMem, (outs), (ins i64mem:$dst, VR64:$src),
Sched<[SchedWriteVecMoveLSNT.MMX.MR]>;
let Predicates = [HasMMX] in {
- let AddedComplexity = 15 in
// movd to MMX register zero-extends
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector GR32:$src)))),
(MMX_MOVD64rr GR32:$src)>;
- let AddedComplexity = 20 in
def : Pat<(x86mmx (X86vzmovl (x86mmx (scalar_to_vector (loadi32 addr:$src))))),
(MMX_MOVD64rm addr:$src)>;
}
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 407b37c6a7c..74b843d988f 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -248,7 +248,6 @@ let canFoldAsLoad = 1, isReMaterializable = 1 in {
// Patterns
let Predicates = [UseAVX] in {
- let AddedComplexity = 20 in {
// MOVSSrm zeros the high parts of the register; represent this
// with SUBREG_TO_REG. The AVX versions also write: DST[255:128] <- 0
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
@@ -285,7 +284,6 @@ let Predicates = [UseAVX] in {
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
def : Pat<(v4f64 (X86vzload addr:$src)),
(SUBREG_TO_REG (i32 0), (VMOVSDrm addr:$src), sub_xmm)>;
- }
// Extract and store.
def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
@@ -308,7 +306,7 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseSSE1] in {
- let Predicates = [NoSSE41], AddedComplexity = 15 in {
+ let Predicates = [NoSSE41] in {
// Move scalar to XMM zero-extended, zeroing a VR128 then do a
// MOVSS to the lower bits.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
@@ -317,7 +315,6 @@ let Predicates = [UseSSE1] in {
(MOVSSrr (v4i32 (V_SET0)), VR128:$src)>;
}
- let AddedComplexity = 20 in {
// MOVSSrm already zeros the high parts of the register.
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
@@ -327,7 +324,6 @@ let Predicates = [UseSSE1] in {
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
def : Pat<(v4f32 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (MOVSSrm addr:$src), VR128)>;
- }
// Extract and store.
def : Pat<(store (f32 (extractelt (v4f32 VR128:$src), (iPTR 0))),
@@ -343,7 +339,6 @@ let Predicates = [UseSSE1] in {
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 20 in {
// MOVSDrm already zeros the high parts of the register.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
@@ -355,7 +350,6 @@ let Predicates = [UseSSE2] in {
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
def : Pat<(v2f64 (X86vzload addr:$src)),
(COPY_TO_REGCLASS (MOVSDrm addr:$src), VR128)>;
- }
// Shuffle with MOVSD
def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)),
@@ -1637,20 +1631,18 @@ let Predicates = [HasAVX, NoVLX] in {
}
let Predicates = [HasAVX, NoVLX] in {
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
- (VCVTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
- (VCVTPD2DQrm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
- (VCVTTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
- (VCVTTPD2DQrm addr:$src)>;
- }
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+ (VCVTPD2DQrr VR128:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2Int (loadv2f64 addr:$src)))))),
+ (VCVTPD2DQrm addr:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+ (VCVTTPD2DQrr VR128:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2si (loadv2f64 addr:$src)))))),
+ (VCVTTPD2DQrm addr:$src)>;
} // Predicates = [HasAVX, NoVLX]
def CVTTPD2DQrr : PDI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
@@ -1665,20 +1657,18 @@ def CVTTPD2DQrm : PDI<0xE6, MRMSrcMem, (outs VR128:$dst),(ins f128mem:$src),
Sched<[WriteCvtPD2ILd]>;
let Predicates = [UseSSE2] in {
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
- (CVTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
- (CVTPD2DQrm addr:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
- (CVTTPD2DQrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2i64 (bitconvert
- (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
- (CVTTPD2DQrm addr:$src)>;
- }
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2Int (v2f64 VR128:$src)))))),
+ (CVTPD2DQrr VR128:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvtp2Int (memopv2f64 addr:$src)))))),
+ (CVTPD2DQrm addr:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2si (v2f64 VR128:$src)))))),
+ (CVTTPD2DQrr VR128:$src)>;
+ def : Pat<(X86vzmovl (v2i64 (bitconvert
+ (v4i32 (X86cvttp2si (memopv2f64 addr:$src)))))),
+ (CVTTPD2DQrm addr:$src)>;
} // Predicates = [UseSSE2]
// Convert packed single to packed double
@@ -1819,26 +1809,22 @@ def CVTPD2PSrm : PDI<0x5A, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
let Predicates = [HasAVX, NoVLX] in {
// Match fpround and fpextend for 128/256-bit conversions
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
- (VCVTPD2PSrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
- (VCVTPD2PSrm addr:$src)>;
- }
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
+ (VCVTPD2PSrr VR128:$src)>;
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (loadv2f64 addr:$src)))))),
+ (VCVTPD2PSrm addr:$src)>;
}
let Predicates = [UseSSE2] in {
// Match fpround and fpextend for 128 conversions
- let AddedComplexity = 15 in {
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
- (CVTPD2PSrr VR128:$src)>;
- def : Pat<(X86vzmovl (v2f64 (bitconvert
- (v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
- (CVTPD2PSrm addr:$src)>;
- }
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (v2f64 VR128:$src)))))),
+ (CVTPD2PSrr VR128:$src)>;
+ def : Pat<(X86vzmovl (v2f64 (bitconvert
+ (v4f32 (X86vfpround (memopv2f64 addr:$src)))))),
+ (CVTPD2PSrm addr:$src)>;
}
//===----------------------------------------------------------------------===//
@@ -4165,34 +4151,30 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in {
} // ExeDomain = SSEPackedInt, isCodeGenOnly = 1
let Predicates = [UseAVX] in {
- let AddedComplexity = 15 in {
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
- (VMOVDI2PDIrr GR32:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (VMOVDI2PDIrr GR32:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
- (VMOV64toPQIrr GR64:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (VMOV64toPQIrr GR64:$src)>;
- def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
- (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
- (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>;
- }
+ def : Pat<(v4i64 (X86vzmovl (insert_subvector undef,
+ (v2i64 (scalar_to_vector GR64:$src)),(iPTR 0)))),
+ (SUBREG_TO_REG (i64 0), (VMOV64toPQIrr GR64:$src), sub_xmm)>;
// AVX 128-bit movd/movq instructions write zeros in the high 128-bit part.
// These instructions also write zeros in the high part of a 256-bit register.
- let AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
- (VMOVDI2PDIrm addr:$src)>;
- def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
- (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
- (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
- def : Pat<(v8i32 (X86vzload addr:$src)),
- (SUBREG_TO_REG (i64 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
- }
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (VMOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (VMOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzload addr:$src)),
+ (VMOVDI2PDIrm addr:$src)>;
+ def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
+ (v4i32 (scalar_to_vector (loadi32 addr:$src))), (iPTR 0)))),
+ (SUBREG_TO_REG (i32 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
+ def : Pat<(v8i32 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (VMOVDI2PDIrm addr:$src), sub_xmm)>;
// Use regular 128-bit instructions to match 256-bit scalar_to_vec+zext.
def : Pat<(v8i32 (X86vzmovl (insert_subvector undef,
(v4i32 (scalar_to_vector GR32:$src)),(iPTR 0)))),
@@ -4200,23 +4182,19 @@ let Predicates = [UseAVX] in {
}
let Predicates = [UseSSE2] in {
- let AddedComplexity = 15 in {
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
- (MOVDI2PDIrr GR32:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector GR32:$src)))),
+ (MOVDI2PDIrr GR32:$src)>;
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
- (MOV64toPQIrr GR64:$src)>;
- }
- let AddedComplexity = 20 in {
- def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
- (MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
- (MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
- (MOVDI2PDIrm addr:$src)>;
- def : Pat<(v4i32 (X86vzload addr:$src)),
- (MOVDI2PDIrm addr:$src)>;
- }
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector GR64:$src)))),
+ (MOV64toPQIrr GR64:$src)>;
+ def : Pat<(v2i64 (X86vzmovl (v2i64 (scalar_to_vector (zextloadi64i32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (v4i32 (scalar_to_vector (loadi32 addr:$src))))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzmovl (bc_v4i32 (loadv2i64 addr:$src)))),
+ (MOVDI2PDIrm addr:$src)>;
+ def : Pat<(v4i32 (X86vzload addr:$src)),
+ (MOVDI2PDIrm addr:$src)>;
}
// Before the MC layer of LLVM existed, clang emitted "movd" assembly instead of
@@ -4287,7 +4265,7 @@ def : InstAlias<"vmovq.s\t{$src, $dst|$dst, $src}",
def : InstAlias<"movq.s\t{$src, $dst|$dst, $src}",
(MOVPQI2QIrr VR128:$dst, VR128:$src), 0>;
-let Predicates = [UseAVX], AddedComplexity = 20 in {
+let Predicates = [UseAVX] in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(VMOVQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)),
@@ -4299,7 +4277,7 @@ let Predicates = [UseAVX], AddedComplexity = 20 in {
(SUBREG_TO_REG (i64 0), (VMOVQI2PQIrm addr:$src), sub_xmm)>;
}
-let Predicates = [UseSSE2], AddedComplexity = 20 in {
+let Predicates = [UseSSE2] in {
def : Pat<(v2i64 (X86vzmovl (loadv2i64 addr:$src))),
(MOVQI2PQIrm addr:$src)>;
def : Pat<(v2i64 (X86vzload addr:$src)), (MOVQI2PQIrm addr:$src)>;
@@ -4310,27 +4288,23 @@ let Predicates = [UseSSE2], AddedComplexity = 20 in {
// IA32 document. movq xmm1, xmm2 does clear the high bits.
//
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
-let AddedComplexity = 15 in
def VMOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"vmovq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, VEX, Requires<[UseAVX]>, VEX_WIG;
-let AddedComplexity = 15 in
def MOVZPQILo2PQIrr : I<0x7E, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (v2i64 (X86vzmovl (v2i64 VR128:$src))))]>,
XS, Requires<[UseSSE2]>;
} // ExeDomain, SchedRW
-let AddedComplexity = 20 in {
- let Predicates = [UseAVX] in {
- def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (VMOVZPQILo2PQIrr VR128:$src)>;
- }
- let Predicates = [UseSSE2] in {
- def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
- (MOVZPQILo2PQIrr VR128:$src)>;
- }
+let Predicates = [UseAVX] in {
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (VMOVZPQILo2PQIrr VR128:$src)>;
+}
+let Predicates = [UseSSE2] in {
+ def : Pat<(v2f64 (X86vzmovl (v2f64 VR128:$src))),
+ (MOVZPQILo2PQIrr VR128:$src)>;
}
//===---------------------------------------------------------------------===//
@@ -6438,7 +6412,6 @@ let Predicates = [HasAVX2] in {
// blends because blends have better throughput on SandyBridge and Haswell, but
// movs[s/d] are 1-2 byte shorter instructions.
let Predicates = [UseAVX] in {
- let AddedComplexity = 15 in {
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(VBLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
@@ -6451,7 +6424,6 @@ let Predicates = [UseAVX] in {
// Move low f64 and clear high bits.
def : Pat<(v4f64 (X86vzmovl (v4f64 VR256:$src))),
(VBLENDPDYrri (v4f64 (AVX_SET0)), VR256:$src, (i8 1))>;
- }
// These will incur an FP/int domain crossing penalty, but it may be the only
// way without AVX2. Do not add any complexity because we may be able to match
@@ -6466,7 +6438,7 @@ let Predicates = [UseAVX] in {
// on targets where they have equal performance. These were changed to use
// blends because blends have better throughput on SandyBridge and Haswell, but
// movs[s/d] are 1-2 byte shorter instructions.
-let Predicates = [UseSSE41], AddedComplexity = 15 in {
+let Predicates = [UseSSE41] in {
// With SSE41 we can use blends for these patterns.
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
(BLENDPSrri (v4f32 (V_SET0)), VR128:$src, (i8 1))>;
OpenPOWER on IntegriCloud