summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-07-10 21:00:22 +0000
committerCraig Topper <craig.topper@intel.com>2018-07-10 21:00:22 +0000
commitdea0b88b04d9ee5549fe1964bb81357ac2e52be6 (patch)
tree55ee7a8bb56221ded55f439693d46414692c3221 /llvm/lib
parent9960b8f13aed041cf48633a6510ad865349c0341 (diff)
downloadbcm5719-llvm-dea0b88b04d9ee5549fe1964bb81357ac2e52be6.tar.gz
bcm5719-llvm-dea0b88b04d9ee5549fe1964bb81357ac2e52be6.zip
[X86] Remove X86ISD::MOVLPS and X86ISD::MOVLPD. NFCI
These ISD nodes try to select the MOVLPS and MOVLPD instructions which are special load only instructions. They load data and merge it into the lower 64-bits of an XMM register. They are logically equivalent to our MOVSD node plus a load. There was only one place in X86ISelLowering that used MOVLPD and no places that selected MOVLPS. The one place that selected MOVLPD had to choose between it and MOVSD based on whether there was a load. But lowering is too early to tell if the load can really be folded. So in isel we have patterns that use MOVSD for MOVLPD if we can't find a load. We also had patterns that select the MOVLPD instruction for a MOVSD if we can find a load, but didn't choose the MOVLPD ISD opcode for some reason. So it seems better to just standardize on MOVSD ISD opcode and manage MOVSD vs MOVLPD instruction with isel patterns. llvm-svn: 336728
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp11
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86InstrAVX512.td25
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td3
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td57
5 files changed, 8 insertions, 90 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7f49e6cc206..9c74575916e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4369,8 +4369,6 @@ static bool isTargetShuffle(unsigned Opcode) {
case X86ISD::VSRLDQ:
case X86ISD::MOVLHPS:
case X86ISD::MOVHLPS:
- case X86ISD::MOVLPS:
- case X86ISD::MOVLPD:
case X86ISD::MOVSHDUP:
case X86ISD::MOVSLDUP:
case X86ISD::MOVDDUP:
@@ -5951,10 +5949,6 @@ static bool getTargetShuffleMask(SDNode *N, MVT VT, bool AllowSentinelZero,
DecodeMOVDDUPMask(NumElems, Mask);
IsUnary = true;
break;
- case X86ISD::MOVLPD:
- case X86ISD::MOVLPS:
- // Not yet implemented
- return false;
case X86ISD::VPERMIL2: {
assert(N->getOperand(0).getValueType() == VT && "Unexpected value type");
assert(N->getOperand(1).getValueType() == VT && "Unexpected value type");
@@ -11363,8 +11357,7 @@ static SDValue lowerV2F64VectorShuffle(const SDLoc &DL, ArrayRef<int> Mask,
// We can either use a special instruction to load over the low double or
// to move just the low double.
return DAG.getNode(
- isShuffleFoldableLoad(V1S) ? X86ISD::MOVLPD : X86ISD::MOVSD,
- DL, MVT::v2f64, V2,
+ X86ISD::MOVSD, DL, MVT::v2f64, V2,
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v2f64, V1S));
if (Subtarget.hasSSE41())
@@ -26041,8 +26034,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::SHUF128: return "X86ISD::SHUF128";
case X86ISD::MOVLHPS: return "X86ISD::MOVLHPS";
case X86ISD::MOVHLPS: return "X86ISD::MOVHLPS";
- case X86ISD::MOVLPS: return "X86ISD::MOVLPS";
- case X86ISD::MOVLPD: return "X86ISD::MOVLPD";
case X86ISD::MOVDDUP: return "X86ISD::MOVDDUP";
case X86ISD::MOVSHDUP: return "X86ISD::MOVSHDUP";
case X86ISD::MOVSLDUP: return "X86ISD::MOVSLDUP";
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 4fadf0543c6..64ddcf7e4fe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -408,8 +408,6 @@ namespace llvm {
MOVSLDUP,
MOVLHPS,
MOVHLPS,
- MOVLPS,
- MOVLPD,
MOVSD,
MOVSS,
UNPCKL,
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td
index 6830de62969..c425dc467eb 100644
--- a/llvm/lib/Target/X86/X86InstrAVX512.td
+++ b/llvm/lib/Target/X86/X86InstrAVX512.td
@@ -4439,11 +4439,6 @@ let Predicates = [HasAVX512] in {
def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))),
(VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>;
-
- def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)),
- (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
- def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)),
- (VMOVSDZrr VR128X:$src1, VR128X:$src2)>;
}
let ExeDomain = SSEPackedInt, SchedRW = [SchedWriteVecLogic.XMM] in {
@@ -6405,7 +6400,8 @@ def VMOVHLPSZrr : AVX512PSI<0x12, MRMSrcReg, (outs VR128X:$dst),
// All patterns was taken from SSS implementation.
//===----------------------------------------------------------------------===//
-multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr, SDNode OpNode,
+multiclass avx512_mov_hilo_packed<bits<8> opc, string OpcodeStr,
+ SDPatternOperator OpNode,
X86VectorVTInfo _> {
let ExeDomain = _.ExeDomain in
def rm : AVX512<opc, MRMSrcMem, (outs _.RC:$dst),
@@ -6423,9 +6419,9 @@ defm VMOVHPSZ128 : avx512_mov_hilo_packed<0x16, "vmovhps", X86Movlhps,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
defm VMOVHPDZ128 : avx512_mov_hilo_packed<0x16, "vmovhpd", X86Unpckl,
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
-defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", X86Movlps,
+defm VMOVLPSZ128 : avx512_mov_hilo_packed<0x12, "vmovlps", null_frag,
v4f32x_info>, EVEX_CD8<32, CD8VT2>, PS;
-defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", X86Movlpd,
+defm VMOVLPDZ128 : avx512_mov_hilo_packed<0x12, "vmovlpd", null_frag,
v2f64x_info>, EVEX_CD8<64, CD8VT1>, PD, VEX_W;
let Predicates = [HasAVX512] in {
@@ -6440,12 +6436,7 @@ let Predicates = [HasAVX512] in {
def : Pat<(v2f64 (X86Unpckl VR128X:$src1,
(bc_v2f64 (v2i64 (scalar_to_vector (loadi64 addr:$src2)))))),
(VMOVHPDZ128rm VR128X:$src1, addr:$src2)>;
- // VMOVLPS patterns
- def : Pat<(v4f32 (X86Movlps VR128X:$src1, (load addr:$src2))),
- (VMOVLPSZ128rm VR128X:$src1, addr:$src2)>;
// VMOVLPD patterns
- def : Pat<(v2f64 (X86Movlpd VR128X:$src1, (load addr:$src2))),
- (VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Movsd VR128X:$src1,
(v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(VMOVLPDZ128rm VR128X:$src1, addr:$src2)>;
@@ -6487,14 +6478,6 @@ let Predicates = [HasAVX512] in {
(v2f64 (X86VPermilpi VR128X:$src, (i8 1))),
(iPTR 0))), addr:$dst),
(VMOVHPDZ128mr addr:$dst, VR128X:$src)>;
- // VMOVLPS patterns
- def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128X:$src2)),
- addr:$src1),
- (VMOVLPSZ128mr addr:$src1, VR128X:$src2)>;
- // VMOVLPD patterns
- def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128X:$src2)),
- addr:$src1),
- (VMOVLPDZ128mr addr:$src1, VR128X:$src2)>;
}
//===----------------------------------------------------------------------===//
// FMA - Fused Multiply Operations
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index b29e2ae9174..1ff02e4feca 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -374,9 +374,6 @@ def X86Movss : SDNode<"X86ISD::MOVSS", SDTShuff2Op>;
def X86Movlhps : SDNode<"X86ISD::MOVLHPS", SDTShuff2Op>;
def X86Movhlps : SDNode<"X86ISD::MOVHLPS", SDTShuff2Op>;
-def X86Movlps : SDNode<"X86ISD::MOVLPS", SDTShuff2Op>;
-def X86Movlpd : SDNode<"X86ISD::MOVLPD", SDTShuff2Op>;
-
def SDTPack : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisInt<0>,
SDTCisVec<1>, SDTCisInt<1>,
SDTCisSameSizeAs<0,1>,
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 85b1dd16aa0..407b37c6a7c 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -305,15 +305,6 @@ let Predicates = [UseAVX] in {
def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
(VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>;
-
- // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
- // is during lowering, where it's not possible to recognize the fold cause
- // it has two uses through a bitcast. One use disappears at isel time and the
- // fold opportunity reappears.
- def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (VMOVSDrr VR128:$src1, VR128:$src2)>;
}
let Predicates = [UseSSE1] in {
@@ -372,15 +363,6 @@ let Predicates = [UseSSE2] in {
def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
(MOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>;
-
- // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
- // is during lowering, where it's not possible to recognize the fold because
- // it has two uses through a bitcast. One use disappears at isel time and the
- // fold opportunity reappears.
- def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, VR128:$src2)>;
- def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)),
- (MOVSDrr VR128:$src1, VR128:$src2)>;
}
// Aliases to help the assembler pick two byte VEX encodings by swapping the
@@ -692,8 +674,8 @@ multiclass sse12_mov_hilo_packed_base<bits<8>opc, SDNode psnode, SDNode pdnode,
Sched<[SchedWriteFShuffle.XMM.Folded, ReadAfterLd]>;
}
-multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
- string base_opc> {
+multiclass sse12_mov_hilo_packed<bits<8>opc, SDPatternOperator psnode,
+ SDPatternOperator pdnode, string base_opc> {
let Predicates = [UseAVX] in
defm V#NAME : sse12_mov_hilo_packed_base<opc, psnode, pdnode, base_opc,
"\t{$src2, $src1, $dst|$dst, $src1, $src2}">,
@@ -704,7 +686,7 @@ multiclass sse12_mov_hilo_packed<bits<8>opc, SDNode psnode, SDNode pdnode,
"\t{$src2, $dst|$dst, $src2}">;
}
-defm MOVL : sse12_mov_hilo_packed<0x12, X86Movlps, X86Movlpd, "movlp">;
+defm MOVL : sse12_mov_hilo_packed<0x12, null_frag, null_frag, "movlp">;
let SchedRW = [WriteFStore] in {
let Predicates = [UseAVX] in {
@@ -730,24 +712,10 @@ def MOVLPDmr : PDI<0x13, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
} // SchedRW
let Predicates = [UseAVX] in {
- // Shuffle with VMOVLPS
- def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
- (VMOVLPSrm VR128:$src1, addr:$src2)>;
-
// Shuffle with VMOVLPD
- def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
- (VMOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Movsd VR128:$src1,
(v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(VMOVLPDrm VR128:$src1, addr:$src2)>;
-
- // Store patterns
- def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
- addr:$src1),
- (VMOVLPSmr addr:$src1, VR128:$src2)>;
- def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
- addr:$src1),
- (VMOVLPDmr addr:$src1, VR128:$src2)>;
}
let Predicates = [UseSSE1] in {
@@ -755,32 +723,13 @@ let Predicates = [UseSSE1] in {
def : Pat<(store (i64 (extractelt (bc_v2i64 (v4f32 VR128:$src2)),
(iPTR 0))), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>;
-
- // Shuffle with MOVLPS
- def : Pat<(v4f32 (X86Movlps VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
- def : Pat<(X86Movlps VR128:$src1,
- (bc_v4f32 (v2i64 (scalar_to_vector (loadi64 addr:$src2))))),
- (MOVLPSrm VR128:$src1, addr:$src2)>;
-
- // Store patterns
- def : Pat<(store (v4f32 (X86Movlps (load addr:$src1), VR128:$src2)),
- addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>;
}
let Predicates = [UseSSE2] in {
// Shuffle with MOVLPD
- def : Pat<(v2f64 (X86Movlpd VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (X86Movsd VR128:$src1,
(v2f64 (scalar_to_vector (loadf64 addr:$src2))))),
(MOVLPDrm VR128:$src1, addr:$src2)>;
-
- // Store patterns
- def : Pat<(store (v2f64 (X86Movlpd (load addr:$src1), VR128:$src2)),
- addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>;
}
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud