summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp6
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp23
2 files changed, 29 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f853c79020a..51b02e8cfca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -9022,6 +9022,12 @@ static SDValue lowerV4F32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
return BlendPerm;
}
+ // Use low/high mov instructions.
+ if (isShuffleEquivalent(V1, V2, Mask, {0, 1, 4, 5}))
+ return DAG.getNode(X86ISD::MOVLHPS, DL, MVT::v4f32, V1, V2);
+ if (isShuffleEquivalent(V1, V2, Mask, {2, 3, 6, 7}))
+ return DAG.getNode(X86ISD::MOVHLPS, DL, MVT::v4f32, V2, V1);
+
// Use dedicated unpack instructions for masks that match their pattern.
if (SDValue V =
lowerVectorShuffleWithUNPCK(DL, MVT::v4f32, Mask, V1, V2, DAG))
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 092e443d7d6..bc7344050bb 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -998,6 +998,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::MINSDrr_Int, X86::MINSDrm_Int, 0 },
{ X86::MINSSrr, X86::MINSSrm, 0 },
{ X86::MINSSrr_Int, X86::MINSSrm_Int, 0 },
+ { X86::MOVLHPSrr, X86::MOVHPSrm, TB_NO_REVERSE },
{ X86::MPSADBWrri, X86::MPSADBWrmi, TB_ALIGN_16 },
{ X86::MULPDrr, X86::MULPDrm, TB_ALIGN_16 },
{ X86::MULPSrr, X86::MULPSrm, TB_ALIGN_16 },
@@ -1298,6 +1299,7 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
{ X86::VMINSDrr_Int, X86::VMINSDrm_Int, 0 },
{ X86::VMINSSrr, X86::VMINSSrm, 0 },
{ X86::VMINSSrr_Int, X86::VMINSSrm_Int, 0 },
+ { X86::VMOVLHPSrr, X86::VMOVHPSrm, TB_NO_REVERSE },
{ X86::VMPSADBWrri, X86::VMPSADBWrmi, 0 },
{ X86::VMULPDrr, X86::VMULPDrm, 0 },
{ X86::VMULPSrr, X86::VMULPSrm, 0 },
@@ -5531,6 +5533,23 @@ MachineInstr *X86InstrInfo::foldMemoryOperandCustom(
}
}
break;
+ case X86::MOVHLPSrr:
+ case X86::VMOVHLPSrr:
+ // Move the upper 64-bits of the second operand to the lower 64-bits.
+ // To fold the load, adjust the pointer to the upper and use (V)MOVLPS.
+ // TODO: In most cases AVX doesn't have a 8-byte alignment requirement.
+ if (OpNum == 2) {
+ unsigned RCSize = getRegClass(MI->getDesc(), OpNum, &RI, MF)->getSize();
+ if (Size <= RCSize && 8 <= Align) {
+ unsigned NewOpCode =
+ (MI->getOpcode() == X86::VMOVHLPSrr ? X86::VMOVLPSrm
+ : X86::MOVLPSrm);
+ MachineInstr *NewMI =
+ FuseInst(MF, NewOpCode, OpNum, MOs, InsertPt, MI, *this, 8);
+ return NewMI;
+ }
+ }
+ break;
};
return nullptr;
@@ -5741,6 +5760,10 @@ static bool hasPartialRegUpdate(unsigned Opcode) {
case X86::CVTSS2SDrm:
case X86::Int_CVTSS2SDrr:
case X86::Int_CVTSS2SDrm:
+ case X86::MOVHPDrm:
+ case X86::MOVHPSrm:
+ case X86::MOVLPDrm:
+ case X86::MOVLPSrm:
case X86::RCPSSr:
case X86::RCPSSm:
case X86::RCPSSr_Int:
OpenPOWER on IntegriCloud