summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.cpp50
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td6
2 files changed, 41 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index f6f4e7d2b2c..e39819f4ac6 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1542,20 +1542,39 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI,
case X86::VMOVSDrr:
case X86::VMOVSSrr:{
// On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD.
- assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!");
+ if (Subtarget.hasSSE41()) {
+ unsigned Mask, Opc;
+ switch (MI.getOpcode()) {
+ default: llvm_unreachable("Unreachable!");
+ case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
+ case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
+ case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
+ case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ }
- unsigned Mask, Opc;
- switch (MI.getOpcode()) {
- default: llvm_unreachable("Unreachable!");
- case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break;
- case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break;
- case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break;
- case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break;
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(Opc));
+ WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
}
+ // Convert to SHUFPD.
+ assert(MI.getOpcode() == X86::MOVSDrr &&
+ "Can only commute MOVSDrr without SSE4.1");
+
auto &WorkingMI = cloneIfNew(MI);
- WorkingMI.setDesc(get(Opc));
- WorkingMI.addOperand(MachineOperand::CreateImm(Mask));
+ WorkingMI.setDesc(get(X86::SHUFPDrri));
+ WorkingMI.addOperand(MachineOperand::CreateImm(0x02));
+ return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
+ OpIdx1, OpIdx2);
+ }
+ case X86::SHUFPDrri: {
+ // Commute to MOVSD.
+ assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!");
+ auto &WorkingMI = cloneIfNew(MI);
+ WorkingMI.setDesc(get(X86::MOVSDrr));
+ WorkingMI.RemoveOperand(3);
return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false,
OpIdx1, OpIdx2);
}
@@ -1874,13 +1893,18 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1,
}
return false;
}
- case X86::MOVSDrr:
case X86::MOVSSrr:
- case X86::VMOVSDrr:
- case X86::VMOVSSrr:
+ // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can
+ // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since
+ // AVX implies sse4.1.
if (Subtarget.hasSSE41())
return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
return false;
+ case X86::SHUFPDrri:
+ // We can commute this to MOVSD.
+ if (MI.getOperand(3).getImm() == 0x02)
+ return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2);
+ return false;
case X86::MOVHLPSrr:
case X86::UNPCKHPDrr:
case X86::VMOVHLPSrr:
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index f904f5a23d5..d25d216db19 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -1951,12 +1951,14 @@ let Predicates = [UseSSE1] in {
/// sse12_shuffle - sse 1 & 2 fp shuffle instructions
multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop,
ValueType vt, string asm, PatFrag mem_frag,
- X86FoldableSchedWrite sched, Domain d> {
+ X86FoldableSchedWrite sched, Domain d,
+ bit IsCommutable = 0> {
def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst),
(ins RC:$src1, x86memop:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2),
(i8 imm:$src3))))], d>,
Sched<[sched.Folded, sched.ReadAfterFold]>;
+ let isCommutable = IsCommutable in
def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst),
(ins RC:$src1, RC:$src2, u8imm:$src3), asm,
[(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2,
@@ -1988,7 +1990,7 @@ let Constraints = "$src1 = $dst" in {
memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS;
defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64,
"shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}",
- memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD;
+ memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD;
}
//===----------------------------------------------------------------------===//
OpenPOWER on IntegriCloud