diff options
Diffstat (limited to 'llvm/lib')
-rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 50 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 6 |
2 files changed, 41 insertions, 15 deletions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f6f4e7d2b2c..e39819f4ac6 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -1542,20 +1542,39 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VMOVSDrr: case X86::VMOVSSrr:{ // On SSE41 or later we can commute a MOVSS/MOVSD to a BLENDPS/BLENDPD. - assert(Subtarget.hasSSE41() && "Commuting MOVSD/MOVSS requires SSE41!"); + if (Subtarget.hasSSE41()) { + unsigned Mask, Opc; + switch (MI.getOpcode()) { + default: llvm_unreachable("Unreachable!"); + case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break; + case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break; + case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break; + case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; + } - unsigned Mask, Opc; - switch (MI.getOpcode()) { - default: llvm_unreachable("Unreachable!"); - case X86::MOVSDrr: Opc = X86::BLENDPDrri; Mask = 0x02; break; - case X86::MOVSSrr: Opc = X86::BLENDPSrri; Mask = 0x0E; break; - case X86::VMOVSDrr: Opc = X86::VBLENDPDrri; Mask = 0x02; break; - case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(Opc)); + WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); } + // Convert to SHUFPD. + assert(MI.getOpcode() == X86::MOVSDrr && + "Can only commute MOVSDrr without SSE4.1"); + auto &WorkingMI = cloneIfNew(MI); - WorkingMI.setDesc(get(Opc)); - WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); + WorkingMI.setDesc(get(X86::SHUFPDrri)); + WorkingMI.addOperand(MachineOperand::CreateImm(0x02)); + return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, + OpIdx1, OpIdx2); + } + case X86::SHUFPDrri: { + // Commute to MOVSD. + assert(MI.getOperand(3).getImm() == 0x02 && "Unexpected immediate!"); + auto &WorkingMI = cloneIfNew(MI); + WorkingMI.setDesc(get(X86::MOVSDrr)); + WorkingMI.RemoveOperand(3); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); } @@ -1874,13 +1893,18 @@ bool X86InstrInfo::findCommutedOpIndices(MachineInstr &MI, unsigned &SrcOpIdx1, } return false; } - case X86::MOVSDrr: case X86::MOVSSrr: - case X86::VMOVSDrr: - case X86::VMOVSSrr: + // X86::MOVSDrr is always commutable. MOVSS is only commutable if we can + // form sse4.1 blend. We assume VMOVSSrr/VMOVSDrr is always commutable since + // AVX implies sse4.1. if (Subtarget.hasSSE41()) return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); return false; + case X86::SHUFPDrri: + // We can commute this to MOVSD. + if (MI.getOperand(3).getImm() == 0x02) + return TargetInstrInfo::findCommutedOpIndices(MI, SrcOpIdx1, SrcOpIdx2); + return false; case X86::MOVHLPSrr: case X86::UNPCKHPDrr: case X86::VMOVHLPSrr: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index f904f5a23d5..d25d216db19 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -1951,12 +1951,14 @@ let Predicates = [UseSSE1] in { /// sse12_shuffle - sse 1 & 2 fp shuffle instructions multiclass sse12_shuffle<RegisterClass RC, X86MemOperand x86memop, ValueType vt, string asm, PatFrag mem_frag, - X86FoldableSchedWrite sched, Domain d> { + X86FoldableSchedWrite sched, Domain d, + bit IsCommutable = 0> { def rmi : PIi8<0xC6, MRMSrcMem, (outs RC:$dst), (ins RC:$src1, x86memop:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, (mem_frag addr:$src2), (i8 imm:$src3))))], d>, Sched<[sched.Folded, sched.ReadAfterFold]>; + let isCommutable = IsCommutable in def rri : PIi8<0xC6, MRMSrcReg, (outs RC:$dst), (ins RC:$src1, RC:$src2, u8imm:$src3), asm, [(set RC:$dst, (vt (X86Shufp RC:$src1, RC:$src2, @@ -1988,7 +1990,7 @@ let Constraints = "$src1 = $dst" in { memopv4f32, SchedWriteFShuffle.XMM, SSEPackedSingle>, PS; defm SHUFPD : sse12_shuffle<VR128, f128mem, v2f64, "shufpd\t{$src3, $src2, $dst|$dst, $src2, $src3}", - memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble>, PD; + memopv2f64, SchedWriteFShuffle.XMM, SSEPackedDouble, 1>, PD; } //===----------------------------------------------------------------------===// |