diff options
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 73 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.cpp | 10 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 67 |
3 files changed, 73 insertions, 77 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7be8d4568f6..0c52fa889e8 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3323,28 +3323,25 @@ def VMOVQI2PQIZrm : AVX512XSI<0x7E, MRMSrcMem, (outs VR128X:$dst), multiclass avx512_move_scalar<string asm, SDNode OpNode, X86VectorVTInfo _> { def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src1, _.FRC:$src2), + (ins _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))))], + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, _.RC:$src2)))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; def rrkz : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2), + (ins _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}} {z}|", "$dst {${mask}} {z}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, - (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))), + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), _.ImmAllZerosV)))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_KZ; let Constraints = "$src0 = $dst" in def rrk : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), - (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.FRC:$src2), + (ins _.RC:$src0, _.KRCWM:$mask, _.RC:$src1, _.RC:$src2), !strconcat(asm, "\t{$src2, $src1, $dst {${mask}}|", "$dst {${mask}}, $src1, $src2}"), [(set _.RC:$dst, (_.VT (X86selects _.KRCWM:$mask, - (_.VT (OpNode _.RC:$src1, - (scalar_to_vector _.FRC:$src2))), + (_.VT (OpNode _.RC:$src1, _.RC:$src2)), (_.VT _.RC:$src0))))], _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V, EVEX_K; let canFoldAsLoad = 1, isReMaterializable = 1 in @@ -3394,7 +3391,8 @@ def : Pat<(_.VT (OpNode _.RC:$src0, (!cast<Instruction>(InstrStr#rrk) (COPY_TO_REGCLASS _.FRC:$src2, _.RC), (COPY_TO_REGCLASS GR32:$mask, VK1WM), - (_.VT _.RC:$src0), _.FRC:$src1)>; + (_.VT _.RC:$src0), + (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; def : Pat<(_.VT (OpNode _.RC:$src0, (_.VT (scalar_to_vector @@ -3403,7 +3401,8 @@ def : Pat<(_.VT (OpNode _.RC:$src0, (_.EltVT ZeroFP))))))), (!cast<Instruction>(InstrStr#rrkz) (COPY_TO_REGCLASS GR32:$mask, VK1WM), - (_.VT _.RC:$src0), _.FRC:$src1)>; + (_.VT _.RC:$src0), + (COPY_TO_REGCLASS _.FRC:$src1, _.RC))>; } multiclass avx512_store_scalar_lowering<string InstrStr, AVX512VLVectorVTInfo _, @@ -3515,11 +3514,13 @@ def : Pat<(f32 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), - (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + (v4f32 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR32X:$src1, VR128X)), + FR32X)>; def : Pat<(f32 (X86selects VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (f64 FR64X:$src1), (f64 FR64X:$src2))), @@ -3527,11 +3528,13 @@ def : Pat<(f64 (X86selects (scalar_to_vector (and GR8:$mask, (i8 1))), (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), - (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), + FR64X)>; def : Pat<(f64 (X86selects VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), + (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (COPY_TO_REGCLASS (i32 (INSERT_SUBREG (IMPLICIT_DEF), GR8:$mask, sub_8bit)), VK1WM), @@ -3539,7 +3542,7 @@ def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), let hasSideEffects = 0 in { def VMOVSSZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR32X:$src2), + (ins VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], NoItinerary>, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrr">; @@ -3547,21 +3550,21 @@ let hasSideEffects = 0 in { let Constraints = "$src0 = $dst" in def VMOVSSZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f32x_info.RC:$src0, f32x_info.KRCWM:$mask, - VR128X:$src1, FR32X:$src2), + VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", [], NoItinerary>, EVEX_K, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrk">; def VMOVSSZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins f32x_info.KRCWM:$mask, VR128X:$src1, FR32X:$src2), + (ins f32x_info.KRCWM:$mask, VR128X:$src1, VR128X:$src2), "vmovss.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", [], NoItinerary>, EVEX_KZ, XS, EVEX_4V, VEX_LIG, FoldGenData<"VMOVSSZrrkz">; def VMOVSDZrr_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), - (ins VR128X:$src1, FR64X:$src2), + (ins VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst|$dst, $src1, $src2}", [], NoItinerary>, XD, EVEX_4V, VEX_LIG, VEX_W, FoldGenData<"VMOVSDZrr">; @@ -3569,7 +3572,7 @@ let Constraints = "$src0 = $dst" in let Constraints = "$src0 = $dst" in def VMOVSDZrrk_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.RC:$src0, f64x_info.KRCWM:$mask, - VR128X:$src1, FR64X:$src2), + VR128X:$src1, VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}}|"# "$dst {${mask}}, $src1, $src2}", [], NoItinerary>, EVEX_K, XD, EVEX_4V, VEX_LIG, @@ -3577,7 +3580,7 @@ let Constraints = "$src0 = $dst" in def VMOVSDZrrkz_REV: AVX512<0x11, MRMDestReg, (outs VR128X:$dst), (ins f64x_info.KRCWM:$mask, VR128X:$src1, - FR64X:$src2), + VR128X:$src2), "vmovsd.s\t{$src2, $src1, $dst {${mask}} {z}|"# "$dst {${mask}} {z}, $src1, $src2}", [], NoItinerary>, EVEX_KZ, XD, EVEX_4V, VEX_LIG, @@ -3587,11 +3590,12 @@ let Constraints = "$src0 = $dst" in let Predicates = [HasAVX512] in { let AddedComplexity = 15 in { def : Pat<(v4f32 (X86vzmovl (v4f32 VR128X:$src))), - (VMOVSSZrr (v4f32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + (VMOVSSZrr (v4f32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128X:$src))), - (VMOVSSZrr (v4i32 (AVX512_128_SET0)), (COPY_TO_REGCLASS VR128X:$src, FR32X))>; + (VMOVSSZrr (v4i32 (AVX512_128_SET0)), VR128X:$src)>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64X:$src)))), - (VMOVSDZrr (v2f64 (AVX512_128_SET0)), FR64X:$src)>; + (VMOVSDZrr (v2f64 (AVX512_128_SET0)), + (COPY_TO_REGCLASS FR64X:$src, VR128))>; } // Move low f32 and clear high bits. @@ -3697,22 +3701,23 @@ let Predicates = [HasAVX512] in { // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4i32 VR128X:$src1), - (COPY_TO_REGCLASS (v4i32 VR128X:$src2), FR32X))>; - def : Pat<(v4f32 (X86Movss VR128X:$src1, VR128X:$src2)), - (VMOVSSZrr (v4f32 VR128X:$src1), - (COPY_TO_REGCLASS (v4f32 VR128X:$src2), FR32X))>; + (VMOVSSZrr (v4i32 VR128X:$src1), VR128X:$src2)>; + + def : Pat<(v4f32 (X86Movss VR128X:$src1, (scalar_to_vector FR32X:$src2))), + (VMOVSSZrr VR128X:$src1, + (COPY_TO_REGCLASS FR32X:$src2, VR128X))>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; - def : Pat<(v2f64 (X86Movsd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; + + def : Pat<(v2f64 (X86Movsd VR128X:$src1, (scalar_to_vector FR64X:$src2))), + (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS FR64X:$src2, VR128X))>; def : Pat<(v2f64 (X86Movlpd VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; def : Pat<(v4f32 (X86Movlps VR128X:$src1, VR128X:$src2)), - (VMOVSDZrr VR128X:$src1, (COPY_TO_REGCLASS VR128X:$src2, FR64X))>; + (VMOVSDZrr VR128X:$src1, VR128X:$src2)>; } let AddedComplexity = 15 in diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index d7a33f8507e..604ee1533fa 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -5189,18 +5189,8 @@ MachineInstr *X86InstrInfo::commuteInstructionImpl(MachineInstr &MI, bool NewMI, case X86::VMOVSSrr: Opc = X86::VBLENDPSrri; Mask = 0x0E; break; } - // MOVSD/MOVSS's 2nd operand is a FR64/FR32 reg class - we need to copy - // this over to a VR128 class like the 1st operand to use a BLENDPD/BLENDPS. - auto &MRI = MI.getParent()->getParent()->getRegInfo(); - auto VR128RC = MRI.getRegClass(MI.getOperand(1).getReg()); - unsigned VR128 = MRI.createVirtualRegister(VR128RC); - BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), get(TargetOpcode::COPY), - VR128) - .addReg(MI.getOperand(2).getReg()); - auto &WorkingMI = cloneIfNew(MI); WorkingMI.setDesc(get(Opc)); - WorkingMI.getOperand(2).setReg(VR128); WorkingMI.addOperand(MachineOperand::CreateImm(Mask)); return TargetInstrInfo::commuteInstructionImpl(WorkingMI, /*NewMI=*/false, OpIdx1, OpIdx2); diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index e328c2fa6a4..92898d1bed9 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -384,22 +384,21 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1, // don't use movss/movsd for copies. //===----------------------------------------------------------------------===// -multiclass sse12_move_rr<RegisterClass RC, SDNode OpNode, ValueType vt, +multiclass sse12_move_rr<SDNode OpNode, ValueType vt, X86MemOperand x86memop, string base_opc, string asm_opr, Domain d = GenericDomain, string Name> { let isCommutable = 1 in def rr : SI<0x10, MRMSrcReg, (outs VR128:$dst), - (ins VR128:$src1, RC:$src2), + (ins VR128:$src1, VR128:$src2), !strconcat(base_opc, asm_opr), - [(set VR128:$dst, (vt (OpNode VR128:$src1, - (scalar_to_vector RC:$src2))))], + [(set VR128:$dst, (vt (OpNode VR128:$src1, VR128:$src2)))], IIC_SSE_MOV_S_RR, d>, Sched<[WriteFShuffle]>; // For the disassembler let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in def rr_REV : SI<0x11, MRMDestReg, (outs VR128:$dst), - (ins VR128:$src1, RC:$src2), + (ins VR128:$src1, VR128:$src2), !strconcat(base_opc, asm_opr), [], IIC_SSE_MOV_S_RR>, Sched<[WriteFShuffle]>, FoldGenData<Name#rr>; @@ -409,7 +408,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, X86MemOperand x86memop, string OpcodeStr, Domain d = GenericDomain, string Name> { // AVX - defm V#NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, + defm V#NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, "\t{$src2, $src1, $dst|$dst, $src1, $src2}", d, "V"#Name>, VEX_4V, VEX_LIG, VEX_WIG; @@ -420,7 +419,7 @@ multiclass sse12_move<RegisterClass RC, SDNode OpNode, ValueType vt, VEX, VEX_LIG, Sched<[WriteStore]>, VEX_WIG; // SSE1 & 2 let Constraints = "$src1 = $dst" in { - defm NAME : sse12_move_rr<RC, OpNode, vt, x86memop, OpcodeStr, + defm NAME : sse12_move_rr<OpNode, vt, x86memop, OpcodeStr, "\t{$src2, $dst|$dst, $src2}", d, Name>; } @@ -506,30 +505,30 @@ let Predicates = [UseAVX] in { // Shuffle with VMOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (VMOVSSrr (v4i32 VR128:$src1), - (COPY_TO_REGCLASS (v4i32 VR128:$src2), FR32))>; - def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (VMOVSSrr (v4f32 VR128:$src1), - (COPY_TO_REGCLASS (v4f32 VR128:$src2), FR32))>; + (VMOVSSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (VMOVSSrr VR128:$src1, (COPY_TO_REGCLASS FR32:$src2, VR128))>; // Shuffle with VMOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; - def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold cause // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (VMOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (VMOVSDrr VR128:$src1, VR128:$src2)>; } let Predicates = [UseSSE1] in { @@ -537,9 +536,9 @@ let Predicates = [UseSSE1] in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSS to the lower bits. def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))), - (MOVSSrr (v4f32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; + (MOVSSrr (v4f32 (V_SET0)), VR128:$src)>; def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), - (MOVSSrr (v4i32 (V_SET0)), (COPY_TO_REGCLASS VR128:$src, FR32))>; + (MOVSSrr (v4i32 (V_SET0)), VR128:$src)>; } let AddedComplexity = 20 in { @@ -561,9 +560,10 @@ let Predicates = [UseSSE1] in { // Shuffle with MOVSS def : Pat<(v4i32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; - def : Pat<(v4f32 (X86Movss VR128:$src1, VR128:$src2)), - (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR32))>; + (MOVSSrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v4f32 (X86Movss VR128:$src1, (scalar_to_vector FR32:$src2))), + (MOVSSrr VR128:$src1, (COPY_TO_REGCLASS FR32:$src2, VR128))>; } let Predicates = [UseSSE2] in { @@ -571,7 +571,7 @@ let Predicates = [UseSSE2] in { // Move scalar to XMM zero-extended, zeroing a VR128 then do a // MOVSD to the lower bits. def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + (MOVSDrr (v2f64 (V_SET0)), (COPY_TO_REGCLASS FR64:$src, VR128))>; } let AddedComplexity = 20 in { @@ -590,22 +590,23 @@ let Predicates = [UseSSE2] in { // Shuffle with MOVSD def : Pat<(v2i64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; - def : Pat<(v2f64 (X86Movsd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; + + def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))), + (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS FR64:$src2, VR128))>; // FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem // is during lowering, where it's not possible to recognize the fold because // it has two uses through a bitcast. One use disappears at isel time and the // fold opportunity reappears. def : Pat<(v2f64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v2i64 (X86Movlpd VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4f32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86Movlps VR128:$src1, VR128:$src2)), - (MOVSDrr VR128:$src1, (COPY_TO_REGCLASS VR128:$src2, FR64))>; + (MOVSDrr VR128:$src1, VR128:$src2)>; } // Aliases to help the assembler pick two byte VEX encodings by swapping the @@ -6722,7 +6723,7 @@ let Predicates = [UseAVX] in { def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))), (VPBLENDWrri (v4i32 (V_SET0)), VR128:$src, (i8 3))>; def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))), - (VMOVSDrr (v2f64 (V_SET0)), FR64:$src)>; + (VMOVSDrr (v2f64 (V_SET0)), (COPY_TO_REGCLASS FR64:$src, VR128))>; // Move low f32 and clear high bits. def : Pat<(v8f32 (X86vzmovl (v8f32 VR256:$src))), |

