diff options
22 files changed, 400 insertions, 500 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 1143056d008..f40c6b613ea 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3617,7 +3617,7 @@ def VMOVDI2PDIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR32:$src "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v4i32 (scalar_to_vector GR32:$src)))]>, - EVEX, Sched<[WriteMove]>; + EVEX, Sched<[WriteVecMoveFromGpr]>; def VMOVDI2PDIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, @@ -3627,7 +3627,7 @@ def VMOV64toPQIZrr : AVX512BI<0x6E, MRMSrcReg, (outs VR128X:$dst), (ins GR64:$sr "vmovq\t{$src, $dst|$dst, $src}", [(set VR128X:$dst, (v2i64 (scalar_to_vector GR64:$src)))]>, - EVEX, VEX_W, Sched<[WriteMove]>; + EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOV64toPQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), (ins i64mem:$src), @@ -3637,7 +3637,7 @@ let isCodeGenOnly = 1 in { def VMOV64toSDZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR64X:$dst), (ins GR64:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64X:$dst, (bitconvert GR64:$src))]>, - EVEX, VEX_W, Sched<[WriteMove]>; + EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set FR64X:$dst, (bitconvert (loadi64 addr:$src)))]>, @@ -3645,7 +3645,7 @@ def VMOV64toSDZrm : AVX512XSI<0x7E, MRMSrcMem, (outs FR64X:$dst), (ins i64mem:$s def VMOVSDto64Zrr : AVX512BI<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64X:$src))]>, - EVEX, VEX_W, Sched<[WriteMove]>; + EVEX, VEX_W, Sched<[WriteVecMoveFromGpr]>; def VMOVSDto64Zmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64X:$src)), addr:$dst)]>, @@ -3660,7 +3660,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVDI2SSZrr : AVX512BI<0x6E, MRMSrcReg, (outs FR32X:$dst), (ins GR32:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set FR32X:$dst, (bitconvert GR32:$src))]>, - EVEX, Sched<[WriteMove]>; + EVEX, Sched<[WriteVecMoveFromGpr]>; def VMOVDI2SSZrm : AVX512BI<0x6E, MRMSrcMem, (outs FR32X:$dst), (ins i32mem:$src), "vmovd\t{$src, $dst|$dst, $src}", @@ -3675,7 +3675,7 @@ def VMOVPDI2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128X:$s "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (extractelt (v4i32 VR128X:$src), (iPTR 0)))]>, - EVEX, Sched<[WriteMove]>; + EVEX, Sched<[WriteVecMoveToGpr]>; def VMOVPDI2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128X:$src), "vmovd\t{$src, $dst|$dst, $src}", @@ -3691,7 +3691,7 @@ def VMOVPQIto64Zrr : I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128X:$src), "vmovq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128X:$src), (iPTR 0)))]>, - PD, EVEX, VEX_W, Sched<[WriteMove]>, + PD, EVEX, VEX_W, Sched<[WriteVecMoveToGpr]>, Requires<[HasAVX512, In64BitMode]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in @@ -3722,7 +3722,7 @@ def VMOVSS2DIZrr : AVX512BI<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32X:$src))]>, - EVEX, Sched<[WriteMove]>; + EVEX, Sched<[WriteVecMoveToGpr]>; def VMOVSS2DIZmr : AVX512BI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32X:$src), "vmovd\t{$src, $dst|$dst, $src}", @@ -9089,7 +9089,7 @@ multiclass cvt_by_vec_width<bits<8> opc, X86VectorVTInfo Vec, string OpcodeStr > def rr : AVX512XS8I<opc, MRMSrcReg, (outs Vec.RC:$dst), (ins Vec.KRC:$src), !strconcat(OpcodeStr##Vec.Suffix, "\t{$src, $dst|$dst, $src}"), [(set Vec.RC:$dst, (Vec.VT (sext Vec.KRC:$src)))]>, - EVEX, Sched<[WriteMove]>; + EVEX, Sched<[WriteMove]>; // TODO - WriteVecTrunc? } multiclass cvt_mask_by_elt_width<bits<8> opc, AVX512VLVectorVTInfo VTInfo, diff --git a/llvm/lib/Target/X86/X86InstrMMX.td b/llvm/lib/Target/X86/X86InstrMMX.td index 75f35c2481a..91901e8d4b6 100644 --- a/llvm/lib/Target/X86/X86InstrMMX.td +++ b/llvm/lib/Target/X86/X86InstrMMX.td @@ -165,7 +165,7 @@ def MMX_MOVD64rr : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (x86mmx (scalar_to_vector GR32:$src)))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveFromGpr]>; def MMX_MOVD64rm : MMXI<0x6E, MRMSrcMem, (outs VR64:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR64:$dst, @@ -193,13 +193,13 @@ def MMX_MOVD64grr : MMXI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR64:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (MMX_X86movd2w (x86mmx VR64:$src)))]>, - Sched<[WriteMove]>, FoldGenData<"MMX_MOVD64rr">; + Sched<[WriteVecMoveToGpr]>, FoldGenData<"MMX_MOVD64rr">; let isBitcast = 1 in def MMX_MOVD64to64rr : MMXRI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR64:$dst, (bitconvert GR64:$src))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst), @@ -209,20 +209,21 @@ def MMX_MOVD64to64rm : MMXRI<0x6E, MRMSrcMem, (outs VR64:$dst), // These are 64 bit moves, but since the OS X assembler doesn't // recognize a register-register movq, we write them as // movd. -let SchedRW = [WriteMove], isBitcast = 1 in { +let isBitcast = 1 in { def MMX_MOVD64from64rr : MMXRI<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", - [(set GR64:$dst, (bitconvert VR64:$src))]>; -let hasSideEffects = 0 in + [(set GR64:$dst, (bitconvert VR64:$src))]>, + Sched<[WriteVecMoveToGpr]>; +let SchedRW = [WriteVecMove], hasSideEffects = 0 in { def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>; -let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { +let isCodeGenOnly = 1, ForceDisassemble = 1 in def MMX_MOVQ64rr_REV : MMXI<0x7F, MRMDestReg, (outs VR64:$dst), (ins VR64:$src), "movq\t{$src, $dst|$dst, $src}", []>, FoldGenData<"MMX_MOVQ64rr">; -} -} // SchedRW +} // SchedRW, hasSideEffects +} // isBitcast let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayStore = 1 in def MMX_MOVD64from64rm : MMXRI<0x7E, MRMDestMem, diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index ee414790629..85cdcbe7005 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3965,7 +3965,7 @@ def VMOVDI2PDIrr : VS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))]>, - VEX, Sched<[WriteMove]>; + VEX, Sched<[WriteVecMoveFromGpr]>; def VMOVDI2PDIrm : VS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -3975,7 +3975,7 @@ def VMOV64toPQIrr : VRS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))]>, - VEX, Sched<[WriteMove]>; + VEX, Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def VMOV64toPQIrm : VRS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", []>, @@ -3984,13 +3984,13 @@ let isCodeGenOnly = 1 in def VMOV64toSDrr : VRS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))]>, - VEX, Sched<[WriteMove]>; + VEX, Sched<[WriteVecMoveFromGpr]>; def MOVDI2PDIrr : S2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v4i32 (scalar_to_vector GR32:$src)))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveFromGpr]>; def MOVDI2PDIrm : S2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", [(set VR128:$dst, @@ -4000,7 +4000,7 @@ def MOV64toPQIrr : RS2I<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set VR128:$dst, (v2i64 (scalar_to_vector GR64:$src)))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveFromGpr]>; let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0, mayLoad = 1 in def MOV64toPQIrm : RS2I<0x6E, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src), "movq\t{$src, $dst|$dst, $src}", []>, @@ -4009,7 +4009,7 @@ let isCodeGenOnly = 1 in def MOV64toSDrr : RS2I<0x6E, MRMSrcReg, (outs FR64:$dst), (ins GR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set FR64:$dst, (bitconvert GR64:$src))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveFromGpr]>; } // ExeDomain = SSEPackedInt //===---------------------------------------------------------------------===// @@ -4019,7 +4019,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVDI2SSrr : VS2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>, - VEX, Sched<[WriteMove]>; + VEX, Sched<[WriteVecMoveFromGpr]>; def VMOVDI2SSrm : VS2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4028,7 +4028,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def MOVDI2SSrr : S2I<0x6E, MRMSrcReg, (outs FR32:$dst), (ins GR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set FR32:$dst, (bitconvert GR32:$src))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveFromGpr]>; def MOVDI2SSrm : S2I<0x6E, MRMSrcMem, (outs FR32:$dst), (ins i32mem:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4044,7 +4044,7 @@ def VMOVPDI2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (extractelt (v4i32 VR128:$src), (iPTR 0)))]>, VEX, - Sched<[WriteMove]>; + Sched<[WriteVecMoveToGpr]>; def VMOVPDI2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", @@ -4055,7 +4055,7 @@ def MOVPDI2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (extractelt (v4i32 VR128:$src), (iPTR 0)))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveToGpr]>; def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (extractelt (v4i32 VR128:$src), @@ -4067,7 +4067,7 @@ def MOVPDI2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, VR128:$src), // Move Packed Doubleword Int first element to Doubleword Int // let ExeDomain = SSEPackedInt in { -let SchedRW = [WriteMove] in { +let SchedRW = [WriteVecMoveToGpr] in { def VMOVPQIto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins VR128:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (extractelt (v2i64 VR128:$src), @@ -4103,7 +4103,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVSDto64rr : VRS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))]>, - VEX, Sched<[WriteMove]>; + VEX, Sched<[WriteVecMoveToGpr]>; def VMOVSDto64mr : VRS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>, @@ -4116,7 +4116,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def MOVSDto64rr : RS2I<0x7E, MRMDestReg, (outs GR64:$dst), (ins FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(set GR64:$dst, (bitconvert FR64:$src))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveToGpr]>; def MOVSDto64mr : RS2I<0x7E, MRMDestMem, (outs), (ins i64mem:$dst, FR64:$src), "movq\t{$src, $dst|$dst, $src}", [(store (i64 (bitconvert FR64:$src)), addr:$dst)]>, @@ -4130,7 +4130,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def VMOVSS2DIrr : VS2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>, - VEX, Sched<[WriteMove]>; + VEX, Sched<[WriteVecMoveToGpr]>; def VMOVSS2DImr : VS2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, @@ -4138,7 +4138,7 @@ let ExeDomain = SSEPackedInt, isCodeGenOnly = 1 in { def MOVSS2DIrr : S2I<0x7E, MRMDestReg, (outs GR32:$dst), (ins FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(set GR32:$dst, (bitconvert FR32:$src))]>, - Sched<[WriteMove]>; + Sched<[WriteVecMoveToGpr]>; def MOVSS2DImr : S2I<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src), "movd\t{$src, $dst|$dst, $src}", [(store (i32 (bitconvert FR32:$src)), addr:$dst)]>, diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 568cef7c8eb..35ae1008899 100755 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -280,6 +280,9 @@ defm : X86WriteRes<WriteVecMaskedStoreY, [BWPort0,BWPort4,BWPort237,BWPort15], 5 defm : X86WriteRes<WriteVecMove, [BWPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveX, [BWPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveY, [BWPort015], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveToGpr, [BWPort0], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [BWPort5], 1, [1], 1>; + defm : X86WriteRes<WriteEMMS, [BWPort01,BWPort15,BWPort015,BWPort0156], 31, [8,1,21,1], 31>; defm : BWWriteResPair<WriteVecALU, [BWPort15], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. @@ -508,11 +511,7 @@ def BWWriteResGroup1 : SchedWriteRes<[BWPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup1], (instregex "MMX_MOVD64from64rr", - "MMX_MOVD64grr", - "(V?)MOVPDI2DIrr", - "(V?)MOVPQIto64rr", - "VPSLLVQ(Y?)rr", +def: InstRW<[BWWriteResGroup1], (instregex "VPSLLVQ(Y?)rr", "VPSRLVQ(Y?)rr")>; def BWWriteResGroup2 : SchedWriteRes<[BWPort1]> { @@ -528,11 +527,7 @@ def BWWriteResGroup3 : SchedWriteRes<[BWPort5]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVD64rr", - "MMX_MOVD64to64rr", - "MMX_MOVQ2DQrr", - "(V?)MOV64toPQIrr", - "(V?)MOVDI2PDIrr")>; +def: InstRW<[BWWriteResGroup3], (instregex "MMX_MOVQ2DQrr")>; def BWWriteResGroup4 : SchedWriteRes<[BWPort6]> { let Latency = 1; @@ -578,8 +573,7 @@ def BWWriteResGroup8 : SchedWriteRes<[BWPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[BWWriteResGroup8], (instregex "MMX_MOVQ64rr", - "VPBLENDD(Y?)rri")>; +def: InstRW<[BWWriteResGroup8], (instregex "VPBLENDD(Y?)rri")>; def BWWriteResGroup9 : SchedWriteRes<[BWPort0156]> { let Latency = 1; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 22af5ee0650..d400e977d7d 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -300,6 +300,8 @@ defm : X86WriteRes<WriteVecMaskedStoreY, [HWPort0,HWPort4,HWPort237,HWPort15], 5 defm : X86WriteRes<WriteVecMove, [HWPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveX, [HWPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveY, [HWPort015], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveToGpr, [HWPort0], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [HWPort5], 1, [1], 1>; defm : HWWriteResPair<WriteVecLogic, [HWPort015], 1, [1], 1, 5>; defm : HWWriteResPair<WriteVecLogicX,[HWPort015], 1, [1], 1, 6>; @@ -794,11 +796,7 @@ def HWWriteResGroup2 : SchedWriteRes<[HWPort0]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup2], (instregex "MMX_MOVD64from64rr", - "MMX_MOVD64grr", - "(V?)MOVPDI2DIrr", - "(V?)MOVPQIto64rr", - "VPSLLVQ(Y?)rr", +def: InstRW<[HWWriteResGroup2], (instregex "VPSLLVQ(Y?)rr", "VPSRLVQ(Y?)rr")>; def HWWriteResGroup3 : SchedWriteRes<[HWPort1]> { @@ -814,11 +812,7 @@ def HWWriteResGroup4 : SchedWriteRes<[HWPort5]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVD64rr", - "MMX_MOVD64to64rr", - "MMX_MOVQ2DQrr", - "(V?)MOV64toPQIrr", - "(V?)MOVDI2PDIrr")>; +def: InstRW<[HWWriteResGroup4], (instregex "MMX_MOVQ2DQrr")>; def HWWriteResGroup5 : SchedWriteRes<[HWPort6]> { let Latency = 1; @@ -864,8 +858,7 @@ def HWWriteResGroup9 : SchedWriteRes<[HWPort015]> { let NumMicroOps = 1; let ResourceCycles = [1]; } -def: InstRW<[HWWriteResGroup9], (instregex "MMX_MOVQ64rr", - "VPBLENDD(Y?)rri")>; +def: InstRW<[HWWriteResGroup9], (instregex "VPBLENDD(Y?)rri")>; def HWWriteResGroup10 : SchedWriteRes<[HWPort0156]> { let Latency = 1; diff --git a/llvm/lib/Target/X86/X86SchedSandyBridge.td b/llvm/lib/Target/X86/X86SchedSandyBridge.td index 6e7e2be2b60..8b457f91254 100644 --- a/llvm/lib/Target/X86/X86SchedSandyBridge.td +++ b/llvm/lib/Target/X86/X86SchedSandyBridge.td @@ -290,6 +290,8 @@ defm : X86WriteRes<WriteVecMaskedStoreY, [SBPort4,SBPort01,SBPort23], 5, [1,1,1] defm : X86WriteRes<WriteVecMove, [SBPort05], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveX, [SBPort05], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveY, [SBPort05], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveToGpr, [SBPort0], 2, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [SBPort5], 1, [1], 1>; defm : SBWriteResPair<WriteVecLogic, [SBPort015], 1, [1], 1, 5>; defm : SBWriteResPair<WriteVecLogicX,[SBPort015], 1, [1], 1, 6>; @@ -497,8 +499,6 @@ def: InstRW<[SBWriteResGroup2], (instrs FDECSTP, FINCSTP, FFREE, FFREEP, FNOP, LD_Frr, ST_Frr, ST_FPrr)>; def: InstRW<[SBWriteResGroup2], (instrs LOOP, LOOPE, LOOPNE)>; // FIXME: This seems wrong compared to other Intel CPUs. def: InstRW<[SBWriteResGroup2], (instrs RETQ)>; -def: InstRW<[SBWriteResGroup2], (instregex "(V?)MOV64toPQIrr", - "(V?)MOVDI2PDIrr")>; def SBWriteResGroup4 : SchedWriteRes<[SBPort05]> { let Latency = 1; @@ -534,14 +534,6 @@ def SBWriteResGroup6 : SchedWriteRes<[SBPort015]> { def: InstRW<[SBWriteResGroup6], (instregex "MMX_MOVQ2DQrr", "MOVDQ(A|U)rr")>; // NOTE: Different port requirements to VEX equivalents -def SBWriteResGroup7 : SchedWriteRes<[SBPort0]> { - let Latency = 2; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SBWriteResGroup7], (instregex "(V?)MOVPDI2DIrr", - "(V?)MOVPQIto64rr")>; - def SBWriteResGroup9 : SchedWriteRes<[SBPort05]> { let Latency = 2; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 3d4e393d800..8d034aceab1 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -269,9 +269,11 @@ defm : X86WriteRes<WriteVecStoreNT, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreNTY, [SKLPort237,SKLPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStore, [SKLPort237,SKLPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStoreY, [SKLPort237,SKLPort0], 2, [1,1], 2>; -defm : X86WriteRes<WriteVecMove, [SKLPort015], 1, [1], 1>; +defm : X86WriteRes<WriteVecMove, [SKLPort05], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveX, [SKLPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveY, [SKLPort015], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveToGpr, [SKLPort0], 2, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [SKLPort5], 1, [1], 1>; defm : SKLWriteResPair<WriteVecALU, [SKLPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. defm : SKLWriteResPair<WriteVecALUX, [SKLPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM). @@ -526,11 +528,7 @@ def SKLWriteResGroup3 : SchedWriteRes<[SKLPort5]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup3], (instregex "COM(P?)_FST0r", - "MMX_MOVD64rr", - "MMX_MOVD64to64rr", - "UCOM_F(P?)r", - "(V?)MOV64toPQIrr", - "(V?)MOVDI2PDIrr")>; + "UCOM_F(P?)r")>; def SKLWriteResGroup4 : SchedWriteRes<[SKLPort6]> { let Latency = 1; @@ -545,7 +543,6 @@ def SKLWriteResGroup6 : SchedWriteRes<[SKLPort05]> { let ResourceCycles = [1]; } def: InstRW<[SKLWriteResGroup6], (instrs FINCSTP, FNOP)>; -def: InstRW<[SKLWriteResGroup6], (instregex "MMX_MOVQ64rr")>; def SKLWriteResGroup7 : SchedWriteRes<[SKLPort06]> { let Latency = 1; @@ -605,16 +602,6 @@ def: InstRW<[SKLWriteResGroup11], (instregex "FBSTPm", "ST_FP(32|64|80)m", "VMPTRSTm")>; -def SKLWriteResGroup12 : SchedWriteRes<[SKLPort0]> { - let Latency = 2; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKLWriteResGroup12], (instregex "MMX_MOVD64from64rr", - "MMX_MOVD64grr", - "(V?)MOVPDI2DIrr", - "(V?)MOVPQIto64rr")>; - def SKLWriteResGroup13 : SchedWriteRes<[SKLPort5]> { let Latency = 2; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 36aa93b2e9e..fac38e7f91b 100755 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -269,9 +269,11 @@ defm : X86WriteRes<WriteVecStoreNT, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecStoreNTY, [SKXPort237,SKXPort4], 1, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStore, [SKXPort237,SKXPort0], 2, [1,1], 2>; defm : X86WriteRes<WriteVecMaskedStoreY, [SKXPort237,SKXPort0], 2, [1,1], 2>; -defm : X86WriteRes<WriteVecMove, [SKXPort015], 1, [1], 1>; +defm : X86WriteRes<WriteVecMove, [SKXPort05], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveX, [SKXPort015], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveY, [SKXPort015], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveToGpr, [SKXPort0], 2, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [SKXPort5], 1, [1], 1>; defm : SKXWriteResPair<WriteVecALU, [SKXPort05], 1, [1], 1, 5>; // Vector integer ALU op, no logicals. defm : SKXWriteResPair<WriteVecALUX, [SKXPort01], 1, [1], 1, 6>; // Vector integer ALU op, no logicals (XMM). @@ -538,13 +540,7 @@ def SKXWriteResGroup3 : SchedWriteRes<[SKXPort5]> { } def: InstRW<[SKXWriteResGroup3], (instregex "COM(P?)_FST0r", "KMOV(B|D|Q|W)kr", - "MMX_MOVD64rr", - "MMX_MOVD64to64rr", - "MOV64toPQIrr", - "MOVDI2PDIrr", - "UCOM_F(P?)r", - "VMOV64toPQI(Z?)rr", - "VMOVDI2PDI(Z?)rr")>; + "UCOM_F(P?)r")>; def SKXWriteResGroup4 : SchedWriteRes<[SKXPort6]> { let Latency = 1; @@ -559,7 +555,6 @@ def SKXWriteResGroup6 : SchedWriteRes<[SKXPort05]> { let ResourceCycles = [1]; } def: InstRW<[SKXWriteResGroup6], (instrs FINCSTP, FNOP)>; -def: InstRW<[SKXWriteResGroup6], (instregex "MMX_MOVQ64rr")>; def SKXWriteResGroup7 : SchedWriteRes<[SKXPort06]> { let Latency = 1; @@ -630,20 +625,6 @@ def: InstRW<[SKXWriteResGroup11], (instregex "FBSTPm", "ST_FP(32|64|80)m", "VMPTRSTm")>; -def SKXWriteResGroup12 : SchedWriteRes<[SKXPort0]> { - let Latency = 2; - let NumMicroOps = 1; - let ResourceCycles = [1]; -} -def: InstRW<[SKXWriteResGroup12], (instregex "MMX_MOVD64from64rr", - "MMX_MOVD64grr", - "MOVPDI2DIrr", - "MOVPQIto64rr", - "VMOVPDI2DIZrr", - "VMOVPDI2DIrr", - "VMOVPQIto64Zrr", - "VMOVPQIto64rr")>; - def SKXWriteResGroup13 : SchedWriteRes<[SKXPort5]> { let Latency = 2; let NumMicroOps = 2; diff --git a/llvm/lib/Target/X86/X86Schedule.td b/llvm/lib/Target/X86/X86Schedule.td index c8afbe688f9..b668f16a619 100644 --- a/llvm/lib/Target/X86/X86Schedule.td +++ b/llvm/lib/Target/X86/X86Schedule.td @@ -250,6 +250,8 @@ def WriteVecMaskedStoreY : SchedWrite; def WriteVecMove : SchedWrite; def WriteVecMoveX : SchedWrite; def WriteVecMoveY : SchedWrite; +def WriteVecMoveToGpr : SchedWrite; +def WriteVecMoveFromGpr : SchedWrite; defm WriteVecALU : X86SchedWritePair; // Vector integer ALU op, no logicals. defm WriteVecALUX : X86SchedWritePair; // Vector integer ALU op, no logicals (XMM). diff --git a/llvm/lib/Target/X86/X86ScheduleAtom.td b/llvm/lib/Target/X86/X86ScheduleAtom.td index 9549b7cfd71..427fad2f089 100644 --- a/llvm/lib/Target/X86/X86ScheduleAtom.td +++ b/llvm/lib/Target/X86/X86ScheduleAtom.td @@ -323,9 +323,11 @@ def : WriteRes<WriteVecStoreNTY, [AtomPort0]>; def : WriteRes<WriteVecMaskedStore, [AtomPort0]>; def : WriteRes<WriteVecMaskedStoreY, [AtomPort0]>; -def : WriteRes<WriteVecMove, [AtomPort01]>; +def : WriteRes<WriteVecMove, [AtomPort0]>; def : WriteRes<WriteVecMoveX, [AtomPort01]>; def : WriteRes<WriteVecMoveY, [AtomPort01]>; +defm : X86WriteRes<WriteVecMoveToGpr, [AtomPort0], 3, [3], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [AtomPort0], 1, [1], 1>; defm : AtomWriteResPair<WriteVecALU, [AtomPort01], [AtomPort0], 1, 1>; defm : AtomWriteResPair<WriteVecALUX, [AtomPort01], [AtomPort0], 1, 1>; @@ -435,26 +437,12 @@ def AtomWrite0_1 : SchedWriteRes<[AtomPort0]> { } def : InstRW<[AtomWrite0_1], (instrs FXAM, LD_Frr, BSWAP32r, BSWAP64r, - MOVSX64rr32, - MMX_MOVD64rr, - MMX_MOVD64to64rr, - MOVDI2PDIrr, - MOVDI2SSrr, - MOV64toPQIrr, - MOV64toSDrr)>; + MOVSX64rr32)>; def : SchedAlias<WriteALURMW, AtomWrite0_1>; def : SchedAlias<WriteADCRMW, AtomWrite0_1>; def : InstRW<[AtomWrite0_1], (instregex "(RCL|RCR|ROL|ROR|SAR|SHL|SHR)(8|16|32|64)m", "MOV(S|Z)X(32|64)rr(8|8_NOREX|16)")>; -def AtomWrite0_3 : SchedWriteRes<[AtomPort0]> { - let Latency = 3; - let ResourceCycles = [3]; -} -def : InstRW<[AtomWrite0_3], (instrs MMX_MOVD64from64rr, MMX_MOVD64grr, - MOVPDI2DIrr, MOVPQIto64rr, - MOVSDto64rr, MOVSS2DIrr)>; - def AtomWrite0_5 : SchedWriteRes<[AtomPort0]> { let Latency = 5; let ResourceCycles = [5]; diff --git a/llvm/lib/Target/X86/X86ScheduleBtVer2.td b/llvm/lib/Target/X86/X86ScheduleBtVer2.td index 361ae95ce16..2d468299b8b 100644 --- a/llvm/lib/Target/X86/X86ScheduleBtVer2.td +++ b/llvm/lib/Target/X86/X86ScheduleBtVer2.td @@ -416,6 +416,8 @@ defm : X86WriteRes<WriteVecMaskedStoreY, [JSAGU, JFPU01, JVALU], 6, [2, 2, 4], defm : X86WriteRes<WriteVecMove, [JFPU01, JVALU], 1, [1, 1], 1>; defm : X86WriteRes<WriteVecMoveX, [JFPU01, JVALU], 1, [1, 1], 1>; defm : X86WriteRes<WriteVecMoveY, [JFPU01, JVALU], 1, [2, 2], 2>; +defm : X86WriteRes<WriteVecMoveToGpr, [JFPU0, JFPA, JALU0], 4, [1, 1, 1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [JFPU01, JFPX], 8, [1, 1], 2>; defm : JWriteResFpuPair<WriteVecALU, [JFPU01, JVALU], 1>; defm : JWriteResFpuPair<WriteVecALUX, [JFPU01, JVALU], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleSLM.td b/llvm/lib/Target/X86/X86ScheduleSLM.td index aaf62b11ef8..699479b791e 100644 --- a/llvm/lib/Target/X86/X86ScheduleSLM.td +++ b/llvm/lib/Target/X86/X86ScheduleSLM.td @@ -252,6 +252,8 @@ def : WriteRes<WriteVecMaskedStoreY, [SLM_MEC_RSV]>; def : WriteRes<WriteVecMove, [SLM_FPC_RSV01]>; def : WriteRes<WriteVecMoveX, [SLM_FPC_RSV01]>; def : WriteRes<WriteVecMoveY, [SLM_FPC_RSV01]>; +def : WriteRes<WriteVecMoveToGpr, [SLM_IEC_RSV01]>; +def : WriteRes<WriteVecMoveFromGpr, [SLM_IEC_RSV01]>; defm : SLMWriteResPair<WriteVecShift, [SLM_FPC_RSV0], 1>; defm : SLMWriteResPair<WriteVecShiftX, [SLM_FPC_RSV0], 1>; diff --git a/llvm/lib/Target/X86/X86ScheduleZnver1.td b/llvm/lib/Target/X86/X86ScheduleZnver1.td index 324a54dbc54..b5c840db757 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver1.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver1.td @@ -301,7 +301,9 @@ defm : X86WriteRes<WriteVecMaskedStore, [ZnAGU,ZnFPU01], 4, [1,1], 1>; defm : X86WriteRes<WriteVecMaskedStoreY, [ZnAGU,ZnFPU01], 5, [1,2], 2>; defm : X86WriteRes<WriteVecMove, [ZnFPU], 1, [1], 1>; defm : X86WriteRes<WriteVecMoveX, [ZnFPU], 1, [1], 1>; -defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 1, [1], 1>; +defm : X86WriteRes<WriteVecMoveY, [ZnFPU], 2, [1], 2>; +defm : X86WriteRes<WriteVecMoveToGpr, [ZnFPU2], 2, [1], 1>; +defm : X86WriteRes<WriteVecMoveFromGpr, [ZnFPU2], 3, [1], 1>; defm : X86WriteRes<WriteEMMS, [ZnFPU], 2, [1], 1>; defm : ZnWriteResFpuPair<WriteVecShift, [ZnFPU], 1>; @@ -922,50 +924,6 @@ def : InstRW<[WriteMicrocoded], (instrs FNCLEX)>; def : InstRW<[WriteMicrocoded], (instrs FNINIT)>; //=== Integer MMX and XMM Instructions ===// -//-- Move instructions --// - -// Moves from GPR to FPR incurs a penalty -def ZnWriteFPU2 : SchedWriteRes<[ZnFPU2]> { - let Latency = 3; -} - -// Move to ALU doesn't incur penalty -def ZnWriteToALU2 : SchedWriteRes<[ZnFPU2]> { - let Latency = 2; -} - -def ZnWriteFPU : SchedWriteRes<[ZnFPU]>; -def ZnWriteFPUY : SchedWriteRes<[ZnFPU]> { - let NumMicroOps = 2; - let Latency=2; -} - -// MOVD. -// r32/64 <- (x)mm. -def : InstRW<[ZnWriteToALU2], (instrs MMX_MOVD64grr, - MMX_MOVD64from64rr, - MOVPDI2DIrr, - VMOVPDI2DIrr)>; - -// (x)mm <- r32/64. -def : InstRW<[ZnWriteFPU2], (instrs MMX_MOVD64rr, - MMX_MOVD64to64rr, - MOVDI2PDIrr, - VMOVDI2PDIrr)>; - -// MOVQ. -// r64 <- (x)mm. -def : InstRW<[ZnWriteToALU2], (instrs VMOVPQIto64rr)>; - -// (x)mm <- r64. -def : InstRW<[ZnWriteFPU2], (instrs VMOV64toPQIrr)>; - -// (x)mm <- (x)mm. -def : InstRW<[ZnWriteFPU], (instregex "MMX_MOVQ64rr")>; - -// (V)MOVDQA/U. -// y <- y. -def : InstRW<[ZnWriteFPUY], (instregex "VMOVDQ(A|U)Yrr")>; // PACKSSWB/DW. // mm <- mm. diff --git a/llvm/test/CodeGen/X86/3dnow-schedule.ll b/llvm/test/CodeGen/X86/3dnow-schedule.ll index badc9256462..eb317ebbb2a 100644 --- a/llvm/test/CodeGen/X86/3dnow-schedule.ll +++ b/llvm/test/CodeGen/X86/3dnow-schedule.ll @@ -16,7 +16,7 @@ define i64 @test_pavgusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pavgusb %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pavgusb (%rdi), %mm0 # sched: [8:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pavgusb(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -31,7 +31,7 @@ define i64 @test_pf2id(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pf2id (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pf2id %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnow.pf2id(x86_mmx %1) @@ -46,7 +46,7 @@ define i64 @test_pf2iw(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pf2iw (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pf2iw %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnowa.pf2iw(x86_mmx %1) @@ -61,7 +61,7 @@ define i64 @test_pfacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfacc %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfacc (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfacc(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -76,7 +76,7 @@ define i64 @test_pfadd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfadd %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfadd (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfadd(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -91,7 +91,7 @@ define i64 @test_pfcmpeq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpeq %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfcmpeq (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpeq(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -106,7 +106,7 @@ define i64 @test_pfcmpge(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpge %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfcmpge (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpge(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -121,7 +121,7 @@ define i64 @test_pfcmpgt(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfcmpgt %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfcmpgt (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfcmpgt(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -136,7 +136,7 @@ define i64 @test_pfmax(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfmax %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfmax (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmax(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -151,7 +151,7 @@ define i64 @test_pfmin(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfmin %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfmin (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmin(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -166,7 +166,7 @@ define i64 @test_pfmul(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfmul %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfmul (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfmul(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -181,7 +181,7 @@ define i64 @test_pfnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfnacc %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfnacc (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfnacc(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -196,7 +196,7 @@ define i64 @test_pfpnacc(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfpnacc %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfpnacc (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnowa.pfpnacc(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -211,7 +211,7 @@ define i64 @test_pfrcp(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcp (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pfrcp %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnow.pfrcp(x86_mmx %1) @@ -226,7 +226,7 @@ define i64 @test_pfrcpit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcpit1 %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfrcpit1 (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit1(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -241,7 +241,7 @@ define i64 @test_pfrcpit2(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfrcpit2 %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfrcpit2 (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrcpit2(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -256,7 +256,7 @@ define i64 @test_pfrsqit1(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfrsqit1 %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfrsqit1 (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfrsqit1(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -271,7 +271,7 @@ define i64 @test_pfrsqrt(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfrsqrt (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pfrsqrt %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnow.pfrsqrt(x86_mmx %1) @@ -286,7 +286,7 @@ define i64 @test_pfsub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfsub %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfsub (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsub(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -301,7 +301,7 @@ define i64 @test_pfsubr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pfsubr %mm1, %mm0 # sched: [3:1.00] ; CHECK-NEXT: pfsubr (%rdi), %mm0 # sched: [9:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pfsubr(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -316,7 +316,7 @@ define i64 @test_pi2fd(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pi2fd (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pi2fd %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnow.pi2fd(x86_mmx %1) @@ -331,7 +331,7 @@ define i64 @test_pi2fw(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pi2fw (%rdi), %mm0 # sched: [9:1.00] ; CHECK-NEXT: pi2fw %mm0, %mm0 # sched: [3:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnowa.pi2fw(x86_mmx %1) @@ -346,7 +346,7 @@ define i64 @test_pmulhrw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pmulhrw %mm1, %mm0 # sched: [5:1.00] ; CHECK-NEXT: pmulhrw (%rdi), %mm0 # sched: [10:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = call x86_mmx @llvm.x86.3dnow.pmulhrw(x86_mmx %a0, x86_mmx %a1) %2 = load x86_mmx, x86_mmx *%a2, align 8 @@ -383,7 +383,7 @@ define i64 @test_pswapd(x86_mmx* %a0) optsize { ; CHECK: # %bb.0: ; CHECK-NEXT: pswapd (%rdi), %mm0 # mm0 = mem[1,0] sched: [6:1.00] ; CHECK-NEXT: pswapd %mm0, %mm0 # mm0 = mm0[1,0] sched: [1:1.00] -; CHECK-NEXT: movq %mm0, %rax # sched: [1:0.33] +; CHECK-NEXT: movq %mm0, %rax # sched: [2:1.00] ; CHECK-NEXT: retq # sched: [1:1.00] %1 = load x86_mmx, x86_mmx *%a0, align 8 %2 = call x86_mmx @llvm.x86.3dnowa.pswapd(x86_mmx %1) diff --git a/llvm/test/CodeGen/X86/avx512-schedule.ll b/llvm/test/CodeGen/X86/avx512-schedule.ll index 69e4ed9f2e4..aeabf2f7cea 100755 --- a/llvm/test/CodeGen/X86/avx512-schedule.ll +++ b/llvm/test/CodeGen/X86/avx512-schedule.ll @@ -1981,12 +1981,12 @@ entry: define double @long_to_double(i64 %x) { ; GENERIC-LABEL: long_to_double: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: long_to_double: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vmovq %rdi, %xmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = bitcast i64 %x to double ret double %res @@ -1995,12 +1995,12 @@ define double @long_to_double(i64 %x) { define i64 @double_to_long(double %x) { ; GENERIC-LABEL: double_to_long: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: double_to_long: ; SKX: # %bb.0: -; SKX-NEXT: vmovq %xmm0, %rax # sched: [1:0.25] +; SKX-NEXT: vmovq %xmm0, %rax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = bitcast double %x to i64 ret i64 %res @@ -2009,12 +2009,12 @@ define i64 @double_to_long(double %x) { define float @int_to_float(i32 %x) { ; GENERIC-LABEL: int_to_float: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:0.33] +; GENERIC-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: int_to_float: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:0.25] +; SKX-NEXT: vmovd %edi, %xmm0 # sched: [1:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = bitcast i32 %x to float ret float %res @@ -2023,12 +2023,12 @@ define float @int_to_float(i32 %x) { define i32 @float_to_int(float %x) { ; GENERIC-LABEL: float_to_int: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: float_to_int: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %xmm0, %eax # sched: [1:0.25] +; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = bitcast float %x to i32 ret i32 %res @@ -5877,12 +5877,12 @@ entry: define i32 @mov_test1(float %x) { ; GENERIC-LABEL: mov_test1: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [1:0.33] +; GENERIC-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: mov_test1: ; SKX: # %bb.0: -; SKX-NEXT: vmovd %xmm0, %eax # sched: [1:0.25] +; SKX-NEXT: vmovd %xmm0, %eax # sched: [2:1.00] ; SKX-NEXT: retq # sched: [7:1.00] %res = bitcast float %x to i32 ret i32 %res diff --git a/llvm/test/CodeGen/X86/mmx-schedule.ll b/llvm/test/CodeGen/X86/mmx-schedule.ll index 275b3fe16b5..b903a57040c 100644 --- a/llvm/test/CodeGen/X86/mmx-schedule.ll +++ b/llvm/test/CodeGen/X86/mmx-schedule.ll @@ -17,7 +17,7 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; GENERIC-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtpd2pi: @@ -41,7 +41,7 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; SANDY-NEXT: cvtpd2pi (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: cvtpd2pi %xmm0, %mm1 # sched: [4:1.00] ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtpd2pi: @@ -81,7 +81,7 @@ define i64 @test_cvtpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; BTVER2-NEXT: cvtpd2pi (%rdi), %mm1 # sched: [8:1.00] ; BTVER2-NEXT: cvtpd2pi %xmm0, %mm0 # sched: [3:1.00] ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvtpd2pi: @@ -262,7 +262,7 @@ define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; GENERIC-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] ; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm1, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvtps2pi: @@ -286,7 +286,7 @@ define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; SANDY-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] ; SANDY-NEXT: cvtps2pi (%rdi), %mm1 # sched: [9:1.00] ; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; SANDY-NEXT: movq %mm1, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvtps2pi: @@ -326,7 +326,7 @@ define i64 @test_cvtps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; BTVER2-NEXT: cvtps2pi (%rdi), %mm1 # sched: [8:1.00] ; BTVER2-NEXT: cvtps2pi %xmm0, %mm0 # sched: [3:1.00] ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvtps2pi: @@ -351,7 +351,7 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; GENERIC-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] ; GENERIC-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttpd2pi: @@ -375,7 +375,7 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; SANDY-NEXT: cvttpd2pi (%rdi), %mm0 # sched: [10:1.00] ; SANDY-NEXT: cvttpd2pi %xmm0, %mm1 # sched: [4:1.00] ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttpd2pi: @@ -415,7 +415,7 @@ define i64 @test_cvttpd2pi(<2 x double> %a0, <2 x double>* %a1) optsize { ; BTVER2-NEXT: cvttpd2pi (%rdi), %mm1 # sched: [8:1.00] ; BTVER2-NEXT: cvttpd2pi %xmm0, %mm0 # sched: [3:1.00] ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvttpd2pi: @@ -440,7 +440,7 @@ define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; GENERIC-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] ; GENERIC-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; GENERIC-NEXT: movq %mm1, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm1, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_cvttps2pi: @@ -464,7 +464,7 @@ define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; SANDY-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] ; SANDY-NEXT: cvttps2pi (%rdi), %mm1 # sched: [9:1.00] ; SANDY-NEXT: por %mm0, %mm1 # sched: [1:0.33] -; SANDY-NEXT: movq %mm1, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm1, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_cvttps2pi: @@ -504,7 +504,7 @@ define i64 @test_cvttps2pi(<4 x float> %a0, <4 x float>* %a1) optsize { ; BTVER2-NEXT: cvttps2pi (%rdi), %mm1 # sched: [8:1.00] ; BTVER2-NEXT: cvttps2pi %xmm0, %mm0 # sched: [3:1.00] ; BTVER2-NEXT: por %mm0, %mm1 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm1, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm1, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_cvttps2pi: @@ -636,12 +636,12 @@ declare void @llvm.x86.mmx.maskmovq(x86_mmx, x86_mmx, i8*) nounwind define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; GENERIC-LABEL: test_movd: ; GENERIC: # %bb.0: -; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:0.33] +; GENERIC-NEXT: movd %edi, %mm1 # sched: [1:1.00] ; GENERIC-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] ; GENERIC-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] ; GENERIC-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: movd %mm2, %ecx # sched: [1:0.33] -; GENERIC-NEXT: movd %mm0, %eax # sched: [1:0.33] +; GENERIC-NEXT: movd %mm2, %ecx # sched: [2:1.00] +; GENERIC-NEXT: movd %mm0, %eax # sched: [2:1.00] ; GENERIC-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; @@ -669,12 +669,12 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; ; SANDY-LABEL: test_movd: ; SANDY: # %bb.0: -; SANDY-NEXT: movd %edi, %mm1 # sched: [1:0.33] +; SANDY-NEXT: movd %edi, %mm1 # sched: [1:1.00] ; SANDY-NEXT: movd (%rsi), %mm2 # sched: [5:0.50] ; SANDY-NEXT: paddd %mm1, %mm2 # sched: [3:1.00] ; SANDY-NEXT: paddd %mm2, %mm0 # sched: [3:1.00] -; SANDY-NEXT: movd %mm2, %ecx # sched: [1:0.33] -; SANDY-NEXT: movd %mm0, %eax # sched: [1:0.33] +; SANDY-NEXT: movd %mm2, %ecx # sched: [2:1.00] +; SANDY-NEXT: movd %mm0, %eax # sched: [2:1.00] ; SANDY-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; @@ -724,12 +724,12 @@ define i32 @test_movd(x86_mmx %a0, i32 %a1, i32 *%a2) { ; ; BTVER2-LABEL: test_movd: ; BTVER2: # %bb.0: +; BTVER2-NEXT: movd %edi, %mm1 # sched: [8:0.50] ; BTVER2-NEXT: movd (%rsi), %mm2 # sched: [5:1.00] -; BTVER2-NEXT: movd %edi, %mm1 # sched: [1:0.50] ; BTVER2-NEXT: paddd %mm1, %mm2 # sched: [1:0.50] ; BTVER2-NEXT: paddd %mm2, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movd %mm2, %ecx # sched: [1:0.50] -; BTVER2-NEXT: movd %mm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: movd %mm2, %ecx # sched: [4:1.00] +; BTVER2-NEXT: movd %mm0, %eax # sched: [4:1.00] ; BTVER2-NEXT: movl %ecx, (%rsi) # sched: [1:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; @@ -763,7 +763,7 @@ define i64 @test_movdq2q(<2 x i64> %a0) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] ; GENERIC-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_movdq2q: @@ -784,7 +784,7 @@ define i64 @test_movdq2q(<2 x i64> %a0) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: movdq2q %xmm0, %mm0 # sched: [2:1.00] ; SANDY-NEXT: paddd %mm0, %mm0 # sched: [3:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_movdq2q: @@ -819,7 +819,7 @@ define i64 @test_movdq2q(<2 x i64> %a0) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: movdq2q %xmm0, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddd %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_movdq2q: @@ -1030,7 +1030,7 @@ define i64 @test_pabsb(x86_mmx *%a0) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pabsb: @@ -1051,7 +1051,7 @@ define i64 @test_pabsb(x86_mmx *%a0) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pabsb (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsb: @@ -1086,7 +1086,7 @@ define i64 @test_pabsb(x86_mmx *%a0) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pabsb (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: pabsb %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pabsb: @@ -1108,7 +1108,7 @@ define i64 @test_pabsd(x86_mmx *%a0) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pabsd: @@ -1129,7 +1129,7 @@ define i64 @test_pabsd(x86_mmx *%a0) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pabsd (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsd: @@ -1164,7 +1164,7 @@ define i64 @test_pabsd(x86_mmx *%a0) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pabsd (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: pabsd %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pabsd: @@ -1186,7 +1186,7 @@ define i64 @test_pabsw(x86_mmx *%a0) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] ; GENERIC-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pabsw: @@ -1207,7 +1207,7 @@ define i64 @test_pabsw(x86_mmx *%a0) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pabsw (%rdi), %mm0 # sched: [6:0.50] ; SANDY-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pabsw: @@ -1242,7 +1242,7 @@ define i64 @test_pabsw(x86_mmx *%a0) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pabsw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: pabsw %mm0, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pabsw: @@ -1264,7 +1264,7 @@ define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_packssdw: @@ -1285,7 +1285,7 @@ define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: packssdw %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packssdw: @@ -1320,7 +1320,7 @@ define i64 @test_packssdw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: packssdw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: packssdw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_packssdw: @@ -1342,7 +1342,7 @@ define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_packsswb: @@ -1363,7 +1363,7 @@ define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: packsswb %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packsswb: @@ -1398,7 +1398,7 @@ define i64 @test_packsswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: packsswb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: packsswb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_packsswb: @@ -1420,7 +1420,7 @@ define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_packuswb: @@ -1441,7 +1441,7 @@ define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: packuswb %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_packuswb: @@ -1476,7 +1476,7 @@ define i64 @test_packuswb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: packuswb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: packuswb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_packuswb: @@ -1498,7 +1498,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddb: @@ -1519,7 +1519,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddb: @@ -1554,7 +1554,7 @@ define i64 @test_paddb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddb: @@ -1576,7 +1576,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddd: @@ -1597,7 +1597,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddd %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddd: @@ -1632,7 +1632,7 @@ define i64 @test_paddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddd: @@ -1654,7 +1654,7 @@ define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] ; GENERIC-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddq: @@ -1675,7 +1675,7 @@ define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] ; SANDY-NEXT: paddq (%rdi), %mm0 # sched: [7:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddq: @@ -1710,7 +1710,7 @@ define i64 @test_paddq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddq %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddq (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddq: @@ -1732,7 +1732,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddsb: @@ -1753,7 +1753,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddsb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddsb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsb: @@ -1788,7 +1788,7 @@ define i64 @test_paddsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddsb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddsb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddsb: @@ -1810,7 +1810,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddsw: @@ -1831,7 +1831,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddsw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddsw: @@ -1866,7 +1866,7 @@ define i64 @test_paddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddsw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddsw: @@ -1888,7 +1888,7 @@ define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddusb: @@ -1909,7 +1909,7 @@ define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddusb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddusb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusb: @@ -1944,7 +1944,7 @@ define i64 @test_paddusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddusb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddusb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddusb: @@ -1966,7 +1966,7 @@ define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddusw: @@ -1987,7 +1987,7 @@ define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddusw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddusw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddusw: @@ -2022,7 +2022,7 @@ define i64 @test_paddusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddusw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddusw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddusw: @@ -2044,7 +2044,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_paddw: @@ -2065,7 +2065,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: paddw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: paddw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_paddw: @@ -2100,7 +2100,7 @@ define i64 @test_paddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: paddw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: paddw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_paddw: @@ -2122,7 +2122,7 @@ define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] ; GENERIC-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_palignr: @@ -2143,7 +2143,7 @@ define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] ; SANDY-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_palignr: @@ -2178,7 +2178,7 @@ define i64 @test_palignr(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: palignr $1, %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: palignr $1, (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_palignr: @@ -2200,7 +2200,7 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pand %mm1, %mm0 # sched: [1:0.33] ; GENERIC-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pand: @@ -2221,7 +2221,7 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pand %mm1, %mm0 # sched: [1:0.33] ; SANDY-NEXT: pand (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pand: @@ -2256,7 +2256,7 @@ define i64 @test_pand(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pand %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pand (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pand: @@ -2278,7 +2278,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] ; GENERIC-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pandn: @@ -2299,7 +2299,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pandn %mm1, %mm0 # sched: [1:0.33] ; SANDY-NEXT: pandn (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pandn: @@ -2334,7 +2334,7 @@ define i64 @test_pandn(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pandn %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pandn (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pandn: @@ -2356,7 +2356,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pavgb: @@ -2377,7 +2377,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pavgb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pavgb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgb: @@ -2412,7 +2412,7 @@ define i64 @test_pavgb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pavgb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pavgb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pavgb: @@ -2434,7 +2434,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pavgw: @@ -2455,7 +2455,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pavgw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pavgw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pavgw: @@ -2490,7 +2490,7 @@ define i64 @test_pavgw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pavgw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pavgw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pavgw: @@ -2512,7 +2512,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpeqb: @@ -2533,7 +2533,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpeqb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pcmpeqb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqb: @@ -2568,7 +2568,7 @@ define i64 @test_pcmpeqb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pcmpeqb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pcmpeqb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pcmpeqb: @@ -2590,7 +2590,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpeqd: @@ -2611,7 +2611,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpeqd %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pcmpeqd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqd: @@ -2646,7 +2646,7 @@ define i64 @test_pcmpeqd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pcmpeqd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pcmpeqd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pcmpeqd: @@ -2668,7 +2668,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpeqw: @@ -2689,7 +2689,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpeqw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pcmpeqw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpeqw: @@ -2724,7 +2724,7 @@ define i64 @test_pcmpeqw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pcmpeqw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pcmpeqw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pcmpeqw: @@ -2746,7 +2746,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpgtb: @@ -2767,7 +2767,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpgtb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pcmpgtb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtb: @@ -2802,7 +2802,7 @@ define i64 @test_pcmpgtb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pcmpgtb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pcmpgtb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pcmpgtb: @@ -2824,7 +2824,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpgtd: @@ -2845,7 +2845,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpgtd %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pcmpgtd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtd: @@ -2880,7 +2880,7 @@ define i64 @test_pcmpgtd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pcmpgtd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pcmpgtd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pcmpgtd: @@ -2902,7 +2902,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pcmpgtw: @@ -2923,7 +2923,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pcmpgtw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pcmpgtw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pcmpgtw: @@ -2958,7 +2958,7 @@ define i64 @test_pcmpgtw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pcmpgtw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pcmpgtw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pcmpgtw: @@ -3035,7 +3035,7 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] ; GENERIC-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phaddd: @@ -3056,7 +3056,7 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: phaddd %mm1, %mm0 # sched: [3:1.50] ; SANDY-NEXT: phaddd (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddd: @@ -3091,7 +3091,7 @@ define i64 @test_phaddd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: phaddd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: phaddd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_phaddd: @@ -3113,7 +3113,7 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] ; GENERIC-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phaddsw: @@ -3134,7 +3134,7 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: phaddsw %mm1, %mm0 # sched: [3:1.50] ; SANDY-NEXT: phaddsw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddsw: @@ -3169,7 +3169,7 @@ define i64 @test_phaddsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: phaddsw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: phaddsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_phaddsw: @@ -3191,7 +3191,7 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] ; GENERIC-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phaddw: @@ -3212,7 +3212,7 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: phaddw %mm1, %mm0 # sched: [3:1.50] ; SANDY-NEXT: phaddw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phaddw: @@ -3247,7 +3247,7 @@ define i64 @test_phaddw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: phaddw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: phaddw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_phaddw: @@ -3269,7 +3269,7 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] ; GENERIC-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phsubd: @@ -3290,7 +3290,7 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: phsubd %mm1, %mm0 # sched: [3:1.50] ; SANDY-NEXT: phsubd (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubd: @@ -3325,7 +3325,7 @@ define i64 @test_phsubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: phsubd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: phsubd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_phsubd: @@ -3347,7 +3347,7 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] ; GENERIC-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phsubsw: @@ -3368,7 +3368,7 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: phsubsw %mm1, %mm0 # sched: [3:1.50] ; SANDY-NEXT: phsubsw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubsw: @@ -3403,7 +3403,7 @@ define i64 @test_phsubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: phsubsw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: phsubsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_phsubsw: @@ -3425,7 +3425,7 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] ; GENERIC-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_phsubw: @@ -3446,7 +3446,7 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: phsubw %mm1, %mm0 # sched: [3:1.50] ; SANDY-NEXT: phsubw (%rdi), %mm0 # sched: [8:1.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_phsubw: @@ -3481,7 +3481,7 @@ define i64 @test_phsubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: phsubw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: phsubw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_phsubw: @@ -3504,7 +3504,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { ; GENERIC-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] ; GENERIC-NEXT: movswl (%rsi), %eax # sched: [5:0.50] ; GENERIC-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pinsrw: @@ -3528,7 +3528,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { ; SANDY-NEXT: pinsrw $0, %edi, %mm0 # sched: [2:1.00] ; SANDY-NEXT: movswl (%rsi), %eax # sched: [5:0.50] ; SANDY-NEXT: pinsrw $1, %eax, %mm0 # sched: [2:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pinsrw: @@ -3568,7 +3568,7 @@ define i64 @test_pinsrw(x86_mmx %a0, i32 %a1, i16* %a2) optsize { ; BTVER2-NEXT: pinsrw $0, %edi, %mm0 # sched: [7:0.50] ; BTVER2-NEXT: movswl (%rsi), %eax # sched: [4:1.00] ; BTVER2-NEXT: pinsrw $1, %eax, %mm0 # sched: [7:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pinsrw: @@ -3592,7 +3592,7 @@ define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaddwd: @@ -3613,7 +3613,7 @@ define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmaddwd %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmaddwd (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddwd: @@ -3648,7 +3648,7 @@ define i64 @test_pmaddwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmaddwd %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmaddwd (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmaddwd: @@ -3670,7 +3670,7 @@ define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaddubsw: @@ -3691,7 +3691,7 @@ define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmaddubsw %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmaddubsw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaddubsw: @@ -3726,7 +3726,7 @@ define i64 @test_pmaddubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmaddubsw %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmaddubsw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmaddubsw: @@ -3748,7 +3748,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaxsw: @@ -3769,7 +3769,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmaxsw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pmaxsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxsw: @@ -3804,7 +3804,7 @@ define i64 @test_pmaxsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmaxsw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pmaxsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmaxsw: @@ -3826,7 +3826,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmaxub: @@ -3847,7 +3847,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmaxub %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pmaxub (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmaxub: @@ -3882,7 +3882,7 @@ define i64 @test_pmaxub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmaxub %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pmaxub (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmaxub: @@ -3904,7 +3904,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pminsw: @@ -3925,7 +3925,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pminsw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pminsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminsw: @@ -3960,7 +3960,7 @@ define i64 @test_pminsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pminsw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pminsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pminsw: @@ -3982,7 +3982,7 @@ define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pminub: @@ -4003,7 +4003,7 @@ define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pminub %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: pminub (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pminub: @@ -4038,7 +4038,7 @@ define i64 @test_pminub(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pminub %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pminub (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pminub: @@ -4115,7 +4115,7 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmulhrsw: @@ -4136,7 +4136,7 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmulhrsw %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmulhrsw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhrsw: @@ -4171,7 +4171,7 @@ define i64 @test_pmulhrsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmulhrsw %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmulhrsw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmulhrsw: @@ -4193,7 +4193,7 @@ define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmulhw: @@ -4214,7 +4214,7 @@ define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmulhw %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmulhw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhw: @@ -4249,7 +4249,7 @@ define i64 @test_pmulhw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmulhw %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmulhw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmulhw: @@ -4271,7 +4271,7 @@ define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmulhuw: @@ -4292,7 +4292,7 @@ define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmulhuw %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmulhuw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmulhuw: @@ -4327,7 +4327,7 @@ define i64 @test_pmulhuw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmulhuw %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmulhuw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmulhuw: @@ -4349,7 +4349,7 @@ define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmullw: @@ -4370,7 +4370,7 @@ define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmullw %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmullw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmullw: @@ -4405,7 +4405,7 @@ define i64 @test_pmullw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmullw %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmullw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmullw: @@ -4427,7 +4427,7 @@ define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pmuludq: @@ -4448,7 +4448,7 @@ define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pmuludq %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: pmuludq (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pmuludq: @@ -4483,7 +4483,7 @@ define i64 @test_pmuludq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pmuludq %mm1, %mm0 # sched: [2:1.00] ; BTVER2-NEXT: pmuludq (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pmuludq: @@ -4505,7 +4505,7 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; GENERIC-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_por: @@ -4526,7 +4526,7 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: por %mm1, %mm0 # sched: [1:0.33] ; SANDY-NEXT: por (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_por: @@ -4561,7 +4561,7 @@ define i64 @test_por(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: por %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: por (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_por: @@ -4583,7 +4583,7 @@ define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] ; GENERIC-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psadbw: @@ -4604,7 +4604,7 @@ define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psadbw %mm1, %mm0 # sched: [5:1.00] ; SANDY-NEXT: psadbw (%rdi), %mm0 # sched: [10:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psadbw: @@ -4639,7 +4639,7 @@ define i64 @test_psadbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psadbw %mm1, %mm0 # sched: [2:0.50] ; BTVER2-NEXT: psadbw (%rdi), %mm0 # sched: [7:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psadbw: @@ -4661,7 +4661,7 @@ define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] ; GENERIC-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pshufb: @@ -4682,7 +4682,7 @@ define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pshufb %mm1, %mm0 # sched: [1:0.50] ; SANDY-NEXT: pshufb (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufb: @@ -4717,7 +4717,7 @@ define i64 @test_pshufb(x86_mmx %a0, x86_mmx %a1, x86_mmx *%a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pshufb %mm1, %mm0 # sched: [2:2.00] ; BTVER2-NEXT: pshufb (%rdi), %mm0 # sched: [7:2.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pshufb: @@ -4739,7 +4739,7 @@ define i64 @test_pshufw(x86_mmx *%a0) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; GENERIC-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pshufw: @@ -4760,7 +4760,7 @@ define i64 @test_pshufw(x86_mmx *%a0) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; SANDY-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pshufw: @@ -4795,7 +4795,7 @@ define i64 @test_pshufw(x86_mmx *%a0) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pshufw $0, (%rdi), %mm0 # mm0 = mem[0,0,0,0] sched: [6:1.00] ; BTVER2-NEXT: pshufw $0, %mm0, %mm0 # mm0 = mm0[0,0,0,0] sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pshufw: @@ -4817,7 +4817,7 @@ define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] ; GENERIC-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psignb: @@ -4838,7 +4838,7 @@ define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] ; SANDY-NEXT: psignb (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignb: @@ -4873,7 +4873,7 @@ define i64 @test_psignb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psignb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psignb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psignb: @@ -4895,7 +4895,7 @@ define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] ; GENERIC-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psignd: @@ -4916,7 +4916,7 @@ define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] ; SANDY-NEXT: psignd (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignd: @@ -4951,7 +4951,7 @@ define i64 @test_psignd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psignd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psignd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psignd: @@ -4973,7 +4973,7 @@ define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] ; GENERIC-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psignw: @@ -4994,7 +4994,7 @@ define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] ; SANDY-NEXT: psignw (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psignw: @@ -5029,7 +5029,7 @@ define i64 @test_psignw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psignw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psignw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psignw: @@ -5052,7 +5052,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pslld: @@ -5076,7 +5076,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: pslld %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: pslld $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pslld: @@ -5116,7 +5116,7 @@ define i64 @test_pslld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: pslld %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pslld (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: pslld $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pslld: @@ -5142,7 +5142,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psllq: @@ -5166,7 +5166,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psllq %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psllq $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllq: @@ -5206,7 +5206,7 @@ define i64 @test_psllq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psllq %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psllq (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psllq $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psllq: @@ -5232,7 +5232,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psllw: @@ -5256,7 +5256,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psllw %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psllw $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psllw: @@ -5296,7 +5296,7 @@ define i64 @test_psllw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psllw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psllw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psllw $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psllw: @@ -5322,7 +5322,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrad: @@ -5346,7 +5346,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psrad %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrad $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrad: @@ -5386,7 +5386,7 @@ define i64 @test_psrad(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psrad %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psrad (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psrad $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psrad: @@ -5412,7 +5412,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psraw: @@ -5436,7 +5436,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psraw %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psraw $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psraw: @@ -5476,7 +5476,7 @@ define i64 @test_psraw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psraw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psraw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psraw $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psraw: @@ -5502,7 +5502,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrld: @@ -5526,7 +5526,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psrld %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrld $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrld: @@ -5566,7 +5566,7 @@ define i64 @test_psrld(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psrld %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psrld (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psrld $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psrld: @@ -5592,7 +5592,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrlq: @@ -5616,7 +5616,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psrlq %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrlq $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlq: @@ -5656,7 +5656,7 @@ define i64 @test_psrlq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psrlq %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psrlq (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psrlq $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psrlq: @@ -5682,7 +5682,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] ; GENERIC-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; GENERIC-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psrlw: @@ -5706,7 +5706,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY-NEXT: psrlw %mm1, %mm0 # sched: [1:1.00] ; SANDY-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; SANDY-NEXT: psrlw $7, %mm0 # sched: [1:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psrlw: @@ -5746,7 +5746,7 @@ define i64 @test_psrlw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2-NEXT: psrlw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psrlw (%rdi), %mm0 # sched: [6:1.00] ; BTVER2-NEXT: psrlw $7, %mm0 # sched: [1:0.50] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psrlw: @@ -5771,7 +5771,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubb: @@ -5792,7 +5792,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubb: @@ -5827,7 +5827,7 @@ define i64 @test_psubb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubb: @@ -5849,7 +5849,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubd: @@ -5870,7 +5870,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubd %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubd (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubd: @@ -5905,7 +5905,7 @@ define i64 @test_psubd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubd %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubd (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubd: @@ -5927,7 +5927,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubq: @@ -5948,7 +5948,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubq %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubq (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubq: @@ -5983,7 +5983,7 @@ define i64 @test_psubq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubq %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubq (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubq: @@ -6005,7 +6005,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubsb: @@ -6026,7 +6026,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubsb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubsb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsb: @@ -6061,7 +6061,7 @@ define i64 @test_psubsb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubsb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubsb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubsb: @@ -6083,7 +6083,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubsw: @@ -6104,7 +6104,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubsw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubsw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubsw: @@ -6139,7 +6139,7 @@ define i64 @test_psubsw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubsw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubsw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubsw: @@ -6161,7 +6161,7 @@ define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubusb: @@ -6182,7 +6182,7 @@ define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubusb %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubusb (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusb: @@ -6217,7 +6217,7 @@ define i64 @test_psubusb(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubusb %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubusb (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubusb: @@ -6239,7 +6239,7 @@ define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubusw: @@ -6260,7 +6260,7 @@ define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubusw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubusw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubusw: @@ -6295,7 +6295,7 @@ define i64 @test_psubusw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubusw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubusw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubusw: @@ -6317,7 +6317,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] ; GENERIC-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_psubw: @@ -6338,7 +6338,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: psubw %mm1, %mm0 # sched: [3:1.00] ; SANDY-NEXT: psubw (%rdi), %mm0 # sched: [8:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_psubw: @@ -6373,7 +6373,7 @@ define i64 @test_psubw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: psubw %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: psubw (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_psubw: @@ -6395,7 +6395,7 @@ define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] ; GENERIC-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhbw: @@ -6416,7 +6416,7 @@ define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:1.00] ; SANDY-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhbw: @@ -6451,7 +6451,7 @@ define i64 @test_punpckhbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: punpckhbw %mm1, %mm0 # mm0 = mm0[4],mm1[4],mm0[5],mm1[5],mm0[6],mm1[6],mm0[7],mm1[7] sched: [1:0.50] ; BTVER2-NEXT: punpckhbw (%rdi), %mm0 # mm0 = mm0[4],mem[4],mm0[5],mem[5],mm0[6],mem[6],mm0[7],mem[7] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_punpckhbw: @@ -6473,7 +6473,7 @@ define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] ; GENERIC-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhdq: @@ -6494,7 +6494,7 @@ define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:1.00] ; SANDY-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhdq: @@ -6529,7 +6529,7 @@ define i64 @test_punpckhdq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: punpckhdq %mm1, %mm0 # mm0 = mm0[1],mm1[1] sched: [1:0.50] ; BTVER2-NEXT: punpckhdq (%rdi), %mm0 # mm0 = mm0[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_punpckhdq: @@ -6551,7 +6551,7 @@ define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] ; GENERIC-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckhwd: @@ -6572,7 +6572,7 @@ define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] ; SANDY-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckhwd: @@ -6607,7 +6607,7 @@ define i64 @test_punpckhwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: punpckhwd %mm1, %mm0 # mm0 = mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] ; BTVER2-NEXT: punpckhwd (%rdi), %mm0 # mm0 = mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_punpckhwd: @@ -6629,7 +6629,7 @@ define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] ; GENERIC-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpcklbw: @@ -6650,7 +6650,7 @@ define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:1.00] ; SANDY-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklbw: @@ -6685,7 +6685,7 @@ define i64 @test_punpcklbw(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: punpcklbw %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1],mm0[2],mm1[2],mm0[3],mm1[3] sched: [1:0.50] ; BTVER2-NEXT: punpcklbw (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1],mm0[2],mem[2],mm0[3],mem[3] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_punpcklbw: @@ -6707,7 +6707,7 @@ define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] ; GENERIC-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpckldq: @@ -6728,7 +6728,7 @@ define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:1.00] ; SANDY-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpckldq: @@ -6763,7 +6763,7 @@ define i64 @test_punpckldq(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: punpckldq %mm1, %mm0 # mm0 = mm0[0],mm1[0] sched: [1:0.50] ; BTVER2-NEXT: punpckldq (%rdi), %mm0 # mm0 = mm0[0],mem[0] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_punpckldq: @@ -6785,7 +6785,7 @@ define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] ; GENERIC-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_punpcklwd: @@ -6806,7 +6806,7 @@ define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:1.00] ; SANDY-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_punpcklwd: @@ -6841,7 +6841,7 @@ define i64 @test_punpcklwd(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: punpcklwd %mm1, %mm0 # mm0 = mm0[0],mm1[0],mm0[1],mm1[1] sched: [1:0.50] ; BTVER2-NEXT: punpcklwd (%rdi), %mm0 # mm0 = mm0[0],mem[0],mm0[1],mem[1] sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_punpcklwd: @@ -6863,7 +6863,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; GENERIC: # %bb.0: ; GENERIC-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] ; GENERIC-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; GENERIC-NEXT: movq %mm0, %rax # sched: [1:0.33] +; GENERIC-NEXT: movq %mm0, %rax # sched: [2:1.00] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; ATOM-LABEL: test_pxor: @@ -6884,7 +6884,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; SANDY: # %bb.0: ; SANDY-NEXT: pxor %mm1, %mm0 # sched: [1:0.33] ; SANDY-NEXT: pxor (%rdi), %mm0 # sched: [6:0.50] -; SANDY-NEXT: movq %mm0, %rax # sched: [1:0.33] +; SANDY-NEXT: movq %mm0, %rax # sched: [2:1.00] ; SANDY-NEXT: retq # sched: [1:1.00] ; ; HASWELL-LABEL: test_pxor: @@ -6919,7 +6919,7 @@ define i64 @test_pxor(x86_mmx %a0, x86_mmx %a1, x86_mmx* %a2) optsize { ; BTVER2: # %bb.0: ; BTVER2-NEXT: pxor %mm1, %mm0 # sched: [1:0.50] ; BTVER2-NEXT: pxor (%rdi), %mm0 # sched: [6:1.00] -; BTVER2-NEXT: movq %mm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: movq %mm0, %rax # sched: [4:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-LABEL: test_pxor: diff --git a/llvm/test/CodeGen/X86/sse2-schedule.ll b/llvm/test/CodeGen/X86/sse2-schedule.ll index 639360df2ef..346b41a604f 100644 --- a/llvm/test/CodeGen/X86/sse2-schedule.ll +++ b/llvm/test/CodeGen/X86/sse2-schedule.ll @@ -4608,21 +4608,21 @@ define i32 @test_movd(<4 x i32> %a0, i32 %a1, i32 *%a2) { ; BTVER2-SSE-LABEL: test_movd: ; BTVER2-SSE: # %bb.0: ; BTVER2-SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movd %edi, %xmm1 # sched: [8:0.50] +; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [4:1.00] ; BTVER2-SSE-NEXT: paddd %xmm0, %xmm1 # sched: [1:0.50] ; BTVER2-SSE-NEXT: movd %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: paddd %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movd %xmm2, %eax # sched: [1:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_movd: ; BTVER2: # %bb.0: +; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [8:0.50] ; BTVER2-NEXT: vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero sched: [5:1.00] -; BTVER2-NEXT: vmovd %edi, %xmm1 # sched: [1:0.50] ; BTVER2-NEXT: vpaddd %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] ; BTVER2-NEXT: vpaddd %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [1:0.50] +; BTVER2-NEXT: vmovd %xmm0, %eax # sched: [4:1.00] +; BTVER2-NEXT: vmovd %xmm1, (%rsi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_movd: @@ -4789,31 +4789,31 @@ define i64 @test_movd_64(<2 x i64> %a0, i64 %a1, i64 *%a2) { ; BTVER2-SSE-LABEL: test_movd_64: ; BTVER2-SSE: # %bb.0: ; BTVER2-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] -; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %rdi, %xmm1 # sched: [8:0.50] +; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] +; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [4:1.00] ; BTVER2-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.50] ; BTVER2-SSE-NEXT: movq %xmm1, (%rsi) # sched: [2:1.00] -; BTVER2-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.50] -; BTVER2-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.50] ; BTVER2-SSE-NEXT: retq # sched: [4:1.00] ; ; BTVER2-LABEL: test_movd_64: ; BTVER2: # %bb.0: +; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [8:0.50] ; BTVER2-NEXT: vmovq {{.*#+}} xmm2 = mem[0],zero sched: [5:1.00] -; BTVER2-NEXT: vmovq %rdi, %xmm1 # sched: [1:0.50] ; BTVER2-NEXT: vpaddq %xmm1, %xmm0, %xmm1 # sched: [1:0.50] -; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] ; BTVER2-NEXT: vpaddq %xmm2, %xmm0, %xmm0 # sched: [1:0.50] -; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [1:0.50] +; BTVER2-NEXT: vmovq %xmm0, %rax # sched: [4:1.00] +; BTVER2-NEXT: vmovq %xmm1, (%rsi) # sched: [2:1.00] ; BTVER2-NEXT: retq # sched: [4:1.00] ; ; ZNVER1-SSE-LABEL: test_movd_64: ; ZNVER1-SSE: # %bb.0: ; ZNVER1-SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero sched: [8:0.50] -; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %rdi, %xmm1 # sched: [3:1.00] ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm1 # sched: [1:0.25] ; ZNVER1-SSE-NEXT: movq %xmm1, (%rsi) # sched: [1:0.50] ; ZNVER1-SSE-NEXT: paddq %xmm0, %xmm2 # sched: [1:0.25] -; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [1:0.25] +; ZNVER1-SSE-NEXT: movq %xmm2, %rax # sched: [2:1.00] ; ZNVER1-SSE-NEXT: retq # sched: [1:0.50] ; ; ZNVER1-LABEL: test_movd_64: diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s index a1d01e4826e..11c5d28a974 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-avx1.s @@ -1252,9 +1252,9 @@ vzeroupper # CHECK-NEXT: 2 1 1.00 vmovaps %ymm0, %ymm2 # CHECK-NEXT: 1 1 1.00 * vmovaps %ymm0, (%rax) # CHECK-NEXT: 1 5 1.00 * vmovaps (%rax), %ymm2 -# CHECK-NEXT: 1 1 0.50 vmovd %eax, %xmm2 +# CHECK-NEXT: 2 8 0.50 vmovd %eax, %xmm2 # CHECK-NEXT: 1 5 1.00 * vmovd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 vmovd %xmm0, %ecx +# CHECK-NEXT: 1 4 1.00 vmovd %xmm0, %ecx # CHECK-NEXT: 1 2 1.00 * vmovd %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 vmovddup %xmm0, %xmm2 # CHECK-NEXT: 1 6 1.00 * vmovddup (%rax), %xmm2 @@ -1295,9 +1295,9 @@ vzeroupper # CHECK-NEXT: 1 3 1.00 * vmovntps %xmm0, (%rax) # CHECK-NEXT: 1 3 2.00 * vmovntps %ymm0, (%rax) # CHECK-NEXT: 1 1 0.50 vmovq %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 vmovq %rax, %xmm2 +# CHECK-NEXT: 2 8 0.50 vmovq %rax, %xmm2 # CHECK-NEXT: 1 5 1.00 * vmovq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 vmovq %xmm0, %rcx +# CHECK-NEXT: 1 4 1.00 vmovq %xmm0, %rcx # CHECK-NEXT: 1 2 1.00 * vmovq %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 1 2 1.00 * vmovsd %xmm0, (%rax) @@ -1720,7 +1720,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 48.00 2.00 - 350.50 910.50 399.00 421.00 382.00 - 43.00 132.00 119.50 119.50 38.00 +# CHECK-NEXT: 48.00 - - 353.50 911.50 402.00 422.00 382.00 - 43.00 132.00 119.50 119.50 38.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -1958,9 +1958,9 @@ vzeroupper # CHECK-NEXT: - - - 1.00 1.00 1.00 1.00 - - - - - - - vmovaps %ymm0, %ymm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovaps %ymm0, (%rax) # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vmovaps (%rax), %ymm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - vmovd %eax, %xmm2 +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovd %eax, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vmovd (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - vmovd %xmm0, %ecx +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - vmovd %xmm0, %ecx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovd %xmm0, (%rax) # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovddup %xmm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - vmovddup (%rax), %xmm2 @@ -2001,9 +2001,9 @@ vzeroupper # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovntps %xmm0, (%rax) # CHECK-NEXT: - - - - - - 2.00 - - 2.00 2.00 - - - vmovntps %ymm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - vmovq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - vmovq %rax, %xmm2 +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovq %rax, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - vmovq (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - vmovq %xmm0, %rcx +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - vmovq %xmm0, %rcx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovq %xmm0, (%rax) # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - vmovsd %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - vmovsd %xmm0, (%rax) diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s index 73915e59320..9da712861c8 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-mmx.s @@ -165,13 +165,13 @@ pxor (%rax), %mm2 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 1 2 0.50 * * * emms -# CHECK-NEXT: 1 1 0.50 movd %eax, %mm2 +# CHECK-NEXT: 2 8 0.50 movd %eax, %mm2 # CHECK-NEXT: 1 5 1.00 * movd (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 movd %mm0, %ecx +# CHECK-NEXT: 1 4 1.00 movd %mm0, %ecx # CHECK-NEXT: 1 2 1.00 * * movd %mm0, (%rax) -# CHECK-NEXT: 1 1 0.50 movq %rax, %mm2 +# CHECK-NEXT: 2 8 0.50 movq %rax, %mm2 # CHECK-NEXT: 1 5 1.00 * movq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.50 movq %mm0, %rcx +# CHECK-NEXT: 1 4 1.00 movq %mm0, %rcx # CHECK-NEXT: 1 2 1.00 * movq %mm0, (%rax) # CHECK-NEXT: 1 1 0.50 packsswb %mm0, %mm2 # CHECK-NEXT: 1 6 1.00 * packsswb (%rax), %mm2 @@ -288,18 +288,18 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 2.00 2.00 - 0.50 0.50 52.50 48.50 46.00 - 2.00 2.00 46.00 46.00 6.00 +# CHECK-NEXT: 2.00 - - 3.50 1.50 55.50 49.50 46.00 - 2.00 2.00 46.00 46.00 6.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - emms -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %eax, %mm2 +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movd %eax, %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - movd (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %mm0, %ecx +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - movd %mm0, %ecx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movd %mm0, (%rax) -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movq %rax, %mm2 +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movq %rax, %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - movq (%rax), %mm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movq %mm0, %rcx +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - movq %mm0, %rcx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movq %mm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - packsswb %mm0, %mm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - packsswb (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s index 32fd929ad75..67c7a5b14ec 100644 --- a/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/BtVer2/resources-sse2.s @@ -472,9 +472,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 movapd %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movapd %xmm0, (%rax) # CHECK-NEXT: 1 5 1.00 * movapd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 movd %eax, %xmm2 +# CHECK-NEXT: 2 8 0.50 movd %eax, %xmm2 # CHECK-NEXT: 1 5 1.00 * movd (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 movd %xmm0, %ecx +# CHECK-NEXT: 1 4 1.00 movd %xmm0, %ecx # CHECK-NEXT: 1 2 1.00 * movd %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 movdqa %xmm0, %xmm2 # CHECK-NEXT: 1 1 1.00 * movdqa %xmm0, (%rax) @@ -493,9 +493,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 2 1.00 * movntdq %xmm0, (%rax) # CHECK-NEXT: 1 3 1.00 * movntpd %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 movq %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.50 movq %rax, %xmm2 +# CHECK-NEXT: 2 8 0.50 movq %rax, %xmm2 # CHECK-NEXT: 1 5 1.00 * movq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.50 movq %xmm0, %rcx +# CHECK-NEXT: 1 4 1.00 movq %xmm0, %rcx # CHECK-NEXT: 1 2 1.00 * movq %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 movq2dq %mm0, %xmm2 # CHECK-NEXT: 1 1 0.50 movsd %xmm0, %xmm2 @@ -685,7 +685,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] -# CHECK-NEXT: 17.00 2.00 - 46.00 203.00 116.50 139.50 117.00 - 15.00 54.00 66.50 66.50 12.00 +# CHECK-NEXT: 17.00 - - 49.00 204.00 119.50 140.50 117.00 - 15.00 54.00 66.50 66.50 12.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] [12] [13] Instructions: @@ -759,9 +759,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movapd %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movapd %xmm0, (%rax) # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 1.00 - - - - - - movapd (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %eax, %xmm2 +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movd %eax, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - movd (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movd %xmm0, %ecx +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - movd %xmm0, %ecx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movd %xmm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - movdqa %xmm0, %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movdqa %xmm0, (%rax) @@ -780,9 +780,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntdq %xmm0, (%rax) # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movntpd %xmm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - movq %xmm0, %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movq %rax, %xmm2 +# CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movq %rax, %xmm2 # CHECK-NEXT: - - - - - 0.50 0.50 1.00 - - - 0.50 0.50 - movq (%rax), %xmm2 -# CHECK-NEXT: 0.50 0.50 - - - - - - - - - - - - movq %xmm0, %rcx +# CHECK-NEXT: 1.00 - - 1.00 - 1.00 - - - - - - - - movq %xmm0, %rcx # CHECK-NEXT: - - - - - - 1.00 - - 1.00 1.00 - - - movq %xmm0, (%rax) # CHECK-NEXT: - - - - - 0.50 0.50 - - - - 0.50 0.50 - movq2dq %mm0, %xmm2 # CHECK-NEXT: - - - 0.50 0.50 0.50 0.50 - - - - - - - movsd %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s index 83816002578..f956f81db92 100644 --- a/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/SandyBridge/resources-mmx.s @@ -165,13 +165,13 @@ pxor (%rax), %mm2 # CHECK: [1] [2] [3] [4] [5] [6] Instructions: # CHECK-NEXT: 31 31 10.33 * * * emms -# CHECK-NEXT: 1 1 0.33 movd %eax, %mm2 +# CHECK-NEXT: 1 1 1.00 movd %eax, %mm2 # CHECK-NEXT: 1 5 0.50 * movd (%rax), %mm2 -# CHECK-NEXT: 1 1 0.33 movd %mm0, %ecx +# CHECK-NEXT: 1 2 1.00 movd %mm0, %ecx # CHECK-NEXT: 1 1 1.00 * * movd %mm0, (%rax) -# CHECK-NEXT: 1 1 0.33 movq %rax, %mm2 +# CHECK-NEXT: 1 1 1.00 movq %rax, %mm2 # CHECK-NEXT: 1 5 0.50 * movq (%rax), %mm2 -# CHECK-NEXT: 1 1 0.33 movq %mm0, %rcx +# CHECK-NEXT: 1 2 1.00 movq %mm0, %rcx # CHECK-NEXT: 1 1 1.00 * movq %mm0, (%rax) # CHECK-NEXT: 1 1 1.00 packsswb %mm0, %mm2 # CHECK-NEXT: 2 6 1.00 * packsswb (%rax), %mm2 @@ -282,18 +282,18 @@ pxor (%rax), %mm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] -# CHECK-NEXT: - - 20.33 54.33 2.00 56.33 24.00 24.00 +# CHECK-NEXT: - - 21.00 53.00 2.00 57.00 24.00 24.00 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6.0] [6.1] Instructions: # CHECK-NEXT: - - 10.33 10.33 - 10.33 - - emms -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movd %eax, %mm2 +# CHECK-NEXT: - - - - - 1.00 - - movd %eax, %mm2 # CHECK-NEXT: - - - - - - 0.50 0.50 movd (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movd %mm0, %ecx +# CHECK-NEXT: - - 1.00 - - - - - movd %mm0, %ecx # CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movd %mm0, (%rax) -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %rax, %mm2 +# CHECK-NEXT: - - - - - 1.00 - - movq %rax, %mm2 # CHECK-NEXT: - - - - - - 0.50 0.50 movq (%rax), %mm2 -# CHECK-NEXT: - - 0.33 0.33 - 0.33 - - movq %mm0, %rcx +# CHECK-NEXT: - - 1.00 - - - - - movq %mm0, %rcx # CHECK-NEXT: - - - - 1.00 - 0.50 0.50 movq %mm0, (%rax) # CHECK-NEXT: - - - - - 1.00 - - packsswb %mm0, %mm2 # CHECK-NEXT: - - - - - 1.00 0.50 0.50 packsswb (%rax), %mm2 diff --git a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s index f9ef3d366fa..a9cc858cb41 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver1/resources-sse2.s @@ -493,9 +493,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 1 1 0.50 * movntdq %xmm0, (%rax) # CHECK-NEXT: 1 1 0.50 * movntpd %xmm0, (%rax) # CHECK-NEXT: 1 1 0.25 movq %xmm0, %xmm2 -# CHECK-NEXT: 1 1 0.25 movq %rax, %xmm2 +# CHECK-NEXT: 1 3 1.00 movq %rax, %xmm2 # CHECK-NEXT: 1 8 0.50 * movq (%rax), %xmm2 -# CHECK-NEXT: 1 1 0.25 movq %xmm0, %rcx +# CHECK-NEXT: 1 2 1.00 movq %xmm0, %rcx # CHECK-NEXT: 1 1 0.50 * movq %xmm0, (%rax) # CHECK-NEXT: 1 1 0.25 movq2dq %mm0, %xmm2 # CHECK-NEXT: 1 1 0.50 movsd %xmm0, %xmm2 @@ -683,7 +683,7 @@ xorpd (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] -# CHECK-NEXT: 65.50 65.50 0.50 0.50 0.50 0.50 - 72.42 39.92 69.25 153.42 - +# CHECK-NEXT: 65.50 65.50 - - - - - 72.42 39.92 71.25 153.42 - # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [10] [11] Instructions: @@ -778,9 +778,9 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movntdq %xmm0, (%rax) # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movntpd %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - movq %xmm0, %xmm2 -# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movq %rax, %xmm2 +# CHECK-NEXT: - - - - - - - - - 1.00 - - movq %rax, %xmm2 # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movq (%rax), %xmm2 -# CHECK-NEXT: - - 0.25 0.25 0.25 0.25 - - - - - - movq %xmm0, %rcx +# CHECK-NEXT: - - - - - - - - - 1.00 - - movq %xmm0, %rcx # CHECK-NEXT: 0.50 0.50 - - - - - - - - - - movq %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 0.25 0.25 0.25 0.25 - movq2dq %mm0, %xmm2 # CHECK-NEXT: - - - - - - - - 0.50 0.50 - - movsd %xmm0, %xmm2 |