summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/ScheduleDAGInstrs.cpp20
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-argument-types.ll22
-rw-r--r--llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll36
-rw-r--r--llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll3
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/inline-asm.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/misched-killflags.mir12
-rw-r--r--llvm/test/CodeGen/AMDGPU/nested-calls.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll12
-rw-r--r--llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll6
-rw-r--r--llvm/test/CodeGen/ARM/Windows/chkstk.ll6
-rw-r--r--llvm/test/CodeGen/ARM/Windows/memset.ll4
-rw-r--r--llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll2
-rw-r--r--llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll28
-rw-r--r--llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll4
-rw-r--r--llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll2
-rw-r--r--llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll4
-rw-r--r--llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll4
-rw-r--r--llvm/test/CodeGen/ARM/fp16-instructions.ll4
-rw-r--r--llvm/test/CodeGen/ARM/select.ll2
-rw-r--r--llvm/test/CodeGen/ARM/twoaddrinstr.ll4
-rw-r--r--llvm/test/CodeGen/ARM/vcombine.ll8
-rw-r--r--llvm/test/CodeGen/ARM/vuzp.ll242
-rw-r--r--llvm/test/CodeGen/SystemZ/misched-readadvances.mir31
-rw-r--r--llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll4
-rw-r--r--llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll4
-rw-r--r--llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll8
-rw-r--r--llvm/test/CodeGen/X86/memset.ll2
-rw-r--r--llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll4
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86-64-shld.ll8
-rw-r--r--llvm/test/CodeGen/X86/schedule-x86_32.ll10
32 files changed, 278 insertions, 234 deletions
diff --git a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
index 346f82ff95f..99406ed1496 100644
--- a/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -234,6 +234,11 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
// Ask the target if address-backscheduling is desirable, and if so how much.
const TargetSubtargetInfo &ST = MF.getSubtarget();
+ // Only use any non-zero latency for real defs/uses, in contrast to
+ // "fake" operands added by regalloc.
+ const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
+ bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
+ !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
Alias.isValid(); ++Alias) {
if (!Uses.contains(*Alias))
@@ -257,11 +262,18 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
Dep = SDep(SU, SDep::Data, *Alias);
RegUse = UseSU->getInstr();
}
- Dep.setLatency(
- SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse,
- UseOp));
+ const MCInstrDesc *UseMIDesc =
+ (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
+ bool ImplicitPseudoUse =
+ (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
+ !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
+ if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
+ Dep.setLatency(SchedModel.computeOperandLatency(SU->getInstr(), OperIdx,
+ RegUse, UseOp));
+ ST.adjustSchedDependency(SU, UseSU, Dep);
+ } else
+ Dep.setLatency(0);
- ST.adjustSchedDependency(SU, UseSU, Dep);
UseSU->addPred(Dep);
}
}
diff --git a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
index 581df1c8527..84d327b6f37 100644
--- a/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-argument-types.ll
@@ -61,11 +61,11 @@ declare void @external_void_func_v16i8(<16 x i8>) #0
; MESA-DAG: s_mov_b64 s[0:1], s[36:37]
+; GCN: v_mov_b32_e32 v0, 1{{$}}
+; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i1@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i1@rel32@hi+4
-; GCN-DAG: v_mov_b32_e32 v0, 1{{$}}
-; MESA-DAG: s_mov_b64 s[2:3], s[38:39]
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
; GCN-NEXT: s_endpgm
@@ -123,12 +123,12 @@ define amdgpu_kernel void @test_call_external_void_func_i1_zeroext(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i8_imm:
; MESA-DAG: s_mov_b32 s33, s3{{$}}
+; GCN: v_mov_b32_e32 v0, 0x7b
+; HSA-DAG: s_mov_b32 s4, s33{{$}}
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8@rel32@hi+4
-; GCN-NEXT: v_mov_b32_e32 v0, 0x7b
-; HSA-DAG: s_mov_b32 s4, s33{{$}}
; GCN-DAG: s_mov_b32 s32, s33{{$}}
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
@@ -144,11 +144,11 @@ define amdgpu_kernel void @test_call_external_void_func_i8_imm(i32) #0 {
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN-DAG: buffer_load_sbyte v0
+; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_signext@rel32@hi+4
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s3
; GCN: s_waitcnt vmcnt(0)
@@ -165,11 +165,11 @@ define amdgpu_kernel void @test_call_external_void_func_i8_signext(i32) #0 {
; HSA-DAG: s_mov_b32 s33, s9{{$}}
; GCN-DAG: buffer_load_ubyte v0
+; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i8_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i8_zeroext@rel32@hi+4
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_waitcnt vmcnt(0)
@@ -197,11 +197,11 @@ define amdgpu_kernel void @test_call_external_void_func_i16_imm() #0 {
; MESA-DAG: s_mov_b32 s33, s3{{$}}
; GCN-DAG: buffer_load_sshort v0
+; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_signext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_signext@rel32@hi+4
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_waitcnt vmcnt(0)
@@ -218,11 +218,11 @@ define amdgpu_kernel void @test_call_external_void_func_i16_signext(i32) #0 {
; GCN-DAG: buffer_load_ushort v0
+; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i16_zeroext@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i16_zeroext@rel32@hi+4
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_waitcnt vmcnt(0)
@@ -237,11 +237,11 @@ define amdgpu_kernel void @test_call_external_void_func_i16_zeroext(i32) #0 {
; GCN-LABEL: {{^}}test_call_external_void_func_i32_imm:
; MESA-DAG: s_mov_b32 s33, s3{{$}}
+; GCN: v_mov_b32_e32 v0, 42
+; GCN: s_mov_b32 s4, s33
; GCN: s_getpc_b64 s{{\[}}[[PC_LO:[0-9]+]]:[[PC_HI:[0-9]+]]{{\]}}
; GCN-NEXT: s_add_u32 s[[PC_LO]], s[[PC_LO]], external_void_func_i32@rel32@lo+4
; GCN-NEXT: s_addc_u32 s[[PC_HI]], s[[PC_HI]], external_void_func_i32@rel32@hi+4
-; GCN: v_mov_b32_e32 v0, 42
-; GCN-DAG: s_mov_b32 s4, s33
; GCN-DAG: s_mov_b32 s32, s33
; GCN: s_swappc_b64 s[30:31], s{{\[}}[[PC_LO]]:[[PC_HI]]{{\]}}
@@ -481,10 +481,10 @@ define amdgpu_kernel void @test_call_external_void_func_v2i32_imm() #0 {
; HSA-DAG: s_mov_b32 s33, s9
; MESA-DAG: s_mov_b32 s33, s3{{$}}
+; GCN-NOT: v3
; GCN-DAG: v_mov_b32_e32 v0, 3
; GCN-DAG: v_mov_b32_e32 v1, 4
; GCN-DAG: v_mov_b32_e32 v2, 5
-; GCN-NOT: v3
; GCN: s_swappc_b64
define amdgpu_kernel void @test_call_external_void_func_v3i32_imm(i32) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
index 6d1e2467d08..57bc6171d7a 100644
--- a/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
+++ b/llvm/test/CodeGen/AMDGPU/call-preserved-registers.ll
@@ -6,10 +6,10 @@ declare void @external_void_func_void() #0
; GCN-LABEL: {{^}}test_kernel_call_external_void_func_void_clobber_s30_s31_call_external_void_func_void:
; GCN: s_mov_b32 s33, s7
-; GCN: s_getpc_b64 s[34:35]
+; GCN: s_mov_b32 s4, s33
+; GCN-NEXT: s_getpc_b64 s[34:35]
; GCN-NEXT: s_add_u32 s34, s34,
; GCN-NEXT: s_addc_u32 s35, s35,
-; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: s_mov_b32 s32, s33
; GCN: s_swappc_b64 s[30:31], s[34:35]
@@ -129,13 +129,13 @@ define amdgpu_kernel void @test_call_void_func_void_mayclobber_v31(i32 addrspace
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_s33:
; GCN: s_mov_b32 s34, s9
-; GCN: ; def s33
-; GCN-NEXT: #ASMEND
-; GCN: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
-; GCN-NEXT: s_mov_b32 s4, s34
-; GCN-NEXT: s_mov_b32 s32, s34
+; GCN: s_mov_b32 s4, s34
+; GCN-DAG: s_mov_b32 s32, s34
+; GCN-DAG: ; def s33
+; GCN-DAG: #ASMEND
+; GCN-DAG: s_getpc_b64 s[6:7]
+; GCN-DAG: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
+; GCN-DAG: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use s33
@@ -150,13 +150,13 @@ define amdgpu_kernel void @test_call_void_func_void_preserves_s33(i32 addrspace(
; GCN-LABEL: {{^}}test_call_void_func_void_preserves_v32:
; GCN: s_mov_b32 s33, s9
-; GCN: ; def v32
-; GCN-NEXT: #ASMEND
-; GCN: s_getpc_b64 s[6:7]
-; GCN-NEXT: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
-; GCN-NEXT: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
-; GCN-NEXT: s_mov_b32 s4, s33
-; GCN-NEXT: s_mov_b32 s32, s33
+; GCN: s_mov_b32 s4, s33
+; GCN-DAG: s_mov_b32 s32, s33
+; GCN-DAG: ; def v32
+; GCN-DAG: #ASMEND
+; GCN-DAG: s_getpc_b64 s[6:7]
+; GCN-DAG: s_add_u32 s6, s6, external_void_func_void@rel32@lo+4
+; GCN-DAG: s_addc_u32 s7, s7, external_void_func_void@rel32@hi+4
; GCN-NEXT: s_swappc_b64 s[30:31], s[6:7]
; GCN-NEXT: ;;#ASMSTART
; GCN-NEXT: ; use v32
@@ -183,10 +183,10 @@ define void @void_func_void_clobber_s33() #2 {
; GCN-LABEL: {{^}}test_call_void_func_void_clobber_s33:
; GCN: s_mov_b32 s33, s7
-; GCN: s_getpc_b64
+; GCN: s_mov_b32 s4, s33
+; GCN-NEXT: s_getpc_b64
; GCN-NEXT: s_add_u32
; GCN-NEXT: s_addc_u32
-; GCN-NEXT: s_mov_b32 s4, s33
; GCN-NEXT: s_mov_b32 s32, s33
; GCN: s_swappc_b64
; GCN-NEXT: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
index 907575c1ba8..e5c18062708 100644
--- a/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/callee-special-input-sgprs.ll
@@ -558,7 +558,8 @@ define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
; GCN: s_mov_b32 s5, s32
-; GCN: s_add_u32 s32, s32, 0x400
+
+; GCN-DAG: s_add_u32 s32, s32, 0x400
; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index 31199b47e20..8e02303377c 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -386,9 +386,9 @@ bb2:
; GCN-DAG: v_mov_b32 [[INS0:v[0-9]+]], 62
; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT3:[0-9]+]], s[[S_ELT3]]
-; GCN: v_mov_b32_e32 v[[VEC_ELT2:[0-9]+]], s{{[0-9]+}}
-; GCN: v_mov_b32_e32 v[[VEC_ELT1:[0-9]+]], s{{[0-9]+}}
-; GCN: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
+; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT2:[0-9]+]], s{{[0-9]+}}
+; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT1:3]], s{{[0-9]+}}
+; GCN-DAG: v_mov_b32_e32 v[[VEC_ELT0:[0-9]+]], s[[S_ELT0]]
; GCN: [[LOOP0:BB[0-9]+_[0-9]+]]:
; GCN-NEXT: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/inline-asm.ll b/llvm/test/CodeGen/AMDGPU/inline-asm.ll
index 2856212bc89..a0563cdd319 100644
--- a/llvm/test/CodeGen/AMDGPU/inline-asm.ll
+++ b/llvm/test/CodeGen/AMDGPU/inline-asm.ll
@@ -186,8 +186,8 @@ entry:
; FIXME: Should not have intermediate sgprs
; CHECK-LABEL: {{^}}i64_imm_input_phys_vgpr:
-; CHECK: s_mov_b32 s1, 0
-; CHECK: s_mov_b32 s0, 0x1e240
+; CHECK-DAG: s_mov_b32 s1, 0
+; CHECK-DAG: s_mov_b32 s0, 0x1e240
; CHECK: v_mov_b32_e32 v0, s0
; CHECK: v_mov_b32_e32 v1, s1
; CHECK: use v[0:1]
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index a62ad820c89..692696ff730 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -352,7 +352,7 @@ endif:
; GCN-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
; GCN-DAG: s_mov_b32 m0, [[SCALEDIDX]]
-; GCN: v_movreld_b32_e32 v{{[0-9]+}}, 0
+; GCN-DAG: v_movreld_b32_e32 v{{[0-9]+}}, 0
; Increment to next element folded into base register, but FileCheck
; can't do math expressions
diff --git a/llvm/test/CodeGen/AMDGPU/misched-killflags.mir b/llvm/test/CodeGen/AMDGPU/misched-killflags.mir
index 811ef0d1375..0c58042d508 100644
--- a/llvm/test/CodeGen/AMDGPU/misched-killflags.mir
+++ b/llvm/test/CodeGen/AMDGPU/misched-killflags.mir
@@ -26,20 +26,20 @@ body: |
S_ENDPGM
...
# CHECK-LABEL: name: func0
-# CHECK: $sgpr10 = S_MOV_B32 5
-# CHECK: $sgpr9 = S_MOV_B32 4
-# CHECK: $sgpr8 = S_MOV_B32 3
-# CHECK: $sgpr33 = S_MOV_B32 killed $sgpr7
+# CHECK-DAG: $sgpr10 = S_MOV_B32 5
+# CHECK-DAG: $sgpr9 = S_MOV_B32 4
+# CHECK-DAG: $sgpr8 = S_MOV_B32 3
+# CHECK-DAG: $sgpr33 = S_MOV_B32 killed $sgpr7
# CHECK: $vgpr0 = V_MOV_B32_e32 $sgpr8, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $sgpr8_sgpr9_sgpr10_sgpr11
+# CHECK: $sgpr32 = S_MOV_B32 $sgpr33
# CHECK: BUNDLE implicit-def $sgpr6_sgpr7, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $scc {
# CHECK: $sgpr6_sgpr7 = S_GETPC_B64
# CHECK: $sgpr6 = S_ADD_U32 internal $sgpr6, 0, implicit-def $scc
# CHECK: $sgpr7 = S_ADDC_U32 internal $sgpr7, 0, implicit-def $scc, implicit internal $scc
# CHECK: }
-# CHECK: $sgpr4 = S_MOV_B32 $sgpr33
+# CHECK: $sgpr4 = S_MOV_B32 killed $sgpr33
# CHECK: $vgpr1 = V_MOV_B32_e32 $sgpr9, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
# CHECK: $vgpr2 = V_MOV_B32_e32 $sgpr10, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11
# CHECK: $vgpr3 = V_MOV_B32_e32 killed $sgpr11, implicit $exec, implicit $sgpr8_sgpr9_sgpr10_sgpr11, implicit $exec
-# CHECK: $sgpr32 = S_MOV_B32 killed $sgpr33
# CHECK: S_NOP 0, implicit killed $sgpr6_sgpr7, implicit $sgpr0_sgpr1_sgpr2_sgpr3, implicit $sgpr4, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
# CHECK: S_ENDPGM
diff --git a/llvm/test/CodeGen/AMDGPU/nested-calls.ll b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
index 462274c65e7..7fbcb9706a8 100644
--- a/llvm/test/CodeGen/AMDGPU/nested-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/nested-calls.ll
@@ -33,8 +33,8 @@ define void @test_func_call_external_void_func_i32_imm() #0 {
; GCN-LABEL: {{^}}test_func_call_external_void_func_i32_imm_stack_use:
; GCN: s_waitcnt
; GCN: s_mov_b32 s5, s32
-; GCN: s_add_u32 s32, s32, 0x1400{{$}}
-; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset
+; GCN-DAG: s_add_u32 s32, s32, 0x1400{{$}}
+; GCN-DAG: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset
; GCN: s_swappc_b64
; GCN: s_sub_u32 s32, s32, 0x1400{{$}}
; GCN: s_setpc_b64
diff --git a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
index d4c05fb5682..0d6bb661797 100644
--- a/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
+++ b/llvm/test/CodeGen/AMDGPU/undefined-subreg-liverange.ll
@@ -57,18 +57,18 @@ bb11: ; preds = %bb9
; CHECK-LABEL: {{^}}partially_undef_copy:
; CHECK: v_mov_b32_e32 v5, 5
-; CHECK: v_mov_b32_e32 v6, 6
+; CHECK-DAG: v_mov_b32_e32 v6, 6
-; CHECK: v_mov_b32_e32 v[[OUTPUT_LO:[0-9]+]], v5
+; CHECK-DAG: v_mov_b32_e32 v[[OUTPUT_LO:[0-9]+]], v5
; Undef copy
-; CHECK: v_mov_b32_e32 v1, v6
+; CHECK-DAG: v_mov_b32_e32 v1, v6
; undef copy
-; CHECK: v_mov_b32_e32 v2, v7
+; CHECK-DAG: v_mov_b32_e32 v2, v7
-; CHECK: v_mov_b32_e32 v[[OUTPUT_HI:[0-9]+]], v8
-; CHECK: v_mov_b32_e32 v[[OUTPUT_LO]], v6
+; CHECK-DAG: v_mov_b32_e32 v[[OUTPUT_HI:[0-9]+]], v8
+; CHECK-DAG: v_mov_b32_e32 v[[OUTPUT_LO]], v6
; CHECK: buffer_store_dwordx4 v{{\[}}[[OUTPUT_LO]]:[[OUTPUT_HI]]{{\]}}
define amdgpu_kernel void @partially_undef_copy() #0 {
diff --git a/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll b/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
index 019298d2080..e75df160e00 100644
--- a/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
+++ b/llvm/test/CodeGen/ARM/Windows/chkstk-movw-movt-isel.ll
@@ -19,9 +19,9 @@ entry:
; CHECK-LABEL: isel
; CHECK: push {r4, r5, r6, lr}
-; CHECK: movw r12, #0
-; CHECK: movt r12, #0
-; CHECK: movw r4, #{{\d*}}
+; CHECK-DAG: movw r12, #0
+; CHECK-DAG: movt r12, #0
+; CHECK-DAG: movw r4, #{{\d*}}
; CHECK: blx r12
; CHECK: sub.w sp, sp, r4
diff --git a/llvm/test/CodeGen/ARM/Windows/chkstk.ll b/llvm/test/CodeGen/ARM/Windows/chkstk.ll
index 330c1f45850..8fd41461459 100644
--- a/llvm/test/CodeGen/ARM/Windows/chkstk.ll
+++ b/llvm/test/CodeGen/ARM/Windows/chkstk.ll
@@ -16,9 +16,9 @@ entry:
; CHECK-DEFAULT-CODE-MODEL: sub.w sp, sp, r4
; CHECK-LARGE-CODE-MODEL: check_watermark:
-; CHECK-LARGE-CODE-MODEL: movw r12, :lower16:__chkstk
-; CHECK-LARGE-CODE-MODEL: movt r12, :upper16:__chkstk
-; CHECK-LARGE-CODE-MODEL: movw r4, #1024
+; CHECK-LARGE-CODE-MODEL-DAG: movw r12, :lower16:__chkstk
+; CHECK-LARGE-CODE-MODEL-DAG: movt r12, :upper16:__chkstk
+; CHECK-LARGE-CODE-MODEL-DAG: movw r4, #1024
; CHECK-LARGE-CODE-MODEL: blx r12
; CHECK-LARGE-CODE-MODEL: sub.w sp, sp, r4
diff --git a/llvm/test/CodeGen/ARM/Windows/memset.ll b/llvm/test/CodeGen/ARM/Windows/memset.ll
index c9b22f47a15..8cb257c1566 100644
--- a/llvm/test/CodeGen/ARM/Windows/memset.ll
+++ b/llvm/test/CodeGen/ARM/Windows/memset.ll
@@ -10,9 +10,9 @@ entry:
unreachable
}
-; CHECK: movw r0, :lower16:source
-; CHECK: movt r0, :upper16:source
; CHECK: movs r1, #0
; CHECK: mov.w r2, #512
+; CHECK: movw r0, :lower16:source
+; CHECK: movt r0, :upper16:source
; CHECK: memset
diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 8e38f18c069..b81cf443e53 100644
--- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -162,8 +162,8 @@ define i32 @test_tst_assessment(i32 %a, i32 %b) {
;
; T2-LABEL: test_tst_assessment:
; T2: @ %bb.0:
-; T2-NEXT: lsls r1, r1, #31
; T2-NEXT: and r0, r0, #1
+; T2-NEXT: lsls r1, r1, #31
; T2-NEXT: it ne
; T2-NEXT: subne r0, #1
; T2-NEXT: bx lr
diff --git a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
index c943f60c56d..bf4f1bd0d0c 100644
--- a/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
+++ b/llvm/test/CodeGen/ARM/arm-shrink-wrapping.ll
@@ -104,10 +104,10 @@ declare i32 @doSomething(i32, i32*)
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: subs [[IV]], [[IV]], #1
-; THUMB: subs [[IV]], #1
-; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB-NEXT: add [[SUM]], [[TMP]]
+; ARM: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB: add [[SUM]], [[TMP]]
+; ARM-NEXT: subs [[IV]], [[IV]], #1
+; THUMB-NEXT: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP]]
;
; Next BB.
@@ -169,10 +169,10 @@ declare i32 @something(...)
; Next BB.
; CHECK: [[LOOP_LABEL:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: subs [[IV]], [[IV]], #1
-; THUMB: subs [[IV]], #1
; ARM: add [[SUM]], [[TMP]], [[SUM]]
; THUMB: add [[SUM]], [[TMP]]
+; ARM: subs [[IV]], [[IV]], #1
+; THUMB: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP_LABEL]]
; Next BB.
; CHECK: @ %for.exit
@@ -228,10 +228,10 @@ for.end: ; preds = %for.body
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: subs [[IV]], [[IV]], #1
-; THUMB: subs [[IV]], #1
-; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB-NEXT: add [[SUM]], [[TMP]]
+; ARM: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB: add [[SUM]], [[TMP]]
+; ARM-NEXT: subs [[IV]], [[IV]], #1
+; THUMB-NEXT: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP]]
;
; Next BB.
@@ -307,10 +307,10 @@ declare void @somethingElse(...)
; Next BB.
; CHECK: [[LOOP:LBB[0-9_]+]]: @ %for.body
; CHECK: mov{{(\.w)?}} [[TMP:r[0-9]+]], #1
-; ARM: subs [[IV]], [[IV]], #1
-; THUMB: subs [[IV]], #1
-; ARM-NEXT: add [[SUM]], [[TMP]], [[SUM]]
-; THUMB-NEXT: add [[SUM]], [[TMP]]
+; ARM: add [[SUM]], [[TMP]], [[SUM]]
+; THUMB: add [[SUM]], [[TMP]]
+; ARM-NEXT: subs [[IV]], [[IV]], #1
+; THUMB-NEXT: subs [[IV]], #1
; CHECK-NEXT: bne [[LOOP]]
;
; Next BB.
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll
index 0ae2d5f6f2f..2c0aa98eae0 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm-wrback.ll
@@ -18,9 +18,9 @@
; CHECK-NEXT: Data
; CHECK-SAME: Latency=3
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=3
+; CHECK-SAME: Latency=0
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=4
+; CHECK-SAME: Latency=0
define i32 @bar(i32 %a1, i32 %b1, i32 %c1) minsize optsize {
%1 = load i32, i32* @a, align 4
%2 = load i32, i32* @b, align 4
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll
index bc7a14b1028..02d1c2f55f9 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-ldm.ll
@@ -11,7 +11,7 @@
; CHECK: Data
; CHECK-SAME: Latency=3
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=3
+; CHECK-SAME: Latency=0
define i32 @foo(i32* %a) nounwind optsize {
entry:
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
index b5edcc30422..1baf472ca49 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm-wrback.ll
@@ -20,9 +20,9 @@
; CHECK-NEXT: Data
; CHECK-SAME: Latency=5
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=5
+; CHECK-SAME: Latency=0
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=6
+; CHECK-SAME: Latency=0
define i32 @bar(i32* %iptr) minsize optsize {
%1 = load double, double* @a, align 8
%2 = load double, double* @b, align 8
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll
index 12c7b3270c3..8da133e806e 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vldm.ll
@@ -11,9 +11,9 @@
; CHECK: Data
; CHECK-SAME: Latency=5
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=5
+; CHECK-SAME: Latency=0
; CHECK-NEXT: Data
-; CHECK-SAME: Latency=6
+; CHECK-SAME: Latency=0
define double @foo(double* %a) nounwind optsize {
entry:
diff --git a/llvm/test/CodeGen/ARM/fp16-instructions.ll b/llvm/test/CodeGen/ARM/fp16-instructions.ll
index 6505d2bf673..670fcf58b1e 100644
--- a/llvm/test/CodeGen/ARM/fp16-instructions.ll
+++ b/llvm/test/CodeGen/ARM/fp16-instructions.ll
@@ -935,9 +935,9 @@ entry:
; CHECK-SOFTFP-FP16-T32: vmov [[S6:s[0-9]]], r0
; CHECK-SOFTFP-FP16-T32: vldr s0, .LCP{{.*}}
; CHECK-SOFTFP-FP16-T32: vcvtb.f32.f16 [[S6]], [[S6]]
-; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
-; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
; CHECK-SOFTFP-FP16-T32: vldr [[S4:s[0-9]]], .LCPI{{.*}}
+; CHECK-SOFTFP-FP16-T32: vcmp.f32 [[S6]], s0
+; CHECK-SOFTFP-FP16-T32: vmov.f32 [[S2:s[0-9]]], #-2.000000e+00
; CHECK-SOFTFP-FP16-T32: vmrs APSR_nzcv, fpscr
; CHECK-SOFTFP-FP16-T32: it eq
; CHECK-SOFTFP-FP16-T32: vmoveq.f32 [[S4]], [[S2]]
diff --git a/llvm/test/CodeGen/ARM/select.ll b/llvm/test/CodeGen/ARM/select.ll
index e9394a72073..639b88183cc 100644
--- a/llvm/test/CodeGen/ARM/select.ll
+++ b/llvm/test/CodeGen/ARM/select.ll
@@ -80,8 +80,8 @@ define double @f7(double %a, double %b) {
; block generated, odds are good that we have close to the ideal code for this:
;
; CHECK-NEON-LABEL: f8:
-; CHECK-NEON: movw [[R3:r[0-9]+]], #1123
; CHECK-NEON: adr [[R2:r[0-9]+]], LCPI7_0
+; CHECK-NEON: movw [[R3:r[0-9]+]], #1123
; CHECK-NEON-NEXT: cmp r0, [[R3]]
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: addeq{{.*}} [[R2]], #4
diff --git a/llvm/test/CodeGen/ARM/twoaddrinstr.ll b/llvm/test/CodeGen/ARM/twoaddrinstr.ll
index f0a95c833c6..e8c52e1b58d 100644
--- a/llvm/test/CodeGen/ARM/twoaddrinstr.ll
+++ b/llvm/test/CodeGen/ARM/twoaddrinstr.ll
@@ -4,8 +4,8 @@
define void @PR13378() nounwind {
; This was orriginally a crasher trying to schedule the instructions.
; CHECK-LABEL: PR13378:
-; CHECK: vld1.32
-; CHECK-NEXT: vmov.i32
+; CHECK: vmov.i32
+; CHECK-NEXT: vld1.32
; CHECK-NEXT: vst1.32
; CHECK-NEXT: vst1.32
; CHECK-NEXT: vmov.f32
diff --git a/llvm/test/CodeGen/ARM/vcombine.ll b/llvm/test/CodeGen/ARM/vcombine.ll
index c08ed81d042..de234b6879e 100644
--- a/llvm/test/CodeGen/ARM/vcombine.ll
+++ b/llvm/test/CodeGen/ARM/vcombine.ll
@@ -39,8 +39,8 @@ define <4 x i32> @vcombine32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
-; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]
+; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
@@ -56,8 +56,8 @@ define <4 x float> @vcombinefloat(<2 x float>* %A, <2 x float>* %B) nounwind {
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
-; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]
+; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-BE: vmov r1, r0, d16
; CHECK-BE: vmov r3, r2, d17
@@ -72,11 +72,11 @@ define <2 x i64> @vcombine64(<1 x i64>* %A, <1 x i64>* %B) nounwind {
; CHECK-DAG: vldr [[LD0:d[0-9]+]], [r0]
; CHECK-DAG: vldr [[LD1:d[0-9]+]], [r1]
-; CHECK-LE: vmov r0, r1, [[LD0]]
; CHECK-LE: vmov r2, r3, [[LD1]]
+; CHECK-LE: vmov r0, r1, [[LD0]]
-; CHECK-BE: vmov r1, r0, [[LD0]]
; CHECK-BE: vmov r3, r2, [[LD1]]
+; CHECK-BE: vmov r1, r0, [[LD0]]
%tmp1 = load <1 x i64>, <1 x i64>* %A
%tmp2 = load <1 x i64>, <1 x i64>* %B
%tmp3 = shufflevector <1 x i64> %tmp1, <1 x i64> %tmp2, <2 x i32> <i32 0, i32 1>
diff --git a/llvm/test/CodeGen/ARM/vuzp.ll b/llvm/test/CodeGen/ARM/vuzp.ll
index 96cafdec7bf..281fe2537a4 100644
--- a/llvm/test/CodeGen/ARM/vuzp.ll
+++ b/llvm/test/CodeGen/ARM/vuzp.ll
@@ -324,23 +324,23 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
; truncate from i32 to i16 and one vmovn.i16 to perform the final truncation for i8.
; CHECK-LABEL: cmpsel_trunc:
; CHECK: @ %bb.0:
-; CHECK-NEXT: add r12, sp, #16
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: add r12, sp, #48
-; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
-; CHECK-NEXT: add r12, sp, #32
-; CHECK-NEXT: vcgt.u32 q8, q10, q8
-; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
-; CHECK-NEXT: vcgt.u32 q9, q10, q9
-; CHECK-NEXT: vmov d20, r2, r3
-; CHECK-NEXT: vmovn.i32 d17, q8
-; CHECK-NEXT: vmovn.i32 d16, q9
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vmovn.i16 d16, q8
-; CHECK-NEXT: vbsl d16, d18, d20
-; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: add r12, sp, #16
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: add r12, sp, #48
+; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
+; CHECK-NEXT: add r12, sp, #32
+; CHECK-NEXT: vcgt.u32 q8, q10, q8
+; CHECK-NEXT: vld1.64 {d20, d21}, [r12]
+; CHECK-NEXT: vcgt.u32 q9, q10, q9
+; CHECK-NEXT: vmov d20, r2, r3
+; CHECK-NEXT: vmovn.i32 d17, q8
+; CHECK-NEXT: vmovn.i32 d16, q9
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vmovn.i16 d16, q8
+; CHECK-NEXT: vbsl d16, d18, d20
+; CHECK-NEXT: vmov r0, r1, d16
; CHECK-NEXT: mov pc, lr
%c = icmp ult <8 x i32> %cmp0, %cmp1
%res = select <8 x i1> %c, <8 x i8> %in0, <8 x i8> %in1
@@ -353,28 +353,28 @@ define <8 x i8> @cmpsel_trunc(<8 x i8> %in0, <8 x i8> %in1, <8 x i32> %cmp0, <8
define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r11, lr}
-; CHECK-NEXT: push {r11, lr}
-; CHECK-NEXT: add r12, sp, #8
-; CHECK-NEXT: add lr, sp, #24
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: ldr r12, [sp, #40]
-; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
-; CHECK-NEXT: vmov.i8 d19, #0x7
-; CHECK-NEXT: vmovl.u8 q10, d18
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vneg.s8 d17, d19
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vuzp.8 d16, d20
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbsl d16, d17, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: pop {r11, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .save {r11, lr}
+; CHECK-NEXT: push {r11, lr}
+; CHECK-NEXT: add r12, sp, #8
+; CHECK-NEXT: add lr, sp, #24
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: ldr r12, [sp, #40]
+; CHECK-NEXT: vld1.64 {d18, d19}, [lr]
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vld1.32 {d18[0]}, [r12:32]
+; CHECK-NEXT: vmov.i8 d19, #0x7
+; CHECK-NEXT: vmovl.u8 q10, d18
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vneg.s8 d17, d19
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vuzp.8 d16, d20
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vshl.s8 d16, d16, d17
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: pop {r11, lr}
+; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@@ -389,22 +389,22 @@ define <8 x i8> @vuzp_trunc_and_shuffle(<8 x i8> %tr0, <8 x i8> %tr1,
define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_right:
; CHECK: @ %bb.0:
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: add r12, sp, #16
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmov.i8 d18, #0x7
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vuzp.8 d16, d17
-; CHECK-NEXT: vneg.s8 d17, d18
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbsl d16, d17, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: add r12, sp, #16
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vmov.i8 d18, #0x7
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vuzp.8 d16, d17
+; CHECK-NEXT: vneg.s8 d17, d18
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vshl.s8 d16, d16, d17
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
<4 x i32> %cmp0, <4 x i32> %cmp1, <4 x i8> *%cmp2_ptr) {
%cmp2_load = load <4 x i8>, <4 x i8> * %cmp2_ptr, align 4
%cmp2 = trunc <4 x i8> %cmp2_load to <4 x i1>
@@ -417,23 +417,23 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_right(<8 x i8> %tr0, <8 x i8> %tr1
define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
; CHECK-LABEL: vuzp_trunc_and_shuffle_undef_left:
; CHECK: @ %bb.0:
-; CHECK-NEXT: mov r12, sp
-; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
-; CHECK-NEXT: add r12, sp, #16
-; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vldr d18, .LCPI22_0
-; CHECK-NEXT: vmov.i8 d19, #0x7
-; CHECK-NEXT: vmovn.i32 d16, q8
-; CHECK-NEXT: vtbl.8 d16, {d16}, d18
-; CHECK-NEXT: vneg.s8 d17, d19
-; CHECK-NEXT: vmov d18, r2, r3
-; CHECK-NEXT: vshl.i8 d16, d16, #7
-; CHECK-NEXT: vshl.s8 d16, d16, d17
-; CHECK-NEXT: vmov d17, r0, r1
-; CHECK-NEXT: vbsl d16, d17, d18
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: mov r12, sp
+; CHECK-NEXT: vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT: add r12, sp, #16
+; CHECK-NEXT: vld1.64 {d18, d19}, [r12]
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vldr d18, .LCPI22_0
+; CHECK-NEXT: vmov.i8 d19, #0x7
+; CHECK-NEXT: vmovn.i32 d16, q8
+; CHECK-NEXT: vtbl.8 d16, {d16}, d18
+; CHECK-NEXT: vneg.s8 d17, d19
+; CHECK-NEXT: vmov d18, r2, r3
+; CHECK-NEXT: vshl.i8 d16, d16, #7
+; CHECK-NEXT: vshl.s8 d16, d16, d17
+; CHECK-NEXT: vmov d17, r0, r1
+; CHECK-NEXT: vbsl d16, d17, d18
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI22_0:
@@ -459,55 +459,55 @@ define <8 x i8> @vuzp_trunc_and_shuffle_undef_left(<8 x i8> %tr0, <8 x i8> %tr1,
define <10 x i8> @vuzp_wide_type(<10 x i8> %tr0, <10 x i8> %tr1,
; CHECK-LABEL: vuzp_wide_type:
; CHECK: @ %bb.0:
-; CHECK-NEXT: .save {r4, lr}
-; CHECK-NEXT: push {r4, lr}
-; CHECK-NEXT: add r12, sp, #32
-; CHECK-NEXT: add lr, sp, #48
-; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
-; CHECK-NEXT: add r12, sp, #24
-; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
-; CHECK-NEXT: add r12, sp, #56
-; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
-; CHECK-NEXT: ldr r12, [sp, #68]
-; CHECK-NEXT: vld1.32 {d18[0]}, [lr:32]
-; CHECK-NEXT: add lr, sp, #40
-; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
-; CHECK-NEXT: ldr r4, [r12]
-; CHECK-NEXT: vmov.32 d23[0], r4
-; CHECK-NEXT: add r4, sp, #64
-; CHECK-NEXT: vld1.32 {d24[0]}, [r4:32]
-; CHECK-NEXT: add r4, sp, #36
-; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
-; CHECK-NEXT: add r4, sp, #28
-; CHECK-NEXT: vcgt.u32 q10, q12, q10
-; CHECK-NEXT: vmov.u8 lr, d23[3]
-; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
-; CHECK-NEXT: add r4, sp, #60
-; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
-; CHECK-NEXT: add r4, sp, #52
-; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
-; CHECK-NEXT: add r4, r12, #4
-; CHECK-NEXT: vcgt.u32 q8, q9, q8
-; CHECK-NEXT: vmovn.i32 d19, q10
-; CHECK-NEXT: vldr d20, .LCPI23_0
-; CHECK-NEXT: vmovn.i32 d18, q8
-; CHECK-NEXT: vmovn.i16 d22, q9
-; CHECK-NEXT: vmov.i8 q9, #0x7
-; CHECK-NEXT: vmov.8 d17[0], lr
-; CHECK-NEXT: vneg.s8 q9, q9
-; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d20
-; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
-; CHECK-NEXT: add r4, sp, #8
-; CHECK-NEXT: vshl.i8 q8, q8, #7
-; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
-; CHECK-NEXT: vshl.s8 q8, q8, q9
-; CHECK-NEXT: vmov d19, r2, r3
-; CHECK-NEXT: vmov d18, r0, r1
-; CHECK-NEXT: vbsl q8, q9, q10
-; CHECK-NEXT: vmov r0, r1, d16
-; CHECK-NEXT: vmov r2, r3, d17
-; CHECK-NEXT: pop {r4, lr}
-; CHECK-NEXT: mov pc, lr
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
+; CHECK-NEXT: add r12, sp, #32
+; CHECK-NEXT: add lr, sp, #48
+; CHECK-NEXT: vld1.32 {d17[0]}, [r12:32]
+; CHECK-NEXT: add r12, sp, #24
+; CHECK-NEXT: vld1.32 {d16[0]}, [r12:32]
+; CHECK-NEXT: add r12, sp, #56
+; CHECK-NEXT: vld1.32 {d19[0]}, [r12:32]
+; CHECK-NEXT: vld1.32 {d18[0]}, [lr:32]
+; CHECK-NEXT: add lr, sp, #40
+; CHECK-NEXT: vld1.32 {d20[0]}, [lr:32]
+; CHECK-NEXT: ldr r12, [sp, #68]
+; CHECK-NEXT: ldr r4, [r12]
+; CHECK-NEXT: vmov.32 d23[0], r4
+; CHECK-NEXT: add r4, sp, #64
+; CHECK-NEXT: vld1.32 {d24[0]}, [r4:32]
+; CHECK-NEXT: add r4, sp, #36
+; CHECK-NEXT: vcgt.u32 q10, q12, q10
+; CHECK-NEXT: vld1.32 {d17[1]}, [r4:32]
+; CHECK-NEXT: add r4, sp, #28
+; CHECK-NEXT: vld1.32 {d16[1]}, [r4:32]
+; CHECK-NEXT: add r4, sp, #60
+; CHECK-NEXT: vld1.32 {d19[1]}, [r4:32]
+; CHECK-NEXT: add r4, sp, #52
+; CHECK-NEXT: vld1.32 {d18[1]}, [r4:32]
+; CHECK-NEXT: add r4, r12, #4
+; CHECK-NEXT: vcgt.u32 q8, q9, q8
+; CHECK-NEXT: vmovn.i32 d19, q10
+; CHECK-NEXT: vmov.u8 lr, d23[3]
+; CHECK-NEXT: vldr d20, .LCPI23_0
+; CHECK-NEXT: vmovn.i32 d18, q8
+; CHECK-NEXT: vmovn.i16 d22, q9
+; CHECK-NEXT: vmov.i8 q9, #0x7
+; CHECK-NEXT: vneg.s8 q9, q9
+; CHECK-NEXT: vmov.8 d17[0], lr
+; CHECK-NEXT: vtbl.8 d16, {d22, d23}, d20
+; CHECK-NEXT: vld1.8 {d17[1]}, [r4]
+; CHECK-NEXT: add r4, sp, #8
+; CHECK-NEXT: vshl.i8 q8, q8, #7
+; CHECK-NEXT: vld1.64 {d20, d21}, [r4]
+; CHECK-NEXT: vshl.s8 q8, q8, q9
+; CHECK-NEXT: vmov d19, r2, r3
+; CHECK-NEXT: vmov d18, r0, r1
+; CHECK-NEXT: vbsl q8, q9, q10
+; CHECK-NEXT: vmov r0, r1, d16
+; CHECK-NEXT: vmov r2, r3, d17
+; CHECK-NEXT: pop {r4, lr}
+; CHECK-NEXT: mov pc, lr
; CHECK-NEXT: .p2align 3
; CHECK-NEXT: @ %bb.1:
; CHECK-NEXT: .LCPI23_0:
diff --git a/llvm/test/CodeGen/SystemZ/misched-readadvances.mir b/llvm/test/CodeGen/SystemZ/misched-readadvances.mir
new file mode 100644
index 00000000000..df8ca2f5f95
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/misched-readadvances.mir
@@ -0,0 +1,31 @@
+# Check that the extra operand for the full register added by RegAlloc does
+# not have a latency that interferes with the latency adjustment
+# (ReadAdvance) for the MSY register operand.
+
+# RUN: llc %s -mtriple=s390x-linux-gnu -mcpu=z13 -start-before=machine-scheduler \
+# RUN: -debug-only=machine-scheduler -o - 2>&1 | FileCheck %s
+# REQUIRES: asserts
+
+# CHECK: ScheduleDAGMI::schedule starting
+# CHECK: SU(4): renamable $r2l = MSR renamable $r2l(tied-def 0), renamable $r2l
+# CHECK: Latency : 6
+# CHECK: SU(5): renamable $r2l = MSY renamable $r2l(tied-def 0), renamable $r1d, -4, $noreg, implicit $r2d
+# CHECK: Predecessors:
+# CHECK: SU(4): Data Latency=2 Reg=$r2l
+# CHECK: SU(4): Data Latency=0 Reg=$r2d
+
+---
+name: Perl_do_sv_dump
+alignment: 4
+tracksRegLiveness: true
+body: |
+ bb.0 :
+ %1:addr64bit = IMPLICIT_DEF
+ %2:addr64bit = IMPLICIT_DEF
+ %3:vr64bit = IMPLICIT_DEF
+
+ bb.1 :
+ %2:addr64bit = ALGFI %2, 4294967291, implicit-def dead $cc
+ %2.subreg_l32:addr64bit = MSR %2.subreg_l32, %2.subreg_l32
+ %2.subreg_l32:addr64bit = MSY %2.subreg_l32, %1, -4, $noreg
+...
diff --git a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
index d639b7acbbc..5300bed0de8 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-128-legalisation-lowering.ll
@@ -88,15 +88,15 @@ define { i128, i8 } @muloti_test(i128 %l, i128 %r) unnamed_addr #0 {
; THUMBV7-NEXT: orrs r3, r2
; THUMBV7-NEXT: ldr r2, [sp, #80]
; THUMBV7-NEXT: orr.w r1, r1, r4
+; THUMBV7-NEXT: orr.w r1, r1, r10
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r3, #1
-; THUMBV7-NEXT: orr.w r1, r1, r10
; THUMBV7-NEXT: orrs.w r7, r2, r11
; THUMBV7-NEXT: orr.w r1, r1, r9
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r7, #1
-; THUMBV7-NEXT: orr.w r0, r0, r12
; THUMBV7-NEXT: ands r3, r7
+; THUMBV7-NEXT: orr.w r0, r0, r12
; THUMBV7-NEXT: orrs r1, r3
; THUMBV7-NEXT: orrs r0, r1
; THUMBV7-NEXT: orr.w r0, r0, r8
diff --git a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
index e47e88a6832..161adf7e7d7 100644
--- a/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
+++ b/llvm/test/CodeGen/Thumb2/umulo-64-legalisation-lowering.ll
@@ -20,11 +20,11 @@ define { i64, i8 } @mulodi_test(i64 %l, i64 %r) unnamed_addr #0 {
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r1, #1
; THUMBV7-NEXT: cmp r5, #0
+; THUMBV7-NEXT: and.w r1, r1, r3
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne r5, #1
-; THUMBV7-NEXT: ands r1, r3
+; THUMBV7-NEXT: orrs r1, r5
; THUMBV7-NEXT: cmp.w lr, #0
-; THUMBV7-NEXT: orr.w r1, r1, r5
; THUMBV7-NEXT: it ne
; THUMBV7-NEXT: movne.w lr, #1
; THUMBV7-NEXT: orr.w r1, r1, lr
diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
index 0b5ce8a4ffb..7a266235109 100644
--- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -97,8 +97,8 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: pushq %r14
; ATOM-NEXT: pushq %rbx
; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx
-; ATOM-NEXT: movl 4(%rdx), %eax
; ATOM-NEXT: movl (%rdx), %r15d
+; ATOM-NEXT: movl 4(%rdx), %eax
; ATOM-NEXT: leaq 20(%rdx), %r14
; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9
; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8
@@ -116,8 +116,8 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: movzbl %bl, %eax
; ATOM-NEXT: movl (%r10,%rax,4), %eax
; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
-; ATOM-NEXT: xorl -4(%r14), %r15d
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
+; ATOM-NEXT: xorl -4(%r14), %r15d
; ATOM-NEXT: xorl (%r14), %eax
; ATOM-NEXT: addq $16, %r14
; ATOM-NEXT: LBB0_1: ## %bb
@@ -130,14 +130,14 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: movzbl %dil, %edi
; ATOM-NEXT: movl (%r8,%rdi,4), %ebx
; ATOM-NEXT: movzbl %r15b, %edi
-; ATOM-NEXT: movl (%r10,%rdi,4), %edi
; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx
+; ATOM-NEXT: movl (%r10,%rdi,4), %edi
; ATOM-NEXT: xorl -12(%r14), %ebx
; ATOM-NEXT: xorl (%r9,%rax,4), %edi
; ATOM-NEXT: movl %ebx, %eax
+; ATOM-NEXT: xorl -8(%r14), %edi
; ATOM-NEXT: shrl $24, %eax
; ATOM-NEXT: movl (%r9,%rax,4), %r15d
-; ATOM-NEXT: xorl -8(%r14), %edi
; ATOM-NEXT: testq %r11, %r11
; ATOM-NEXT: movl %edi, %eax
; ATOM-NEXT: jne LBB0_2
diff --git a/llvm/test/CodeGen/X86/memset.ll b/llvm/test/CodeGen/X86/memset.ll
index 02fd8806254..02dfb34e100 100644
--- a/llvm/test/CodeGen/X86/memset.ll
+++ b/llvm/test/CodeGen/X86/memset.ll
@@ -41,8 +41,8 @@ define void @t() nounwind {
; YMM-NEXT: movl %esp, %ebp
; YMM-NEXT: andl $-32, %esp
; YMM-NEXT: subl $96, %esp
-; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0
; YMM-NEXT: leal {{[0-9]+}}(%esp), %eax
+; YMM-NEXT: vxorps %xmm0, %xmm0, %xmm0
; YMM-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp)
; YMM-NEXT: movl %eax, (%esp)
; YMM-NEXT: vzeroupper
diff --git a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
index a5453b9e1f8..2a129bc643b 100644
--- a/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
+++ b/llvm/test/CodeGen/X86/phys-reg-local-regalloc.ll
@@ -20,9 +20,9 @@ entry:
; On Intel Atom the scheduler moves a movl instruction
; used for the printf call to follow movl 24(%esp), %eax
; ATOM: movl 24(%esp), %eax
-; ATOM: movl
-; ATOM: movl %eax, 36(%esp)
; ATOM-NOT: movl
+; ATOM: movl %eax, 36(%esp)
+; ATOM: movl
; ATOM: movl 28(%esp), %ebx
; ATOM-NOT: movl
; ATOM: movl %ebx, 40(%esp)
diff --git a/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll b/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll
index 315a497bc3c..0e66329f7b4 100644
--- a/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll
+++ b/llvm/test/CodeGen/X86/schedule-x86-64-shld.ll
@@ -135,16 +135,16 @@ define i64 @lshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
;
; BDVER12-LABEL: lshift_cl_optsize:
; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BDVER12-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: lshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shldq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
@@ -211,16 +211,16 @@ define i64 @rshift_cl_optsize(i64 %a, i64 %b, i64 %c) nounwind readnone optsize
;
; BDVER12-LABEL: rshift_cl_optsize:
; BDVER12: # %bb.0: # %entry
-; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; BDVER12-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BDVER12-NEXT: # kill: def $cl killed $cl killed $rcx
; BDVER12-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BDVER12-NEXT: retq # sched: [5:1.00]
;
; BTVER2-LABEL: rshift_cl_optsize:
; BTVER2: # %bb.0: # %entry
-; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: movq %rdi, %rax # sched: [1:0.50]
+; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50]
; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx
; BTVER2-NEXT: shrdq %cl, %rsi, %rax # sched: [4:4.00]
; BTVER2-NEXT: retq # sched: [4:1.00]
diff --git a/llvm/test/CodeGen/X86/schedule-x86_32.ll b/llvm/test/CodeGen/X86/schedule-x86_32.ll
index 757a022839b..6b8ad906fec 100644
--- a/llvm/test/CodeGen/X86/schedule-x86_32.ll
+++ b/llvm/test/CodeGen/X86/schedule-x86_32.ll
@@ -495,8 +495,8 @@ define void @test_arpl(i16 %a0, i16 *%a1) optsize {
;
; ZNVER1-LABEL: test_arpl:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
+; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: arpl %ax, (%ecx) # sched: [100:0.25]
; ZNVER1-NEXT: #NO_APP
@@ -681,10 +681,10 @@ define void @test_bound(i16 %a0, i16 *%a1, i32 %a2, i32 *%a3) optsize {
; ZNVER1-NEXT: pushl %esi # sched: [1:0.50]
; ZNVER1-NEXT: .cfi_def_cfa_offset 8
; ZNVER1-NEXT: .cfi_offset %esi, -8
+; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %edx # sched: [8:0.50]
; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %esi # sched: [8:0.50]
-; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: bound %ax, (%esi) # sched: [100:0.25]
; ZNVER1-NEXT: bound %ecx, (%edx) # sched: [100:0.25]
@@ -985,8 +985,8 @@ define void @test_dec16(i16 %a0, i16* %a1) optsize {
;
; ZNVER1-LABEL: test_dec16:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
+; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: decw %ax # sched: [1:0.25]
; ZNVER1-NEXT: decw (%ecx) # sched: [5:0.50]
@@ -1212,8 +1212,8 @@ define void @test_inc16(i16 %a0, i16* %a1) optsize {
;
; ZNVER1-LABEL: test_inc16:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
+; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: incw %ax # sched: [1:0.25]
; ZNVER1-NEXT: incw (%ecx) # sched: [5:0.50]
@@ -1949,8 +1949,8 @@ define i16 @test_pop_push_16(i16 %a0, i16 *%a1) optsize {
;
; ZNVER1-LABEL: test_pop_push_16:
; ZNVER1: # %bb.0:
-; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: movzwl {{[0-9]+}}(%esp), %eax # sched: [8:0.50]
+; ZNVER1-NEXT: movl {{[0-9]+}}(%esp), %ecx # sched: [8:0.50]
; ZNVER1-NEXT: #APP
; ZNVER1-NEXT: popw %ax # sched: [8:0.50]
; ZNVER1-NEXT: popw (%ecx) # sched: [5:0.50]
OpenPOWER on IntegriCloud