diff options
Diffstat (limited to 'llvm/test/CodeGen/AArch64/misched-basic-A53.ll')
| -rw-r--r-- | llvm/test/CodeGen/AArch64/misched-basic-A53.ll | 31 |
1 files changed, 30 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AArch64/misched-basic-A53.ll b/llvm/test/CodeGen/AArch64/misched-basic-A53.ll index 0d5534eca54..1555c4868e1 100644 --- a/llvm/test/CodeGen/AArch64/misched-basic-A53.ll +++ b/llvm/test/CodeGen/AArch64/misched-basic-A53.ll @@ -4,13 +4,15 @@ ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled ; much higher than the ADD instructions in order to hide latency. When not ; specifying a subtarget, the MADD will remain near the end of the block. +; +; CHECK: ********** MI Scheduling ********** ; CHECK: main ; CHECK: *** Final schedule for BB#2 *** ; CHECK: SU(13) ; CHECK: MADDwwww ; CHECK: SU(4) ; CHECK: ADDwwi_lsl0_s -; CHECK: ********** MI Scheduling ********** +; CHECK: ********** INTERVALS ********** @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4 @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4 @@ -76,6 +78,33 @@ for.end: ; preds = %for.cond ret i32 %add6 } + +; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to +; hide latency. Whereas normally there would only be a single FADDvvv_4s +; after it, this test checks to make sure there are more than one. +; +; CHECK: ********** MI Scheduling ********** +; CHECK: neon4xfloat:BB#0 +; CHECK: *** Final schedule for BB#0 *** +; CHECK: FDIVvvv_4S +; CHECK: FADDvvv_4S +; CHECK: FADDvvv_4S +; CHECK: ********** INTERVALS ********** +define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) { + %tmp1 = fadd <4 x float> %A, %B; + %tmp2 = fadd <4 x float> %A, %tmp1; + %tmp3 = fadd <4 x float> %A, %tmp2; + %tmp4 = fadd <4 x float> %A, %tmp3; + %tmp5 = fadd <4 x float> %A, %tmp4; + %tmp6 = fadd <4 x float> %A, %tmp5; + %tmp7 = fadd <4 x float> %A, %tmp6; + %tmp8 = fadd <4 x float> %A, %tmp7; + %tmp9 = fdiv <4 x float> %A, %B; + %tmp10 = fadd <4 x float> %tmp8, %tmp9; + + ret <4 x float> %tmp10 +} + ; Function Attrs: nounwind declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1 |

