1 files changed, 30 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/AArch64/misched-basic-A53.ll b/llvm/test/CodeGen/AArch64/misched-basic-A53.ll
index 0d5534eca54..1555c4868e1 100644
--- a/llvm/test/CodeGen/AArch64/misched-basic-A53.ll
+++ b/llvm/test/CodeGen/AArch64/misched-basic-A53.ll
@@ -4,13 +4,15 @@
 ; The Cortex-A53 machine model will cause the MADD instruction to be scheduled
 ; much higher than the ADD instructions in order to hide latency. When not
 ; specifying a subtarget, the MADD will remain near the end of the block.
+;
+; CHECK: ********** MI Scheduling **********
 ; CHECK: main
 ; CHECK: *** Final schedule for BB#2 ***
 ; CHECK: SU(13)
 ; CHECK: MADDwwww
 ; CHECK: SU(4)
 ; CHECK: ADDwwi_lsl0_s
-; CHECK: ********** MI Scheduling **********
+; CHECK: ********** INTERVALS **********
 @main.x = private unnamed_addr constant [8 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 4
 @main.y = private unnamed_addr constant [8 x i32] [i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2], align 4
 
@@ -76,6 +78,33 @@ for.end:                                          ; preds = %for.cond
   ret i32 %add6
 }
 
+
+; The Cortex-A53 machine model will cause the FDIVvvv_42 to be raised to
+; hide latency. Whereas normally there would only be a single FADDvvv_4s
+; after it, this test checks to make sure there are more than one.
+;
+; CHECK: ********** MI Scheduling **********
+; CHECK: neon4xfloat:BB#0
+; CHECK: *** Final schedule for BB#0 ***
+; CHECK: FDIVvvv_4S
+; CHECK: FADDvvv_4S
+; CHECK: FADDvvv_4S
+; CHECK: ********** INTERVALS **********
+define <4 x float> @neon4xfloat(<4 x float> %A, <4 x float> %B) {
+        %tmp1 = fadd <4 x float> %A, %B;
+        %tmp2 = fadd <4 x float> %A, %tmp1;
+        %tmp3 = fadd <4 x float> %A, %tmp2;
+        %tmp4 = fadd <4 x float> %A, %tmp3;
+        %tmp5 = fadd <4 x float> %A, %tmp4;
+        %tmp6 = fadd <4 x float> %A, %tmp5;
+        %tmp7 = fadd <4 x float> %A, %tmp6;
+        %tmp8 = fadd <4 x float> %A, %tmp7;
+        %tmp9 = fdiv <4 x float> %A, %B;
+        %tmp10 = fadd <4 x float> %tmp8, %tmp9;
+
+        ret <4 x float> %tmp10
+}
+
 ; Function Attrs: nounwind
 declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1