summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll91
1 files changed, 86 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
index a9223e1e2a9..5f914323861 100644
--- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
+++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll
@@ -1,5 +1,6 @@
; REQUIRES: asserts
-; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT
+; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST
; Check latencies of vmul/vfma accumulate chains.
define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
@@ -14,7 +15,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
; > VMULS read-advanced latency to VMLAS = 0
; CHECK-SAME: Latency=0
-; CHECK: VMLAS
+; CHECK-DEFAULT: VMLAS
+; CHECK-FAST: VFMAS
; > VMLAS common latency = 9
; CHECK: Latency : 9
; CHECK: Successors:
@@ -22,7 +24,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float
; > VMLAS read-advanced latency to the next VMLAS = 4
; CHECK-SAME: Latency=4
-; CHECK: VMLAS
+; CHECK-DEFAULT: VMLAS
+; CHECK-FAST: VFMAS
; CHECK: Latency : 9
; CHECK: Successors:
; CHECK: data
@@ -51,7 +54,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
; VMULfd read-advanced latency to VMLAfd = 0
; CHECK-SAME: Latency=0
-; CHECK: VMLAfd
+; CHECK-DEFAULT: VMLAfd
+; CHECK-FAST: VFMAfd
; > VMLAfd common latency = 9
; CHECK: Latency : 9
; CHECK: Successors:
@@ -59,7 +63,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
; > VMLAfd read-advanced latency to the next VMLAfd = 4
; CHECK-SAME: Latency=4
-; CHECK: VMLAfd
+; CHECK-DEFAULT: VMLAfd
+; CHECK-FAST: VFMAfd
; CHECK: Latency : 9
; CHECK: Successors:
; CHECK: data
@@ -75,3 +80,79 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2
ret <2 x float> %add2
}
+define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) {
+; CHECK: ********** MI Scheduling **********
+; CHECK: Test3:BB#0
+
+; CHECK: VMULS
+; > VMULS common latency = 5
+; CHECK: Latency : 5
+; CHECK: Successors:
+; CHECK: data
+; > VMULS read-advanced latency to VMLSS = 0
+; CHECK-SAME: Latency=0
+
+; CHECK-DEFAULT: VMLSS
+; CHECK-FAST: VFMSS
+; > VMLSS common latency = 9
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSS read-advanced latency to the next VMLSS = 4
+; CHECK-SAME: Latency=4
+
+; CHECK-DEFAULT: VMLSS
+; CHECK-FAST: VFMSS
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSS not-optimized latency to VMOVRS = 9
+; CHECK-SAME: Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
+ %mul1 = fmul float %f1, %f2
+ %mul2 = fmul float %f3, %f4
+ %mul3 = fmul float %f5, %f6
+ %sub1 = fsub float %mul1, %mul2
+ %sub2 = fsub float %sub1, %mul3
+ ret float %sub2
+}
+
+; ASIMD form
+define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) {
+; CHECK: ********** MI Scheduling **********
+; CHECK: Test4:BB#0
+
+; CHECK: VMULfd
+; > VMULfd common latency = 5
+; CHECK: Latency : 5
+; CHECK: Successors:
+; CHECK: data
+; VMULfd read-advanced latency to VMLSfd = 0
+; CHECK-SAME: Latency=0
+
+; CHECK-DEFAULT: VMLSfd
+; CHECK-FAST: VFMSfd
+; > VMLSfd common latency = 9
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSfd read-advanced latency to the next VMLSfd = 4
+; CHECK-SAME: Latency=4
+
+; CHECK-DEFAULT: VMLSfd
+; CHECK-FAST: VFMSfd
+; CHECK: Latency : 9
+; CHECK: Successors:
+; CHECK: data
+; > VMLSfd not-optimized latency to VMOVRRD = 9
+; CHECK-SAME: Latency=9
+
+; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS
+ %mul1 = fmul <2 x float> %f1, %f2
+ %mul2 = fmul <2 x float> %f3, %f4
+ %mul3 = fmul <2 x float> %f5, %f6
+ %sub1 = fsub <2 x float> %mul1, %mul2
+ %sub2 = fsub <2 x float> %sub1, %mul3
+ ret <2 x float> %sub2
+}
OpenPOWER on IntegriCloud