diff options
| author | Oliver Stannard <oliver.stannard@arm.com> | 2017-06-09 09:19:09 +0000 |
|---|---|---|
| committer | Oliver Stannard <oliver.stannard@arm.com> | 2017-06-09 09:19:09 +0000 |
| commit | ad0973557c4d782e0076cc2dd8dbd15ca4d08f08 (patch) | |
| tree | 697acf86a1e7c1444487a8dbdcecbffa9e3d15df /llvm/test | |
| parent | 1b47ff7ee89df87700387615a2435b3e69ab98a0 (diff) | |
| download | bcm5719-llvm-ad0973557c4d782e0076cc2dd8dbd15ca4d08f08.tar.gz bcm5719-llvm-ad0973557c4d782e0076cc2dd8dbd15ca4d08f08.zip | |
[ARM] Add scheduling info for VFMS
The scalar VFMS instructions did not have scheduling information attached (but
VFMA did), which was causing assertion failures with the Cortex-A57 scheduling
model and -fp-contract=fast.
Differential Revision: https://reviews.llvm.org/D34040
llvm-svn: 305064
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll | 91 |
1 files changed, 86 insertions, 5 deletions
diff --git a/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll b/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll index a9223e1e2a9..5f914323861 100644 --- a/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll +++ b/llvm/test/CodeGen/ARM/cortex-a57-misched-vfma.ll @@ -1,5 +1,6 @@ ; REQUIRES: asserts -; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEFAULT +; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-a57 -enable-misched -verify-misched -debug-only=machine-scheduler -o - 2>&1 > /dev/null -fp-contract=fast | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FAST ; Check latencies of vmul/vfma accumulate chains. define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { @@ -14,7 +15,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float ; > VMULS read-advanced latency to VMLAS = 0 ; CHECK-SAME: Latency=0 -; CHECK: VMLAS +; CHECK-DEFAULT: VMLAS +; CHECK-FAST: VFMAS ; > VMLAS common latency = 9 ; CHECK: Latency : 9 ; CHECK: Successors: @@ -22,7 +24,8 @@ define float @Test1(float %f1, float %f2, float %f3, float %f4, float %f5, float ; > VMLAS read-advanced latency to the next VMLAS = 4 ; CHECK-SAME: Latency=4 -; CHECK: VMLAS +; CHECK-DEFAULT: VMLAS +; CHECK-FAST: VFMAS ; CHECK: Latency : 9 ; CHECK: Successors: ; CHECK: data @@ -51,7 +54,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 ; VMULfd read-advanced latency to VMLAfd = 0 ; CHECK-SAME: Latency=0 -; CHECK: VMLAfd +; CHECK-DEFAULT: VMLAfd +; CHECK-FAST: VFMAfd ; > VMLAfd common latency = 9 ; CHECK: Latency : 9 ; CHECK: Successors: @@ -59,7 +63,8 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 ; > VMLAfd read-advanced latency to the next VMLAfd = 4 ; CHECK-SAME: Latency=4 -; CHECK: VMLAfd +; CHECK-DEFAULT: VMLAfd +; CHECK-FAST: VFMAfd ; CHECK: Latency : 9 ; CHECK: Successors: ; CHECK: data @@ -75,3 +80,79 @@ define <2 x float> @Test2(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 ret <2 x float> %add2 } +define float @Test3(float %f1, float %f2, float %f3, float %f4, float %f5, float %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test3:BB#0 + +; CHECK: VMULS +; > VMULS common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; > VMULS read-advanced latency to VMLSS = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLSS +; CHECK-FAST: VFMSS +; > VMLSS common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSS read-advanced latency to the next VMLSS = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLSS +; CHECK-FAST: VFMSS +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSS not-optimized latency to VMOVRS = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS + %mul1 = fmul float %f1, %f2 + %mul2 = fmul float %f3, %f4 + %mul3 = fmul float %f5, %f6 + %sub1 = fsub float %mul1, %mul2 + %sub2 = fsub float %sub1, %mul3 + ret float %sub2 +} + +; ASIMD form +define <2 x float> @Test4(<2 x float> %f1, <2 x float> %f2, <2 x float> %f3, <2 x float> %f4, <2 x float> %f5, <2 x float> %f6) { +; CHECK: ********** MI Scheduling ********** +; CHECK: Test4:BB#0 + +; CHECK: VMULfd +; > VMULfd common latency = 5 +; CHECK: Latency : 5 +; CHECK: Successors: +; CHECK: data +; VMULfd read-advanced latency to VMLSfd = 0 +; CHECK-SAME: Latency=0 + +; CHECK-DEFAULT: VMLSfd +; CHECK-FAST: VFMSfd +; > VMLSfd common latency = 9 +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSfd read-advanced latency to the next VMLSfd = 4 +; CHECK-SAME: Latency=4 + +; CHECK-DEFAULT: VMLSfd +; CHECK-FAST: VFMSfd +; CHECK: Latency : 9 +; CHECK: Successors: +; CHECK: data +; > VMLSfd not-optimized latency to VMOVRRD = 9 +; CHECK-SAME: Latency=9 + +; f1 * f2 + f3 * f4 + f5 * f6 ==> VMULS, VMLSS, VMLSS + %mul1 = fmul <2 x float> %f1, %f2 + %mul2 = fmul <2 x float> %f3, %f4 + %mul3 = fmul <2 x float> %f5, %f6 + %sub1 = fsub <2 x float> %mul1, %mul2 + %sub2 = fsub <2 x float> %sub1, %mul3 + ret <2 x float> %sub2 +} |

