diff options
| author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2019-07-16 15:55:45 +0000 |
|---|---|---|
| committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2019-07-16 15:55:45 +0000 |
| commit | 450c62e33ea5310481b06d3fd59df911f5451ff2 (patch) | |
| tree | 19ff49028b591c07b394160a36a5abf0562e6bed /llvm/test/CodeGen/SystemZ | |
| parent | 63a0c2bce8e5731f90fc99b75d59f2e85283cb3b (diff) | |
| download | bcm5719-llvm-450c62e33ea5310481b06d3fd59df911f5451ff2.tar.gz bcm5719-llvm-450c62e33ea5310481b06d3fd59df911f5451ff2.zip | |
[Strict FP] Allow more relaxed scheduling
Reimplement scheduling constraints for strict FP instructions in
ScheduleDAGInstrs::buildSchedGraph to allow for more relaxed
scheduling. Specifially, allow one strict FP instruction to
be scheduled across another, as long as it is not moved across
any global barrier.
Differential Revision: https://reviews.llvm.org/D64412
Reviewed By: cameron.mcinally
llvm-svn: 366222
Diffstat (limited to 'llvm/test/CodeGen/SystemZ')
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/fp-strict-alias.ll | 222 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll | 30 |
2 files changed, 165 insertions, 87 deletions
diff --git a/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll b/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll index d8ee018b9e5..fe27b61c20b 100644 --- a/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll +++ b/llvm/test/CodeGen/SystemZ/fp-strict-alias.ll @@ -2,138 +2,216 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) -declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata) declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata) declare float @llvm.sqrt.f32(float) declare void @llvm.s390.sfpc(i32) -; For non-strict operations, we expect the post-RA scheduler to -; separate the two square root instructions on z13. -define void @f1(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +; The basic assumption of all following tests is that on z13, we never +; want to see two square root instructions directly in a row, so the +; post-RA scheduler will always schedule something else in between +; whenever possible. + +; We can move any FP operation across a (normal) store. + +define void @f1(float %f1, float %f2, float *%ptr1, float *%ptr2) { ; CHECK-LABEL: f1: ; CHECK: sqebr -; CHECK: {{aebr|sebr}} +; CHECK: ste ; CHECK: sqebr +; CHECK: ste ; CHECK: br %r14 - %add = fadd float %f1, %f2 - %sub = fsub float %f3, %f4 - %sqrt1 = call float @llvm.sqrt.f32(float %f2) - %sqrt2 = call float @llvm.sqrt.f32(float %f4) - - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + %sqrt1 = call float @llvm.sqrt.f32(float %f1) + %sqrt2 = call float @llvm.sqrt.f32(float %f2) - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 ret void } -; But for strict operations, this must not happen. -define void @f2(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { +define void @f2(float %f1, float %f2, float *%ptr1, float *%ptr2) { ; CHECK-LABEL: f2: -; CHECK: {{aebr|sebr}} -; CHECK: {{aebr|sebr}} ; CHECK: sqebr +; CHECK: ste ; CHECK: sqebr +; CHECK: ste ; CHECK: br %r14 - %add = call float @llvm.experimental.constrained.fadd.f32( - float %f1, float %f2, + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, metadata !"round.dynamic", - metadata !"fpexcept.strict") - %sub = call float @llvm.experimental.constrained.fsub.f32( - float %f3, float %f4, + metadata !"fpexcept.ignore") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, metadata !"round.dynamic", - metadata !"fpexcept.strict") + metadata !"fpexcept.ignore") + + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 + + ret void +} + +define void @f3(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f3: +; CHECK: sqebr +; CHECK: ste +; CHECK: sqebr +; CHECK: ste +; CHECK: br %r14 + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( - float %f2, + float %f1, metadata !"round.dynamic", metadata !"fpexcept.strict") %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( - float %f4, + float %f2, metadata !"round.dynamic", metadata !"fpexcept.strict") - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + ret void +} + + +; We can move a non-strict FP operation or a fpexcept.ignore +; operation even across a volatile store, but not a fpexcept.strict +; operation. + +define void @f4(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f4: +; CHECK: sqebr +; CHECK: ste +; CHECK: sqebr +; CHECK: ste +; CHECK: br %r14 + + %sqrt1 = call float @llvm.sqrt.f32(float %f1) + %sqrt2 = call float @llvm.sqrt.f32(float %f2) + + store volatile float %sqrt1, float *%ptr1 + store volatile float %sqrt2, float *%ptr2 ret void } -; On the other hand, strict operations that use the fpexcept.ignore -; exception behaviour should be scheduled freely. -define void @f3(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { -; CHECK-LABEL: f3: +define void @f5(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f5: ; CHECK: sqebr -; CHECK: {{aebr|sebr}} +; CHECK: ste ; CHECK: sqebr +; CHECK: ste ; CHECK: br %r14 - %add = call float @llvm.experimental.constrained.fadd.f32( - float %f1, float %f2, + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, metadata !"round.dynamic", metadata !"fpexcept.ignore") - %sub = call float @llvm.experimental.constrained.fsub.f32( - float %f3, float %f4, + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, metadata !"round.dynamic", metadata !"fpexcept.ignore") + + store volatile float %sqrt1, float *%ptr1 + store volatile float %sqrt2, float *%ptr2 + + ret void +} + +define void @f6(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f6: +; CHECK: sqebr +; CHECK: sqebr +; CHECK: ste +; CHECK: ste +; CHECK: br %r14 + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( float %f2, metadata !"round.dynamic", + metadata !"fpexcept.strict") + + store volatile float %sqrt1, float *%ptr1 + store volatile float %sqrt2, float *%ptr2 + + ret void +} + + +; No variant of FP operations can be scheduled across a SPFC. + +define void @f7(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f7: +; CHECK: sqebr +; CHECK: sqebr +; CHECK: ste +; CHECK: ste +; CHECK: br %r14 + + %sqrt1 = call float @llvm.sqrt.f32(float %f1) + %sqrt2 = call float @llvm.sqrt.f32(float %f2) + + call void @llvm.s390.sfpc(i32 0) + + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 + + ret void +} + +define void @f8(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f8: +; CHECK: sqebr +; CHECK: sqebr +; CHECK: ste +; CHECK: ste +; CHECK: br %r14 + + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", metadata !"fpexcept.ignore") %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( - float %f4, + float %f2, metadata !"round.dynamic", metadata !"fpexcept.ignore") - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + call void @llvm.s390.sfpc(i32 0) - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 ret void } -; However, even non-strict operations must not be scheduled across an SFPC. -define void @f4(float %f1, float %f2, float %f3, float %f4, float *%ptr0) { -; CHECK-LABEL: f4: -; CHECK: {{aebr|sebr}} -; CHECK: {{aebr|sebr}} -; CHECK: sfpc +define void @f9(float %f1, float %f2, float *%ptr1, float *%ptr2) { +; CHECK-LABEL: f9: ; CHECK: sqebr ; CHECK: sqebr +; CHECK: ste +; CHECK: ste ; CHECK: br %r14 - %add = fadd float %f1, %f2 - %sub = fsub float %f3, %f4 - call void @llvm.s390.sfpc(i32 0) - %sqrt1 = call float @llvm.sqrt.f32(float %f2) - %sqrt2 = call float @llvm.sqrt.f32(float %f4) + %sqrt1 = call float @llvm.experimental.constrained.sqrt.f32( + float %f1, + metadata !"round.dynamic", + metadata !"fpexcept.strict") + %sqrt2 = call float @llvm.experimental.constrained.sqrt.f32( + float %f2, + metadata !"round.dynamic", + metadata !"fpexcept.strict") - %ptr1 = getelementptr float, float *%ptr0, i64 1 - %ptr2 = getelementptr float, float *%ptr0, i64 2 - %ptr3 = getelementptr float, float *%ptr0, i64 3 + call void @llvm.s390.sfpc(i32 0) - store float %add, float *%ptr0 - store float %sub, float *%ptr1 - store float %sqrt1, float *%ptr2 - store float %sqrt2, float *%ptr3 + store float %sqrt1, float *%ptr1 + store float %sqrt2, float *%ptr2 ret void } diff --git a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll index ec1e6746a61..8ab4c6db255 100644 --- a/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/SystemZ/vector-constrained-fp-intrinsics.ll @@ -108,8 +108,8 @@ define void @constrained_vector_fdiv_v3f64(<3 x double>* %a) { ; S390X-NEXT: ldeb %f3, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI3_2 ; S390X-NEXT: ldeb %f4, 0(%r1) -; S390X-NEXT: ddb %f2, 0(%r2) ; S390X-NEXT: ddbr %f3, %f1 +; S390X-NEXT: ddb %f2, 0(%r2) ; S390X-NEXT: ddbr %f4, %f0 ; S390X-NEXT: std %f4, 16(%r2) ; S390X-NEXT: std %f3, 8(%r2) @@ -659,16 +659,16 @@ entry: define void @constrained_vector_fmul_v3f64(<3 x double>* %a) { ; S390X-LABEL: constrained_vector_fmul_v3f64: ; S390X: # %bb.0: # %entry +; S390X-NEXT: ld %f0, 8(%r2) ; S390X-NEXT: larl %r1, .LCPI13_0 -; S390X-NEXT: ld %f0, 0(%r1) -; S390X-NEXT: ld %f1, 8(%r2) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ld %f2, 16(%r2) -; S390X-NEXT: ldr %f3, %f0 +; S390X-NEXT: mdbr %f0, %f1 +; S390X-NEXT: ldr %f3, %f1 ; S390X-NEXT: mdb %f3, 0(%r2) -; S390X-NEXT: mdbr %f1, %f0 -; S390X-NEXT: mdbr %f2, %f0 +; S390X-NEXT: mdbr %f2, %f1 ; S390X-NEXT: std %f2, 16(%r2) -; S390X-NEXT: std %f1, 8(%r2) +; S390X-NEXT: std %f0, 8(%r2) ; S390X-NEXT: std %f3, 0(%r2) ; S390X-NEXT: br %r14 ; @@ -832,16 +832,16 @@ entry: define void @constrained_vector_fadd_v3f64(<3 x double>* %a) { ; S390X-LABEL: constrained_vector_fadd_v3f64: ; S390X: # %bb.0: # %entry +; S390X-NEXT: ld %f0, 8(%r2) ; S390X-NEXT: larl %r1, .LCPI18_0 -; S390X-NEXT: ld %f0, 0(%r1) -; S390X-NEXT: ld %f1, 8(%r2) +; S390X-NEXT: ld %f1, 0(%r1) ; S390X-NEXT: ld %f2, 16(%r2) -; S390X-NEXT: ldr %f3, %f0 +; S390X-NEXT: adbr %f0, %f1 +; S390X-NEXT: ldr %f3, %f1 ; S390X-NEXT: adb %f3, 0(%r2) -; S390X-NEXT: adbr %f1, %f0 -; S390X-NEXT: adbr %f2, %f0 +; S390X-NEXT: adbr %f2, %f1 ; S390X-NEXT: std %f2, 16(%r2) -; S390X-NEXT: std %f1, 8(%r2) +; S390X-NEXT: std %f0, 8(%r2) ; S390X-NEXT: std %f3, 0(%r2) ; S390X-NEXT: br %r14 ; @@ -969,14 +969,14 @@ define <3 x float> @constrained_vector_fsub_v3f32() { ; S390X: # %bb.0: # %entry ; S390X-NEXT: larl %r1, .LCPI22_0 ; S390X-NEXT: le %f0, 0(%r1) -; S390X-NEXT: lzer %f1 ; S390X-NEXT: ler %f4, %f0 -; S390X-NEXT: sebr %f4, %f1 ; S390X-NEXT: larl %r1, .LCPI22_1 ; S390X-NEXT: ler %f2, %f0 ; S390X-NEXT: seb %f2, 0(%r1) ; S390X-NEXT: larl %r1, .LCPI22_2 ; S390X-NEXT: seb %f0, 0(%r1) +; S390X-NEXT: lzer %f1 +; S390X-NEXT: sebr %f4, %f1 ; S390X-NEXT: br %r14 ; ; SZ13-LABEL: constrained_vector_fsub_v3f32: |

