diff options
Diffstat (limited to 'llvm/test/CodeGen/SystemZ')
32 files changed, 841 insertions, 57 deletions
diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll index d14a92acae8..3b143d93315 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-01.ll @@ -1,6 +1,7 @@ ; Test floating-point absolute. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32. declare float @llvm.fabs.f32(float %f) diff --git a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll index deec8c32b4a..e831ddb86fe 100644 --- a/llvm/test/CodeGen/SystemZ/fp-abs-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-abs-02.ll @@ -1,6 +1,7 @@ ; Test negated floating-point absolute. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32. declare float @llvm.fabs.f32(float %f) diff --git a/llvm/test/CodeGen/SystemZ/fp-add-02.ll b/llvm/test/CodeGen/SystemZ/fp-add-02.ll index 07c7462020f..5be1ad79d45 100644 --- a/llvm/test/CodeGen/SystemZ/fp-add-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-add-02.ll @@ -1,7 +1,8 @@ ; Test 64-bit floating-point addition. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s - +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() ; Check register addition. @@ -76,7 +77,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: adb %f0, 160(%r15) +; CHECK-SCALAR: adb %f0, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll index 95af309e795..94a256777c7 100644 --- a/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-cmp-02.ll @@ -1,7 +1,10 @@ ; Test 64-bit floating-point comparison. The tests assume a z10 implementation ; of select, using conditional branches rather than LOCGR. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare double @foo() @@ -9,8 +12,9 @@ declare double @foo() define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { ; CHECK-LABEL: f1: ; CHECK: cdbr %f0, %f2 -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq double %f1, %f2 %res = select i1 %cond, i64 %a, i64 %b @@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { ; CHECK-LABEL: f2: ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %f2 = load double , double *%ptr %cond = fcmp oeq double %f1, %f2 @@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { ; CHECK-LABEL: f3: ; CHECK: cdb %f0, 4088(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double, double *%base, i64 511 %f2 = load double , double *%ptr @@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f1, double *%base) { ; CHECK-LABEL: f4: ; CHECK: aghi %r4, 4096 ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double, double *%base, i64 512 %f2 = load double , double *%ptr @@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f1, double *%base) { ; CHECK-LABEL: f5: ; CHECK: aghi %r4, -8 ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr = getelementptr double, double *%base, i64 -1 %f2 = load double , double *%ptr @@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { ; CHECK-LABEL: f6: ; CHECK: sllg %r1, %r5, 3 ; CHECK: cdb %f0, 800(%r1,%r4) -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %ptr1 = getelementptr double, double *%base, i64 %index %ptr2 = getelementptr double, double *%ptr1, i64 100 @@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: cdb {{%f[0-9]+}}, 160(%r15) +; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 @@ -152,9 +161,12 @@ define double @f7(double *%ptr0) { ; Check comparison with zero. define i64 @f8(i64 %a, i64 %b, double %f) { ; CHECK-LABEL: f8: -; CHECK: ltdbr %f0, %f0 -; CHECK-NEXT: je -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR: ltdbr %f0, %f0 +; CHECK-SCALAR-NEXT: je +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR: lzdr %f1 +; CHECK-VECTOR-NEXT: cdbr %f0, %f1 +; CHECK-VECTOR-NEXT: locgrne %r2, %r3 ; CHECK: br %r14 %cond = fcmp oeq double %f, 0.0 %res = select i1 %cond, i64 %a, i64 %b @@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f) { define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) { ; CHECK-LABEL: f9: ; CHECK: cdb %f0, 0(%r4) -; CHECK-NEXT: jl {{\.L.*}} -; CHECK: lgr %r2, %r3 +; CHECK-SCALAR-NEXT: jl +; CHECK-SCALAR: lgr %r2, %r3 +; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 ; CHECK: br %r14 %f1 = load double , double *%ptr %cond = fcmp ogt double %f1, %f2 diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-01.ll b/llvm/test/CodeGen/SystemZ/fp-conv-01.ll index ebc174afada..06740ed4b4a 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-01.ll @@ -1,11 +1,15 @@ ; Test floating-point truncations. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s ; Test f64->f32. define float @f1(double %d1, double %d2) { ; CHECK-LABEL: f1: -; CHECK: ledbr %f0, %f2 +; CHECK-SCALAR: ledbr %f0, %f2 +; CHECK-VECTOR: ledbra %f0, 0, %f2, 0 ; CHECK: br %r14 %res = fptrunc double %d2 to float ret float %res @@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) { define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { ; CHECK-LABEL: f5: ; CHECK: ldxbr %f1, %f1 -; CHECK: adbr %f1, %f2 -; CHECK: std %f1, 0(%r2) +; CHECK-SCALAR: adbr %f1, %f2 +; CHECK-SCALAR: std %f1, 0(%r2) +; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2 +; CHECK-VECTOR: std [[REG]], 0(%r2) ; CHECK: br %r14 %val = load fp128 , fp128 *%ptr %conv = fptrunc fp128 %val to double diff --git a/llvm/test/CodeGen/SystemZ/fp-conv-02.ll b/llvm/test/CodeGen/SystemZ/fp-conv-02.ll index e9376ba6973..be32bfe7ba9 100644 --- a/llvm/test/CodeGen/SystemZ/fp-conv-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-conv-02.ll @@ -1,6 +1,8 @@ ; Test extensions of f32 to f64. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Check register extension. define double @f1(float %val) { @@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %index) { ; to use LDEB if possible. define void @f7(double *%ptr1, float *%ptr2) { ; CHECK-LABEL: f7: -; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) +; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) ; CHECK: br %r14 %val0 = load volatile float , float *%ptr2 %val1 = load volatile float , float *%ptr2 diff --git a/llvm/test/CodeGen/SystemZ/fp-div-02.ll b/llvm/test/CodeGen/SystemZ/fp-div-02.ll index 82eeb480602..f120e7c923d 100644 --- a/llvm/test/CodeGen/SystemZ/fp-div-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-div-02.ll @@ -1,6 +1,8 @@ ; Test 64-bit floating-point division. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() @@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: ddb %f0, 160(%r15) +; CHECK-SCALAR: ddb %f0, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/llvm/test/CodeGen/SystemZ/fp-move-01.ll b/llvm/test/CodeGen/SystemZ/fp-move-01.ll index 31a8fc55d77..843b1b6a6e6 100644 --- a/llvm/test/CodeGen/SystemZ/fp-move-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-move-01.ll @@ -1,11 +1,13 @@ ; Test moves between FPRs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32 moves. define float @f1(float %a, float %b) { ; CHECK-LABEL: f1: ; CHECK: ler %f0, %f2 +; CHECK: br %r14 ret float %b } @@ -13,6 +15,7 @@ define float @f1(float %a, float %b) { define double @f2(double %a, double %b) { ; CHECK-LABEL: f2: ; CHECK: ldr %f0, %f2 +; CHECK: br %r14 ret double %b } @@ -22,6 +25,7 @@ define void @f3(fp128 *%x) { ; CHECK-LABEL: f3: ; CHECK: lxr ; CHECK: axbr +; CHECK: br %r14 %val = load volatile fp128 , fp128 *%x %sum = fadd fp128 %val, %val store volatile fp128 %sum, fp128 *%x diff --git a/llvm/test/CodeGen/SystemZ/fp-move-04.ll b/llvm/test/CodeGen/SystemZ/fp-move-04.ll index d3728d0e585..6650419b2c3 100644 --- a/llvm/test/CodeGen/SystemZ/fp-move-04.ll +++ b/llvm/test/CodeGen/SystemZ/fp-move-04.ll @@ -1,6 +1,7 @@ ; Test 64-bit floating-point loads. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test the low end of the LD range. define double @f1(double *%src) { diff --git a/llvm/test/CodeGen/SystemZ/fp-move-07.ll b/llvm/test/CodeGen/SystemZ/fp-move-07.ll index c3ad2a59f66..5361002a97e 100644 --- a/llvm/test/CodeGen/SystemZ/fp-move-07.ll +++ b/llvm/test/CodeGen/SystemZ/fp-move-07.ll @@ -1,6 +1,7 @@ ; Test 64-bit floating-point stores. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test the low end of the STD range. define void @f1(double *%src, double %val) { diff --git a/llvm/test/CodeGen/SystemZ/fp-move-11.ll b/llvm/test/CodeGen/SystemZ/fp-move-11.ll new file mode 100644 index 00000000000..ce45019425c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/fp-move-11.ll @@ -0,0 +1,110 @@ +; Test 32-bit floating-point loads for z13. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; Test that we use LDE instead of LE - low end of the LE range. +define float @f1(float *%src) { +; CHECK-LABEL: f1: +; CHECK: lde %f0, 0(%r2) +; CHECK: br %r14 + %val = load float, float *%src + ret float %val +} + +; Test that we use LDE instead of LE - high end of the LE range. +define float @f2(float *%src) { +; CHECK-LABEL: f2: +; CHECK: lde %f0, 4092(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 1023 + %val = load float, float *%ptr + ret float %val +} + +; Check the next word up, which should use LEY instead of LDE. +define float @f3(float *%src) { +; CHECK-LABEL: f3: +; CHECK: ley %f0, 4096(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 1024 + %val = load float, float *%ptr + ret float %val +} + +; Check the high end of the aligned LEY range. +define float @f4(float *%src) { +; CHECK-LABEL: f4: +; CHECK: ley %f0, 524284(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 131071 + %val = load float, float *%ptr + ret float %val +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f5(float *%src) { +; CHECK-LABEL: f5: +; CHECK: agfi %r2, 524288 +; CHECK: lde %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 131072 + %val = load float, float *%ptr + ret float %val +} + +; Check the high end of the negative aligned LEY range. +define float @f6(float *%src) { +; CHECK-LABEL: f6: +; CHECK: ley %f0, -4(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 -1 + %val = load float, float *%ptr + ret float %val +} + +; Check the low end of the LEY range. +define float @f7(float *%src) { +; CHECK-LABEL: f7: +; CHECK: ley %f0, -524288(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 -131072 + %val = load float, float *%ptr + ret float %val +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define float @f8(float *%src) { +; CHECK-LABEL: f8: +; CHECK: agfi %r2, -524292 +; CHECK: lde %f0, 0(%r2) +; CHECK: br %r14 + %ptr = getelementptr float, float *%src, i64 -131073 + %val = load float, float *%ptr + ret float %val +} + +; Check that LDE allows an index. +define float @f9(i64 %src, i64 %index) { +; CHECK-LABEL: f9: +; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to float * + %val = load float, float *%ptr + ret float %val +} + +; Check that LEY allows an index. +define float @f10(i64 %src, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to float * + %val = load float, float *%ptr + ret float %val +} diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-03.ll b/llvm/test/CodeGen/SystemZ/fp-mul-03.ll index 701304ef3ee..0d52121f41c 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-03.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-03.ll @@ -1,6 +1,8 @@ ; Test multiplication of two f64s, producing an f64 result. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() @@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: mdb %f0, 160(%r15) +; CHECK-SCALAR: mdb %f0, 160(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-07.ll b/llvm/test/CodeGen/SystemZ/fp-mul-07.ll index b1d0ae3c520..e0b4a5c5d78 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-07.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-07.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare double @llvm.fma.f64(double %f1, double %f2, double %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: -; CHECK: madbr %f4, %f0, %f2 -; CHECK: ldr %f0, %f4 +; CHECK-SCALAR: madbr %f4, %f0, %f2 +; CHECK-SCALAR: ldr %f0, %f4 +; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) ret double %res diff --git a/llvm/test/CodeGen/SystemZ/fp-mul-09.ll b/llvm/test/CodeGen/SystemZ/fp-mul-09.ll index f2eadf55ff3..927a8064823 100644 --- a/llvm/test/CodeGen/SystemZ/fp-mul-09.ll +++ b/llvm/test/CodeGen/SystemZ/fp-mul-09.ll @@ -1,11 +1,15 @@ -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s declare double @llvm.fma.f64(double %f1, double %f2, double %f3) define double @f1(double %f1, double %f2, double %acc) { ; CHECK-LABEL: f1: -; CHECK: msdbr %f4, %f0, %f2 -; CHECK: ldr %f0, %f4 +; CHECK-SCALAR: msdbr %f4, %f0, %f2 +; CHECK-SCALAR: ldr %f0, %f4 +; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4 ; CHECK: br %r14 %negacc = fsub double -0.0, %acc %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) diff --git a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll index 927bcd44d02..fe2e5f67cf5 100644 --- a/llvm/test/CodeGen/SystemZ/fp-neg-01.ll +++ b/llvm/test/CodeGen/SystemZ/fp-neg-01.ll @@ -1,6 +1,7 @@ ; Test floating-point negation. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ; Test f32. define float @f1(float %f) { diff --git a/llvm/test/CodeGen/SystemZ/fp-round-02.ll b/llvm/test/CodeGen/SystemZ/fp-round-02.ll index bd5419dad1d..428261478dc 100644 --- a/llvm/test/CodeGen/SystemZ/fp-round-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-round-02.ll @@ -1,6 +1,9 @@ ; Test rounding functions for z196 and above. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s ; Test rint for f32. declare float @llvm.rint.f32(float %f) @@ -16,7 +19,8 @@ define float @f1(float %f) { declare double @llvm.rint.f64(double %f) define double @f2(double %f) { ; CHECK-LABEL: f2: -; CHECK: fidbr %f0, 0, %f0 +; CHECK-SCALAR: fidbr %f0, 0, %f0 +; CHECK-VECTOR: fidbra %f0, 0, %f0, 0 ; CHECK: br %r14 %res = call double @llvm.rint.f64(double %f) ret double %res diff --git a/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll b/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll index a6d987b0d76..a162466064e 100644 --- a/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-sqrt-02.ll @@ -1,6 +1,8 @@ ; Test 64-bit square root. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @llvm.sqrt.f64(double %f) declare double @sqrt(double) @@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %index) { ; to use SQDB if possible. define void @f7(double *%ptr) { ; CHECK-LABEL: f7: -; CHECK: sqdb {{%f[0-9]+}}, 160(%r15) +; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15) ; CHECK: br %r14 %val0 = load volatile double , double *%ptr %val1 = load volatile double , double *%ptr diff --git a/llvm/test/CodeGen/SystemZ/fp-sub-02.ll b/llvm/test/CodeGen/SystemZ/fp-sub-02.ll index f59ec0a31d7..143baac23e1 100644 --- a/llvm/test/CodeGen/SystemZ/fp-sub-02.ll +++ b/llvm/test/CodeGen/SystemZ/fp-sub-02.ll @@ -1,6 +1,8 @@ ; Test 64-bit floating-point subtraction. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ +; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s declare double @foo() @@ -76,7 +78,7 @@ define double @f6(double %f1, double *%base, i64 %index) { define double @f7(double *%ptr0) { ; CHECK-LABEL: f7: ; CHECK: brasl %r14, foo@PLT -; CHECK: sdb %f0, 16{{[04]}}(%r15) +; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15) ; CHECK: br %r14 %ptr1 = getelementptr double, double *%ptr0, i64 2 %ptr2 = getelementptr double, double *%ptr0, i64 4 diff --git a/llvm/test/CodeGen/SystemZ/frame-03.ll b/llvm/test/CodeGen/SystemZ/frame-03.ll index 029c6d6d37d..21b8fdb0d67 100644 --- a/llvm/test/CodeGen/SystemZ/frame-03.ll +++ b/llvm/test/CodeGen/SystemZ/frame-03.ll @@ -2,7 +2,7 @@ ; uses a different register class, but the set of saved and restored ; registers should be the same. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; This function should require all FPRs, but no other spill slots. ; We need to save and restore 8 of the 16 FPRs, so the frame size diff --git a/llvm/test/CodeGen/SystemZ/frame-07.ll b/llvm/test/CodeGen/SystemZ/frame-07.ll index 253bbc26c1f..dd810142962 100644 --- a/llvm/test/CodeGen/SystemZ/frame-07.ll +++ b/llvm/test/CodeGen/SystemZ/frame-07.ll @@ -1,7 +1,7 @@ ; Test the saving and restoring of FPRs in large frames. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s -; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s ; Test a frame size that requires some FPRs to be saved and loaded using ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. diff --git a/llvm/test/CodeGen/SystemZ/frame-17.ll b/llvm/test/CodeGen/SystemZ/frame-17.ll index 485297a2b21..502e541bafc 100644 --- a/llvm/test/CodeGen/SystemZ/frame-17.ll +++ b/llvm/test/CodeGen/SystemZ/frame-17.ll @@ -1,6 +1,6 @@ ; Test spilling of FPRs. ; -; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ; We need to save and restore 8 of the 16 FPRs and allocate an additional ; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly diff --git a/llvm/test/CodeGen/SystemZ/frame-20.ll b/llvm/test/CodeGen/SystemZ/frame-20.ll new file mode 100644 index 00000000000..8d601c6f6d5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/frame-20.ll @@ -0,0 +1,445 @@ +; Like frame-03.ll, but for z13. In this case we have 16 more registers +; available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + +; This function should require all FPRs, but no other spill slots. +; We need to save and restore 8 of the 16 FPRs, so the frame size +; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 +; (the caller-allocated part of the frame) + 224. +define void @f1(double *%ptr) { +; CHECK-LABEL: f1: +; CHECK: aghi %r15, -224 +; CHECK: .cfi_def_cfa_offset 384 +; CHECK: std %f8, 216(%r15) +; CHECK: std %f9, 208(%r15) +; CHECK: std %f10, 200(%r15) +; CHECK: std %f11, 192(%r15) +; CHECK: std %f12, 184(%r15) +; CHECK: std %f13, 176(%r15) +; CHECK: std %f14, 168(%r15) +; CHECK: std %f15, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK: .cfi_offset %f15, -224 +; CHECK-DAG: ld %f0, 0(%r2) +; CHECK-DAG: ld %f7, 0(%r2) +; CHECK-DAG: ld %f8, 0(%r2) +; CHECK-DAG: ld %f15, 0(%r2) +; CHECK-DAG: vlrepg %v16, 0(%r2) +; CHECK-DAG: vlrepg %v23, 0(%r2) +; CHECK-DAG: vlrepg %v24, 0(%r2) +; CHECK-DAG: vlrepg %v31, 0(%r2) +; CHECK: ld %f8, 216(%r15) +; CHECK: ld %f9, 208(%r15) +; CHECK: ld %f10, 200(%r15) +; CHECK: ld %f11, 192(%r15) +; CHECK: ld %f12, 184(%r15) +; CHECK: ld %f13, 176(%r15) +; CHECK: ld %f14, 168(%r15) +; CHECK: ld %f15, 160(%r15) +; CHECK: aghi %r15, 224 +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l8 = load volatile double, double *%ptr + %l9 = load volatile double, double *%ptr + %l10 = load volatile double, double *%ptr + %l11 = load volatile double, double *%ptr + %l12 = load volatile double, double *%ptr + %l13 = load volatile double, double *%ptr + %l14 = load volatile double, double *%ptr + %l15 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc8 = fsub double %l8, %acc7 + %acc9 = fsub double %l9, %acc8 + %acc10 = fsub double %l10, %acc9 + %acc11 = fsub double %l11, %acc10 + %acc12 = fsub double %l12, %acc11 + %acc13 = fsub double %l13, %acc12 + %acc14 = fsub double %l14, %acc13 + %acc15 = fsub double %l15, %acc14 + %acc16 = fsub double %l16, %acc15 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc8, double *%ptr + store volatile double %acc9, double *%ptr + store volatile double %acc10, double *%ptr + store volatile double %acc11, double *%ptr + store volatile double %acc12, double *%ptr + store volatile double %acc13, double *%ptr + store volatile double %acc14, double *%ptr + store volatile double %acc15, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} + +; Like f1, but requires one fewer FPR. We allocate in numerical order, +; so %f15 is the one that gets dropped. +define void @f2(double *%ptr) { +; CHECK-LABEL: f2: +; CHECK: aghi %r15, -216 +; CHECK: .cfi_def_cfa_offset 376 +; CHECK: std %f8, 208(%r15) +; CHECK: std %f9, 200(%r15) +; CHECK: std %f10, 192(%r15) +; CHECK: std %f11, 184(%r15) +; CHECK: std %f12, 176(%r15) +; CHECK: std %f13, 168(%r15) +; CHECK: std %f14, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK: .cfi_offset %f9, -176 +; CHECK: .cfi_offset %f10, -184 +; CHECK: .cfi_offset %f11, -192 +; CHECK: .cfi_offset %f12, -200 +; CHECK: .cfi_offset %f13, -208 +; CHECK: .cfi_offset %f14, -216 +; CHECK-NOT: %v15 +; CHECK-NOT: %f15 +; CHECK: ld %f8, 208(%r15) +; CHECK: ld %f9, 200(%r15) +; CHECK: ld %f10, 192(%r15) +; CHECK: ld %f11, 184(%r15) +; CHECK: ld %f12, 176(%r15) +; CHECK: ld %f13, 168(%r15) +; CHECK: ld %f14, 160(%r15) +; CHECK: aghi %r15, 216 +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l8 = load volatile double, double *%ptr + %l9 = load volatile double, double *%ptr + %l10 = load volatile double, double *%ptr + %l11 = load volatile double, double *%ptr + %l12 = load volatile double, double *%ptr + %l13 = load volatile double, double *%ptr + %l14 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc8 = fsub double %l8, %acc7 + %acc9 = fsub double %l9, %acc8 + %acc10 = fsub double %l10, %acc9 + %acc11 = fsub double %l11, %acc10 + %acc12 = fsub double %l12, %acc11 + %acc13 = fsub double %l13, %acc12 + %acc14 = fsub double %l14, %acc13 + %acc16 = fsub double %l16, %acc14 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc8, double *%ptr + store volatile double %acc9, double *%ptr + store volatile double %acc10, double *%ptr + store volatile double %acc11, double *%ptr + store volatile double %acc12, double *%ptr + store volatile double %acc13, double *%ptr + store volatile double %acc14, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} + +; Like f1, but should require only one call-saved FPR. +define void @f3(double *%ptr) { +; CHECK-LABEL: f3: +; CHECK: aghi %r15, -168 +; CHECK: .cfi_def_cfa_offset 328 +; CHECK: std %f8, 160(%r15) +; CHECK: .cfi_offset %f8, -168 +; CHECK-NOT: {{%[fv]9}} +; CHECK-NOT: {{%[fv]1[0-5]}} +; CHECK: ld %f8, 160(%r15) +; CHECK: aghi %r15, 168 +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l8 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc8 = fsub double %l8, %acc7 + %acc16 = fsub double %l16, %acc8 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc8, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} + +; This function should use all call-clobbered FPRs and vector registers +; but no call-saved ones. It shouldn't need to create a frame. +define void @f4(double *%ptr) { +; CHECK-LABEL: f4: +; CHECK-NOT: %r15 +; CHECK-NOT: {{%[fv][89]}} +; CHECK-NOT: {{%[fv]1[0-5]}} +; CHECK: br %r14 + %l0 = load volatile double, double *%ptr + %l1 = load volatile double, double *%ptr + %l2 = load volatile double, double *%ptr + %l3 = load volatile double, double *%ptr + %l4 = load volatile double, double *%ptr + %l5 = load volatile double, double *%ptr + %l6 = load volatile double, double *%ptr + %l7 = load volatile double, double *%ptr + %l16 = load volatile double, double *%ptr + %l17 = load volatile double, double *%ptr + %l18 = load volatile double, double *%ptr + %l19 = load volatile double, double *%ptr + %l20 = load volatile double, double *%ptr + %l21 = load volatile double, double *%ptr + %l22 = load volatile double, double *%ptr + %l23 = load volatile double, double *%ptr + %l24 = load volatile double, double *%ptr + %l25 = load volatile double, double *%ptr + %l26 = load volatile double, double *%ptr + %l27 = load volatile double, double *%ptr + %l28 = load volatile double, double *%ptr + %l29 = load volatile double, double *%ptr + %l30 = load volatile double, double *%ptr + %l31 = load volatile double, double *%ptr + %acc0 = fsub double %l0, %l0 + %acc1 = fsub double %l1, %acc0 + %acc2 = fsub double %l2, %acc1 + %acc3 = fsub double %l3, %acc2 + %acc4 = fsub double %l4, %acc3 + %acc5 = fsub double %l5, %acc4 + %acc6 = fsub double %l6, %acc5 + %acc7 = fsub double %l7, %acc6 + %acc16 = fsub double %l16, %acc7 + %acc17 = fsub double %l17, %acc16 + %acc18 = fsub double %l18, %acc17 + %acc19 = fsub double %l19, %acc18 + %acc20 = fsub double %l20, %acc19 + %acc21 = fsub double %l21, %acc20 + %acc22 = fsub double %l22, %acc21 + %acc23 = fsub double %l23, %acc22 + %acc24 = fsub double %l24, %acc23 + %acc25 = fsub double %l25, %acc24 + %acc26 = fsub double %l26, %acc25 + %acc27 = fsub double %l27, %acc26 + %acc28 = fsub double %l28, %acc27 + %acc29 = fsub double %l29, %acc28 + %acc30 = fsub double %l30, %acc29 + %acc31 = fsub double %l31, %acc30 + store volatile double %acc0, double *%ptr + store volatile double %acc1, double *%ptr + store volatile double %acc2, double *%ptr + store volatile double %acc3, double *%ptr + store volatile double %acc4, double *%ptr + store volatile double %acc5, double *%ptr + store volatile double %acc6, double *%ptr + store volatile double %acc7, double *%ptr + store volatile double %acc16, double *%ptr + store volatile double %acc17, double *%ptr + store volatile double %acc18, double *%ptr + store volatile double %acc19, double *%ptr + store volatile double %acc20, double *%ptr + store volatile double %acc21, double *%ptr + store volatile double %acc22, double *%ptr + store volatile double %acc23, double *%ptr + store volatile double %acc24, double *%ptr + store volatile double %acc25, double *%ptr + store volatile double %acc26, double *%ptr + store volatile double %acc27, double *%ptr + store volatile double %acc28, double *%ptr + store volatile double %acc29, double *%ptr + store volatile double %acc30, double *%ptr + store volatile double %acc31, double *%ptr + ret void +} diff --git a/llvm/test/CodeGen/SystemZ/vec-abs-05.ll b/llvm/test/CodeGen/SystemZ/vec-abs-05.ll index 89142b21854..63210f87b94 100644 --- a/llvm/test/CodeGen/SystemZ/vec-abs-05.ll +++ b/llvm/test/CodeGen/SystemZ/vec-abs-05.ll @@ -1,7 +1,8 @@ -; Test v2f64 absolute. +; Test f64 and v2f64 absolute. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare double @llvm.fabs.f64(double) declare <2 x double> @llvm.fabs.v2f64(<2 x double>) ; Test a plain absolute. @@ -22,3 +23,24 @@ define <2 x double> @f2(<2 x double> %val) { %ret = fsub <2 x double> <double -0.0, double -0.0>, %abs ret <2 x double> %ret } + +; Test an f64 absolute that uses vector registers. +define double @f3(<2 x double> %val) { +; CHECK-LABEL: f3: +; CHECK: wflpdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = call double @llvm.fabs.f64(double %scalar) + ret double %ret +} + +; Test an f64 negative absolute that uses vector registers. +define double @f4(<2 x double> %val) { +; CHECK-LABEL: f4: +; CHECK: wflndb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %abs = call double @llvm.fabs.f64(double %scalar) + %ret = fsub double -0.0, %abs + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-add-01.ll b/llvm/test/CodeGen/SystemZ/vec-add-01.ll index 1de2aa2a1b9..31703437767 100644 --- a/llvm/test/CodeGen/SystemZ/vec-add-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-add-01.ll @@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, %ret = fadd <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 addition that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfadb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fadd double %scalar1, %scalar2 + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll b/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll index bdb8744631a..eef57555b48 100644 --- a/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll +++ b/llvm/test/CodeGen/SystemZ/vec-cmp-06.ll @@ -1,4 +1,4 @@ -; Test v2f64 comparisons. +; Test f64 and v2f64 comparisons. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s @@ -335,3 +335,15 @@ define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ret <2 x double> %ret } + +; Test an f64 comparison that uses vector registers. +define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) { +; CHECK-LABEL: f29: +; CHECK: wfcdb %f0, %v24 +; CHECK-NEXT: locgrne %r2, %r3 +; CHECK: br %r14 + %f2 = extractelement <2 x double> %vec, i32 0 + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b + ret i64 %res +} diff --git a/llvm/test/CodeGen/SystemZ/vec-conv-02.ll b/llvm/test/CodeGen/SystemZ/vec-conv-02.ll index ceccfc60b37..ab84389f3c8 100644 --- a/llvm/test/CodeGen/SystemZ/vec-conv-02.ll +++ b/llvm/test/CodeGen/SystemZ/vec-conv-02.ll @@ -11,3 +11,23 @@ define void @f1(<2 x double> %val, <2 x float> *%ptr) { store <2 x float> %res, <2 x float> *%ptr ret void } + +; Test conversion of an f64 in a vector register to an f32. +define float @f2(<2 x double> %vec) { +; CHECK-LABEL: f2: +; CHECK: wledb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %vec, i32 0 + %ret = fptrunc double %scalar to float + ret float %ret +} + +; Test conversion of an f32 in a vector register to an f64. +define double @f3(<4 x float> %vec) { +; CHECK-LABEL: f3: +; CHECK: wldeb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <4 x float> %vec, i32 0 + %ret = fpext float %scalar to double + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-div-01.ll b/llvm/test/CodeGen/SystemZ/vec-div-01.ll index 5666444e9da..506d40861d3 100644 --- a/llvm/test/CodeGen/SystemZ/vec-div-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-div-01.ll @@ -70,3 +70,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, %ret = fdiv <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 division that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfddb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fdiv double %scalar1, %scalar2 + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-mul-01.ll b/llvm/test/CodeGen/SystemZ/vec-mul-01.ll index d0018fa1f8c..5ecc30d4427 100644 --- a/llvm/test/CodeGen/SystemZ/vec-mul-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-mul-01.ll @@ -47,3 +47,14 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, %ret = fmul <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 multiplication that uses vector registers. +define double @f6(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f6: +; CHECK: wfmdb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fmul double %scalar1, %scalar2 + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-neg-01.ll b/llvm/test/CodeGen/SystemZ/vec-neg-01.ll index 491e24bb34f..b1389ce4d6d 100644 --- a/llvm/test/CodeGen/SystemZ/vec-neg-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-neg-01.ll @@ -46,3 +46,13 @@ define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) { %ret = fsub <2 x double> <double -0.0, double -0.0>, %val ret <2 x double> %ret } + +; Test an f64 negation that uses vector registers. +define double @f6(<2 x double> %val) { +; CHECK-LABEL: f6: +; CHECK: wflcdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = fsub double -0.0, %scalar + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-round-01.ll b/llvm/test/CodeGen/SystemZ/vec-round-01.ll index 284b83e96f7..82718276bb0 100644 --- a/llvm/test/CodeGen/SystemZ/vec-round-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-round-01.ll @@ -2,6 +2,12 @@ ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare double @llvm.rint.f64(double) +declare double @llvm.nearbyint.f64(double) +declare double @llvm.floor.f64(double) +declare double @llvm.ceil.f64(double) +declare double @llvm.trunc.f64(double) +declare double @llvm.round.f64(double) declare <2 x double> @llvm.rint.v2f64(<2 x double>) declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) declare <2 x double> @llvm.floor.v2f64(<2 x double>) @@ -56,3 +62,57 @@ define <2 x double> @f6(<2 x double> %val) { %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val) ret <2 x double> %res } + +define double @f7(<2 x double> %val) { +; CHECK-LABEL: f7: +; CHECK: wfidb %f0, %v24, 0, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.rint.f64(double %scalar) + ret double %res +} + +define double @f8(<2 x double> %val) { +; CHECK-LABEL: f8: +; CHECK: wfidb %f0, %v24, 4, 0 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.nearbyint.f64(double %scalar) + ret double %res +} + +define double @f9(<2 x double> %val) { +; CHECK-LABEL: f9: +; CHECK: wfidb %f0, %v24, 4, 7 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.floor.f64(double %scalar) + ret double %res +} + +define double @f10(<2 x double> %val) { +; CHECK-LABEL: f10: +; CHECK: wfidb %f0, %v24, 4, 6 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.ceil.f64(double %scalar) + ret double %res +} + +define double @f11(<2 x double> %val) { +; CHECK-LABEL: f11: +; CHECK: wfidb %f0, %v24, 4, 5 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.trunc.f64(double %scalar) + ret double %res +} + +define double @f12(<2 x double> %val) { +; CHECK-LABEL: f12: +; CHECK: wfidb %f0, %v24, 4, 1 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %res = call double @llvm.round.f64(double %scalar) + ret double %res +} diff --git a/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll b/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll index 0160c24a749..5c3ffb3b064 100644 --- a/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-sqrt-01.ll @@ -1,7 +1,8 @@ -; Test v2f64 square root. +; Test f64 and v2f64 square root. ; ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s +declare double @llvm.sqrt.f64(double) declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) define <2 x double> @f1(<2 x double> %val) { @@ -11,3 +12,12 @@ define <2 x double> @f1(<2 x double> %val) { %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) ret <2 x double> %ret } + +define double @f2(<2 x double> %val) { +; CHECK-LABEL: f2: +; CHECK: wfsqdb %f0, %v24 +; CHECK: br %r14 + %scalar = extractelement <2 x double> %val, i32 0 + %ret = call double @llvm.sqrt.f64(double %scalar) + ret double %ret +} diff --git a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll index aabf1c9be4a..5620ebcb8c4 100644 --- a/llvm/test/CodeGen/SystemZ/vec-sub-01.ll +++ b/llvm/test/CodeGen/SystemZ/vec-sub-01.ll @@ -74,3 +74,14 @@ define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, %ret = fsub <2 x double> %val1, %val2 ret <2 x double> %ret } + +; Test an f64 subtraction that uses vector registers. +define double @f7(<2 x double> %val1, <2 x double> %val2) { +; CHECK-LABEL: f7: +; CHECK: wfsdb %f0, %v24, %v26 +; CHECK: br %r14 + %scalar1 = extractelement <2 x double> %val1, i32 0 + %scalar2 = extractelement <2 x double> %val2, i32 0 + %ret = fsub double %scalar1, %scalar2 + ret double %ret +} |