diff options
author | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2018-04-30 17:54:28 +0000 |
---|---|---|
committer | Ulrich Weigand <ulrich.weigand@de.ibm.com> | 2018-04-30 17:54:28 +0000 |
commit | c3ec80fea186b6dd956001467a1076c6c2ee4fb6 (patch) | |
tree | 18dd0151abdf2a9dbbd50abfc01123edc3d00a69 /llvm/test/CodeGen/SystemZ | |
parent | b32f3656d2bc376d223b5cf38c5067e3942d1471 (diff) | |
download | bcm5719-llvm-c3ec80fea186b6dd956001467a1076c6c2ee4fb6.tar.gz bcm5719-llvm-c3ec80fea186b6dd956001467a1076c6c2ee4fb6.zip |
[SystemZ] Handle SADDO et.al. and ADD/SUBCARRY
This provides an optimized implementation of SADDO/SSUBO/UADDO/USUBO
as well as ADDCARRY/SUBCARRY on top of the new CC implementation.
In particular, multi-word arithmetic now uses UADDO/ADDCARRY instead
of the old ADDC/ADDE logic, which means we no longer need to use
"glue" links for those instructions. This also allows making full
use of the memory-based instructions like ALSI, which couldn't be
recognized due to limitations in the DAG matcher previously.
Also, the llvm.sadd.with.overflow et.al. intrinsincs now expand to
directly using the ADD instructions and checking for a CC 3 result.
llvm-svn: 331203
Diffstat (limited to 'llvm/test/CodeGen/SystemZ')
43 files changed, 10404 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/SystemZ/asm-18.ll b/llvm/test/CodeGen/SystemZ/asm-18.ll index f7be9b45eb6..16b4745d2cd 100644 --- a/llvm/test/CodeGen/SystemZ/asm-18.ll +++ b/llvm/test/CodeGen/SystemZ/asm-18.ll @@ -748,3 +748,78 @@ define void @f34(i32 *%ptr1, i32 *%ptr2) { store i32 %sel2, i32 *%ptr1 ret void } + +; Test immediate addition with overflow involving high registers. +define void @f35() { +; CHECK-LABEL: f35: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: aih [[REG]], -32768 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepb [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 1 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepc [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 32767 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepd [[REG]], [[REGCC]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res1, i32 -32768) + %val1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res2 = call i32 asm "stepb $0, $2", "=h,h,d"(i32 %val1, i1 %obit1) + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res2, i32 1) + %val2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res3 = call i32 asm "stepc $0, $2", "=h,h,d"(i32 %val2, i1 %obit2) + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res3, i32 32767) + %val3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + call void asm sideeffect "stepd $0, $1", "h,d"(i32 %val3, i1 %obit3) + ret void +} + +; Test large immediate addition with overflow involving high registers. +define void @f36() { +; CHECK-LABEL: f36: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: aih [[REG]], -2147483648 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepb [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 1 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepc [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 2147483647 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepd [[REG]], [[REGCC]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res1, i32 -2147483648) + %val1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res2 = call i32 asm "stepb $0, $2", "=h,h,d"(i32 %val1, i1 %obit1) + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res2, i32 1) + %val2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res3 = call i32 asm "stepc $0, $2", "=h,h,d"(i32 %val2, i1 %obit2) + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res3, i32 2147483647) + %val3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + call void asm sideeffect "stepd $0, $1", "h,d"(i32 %val3, i1 %obit3) + ret void +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-add-08.ll b/llvm/test/CodeGen/SystemZ/int-add-08.ll index 75b85d0888a..674fa902d21 100644 --- a/llvm/test/CodeGen/SystemZ/int-add-08.ll +++ b/llvm/test/CodeGen/SystemZ/int-add-08.ll @@ -123,12 +123,14 @@ define void @f8(i128 *%ptr0) { %ptr2 = getelementptr i128, i128 *%ptr0, i128 4 %ptr3 = getelementptr i128, i128 *%ptr0, i128 6 %ptr4 = getelementptr i128, i128 *%ptr0, i128 8 + %ptr5 = getelementptr i128, i128 *%ptr0, i128 10 %val0 = load i128 , i128 *%ptr0 %val1 = load i128 , i128 *%ptr1 %val2 = load i128 , i128 *%ptr2 %val3 = load i128 , i128 *%ptr3 %val4 = load i128 , i128 *%ptr4 + %val5 = load i128 , i128 *%ptr5 %retptr = call i128 *@foo() @@ -138,7 +140,8 @@ define void @f8(i128 *%ptr0) { %add2 = add i128 %add1, %val2 %add3 = add i128 %add2, %val3 %add4 = add i128 %add3, %val4 - store i128 %add4, i128 *%retptr + %add5 = add i128 %add4, %val5 + store i128 %add5, i128 *%retptr ret void } diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-01.ll b/llvm/test/CodeGen/SystemZ/int-sadd-01.ll new file mode 100644 index 00000000000..57023b0a6c8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-01.ll @@ -0,0 +1,325 @@ +; Test 32-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check AR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ar %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ar %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: ar %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the A range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: a %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned A range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: a %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use AY instead of A. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: ay %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ay %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: a %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: ay %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: ay %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: a %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that A allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: a %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: ay %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use A rather than AR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: a %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-02.ll b/llvm/test/CodeGen/SystemZ/int-sadd-02.ll new file mode 100644 index 00000000000..daa3b618412 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-02.ll @@ -0,0 +1,253 @@ +; Test 32-bit addition in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check the low end of the AH range. +define zeroext i1 @f1(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ah %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AH range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ah %r3, 4094(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2047 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which should use AHY instead of AH. +define zeroext i1 @f3(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: ahy %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2048 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AHY range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: ahy %r3, 524286(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: ah %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AHY range. +define zeroext i1 @f6(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: ahy %r3, -2(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AHY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ahy %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: ah %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: ah %r4, 4094({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AHY allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: ahy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: ah %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: ah %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-03.ll b/llvm/test/CodeGen/SystemZ/int-sadd-03.ll new file mode 100644 index 00000000000..3feb70e3a06 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-03.ll @@ -0,0 +1,269 @@ +; Test 64-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check AGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: agr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: agr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check AG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: ag %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned AG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: ag %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: ag %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: ag %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: ag %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: ag %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that AG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: ag %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use AG rather than AGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: ag %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-04.ll b/llvm/test/CodeGen/SystemZ/int-sadd-04.ll new file mode 100644 index 00000000000..30f2df37756 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-04.ll @@ -0,0 +1,312 @@ +; Test additions between an i64 and a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check AGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: agfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: agfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check AGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned AGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: agf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: agf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: agf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: agf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that AGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: agf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use AGF rather than AGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: agf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32 , i32 *%ptr0 + %val1 = load i32 , i32 *%ptr1 + %val2 = load i32 , i32 *%ptr2 + %val3 = load i32 , i32 *%ptr3 + %val4 = load i32 , i32 *%ptr4 + %val5 = load i32 , i32 *%ptr5 + %val6 = load i32 , i32 *%ptr6 + %val7 = load i32 , i32 *%ptr7 + %val8 = load i32 , i32 *%ptr8 + %val9 = load i32 , i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-05.ll b/llvm/test/CodeGen/SystemZ/int-sadd-05.ll new file mode 100644 index 00000000000..ea2f4895da4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-05.ll @@ -0,0 +1,186 @@ +; Test additions between an i64 and a sign-extended i16 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check AGH with no displacement. +define zeroext i1 @f1(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: agh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned AGH range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agh %r3, 524286(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: agh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AGH range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agh %r3, -2(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGH range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: agh %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: agh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that AGH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agh %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: agh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: agh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-06.ll b/llvm/test/CodeGen/SystemZ/int-sadd-06.ll new file mode 100644 index 00000000000..f389762f2b8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-06.ll @@ -0,0 +1,212 @@ +; Test 32-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check additions of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ahi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the AHI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ahi %r3, 32767 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which must use AFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: afi %r3, 32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the signed 32-bit range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 2147483647) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which is treated as a negative value. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: afi %r3, -2147483648 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative AHI range. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: ahi %r3, -1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AHI range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ahi %r3, -32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use AFI instead. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: afi %r3, -32769 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the signed 32-bit range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: afi %r3, -2147483648 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which is treated as a positive value. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -2147483649) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: ahi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: ahi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-07.ll b/llvm/test/CodeGen/SystemZ/int-sadd-07.ll new file mode 100644 index 00000000000..d800eb00c91 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-07.ll @@ -0,0 +1,214 @@ +; Test 64-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check additions of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: aghi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit + +} + +; Check the high end of the AGHI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: aghi %r3, 32767 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use AGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, 32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the AGFI range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 2147483647 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 2147483647) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: llilh [[REG1:%r[0-9]+]], 32768 +; CHECK: agr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative AGHI range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: aghi %r3, -1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGHI range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: aghi %r3, -32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use AGFI instead. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r3, -32769 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGFI range. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r3, -2147483648 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use register addition instead. +define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295 +; CHECK: agr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147483649) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: aghi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: aghi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-08.ll b/llvm/test/CodeGen/SystemZ/int-sadd-08.ll new file mode 100644 index 00000000000..e9be58b31ce --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-08.ll @@ -0,0 +1,490 @@ +; Test 32-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: asi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: asi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], 128 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: asi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], -129 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ASI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: asi 524284(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: asi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ASI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: asi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: asi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ASI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: asi 4(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use ASI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: asi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val0, i32 127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val1, i32 127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val2, i32 127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val3, i32 127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val4, i32 127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val5, i32 127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val6, i32 127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val7, i32 127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val8, i32 127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val9, i32 127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val10, i32 127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val11, i32 127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val12, i32 127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val13, i32 127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val14, i32 127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val15, i32 127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that adding -128 to a spilled value can use ASI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: asi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val0, i32 -128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val1, i32 -128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val2, i32 -128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val3, i32 -128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val4, i32 -128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val5, i32 -128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val6, i32 -128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val7, i32 -128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val8, i32 -128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val9, i32 -128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val10, i32 -128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val11, i32 -128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val12, i32 -128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val13, i32 -128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val14, i32 -128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val15, i32 -128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: asi 0(%r2), 1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: asi 0(%r2), 1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-09.ll b/llvm/test/CodeGen/SystemZ/int-sadd-09.ll new file mode 100644 index 00000000000..1b26ddc277a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-09.ll @@ -0,0 +1,490 @@ +; Test 64-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: agsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: agsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], 128 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: agsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], -129 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned AGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: agsi 524280(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: agsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the AGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: agsi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: agsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that AGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: agsi 4(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use AGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: agsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val0, i64 127) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val1, i64 127) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val2, i64 127) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val3, i64 127) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val4, i64 127) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val5, i64 127) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val6, i64 127) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val7, i64 127) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val8, i64 127) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val9, i64 127) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val10, i64 127) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val11, i64 127) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val12, i64 127) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val13, i64 127) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val14, i64 127) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val15, i64 127) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check that adding -128 to a spilled value can use AGSI. +define zeroext i1 @f12(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f12: +; CHECK: agsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val0, i64 -128) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val1, i64 -128) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val2, i64 -128) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val3, i64 -128) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val4, i64 -128) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val5, i64 -128) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val6, i64 -128) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val7, i64 -128) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val8, i64 -128) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val9, i64 -128) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val10, i64 -128) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val11, i64 -128) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val12, i64 -128) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val13, i64 -128) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val14, i64 -128) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val15, i64 -128) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: agsi 0(%r2), 1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i64 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: agsi 0(%r2), 1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-01.ll b/llvm/test/CodeGen/SystemZ/int-ssub-01.ll new file mode 100644 index 00000000000..4ead06374f4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-01.ll @@ -0,0 +1,325 @@ +; Test 32-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check SR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: sr %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: sr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: sr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the S range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: s %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned S range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: s %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use SY instead of S. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: sy %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: sy %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: s %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: sy %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the SY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: sy %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: s %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that S allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: s %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: sy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use S rather than SR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: s %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-02.ll b/llvm/test/CodeGen/SystemZ/int-ssub-02.ll new file mode 100644 index 00000000000..3af414674e9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-02.ll @@ -0,0 +1,253 @@ +; Test 32-bit subtraction in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check the low end of the SH range. +define zeroext i1 @f1(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: sh %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SH range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: sh %r3, 4094(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2047 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which should use SHY instead of SH. +define zeroext i1 @f3(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: shy %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2048 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SHY range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: shy %r3, 524286(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: sh %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SHY range. +define zeroext i1 @f6(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: shy %r3, -2(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the SHY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: shy %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: sh %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: sh %r4, 4094({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SHY allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: shy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: sh %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: sh %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-03.ll b/llvm/test/CodeGen/SystemZ/int-ssub-03.ll new file mode 100644 index 00000000000..71d7d7e400f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-03.ll @@ -0,0 +1,269 @@ +; Test 64-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: sgr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: sgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: sgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: sg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: sg %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: sg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: sg %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: sg %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: sg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: sg %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SG rather than SGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: sg %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-04.ll b/llvm/test/CodeGen/SystemZ/int-ssub-04.ll new file mode 100644 index 00000000000..201fb460669 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-04.ll @@ -0,0 +1,312 @@ +; Test subtractions between an i64 and a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: sgfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: sgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: sgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: sgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: sgf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: sgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: sgf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: sgf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: sgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: sgf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SGF rather than SGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: sgf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32 , i32 *%ptr0 + %val1 = load i32 , i32 *%ptr1 + %val2 = load i32 , i32 *%ptr2 + %val3 = load i32 , i32 *%ptr3 + %val4 = load i32 , i32 *%ptr4 + %val5 = load i32 , i32 *%ptr5 + %val6 = load i32 , i32 *%ptr6 + %val7 = load i32 , i32 *%ptr7 + %val8 = load i32 , i32 *%ptr8 + %val9 = load i32 , i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-05.ll b/llvm/test/CodeGen/SystemZ/int-ssub-05.ll new file mode 100644 index 00000000000..d33f034a93c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-05.ll @@ -0,0 +1,186 @@ +; Test subtractions between an i64 and a sign-extended i16 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check SGH with no displacement. +define zeroext i1 @f1(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: sgh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SGH range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: sgh %r3, 524286(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: sgh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SGH range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: sgh %r3, -2(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGH range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: sgh %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: sgh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SGH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: sgh %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: sgh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: sgh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-06.ll b/llvm/test/CodeGen/SystemZ/int-ssub-06.ll new file mode 100644 index 00000000000..5783c2bc838 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-06.ll @@ -0,0 +1,248 @@ +; Test 32-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check subtractions of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ahi %r3, -1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the AHI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ahi %r3, -32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which must use AFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: afi %r3, -32769 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the signed 32-bit range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: afi %r3, -2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 2147483647) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which is treated as a negative value +; and must use a register. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: llilh [[REG1:%r[0-5]]], 32768 +; CHECK: sr %r3, [[REG1]] +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which is treated as a negative value, +; and can use AFI again. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 2147483649) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative AHI range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ahi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AHI range. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: ahi %r3, 32767 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use AFI instead. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: afi %r3, 32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the signed 32-bit range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -2147483647) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use a register. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: llilh [[REG1:%r[0-5]]], 32768 +; CHECK: sr %r3, [[REG1]] +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which is treated as a positive value. +define zeroext i1 @f12(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: afi %r3, -2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -2147483649) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f13(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: ahi %r3, -1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f14: +; CHECK: ahi %r3, -1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-07.ll b/llvm/test/CodeGen/SystemZ/int-ssub-07.ll new file mode 100644 index 00000000000..dd3b00a4bb0 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-07.ll @@ -0,0 +1,214 @@ +; Test 64-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check subtractions of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: aghi %r3, -1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit + +} + +; Check the high end of the SGHI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: aghi %r3, -32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use SGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, -32769 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the SGFI range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, -2147483648 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: llilf [[REG1:%r[0-9]+]], 2147483649 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 2147483649) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative SGHI range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: aghi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGHI range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: aghi %r3, 32767 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use SGFI instead. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r3, 32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGFI range. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r3, 2147483647 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -2147483647) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use register subtraction instead. +define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: lgfi [[REG1:%r[0-9]+]], -2147483648 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: aghi %r3, -1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: aghi %r3, -1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-08.ll b/llvm/test/CodeGen/SystemZ/int-ssub-08.ll new file mode 100644 index 00000000000..9ba91f03194 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-08.ll @@ -0,0 +1,490 @@ +; Test 32-bit subtractions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: asi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: asi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an subtraction and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], -129 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: asi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], 128 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ASI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: asi 524284(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: asi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ASI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: asi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: asi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ASI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: asi 4(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 from a spilled value can use ASI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: asi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val0, i32 128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val1, i32 128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val2, i32 128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val3, i32 128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val4, i32 128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val5, i32 128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val6, i32 128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val7, i32 128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val8, i32 128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val9, i32 128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val10, i32 128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val11, i32 128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val12, i32 128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val13, i32 128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val14, i32 128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val15, i32 128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that subtracting -127 from a spilled value can use ASI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: asi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val0, i32 -127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val1, i32 -127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val2, i32 -127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val3, i32 -127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val4, i32 -127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val5, i32 -127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val6, i32 -127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val7, i32 -127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val8, i32 -127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val9, i32 -127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val10, i32 -127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val11, i32 -127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val12, i32 -127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val13, i32 -127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val14, i32 -127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val15, i32 -127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: asi 0(%r2), -1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: asi 0(%r2), -1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-09.ll b/llvm/test/CodeGen/SystemZ/int-ssub-09.ll new file mode 100644 index 00000000000..f5a4ee2ee12 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-09.ll @@ -0,0 +1,490 @@ +; Test 64-bit subtractions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: agsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: agsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an subtraction and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], -129 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: agsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], 128 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned AGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: agsi 524280(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: agsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the AGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: agsi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: agsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that AGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: agsi 4(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 to a spilled value can use AGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: agsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val0, i64 128) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val1, i64 128) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val2, i64 128) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val3, i64 128) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val4, i64 128) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val5, i64 128) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val6, i64 128) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val7, i64 128) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val8, i64 128) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val9, i64 128) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val10, i64 128) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val11, i64 128) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val12, i64 128) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val13, i64 128) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val14, i64 128) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val15, i64 128) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check that subtracting -127 from a spilled value can use AGSI. +define zeroext i1 @f12(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f12: +; CHECK: agsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val0, i64 -127) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val1, i64 -127) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val2, i64 -127) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val3, i64 -127) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val4, i64 -127) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val5, i64 -127) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val6, i64 -127) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val7, i64 -127) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val8, i64 -127) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val9, i64 -127) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val10, i64 -127) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val11, i64 -127) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val12, i64 -127) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val13, i64 -127) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val14, i64 -127) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val15, i64 -127) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: agsi 0(%r2), -1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i64 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: agsi 0(%r2), -1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sub-05.ll b/llvm/test/CodeGen/SystemZ/int-sub-05.ll index 9775298a7a2..f255dfc30da 100644 --- a/llvm/test/CodeGen/SystemZ/int-sub-05.ll +++ b/llvm/test/CodeGen/SystemZ/int-sub-05.ll @@ -132,12 +132,14 @@ define void @f8(i128 *%ptr0) { %ptr2 = getelementptr i128, i128 *%ptr0, i128 4 %ptr3 = getelementptr i128, i128 *%ptr0, i128 6 %ptr4 = getelementptr i128, i128 *%ptr0, i128 8 + %ptr5 = getelementptr i128, i128 *%ptr0, i128 10 %val0 = load i128 , i128 *%ptr0 %val1 = load i128 , i128 *%ptr1 %val2 = load i128 , i128 *%ptr2 %val3 = load i128 , i128 *%ptr3 %val4 = load i128 , i128 *%ptr4 + %val5 = load i128 , i128 *%ptr5 %retptr = call i128 *@foo() @@ -147,7 +149,8 @@ define void @f8(i128 *%ptr0) { %sub2 = sub i128 %sub1, %val2 %sub3 = sub i128 %sub2, %val3 %sub4 = sub i128 %sub3, %val4 - store i128 %sub4, i128 *%retptr + %sub5 = sub i128 %sub4, %val5 + store i128 %sub5, i128 *%retptr ret void } diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-01.ll b/llvm/test/CodeGen/SystemZ/int-uadd-01.ll new file mode 100644 index 00000000000..3871ba0cae4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-01.ll @@ -0,0 +1,314 @@ +; Test 32-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check ALR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alr %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: alr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the AL range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: al %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AL range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: al %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use ALY instead of AL. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: aly %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned ALY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: aly %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: al %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned ALY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: aly %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the ALY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: aly %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: al %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AL allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: al %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that ALY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: aly %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use AL rather than ALR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: al %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-02.ll b/llvm/test/CodeGen/SystemZ/int-uadd-02.ll new file mode 100644 index 00000000000..52b3af76113 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-02.ll @@ -0,0 +1,261 @@ +; Test 64-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check ALGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: algr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: algr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: algr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check ALG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: alg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned ALG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: alg %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: alg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned ALG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: alg %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: alg %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: alg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that ALG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: alg %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use ALG rather than ALGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: alg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-03.ll b/llvm/test/CodeGen/SystemZ/int-uadd-03.ll new file mode 100644 index 00000000000..d57f8a84411 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-03.ll @@ -0,0 +1,304 @@ +; Test additions between an i64 and a zero-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check ALGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: algfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: algfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: algfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check ALGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: algf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned ALGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: algf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: algf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned ALGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: algf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: algf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: algf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that ALGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: algf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use ALGF rather than ALGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: algf %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-04.ll b/llvm/test/CodeGen/SystemZ/int-uadd-04.ll new file mode 100644 index 00000000000..ab686636368 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-04.ll @@ -0,0 +1,95 @@ +; Test 32-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alfi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the ALFI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 4294967295) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that negative values are treated as unsigned +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: alfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: alfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: alfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-05.ll b/llvm/test/CodeGen/SystemZ/int-uadd-05.ll new file mode 100644 index 00000000000..15a5488d19f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-05.ll @@ -0,0 +1,112 @@ +; Test 64-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: algfi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the ALGFI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: algfi %r3, 4294967295 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 4294967295) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: llihl [[REG1:%r[0-9]+]], 1 +; CHECK: algr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 4294967296) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Likewise for negative values. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: lghi [[REG1:%r[0-9]+]], -1 +; CHECK: algr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: algfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: algfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-06.ll b/llvm/test/CodeGen/SystemZ/int-uadd-06.ll new file mode 100644 index 00000000000..2c1864de3a5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-06.ll @@ -0,0 +1,80 @@ +; Test the three-operand form of 32-bit addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo(i32, i32, i32) + +; Check ALRK. +define i32 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f1: +; CHECK: alrk %r2, %r3, %r4 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risblg [[REG2:%r[0-5]]], [[REG1]], 31, 159, 35 +; CHECK: st [[REG2]], 0(%r5) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +; Check using the overflow result for a branch. +define i32 @f2(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: alrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; ... and the same with the inverted direction. +define i32 @f3(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: alrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; Check that we can still use ALR in obvious cases. +define i32 @f4(i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f4: +; CHECK: alr %r2, %r3 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risblg [[REG2:%r[0-5]]], [[REG1]], 31, 159, 35 +; CHECK: st [[REG2]], 0(%r4) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-07.ll b/llvm/test/CodeGen/SystemZ/int-uadd-07.ll new file mode 100644 index 00000000000..85c81c634db --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-07.ll @@ -0,0 +1,80 @@ +; Test the three-operand form of 64-bit addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo(i64, i64, i64) + +; Check ALGRK. +define i64 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f1: +; CHECK: algrk %r2, %r3, %r4 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 35 +; CHECK: stg [[REG2]], 0(%r5) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +; Check using the overflow result for a branch. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: algrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; ... and the same with the inverted direction. +define i64 @f3(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: algrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; Check that we can still use ALGR in obvious cases. +define i64 @f4(i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f4: +; CHECK: algr %r2, %r3 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 35 +; CHECK: stg [[REG2]], 0(%r4) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-08.ll b/llvm/test/CodeGen/SystemZ/int-uadd-08.ll new file mode 100644 index 00000000000..5a069db6c25 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-08.ll @@ -0,0 +1,142 @@ +; Test 32-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the ALHSIK range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which must use ALFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: alfi %r3, 32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative ALHSIK range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the ALHSIK range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use ALFI instead. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: alfi %r3, 4294934527 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-09.ll b/llvm/test/CodeGen/SystemZ/int-uadd-09.ll new file mode 100644 index 00000000000..cf59fb21861 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-09.ll @@ -0,0 +1,140 @@ +; Test 64-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() + +; Check additions of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the ALGHSIK range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use ALGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: algfi %r3, 32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative ALGHSIK range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALGHSIK range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Test the next value down, which cannot use either ALGHSIK or ALGFI. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK-NOT: alghsik +; CHECK-NOT: algfi +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-10.ll b/llvm/test/CodeGen/SystemZ/int-uadd-10.ll new file mode 100644 index 00000000000..ba328ea540d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-10.ll @@ -0,0 +1,480 @@ +; Test 32-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: alsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: alsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: alfi [[VAL]], 128 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: alsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: alfi [[VAL]], 4294967167 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ALSI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: alsi 524284(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: alsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ALSI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: alsi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: alsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ALSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: alsi 4(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use ALSI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: alsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val0, i32 127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val1, i32 127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val2, i32 127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val3, i32 127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val4, i32 127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val5, i32 127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val6, i32 127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val7, i32 127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val8, i32 127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val9, i32 127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val10, i32 127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val11, i32 127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val12, i32 127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val13, i32 127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val14, i32 127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val15, i32 127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that adding -128 to a spilled value can use ALSI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: alsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val0, i32 -128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val1, i32 -128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val2, i32 -128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val3, i32 -128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val4, i32 -128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val5, i32 -128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val6, i32 -128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val7, i32 -128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val8, i32 -128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val9, i32 -128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val10, i32 -128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val11, i32 -128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val12, i32 -128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val13, i32 -128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val14, i32 -128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val15, i32 -128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: alsi 0(%r2), 1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: alsi 0(%r2), 1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-11.ll b/llvm/test/CodeGen/SystemZ/int-uadd-11.ll new file mode 100644 index 00000000000..5fb5b78852a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-11.ll @@ -0,0 +1,349 @@ +; Test 64-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: algsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: algsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: algfi [[VAL]], 128 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: algsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL1:%r[0-5]]], 0(%r3) +; CHECK: lghi [[VAL2:%r[0-9]+]], -129 +; CHECK: algr [[VAL2]], [[VAL1]] +; CHECK-DAG: stg [[VAL2]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ALGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: algsi 524280(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: algsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the ALGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: algsi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: algsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that ALGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: algsi 8(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 8 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use ALGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: algsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val0, i64 127) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val1, i64 127) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val2, i64 127) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val3, i64 127) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val4, i64 127) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val5, i64 127) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val6, i64 127) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val7, i64 127) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val8, i64 127) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val9, i64 127) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val10, i64 127) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val11, i64 127) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val12, i64 127) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val13, i64 127) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val14, i64 127) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val15, i64 127) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f12(i64 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: algsi 0(%r2), 1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: algsi 0(%r2), 1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-01.ll b/llvm/test/CodeGen/SystemZ/int-usub-01.ll new file mode 100644 index 00000000000..ba9de4adc94 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-01.ll @@ -0,0 +1,325 @@ +; Test 32-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check SLR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: slr %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: slr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: slr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the SL range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: sl %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SL range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: sl %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use SLY instead of SL. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: sly %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SLY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: sly %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: sl %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SLY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: sly %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the SLY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: sly %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: sl %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SL allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: sl %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SLY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: sly %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SL rather than SLR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: sl %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-02.ll b/llvm/test/CodeGen/SystemZ/int-usub-02.ll new file mode 100644 index 00000000000..de46d655a8c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-02.ll @@ -0,0 +1,269 @@ +; Test 64-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SLGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: slgr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: slgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: slgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SLG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: slg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SLG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: slg %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: slg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SLG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: slg %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SLG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: slg %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: slg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SLG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: slg %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SLG rather than SLGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: slg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-03.ll b/llvm/test/CodeGen/SystemZ/int-usub-03.ll new file mode 100644 index 00000000000..4e5f99fcee2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-03.ll @@ -0,0 +1,312 @@ +; Test subtraction of a zero-extended i32 from an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SLGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: slgfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: slgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: slgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SLGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: slgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SLGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: slgf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: slgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SLGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: slgf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SLGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: slgf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: slgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SLGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: slgf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SLGF rather than SLGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: slgf %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-04.ll b/llvm/test/CodeGen/SystemZ/int-usub-04.ll new file mode 100644 index 00000000000..d704f62f501 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-04.ll @@ -0,0 +1,98 @@ +; Test 32-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: slfi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the SLFI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: slfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 4294967295) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that negative values are treated as unsigned +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: slfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: slfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: slfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-05.ll b/llvm/test/CodeGen/SystemZ/int-usub-05.ll new file mode 100644 index 00000000000..ffa1e90c843 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-05.ll @@ -0,0 +1,116 @@ +; Test 64-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: slgfi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the SLGFI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: slgfi %r3, 4294967295 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 4294967295) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: llihl [[REG1:%r[0-9]+]], 1 +; CHECK: slgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 4294967296) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Likewise for negative values. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: lghi [[REG1:%r[0-9]+]], -1 +; CHECK: slgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: slgfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: slgfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-06.ll b/llvm/test/CodeGen/SystemZ/int-usub-06.ll new file mode 100644 index 00000000000..d7d47889f9e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-06.ll @@ -0,0 +1,82 @@ +; Test the three-operand form of 32-bit subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo(i32, i32, i32) + +; Check SLRK. +define i32 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f1: +; CHECK: slrk %r2, %r3, %r4 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: srl [[REG]], 31 +; CHECK: st [[REG]], 0(%r5) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +; Check using the overflow result for a branch. +define i32 @f2(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: slrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; ... and the same with the inverted direction. +define i32 @f3(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: slrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; Check that we can still use SLR in obvious cases. +define i32 @f4(i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f4: +; CHECK: slr %r2, %r3 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: srl [[REG]], 31 +; CHECK: st [[REG]], 0(%r4) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-07.ll b/llvm/test/CodeGen/SystemZ/int-usub-07.ll new file mode 100644 index 00000000000..85ceb8adef5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-07.ll @@ -0,0 +1,82 @@ +; Test the three-operand form of 64-bit addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo(i64, i64, i64) + +; Check SLGRK. +define i64 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f1: +; CHECK: slgrk %r2, %r3, %r4 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: afi [[REG1]], -536870912 +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 33 +; CHECK: stg [[REG2]], 0(%r5) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +; Check using the overflow result for a branch. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: slgrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; ... and the same with the inverted direction. +define i64 @f3(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: slgrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; Check that we can still use SLGR in obvious cases. +define i64 @f4(i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f4: +; CHECK: slgr %r2, %r3 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: afi [[REG1]], -536870912 +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 33 +; CHECK: stg [[REG2]], 0(%r4) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-08.ll b/llvm/test/CodeGen/SystemZ/int-usub-08.ll new file mode 100644 index 00000000000..d282404e840 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-08.ll @@ -0,0 +1,148 @@ +; Test 32-bit subtraction in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the ALHSIK range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use SLFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: slfi %r3, 32769 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative ALHSIK range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the ALHSIK range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use SLFI instead. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: slfi %r3, 4294934528 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-09.ll b/llvm/test/CodeGen/SystemZ/int-usub-09.ll new file mode 100644 index 00000000000..ce5fafabe59 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-09.ll @@ -0,0 +1,145 @@ +; Test 64-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the ALGHSIK range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use SLGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: slgfi %r3, 32769 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative ALGHSIK range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALGHSIK range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Test the next value down, which cannot use either ALGHSIK or SLGFI. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK-NOT: alghsik +; CHECK-NOT: slgfi +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-10.ll b/llvm/test/CodeGen/SystemZ/int-usub-10.ll new file mode 100644 index 00000000000..97f08777894 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-10.ll @@ -0,0 +1,490 @@ +; Test 32-bit subtractions of constants from memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: alsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: alsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use a subtraction and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: slfi [[VAL]], 129 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: alsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: slfi [[VAL]], 4294967168 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ASI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: alsi 524284(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: alsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ALSI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: alsi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: alsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ALSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: alsi 4(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 from a spilled value can use ALSI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: alsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val0, i32 128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val1, i32 128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val2, i32 128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val3, i32 128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val4, i32 128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val5, i32 128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val6, i32 128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val7, i32 128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val8, i32 128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val9, i32 128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val10, i32 128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val11, i32 128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val12, i32 128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val13, i32 128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val14, i32 128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val15, i32 128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that subtracting -127 from a spilled value can use ALSI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: alsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val0, i32 -127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val1, i32 -127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val2, i32 -127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val3, i32 -127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val4, i32 -127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val5, i32 -127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val6, i32 -127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val7, i32 -127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val8, i32 -127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val9, i32 -127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val10, i32 -127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val11, i32 -127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val12, i32 -127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val13, i32 -127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val14, i32 -127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val15, i32 -127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: alsi 0(%r2), -1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: alsi 0(%r2), -1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-11.ll b/llvm/test/CodeGen/SystemZ/int-usub-11.ll new file mode 100644 index 00000000000..cef5216b2c5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-11.ll @@ -0,0 +1,359 @@ +; Test 64-bit subtractions of constants from memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: algsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: algsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: slgfi [[VAL]], 129 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: algsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL1:%r[0-5]]], 0(%r3) +; CHECK: lghi [[VAL2:%r[0-9]+]], -128 +; CHECK: slgr [[VAL1]], [[VAL2]] +; CHECK-DAG: stg [[VAL1]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ALGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: algsi 524280(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: algsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the ALGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: algsi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: algsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that ALGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: algsi 8(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 8 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 from a spilled value can use ALGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: algsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val0, i64 128) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val1, i64 128) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val2, i64 128) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val3, i64 128) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val4, i64 128) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val5, i64 128) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val6, i64 128) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val7, i64 128) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val8, i64 128) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val9, i64 128) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val10, i64 128) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val11, i64 128) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val12, i64 128) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val13, i64 128) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val14, i64 128) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val15, i64 128) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f12(i64 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: algsi 0(%r2), -1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: algsi 0(%r2), -1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + |