diff options
Diffstat (limited to 'llvm/test/CodeGen/SystemZ')
43 files changed, 10404 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/SystemZ/asm-18.ll b/llvm/test/CodeGen/SystemZ/asm-18.ll index f7be9b45eb6..16b4745d2cd 100644 --- a/llvm/test/CodeGen/SystemZ/asm-18.ll +++ b/llvm/test/CodeGen/SystemZ/asm-18.ll @@ -748,3 +748,78 @@ define void @f34(i32 *%ptr1, i32 *%ptr2) { store i32 %sel2, i32 *%ptr1 ret void } + +; Test immediate addition with overflow involving high registers. +define void @f35() { +; CHECK-LABEL: f35: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: aih [[REG]], -32768 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepb [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 1 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepc [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 32767 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepd [[REG]], [[REGCC]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res1, i32 -32768) + %val1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res2 = call i32 asm "stepb $0, $2", "=h,h,d"(i32 %val1, i1 %obit1) + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res2, i32 1) + %val2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res3 = call i32 asm "stepc $0, $2", "=h,h,d"(i32 %val2, i1 %obit2) + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res3, i32 32767) + %val3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + call void asm sideeffect "stepd $0, $1", "h,d"(i32 %val3, i1 %obit3) + ret void +} + +; Test large immediate addition with overflow involving high registers. +define void @f36() { +; CHECK-LABEL: f36: +; CHECK: stepa [[REG:%r[0-5]]] +; CHECK: aih [[REG]], -2147483648 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepb [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 1 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepc [[REG]], [[REGCC]] +; CHECK: aih [[REG]], 2147483647 +; CHECK: ipm [[REGCC:%r[0-5]]] +; CHECK: afi [[REGCC]], 1342177280 +; CHECK: srl [[REGCC]], 31 +; CHECK: stepd [[REG]], [[REGCC]] +; CHECK: br %r14 + %res1 = call i32 asm "stepa $0", "=h"() + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res1, i32 -2147483648) + %val1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res2 = call i32 asm "stepb $0, $2", "=h,h,d"(i32 %val1, i1 %obit1) + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res2, i32 1) + %val2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res3 = call i32 asm "stepc $0, $2", "=h,h,d"(i32 %val2, i1 %obit2) + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %res3, i32 2147483647) + %val3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + call void asm sideeffect "stepd $0, $1", "h,d"(i32 %val3, i1 %obit3) + ret void +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-add-08.ll b/llvm/test/CodeGen/SystemZ/int-add-08.ll index 75b85d0888a..674fa902d21 100644 --- a/llvm/test/CodeGen/SystemZ/int-add-08.ll +++ b/llvm/test/CodeGen/SystemZ/int-add-08.ll @@ -123,12 +123,14 @@ define void @f8(i128 *%ptr0) { %ptr2 = getelementptr i128, i128 *%ptr0, i128 4 %ptr3 = getelementptr i128, i128 *%ptr0, i128 6 %ptr4 = getelementptr i128, i128 *%ptr0, i128 8 + %ptr5 = getelementptr i128, i128 *%ptr0, i128 10 %val0 = load i128 , i128 *%ptr0 %val1 = load i128 , i128 *%ptr1 %val2 = load i128 , i128 *%ptr2 %val3 = load i128 , i128 *%ptr3 %val4 = load i128 , i128 *%ptr4 + %val5 = load i128 , i128 *%ptr5 %retptr = call i128 *@foo() @@ -138,7 +140,8 @@ define void @f8(i128 *%ptr0) { %add2 = add i128 %add1, %val2 %add3 = add i128 %add2, %val3 %add4 = add i128 %add3, %val4 - store i128 %add4, i128 *%retptr + %add5 = add i128 %add4, %val5 + store i128 %add5, i128 *%retptr ret void } diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-01.ll b/llvm/test/CodeGen/SystemZ/int-sadd-01.ll new file mode 100644 index 00000000000..57023b0a6c8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-01.ll @@ -0,0 +1,325 @@ +; Test 32-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check AR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ar %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ar %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: ar %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the A range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: a %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned A range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: a %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use AY instead of A. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: ay %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ay %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: a %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: ay %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: ay %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: a %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that A allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: a %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: ay %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use A rather than AR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: a %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-02.ll b/llvm/test/CodeGen/SystemZ/int-sadd-02.ll new file mode 100644 index 00000000000..daa3b618412 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-02.ll @@ -0,0 +1,253 @@ +; Test 32-bit addition in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check the low end of the AH range. +define zeroext i1 @f1(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ah %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AH range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ah %r3, 4094(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2047 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which should use AHY instead of AH. +define zeroext i1 @f3(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: ahy %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2048 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AHY range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: ahy %r3, 524286(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: ah %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AHY range. +define zeroext i1 @f6(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: ahy %r3, -2(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AHY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ahy %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: ah %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: ah %r4, 4094({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AHY allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: ahy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: ah %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: ah %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-03.ll b/llvm/test/CodeGen/SystemZ/int-sadd-03.ll new file mode 100644 index 00000000000..3feb70e3a06 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-03.ll @@ -0,0 +1,269 @@ +; Test 64-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check AGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: agr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: agr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check AG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: ag %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned AG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: ag %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: ag %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: ag %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: ag %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: ag %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that AG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: ag %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use AG rather than AGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: ag %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-04.ll b/llvm/test/CodeGen/SystemZ/int-sadd-04.ll new file mode 100644 index 00000000000..30f2df37756 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-04.ll @@ -0,0 +1,312 @@ +; Test additions between an i64 and a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check AGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: agfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: agfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check AGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned AGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: agf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: agf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: agf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: agf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that AGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: agf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use AGF rather than AGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: agf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32 , i32 *%ptr0 + %val1 = load i32 , i32 *%ptr1 + %val2 = load i32 , i32 *%ptr2 + %val3 = load i32 , i32 *%ptr3 + %val4 = load i32 , i32 *%ptr4 + %val5 = load i32 , i32 *%ptr5 + %val6 = load i32 , i32 *%ptr6 + %val7 = load i32 , i32 *%ptr7 + %val8 = load i32 , i32 *%ptr8 + %val9 = load i32 , i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-05.ll b/llvm/test/CodeGen/SystemZ/int-sadd-05.ll new file mode 100644 index 00000000000..ea2f4895da4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-05.ll @@ -0,0 +1,186 @@ +; Test additions between an i64 and a sign-extended i16 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check AGH with no displacement. +define zeroext i1 @f1(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: agh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned AGH range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agh %r3, 524286(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: agh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned AGH range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agh %r3, -2(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGH range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: agh %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: agh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that AGH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agh %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: agh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: agh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-06.ll b/llvm/test/CodeGen/SystemZ/int-sadd-06.ll new file mode 100644 index 00000000000..f389762f2b8 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-06.ll @@ -0,0 +1,212 @@ +; Test 32-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check additions of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ahi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the AHI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ahi %r3, 32767 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which must use AFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: afi %r3, 32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the signed 32-bit range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 2147483647) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which is treated as a negative value. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: afi %r3, -2147483648 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative AHI range. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: ahi %r3, -1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AHI range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ahi %r3, -32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use AFI instead. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: afi %r3, -32769 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the signed 32-bit range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: afi %r3, -2147483648 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which is treated as a positive value. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -2147483649) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: ahi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: ahi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-07.ll b/llvm/test/CodeGen/SystemZ/int-sadd-07.ll new file mode 100644 index 00000000000..d800eb00c91 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-07.ll @@ -0,0 +1,214 @@ +; Test 64-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check additions of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: aghi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit + +} + +; Check the high end of the AGHI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: aghi %r3, 32767 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use AGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, 32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the AGFI range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, 2147483647 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 2147483647) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: llilh [[REG1:%r[0-9]+]], 32768 +; CHECK: agr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative AGHI range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: aghi %r3, -1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGHI range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: aghi %r3, -32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use AGFI instead. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r3, -32769 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the AGFI range. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r3, -2147483648 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use register addition instead. +define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: llihf [[REG1:%r[0-9]+]], 4294967295 +; CHECK: agr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -2147483649) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: aghi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: aghi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-08.ll b/llvm/test/CodeGen/SystemZ/int-sadd-08.ll new file mode 100644 index 00000000000..e9be58b31ce --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-08.ll @@ -0,0 +1,490 @@ +; Test 32-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: asi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: asi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], 128 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: asi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], -129 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 -129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ASI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: asi 524284(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: asi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ASI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: asi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: asi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ASI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: asi 4(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use ASI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: asi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val0, i32 127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val1, i32 127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val2, i32 127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val3, i32 127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val4, i32 127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val5, i32 127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val6, i32 127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val7, i32 127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val8, i32 127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val9, i32 127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val10, i32 127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val11, i32 127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val12, i32 127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val13, i32 127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val14, i32 127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val15, i32 127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that adding -128 to a spilled value can use ASI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: asi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val0, i32 -128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val1, i32 -128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val2, i32 -128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val3, i32 -128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val4, i32 -128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val5, i32 -128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val6, i32 -128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val7, i32 -128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val8, i32 -128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val9, i32 -128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val10, i32 -128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val11, i32 -128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val12, i32 -128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val13, i32 -128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val14, i32 -128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %val15, i32 -128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: asi 0(%r2), 1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: asi 0(%r2), 1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.sadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sadd-09.ll b/llvm/test/CodeGen/SystemZ/int-sadd-09.ll new file mode 100644 index 00000000000..1b26ddc277a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-sadd-09.ll @@ -0,0 +1,490 @@ +; Test 64-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: agsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: agsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], 128 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: agsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], -129 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 -129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned AGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: agsi 524280(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: agsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the AGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: agsi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: agsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that AGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: agsi 4(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use AGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: agsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val0, i64 127) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val1, i64 127) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val2, i64 127) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val3, i64 127) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val4, i64 127) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val5, i64 127) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val6, i64 127) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val7, i64 127) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val8, i64 127) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val9, i64 127) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val10, i64 127) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val11, i64 127) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val12, i64 127) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val13, i64 127) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val14, i64 127) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val15, i64 127) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check that adding -128 to a spilled value can use AGSI. +define zeroext i1 @f12(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f12: +; CHECK: agsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val0, i64 -128) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val1, i64 -128) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val2, i64 -128) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val3, i64 -128) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val4, i64 -128) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val5, i64 -128) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val6, i64 -128) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val7, i64 -128) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val8, i64 -128) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val9, i64 -128) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val10, i64 -128) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val11, i64 -128) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val12, i64 -128) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val13, i64 -128) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val14, i64 -128) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %val15, i64 -128) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: agsi 0(%r2), 1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i64 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: agsi 0(%r2), 1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.sadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.sadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-01.ll b/llvm/test/CodeGen/SystemZ/int-ssub-01.ll new file mode 100644 index 00000000000..4ead06374f4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-01.ll @@ -0,0 +1,325 @@ +; Test 32-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check SR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: sr %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: sr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: sr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the S range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: s %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned S range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: s %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use SY instead of S. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: sy %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: sy %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: s %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: sy %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the SY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: sy %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: s %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that S allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: s %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: sy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use S rather than SR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: s %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-02.ll b/llvm/test/CodeGen/SystemZ/int-ssub-02.ll new file mode 100644 index 00000000000..3af414674e9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-02.ll @@ -0,0 +1,253 @@ +; Test 32-bit subtraction in which the second operand is a sign-extended +; i16 memory value. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check the low end of the SH range. +define zeroext i1 @f1(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: sh %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SH range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: sh %r3, 4094(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2047 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which should use SHY instead of SH. +define zeroext i1 @f3(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: shy %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 2048 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SHY range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: shy %r3, 524286(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: sh %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SHY range. +define zeroext i1 @f6(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: shy %r3, -2(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the SHY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: shy %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: sh %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: sh %r4, 4094({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4094 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SHY allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: shy %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: sh %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i32 %dummy, i32 %a, i16 *%src, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: sh %r3, 0(%r4) +; CHECK: st %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i32 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-03.ll b/llvm/test/CodeGen/SystemZ/int-ssub-03.ll new file mode 100644 index 00000000000..71d7d7e400f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-03.ll @@ -0,0 +1,269 @@ +; Test 64-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: sgr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: sgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: sgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: sg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: sg %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: sg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: sg %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: sg %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: sg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: sg %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SG rather than SGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: sg %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-04.ll b/llvm/test/CodeGen/SystemZ/int-ssub-04.ll new file mode 100644 index 00000000000..201fb460669 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-04.ll @@ -0,0 +1,312 @@ +; Test subtractions between an i64 and a sign-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: sgfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: sgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: sgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: sgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: sgf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: sgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: sgf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: sgf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: sgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: sgf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = sext i32 %b to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SGF rather than SGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: sgf %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32 , i32 *%ptr0 + %val1 = load i32 , i32 *%ptr1 + %val2 = load i32 , i32 *%ptr2 + %val3 = load i32 , i32 *%ptr3 + %val4 = load i32 , i32 *%ptr4 + %val5 = load i32 , i32 *%ptr5 + %val6 = load i32 , i32 *%ptr6 + %val7 = load i32 , i32 *%ptr7 + %val8 = load i32 , i32 *%ptr8 + %val9 = load i32 , i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = sext i32 %frob0 to i64 + %ext1 = sext i32 %frob1 to i64 + %ext2 = sext i32 %frob2 to i64 + %ext3 = sext i32 %frob3 to i64 + %ext4 = sext i32 %frob4 to i64 + %ext5 = sext i32 %frob5 to i64 + %ext6 = sext i32 %frob6 to i64 + %ext7 = sext i32 %frob7 to i64 + %ext8 = sext i32 %frob8 to i64 + %ext9 = sext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-05.ll b/llvm/test/CodeGen/SystemZ/int-ssub-05.ll new file mode 100644 index 00000000000..d33f034a93c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-05.ll @@ -0,0 +1,186 @@ +; Test subtractions between an i64 and a sign-extended i16 on z14. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z14 | FileCheck %s + +declare i64 @foo() + +; Check SGH with no displacement. +define zeroext i1 @f1(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: sgh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SGH range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: sgh %r3, 524286(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262143 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f5(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: agfi %r4, 524288 +; CHECK: sgh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SGH range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: sgh %r3, -2(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -1 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGH range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: sgh %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262144 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next halfword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, -524290 +; CHECK: sgh %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %ptr = getelementptr i16, i16 *%src, i64 -262145 + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SGH allows an index. +define zeroext i1 @f9(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: sgh %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: lghi %r2, 0 +; CHECK-DAG: locghio %r2, 1 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i16 * + %half = load i16, i16 *%ptr + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: sgh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i16 *%src, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: sgh %r3, 0(%r4) +; CHECK: stg %r3, 0(%r5) +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %half = load i16, i16 *%src + %b = sext i16 %half to i64 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-06.ll b/llvm/test/CodeGen/SystemZ/int-ssub-06.ll new file mode 100644 index 00000000000..5783c2bc838 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-06.ll @@ -0,0 +1,248 @@ +; Test 32-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check subtractions of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: ahi %r3, -1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the AHI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: ahi %r3, -32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which must use AFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: afi %r3, -32769 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the signed 32-bit range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: afi %r3, -2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 2147483647) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which is treated as a negative value +; and must use a register. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: llilh [[REG1:%r[0-5]]], 32768 +; CHECK: sr %r3, [[REG1]] +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which is treated as a negative value, +; and can use AFI again. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 2147483649) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative AHI range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: ahi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the AHI range. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: ahi %r3, 32767 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use AFI instead. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: afi %r3, 32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the signed 32-bit range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: afi %r3, 2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -2147483647) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use a register. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: llilh [[REG1:%r[0-5]]], 32768 +; CHECK: sr %r3, [[REG1]] +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -2147483648) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which is treated as a positive value. +define zeroext i1 @f12(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: afi %r3, -2147483647 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -2147483649) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f13(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: ahi %r3, -1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f14: +; CHECK: ahi %r3, -1 +; CHECK: st %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-07.ll b/llvm/test/CodeGen/SystemZ/int-ssub-07.ll new file mode 100644 index 00000000000..dd3b00a4bb0 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-07.ll @@ -0,0 +1,214 @@ +; Test 64-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check subtractions of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: aghi %r3, -1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit + +} + +; Check the high end of the SGHI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: aghi %r3, -32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use SGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: agfi %r3, -32769 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the SGFI range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: agfi %r3, -2147483648 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: llilf [[REG1:%r[0-9]+]], 2147483649 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 2147483649) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative SGHI range. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: aghi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGHI range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: aghi %r3, 32767 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use SGFI instead. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r3, 32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SGFI range. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r3, 2147483647 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -2147483647) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value down, which must use register subtraction instead. +define zeroext i1 @f10(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: lgfi [[REG1:%r[0-9]+]], -2147483648 +; CHECK: sgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -2147483648) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f11(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f11: +; CHECK: aghi %r3, -1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgo foo@PLT|bnor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f12(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f12: +; CHECK: aghi %r3, -1 +; CHECK: stg %r3, 0(%r4) +; CHECK: {{jgno foo@PLT|bor %r14}} +; CHECK: {{br %r14|jg foo@PLT}} + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-08.ll b/llvm/test/CodeGen/SystemZ/int-ssub-08.ll new file mode 100644 index 00000000000..9ba91f03194 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-08.ll @@ -0,0 +1,490 @@ +; Test 32-bit subtractions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: asi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: asi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an subtraction and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], -129 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: asi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: ahi [[VAL]], 128 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ASI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: asi 524284(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: asi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ASI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: asi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: asi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ASI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: asi 4(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 from a spilled value can use ASI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: asi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val0, i32 128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val1, i32 128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val2, i32 128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val3, i32 128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val4, i32 128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val5, i32 128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val6, i32 128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val7, i32 128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val8, i32 128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val9, i32 128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val10, i32 128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val11, i32 128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val12, i32 128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val13, i32 128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val14, i32 128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val15, i32 128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that subtracting -127 from a spilled value can use ASI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: asi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val0, i32 -127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val1, i32 -127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val2, i32 -127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val3, i32 -127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val4, i32 -127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val5, i32 -127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val6, i32 -127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val7, i32 -127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val8, i32 -127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val9, i32 -127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val10, i32 -127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val11, i32 -127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val12, i32 -127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val13, i32 -127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val14, i32 -127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %val15, i32 -127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: asi 0(%r2), -1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: asi 0(%r2), -1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.ssub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-ssub-09.ll b/llvm/test/CodeGen/SystemZ/int-ssub-09.ll new file mode 100644 index 00000000000..f5a4ee2ee12 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-ssub-09.ll @@ -0,0 +1,490 @@ +; Test 64-bit subtractions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: agsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: agsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an subtraction and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], -129 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: agsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: aghi [[VAL]], 128 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], 1342177280 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned AGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: agsi 524280(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: agsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the AGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: agsi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: agsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that AGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: agsi 4(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], 1342177280 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 to a spilled value can use AGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: agsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val0, i64 128) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val1, i64 128) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val2, i64 128) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val3, i64 128) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val4, i64 128) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val5, i64 128) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val6, i64 128) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val7, i64 128) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val8, i64 128) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val9, i64 128) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val10, i64 128) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val11, i64 128) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val12, i64 128) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val13, i64 128) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val14, i64 128) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val15, i64 128) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check that subtracting -127 from a spilled value can use AGSI. +define zeroext i1 @f12(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f12: +; CHECK: agsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val0, i64 -127) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val1, i64 -127) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val2, i64 -127) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val3, i64 -127) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val4, i64 -127) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val5, i64 -127) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val6, i64 -127) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val7, i64 -127) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val8, i64 -127) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val9, i64 -127) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val10, i64 -127) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val11, i64 -127) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val12, i64 -127) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val13, i64 -127) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val14, i64 -127) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %val15, i64 -127) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: agsi 0(%r2), -1 +; CHECK: jgo foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i64 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: agsi 0(%r2), -1 +; CHECK: jgno foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.ssub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-sub-05.ll b/llvm/test/CodeGen/SystemZ/int-sub-05.ll index 9775298a7a2..f255dfc30da 100644 --- a/llvm/test/CodeGen/SystemZ/int-sub-05.ll +++ b/llvm/test/CodeGen/SystemZ/int-sub-05.ll @@ -132,12 +132,14 @@ define void @f8(i128 *%ptr0) { %ptr2 = getelementptr i128, i128 *%ptr0, i128 4 %ptr3 = getelementptr i128, i128 *%ptr0, i128 6 %ptr4 = getelementptr i128, i128 *%ptr0, i128 8 + %ptr5 = getelementptr i128, i128 *%ptr0, i128 10 %val0 = load i128 , i128 *%ptr0 %val1 = load i128 , i128 *%ptr1 %val2 = load i128 , i128 *%ptr2 %val3 = load i128 , i128 *%ptr3 %val4 = load i128 , i128 *%ptr4 + %val5 = load i128 , i128 *%ptr5 %retptr = call i128 *@foo() @@ -147,7 +149,8 @@ define void @f8(i128 *%ptr0) { %sub2 = sub i128 %sub1, %val2 %sub3 = sub i128 %sub2, %val3 %sub4 = sub i128 %sub3, %val4 - store i128 %sub4, i128 *%retptr + %sub5 = sub i128 %sub4, %val5 + store i128 %sub5, i128 *%retptr ret void } diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-01.ll b/llvm/test/CodeGen/SystemZ/int-uadd-01.ll new file mode 100644 index 00000000000..3871ba0cae4 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-01.ll @@ -0,0 +1,314 @@ +; Test 32-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check ALR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alr %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: alr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the AL range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: al %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned AL range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: al %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use ALY instead of AL. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: aly %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned ALY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: aly %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: al %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned ALY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: aly %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the ALY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: aly %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: al %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that AL allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: al %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that ALY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: aly %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use AL rather than ALR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: al %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-02.ll b/llvm/test/CodeGen/SystemZ/int-uadd-02.ll new file mode 100644 index 00000000000..52b3af76113 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-02.ll @@ -0,0 +1,261 @@ +; Test 64-bit addition in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check ALGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: algr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: algr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: algr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check ALG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: alg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned ALG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: alg %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: alg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned ALG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: alg %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: alg %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: alg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that ALG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: alg %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use ALG rather than ALGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: alg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-03.ll b/llvm/test/CodeGen/SystemZ/int-uadd-03.ll new file mode 100644 index 00000000000..d57f8a84411 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-03.ll @@ -0,0 +1,304 @@ +; Test additions between an i64 and a zero-extended i32. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check ALGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: algfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: algfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: algfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check ALGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: algf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned ALGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: algf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: algf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned ALGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: algf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: algf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: algf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that ALGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: algf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that additions of spilled values can use ALGF rather than ALGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: algf %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-04.ll b/llvm/test/CodeGen/SystemZ/int-uadd-04.ll new file mode 100644 index 00000000000..ab686636368 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-04.ll @@ -0,0 +1,95 @@ +; Test 32-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alfi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the ALFI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 4294967295) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that negative values are treated as unsigned +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: alfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: alfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: alfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-05.ll b/llvm/test/CodeGen/SystemZ/int-uadd-05.ll new file mode 100644 index 00000000000..15a5488d19f --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-05.ll @@ -0,0 +1,112 @@ +; Test 64-bit addition in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: algfi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the ALGFI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: algfi %r3, 4294967295 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 4294967295) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: llihl [[REG1:%r[0-9]+]], 1 +; CHECK: algr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 4294967296) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Likewise for negative values. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: lghi [[REG1:%r[0-9]+]], -1 +; CHECK: algr [[REG1]], %r3 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: algfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: algfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-06.ll b/llvm/test/CodeGen/SystemZ/int-uadd-06.ll new file mode 100644 index 00000000000..2c1864de3a5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-06.ll @@ -0,0 +1,80 @@ +; Test the three-operand form of 32-bit addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo(i32, i32, i32) + +; Check ALRK. +define i32 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f1: +; CHECK: alrk %r2, %r3, %r4 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risblg [[REG2:%r[0-5]]], [[REG1]], 31, 159, 35 +; CHECK: st [[REG2]], 0(%r5) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +; Check using the overflow result for a branch. +define i32 @f2(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: alrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; ... and the same with the inverted direction. +define i32 @f3(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: alrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; Check that we can still use ALR in obvious cases. +define i32 @f4(i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f4: +; CHECK: alr %r2, %r3 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risblg [[REG2:%r[0-5]]], [[REG1]], 31, 159, 35 +; CHECK: st [[REG2]], 0(%r4) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-07.ll b/llvm/test/CodeGen/SystemZ/int-uadd-07.ll new file mode 100644 index 00000000000..85c81c634db --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-07.ll @@ -0,0 +1,80 @@ +; Test the three-operand form of 64-bit addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo(i64, i64, i64) + +; Check ALGRK. +define i64 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f1: +; CHECK: algrk %r2, %r3, %r4 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 35 +; CHECK: stg [[REG2]], 0(%r5) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +; Check using the overflow result for a branch. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: algrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; ... and the same with the inverted direction. +define i64 @f3(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: algrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; Check that we can still use ALGR in obvious cases. +define i64 @f4(i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f4: +; CHECK: algr %r2, %r3 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 35 +; CHECK: stg [[REG2]], 0(%r4) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-08.ll b/llvm/test/CodeGen/SystemZ/int-uadd-08.ll new file mode 100644 index 00000000000..5a069db6c25 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-08.ll @@ -0,0 +1,142 @@ +; Test 32-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the ALHSIK range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value up, which must use ALFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: alfi %r3, 32768 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative ALHSIK range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the ALHSIK range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use ALFI instead. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: alfi %r3, 4294934527 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-09.ll b/llvm/test/CodeGen/SystemZ/int-uadd-09.ll new file mode 100644 index 00000000000..cf59fb21861 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-09.ll @@ -0,0 +1,140 @@ +; Test 64-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() + +; Check additions of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the ALGHSIK range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use ALGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: algfi %r3, 32768 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative ALGHSIK range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALGHSIK range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 35 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Test the next value down, which cannot use either ALGHSIK or ALGFI. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK-NOT: alghsik +; CHECK-NOT: algfi +; CHECK: br %r14 + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-10.ll b/llvm/test/CodeGen/SystemZ/int-uadd-10.ll new file mode 100644 index 00000000000..ba328ea540d --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-10.ll @@ -0,0 +1,480 @@ +; Test 32-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: alsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: alsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: alfi [[VAL]], 128 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: alsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: alfi [[VAL]], 4294967167 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 -129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ALSI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: alsi 524284(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: alsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ALSI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: alsi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: alsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ALSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: alsi 4(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use ALSI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: alsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val0, i32 127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val1, i32 127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val2, i32 127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val3, i32 127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val4, i32 127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val5, i32 127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val6, i32 127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val7, i32 127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val8, i32 127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val9, i32 127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val10, i32 127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val11, i32 127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val12, i32 127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val13, i32 127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val14, i32 127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val15, i32 127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that adding -128 to a spilled value can use ALSI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: alsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val0, i32 -128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val1, i32 -128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val2, i32 -128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val3, i32 -128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val4, i32 -128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val5, i32 -128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val6, i32 -128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val7, i32 -128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val8, i32 -128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val9, i32 -128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val10, i32 -128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val11, i32 -128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val12, i32 -128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val13, i32 -128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val14, i32 -128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %val15, i32 -128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: alsi 0(%r2), 1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: alsi 0(%r2), 1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-uadd-11.ll b/llvm/test/CodeGen/SystemZ/int-uadd-11.ll new file mode 100644 index 00000000000..5fb5b78852a --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-uadd-11.ll @@ -0,0 +1,349 @@ +; Test 64-bit additions of constants to memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: algsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: algsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: algfi [[VAL]], 128 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: algsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL1:%r[0-5]]], 0(%r3) +; CHECK: lghi [[VAL2:%r[0-9]+]], -129 +; CHECK: algr [[VAL2]], [[VAL1]] +; CHECK-DAG: stg [[VAL2]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 -129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ALGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: algsi 524280(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: algsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the ALGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: algsi -524288(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: algsi 0(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that ALGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: algsi 8(%r2), 1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: risbg %r2, [[REG]], 63, 191, 35 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 8 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that adding 127 to a spilled value can use ALGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: algsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val0, i64 127) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val1, i64 127) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val2, i64 127) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val3, i64 127) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val4, i64 127) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val5, i64 127) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val6, i64 127) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val7, i64 127) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val8, i64 127) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val9, i64 127) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val10, i64 127) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val11, i64 127) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val12, i64 127) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val13, i64 127) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val14, i64 127) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %val15, i64 127) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f12(i64 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: algsi 0(%r2), 1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: algsi 0(%r2), 1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.uadd.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.uadd.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-01.ll b/llvm/test/CodeGen/SystemZ/int-usub-01.ll new file mode 100644 index 00000000000..ba9de4adc94 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-01.ll @@ -0,0 +1,325 @@ +; Test 32-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check SLR. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: slr %r3, %r4 +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: slr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i32 %dummy, i32 %a, i32 %b, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: slr %r3, %r4 +; CHECK: st %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; Check the low end of the SL range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: sl %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SL range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: sl %r3, 4092(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1023 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which should use SLY instead of SL. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: sly %r3, 4096(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 1024 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the aligned SLY range. +define zeroext i1 @f7(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: sly %r3, 524284(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f8(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: agfi %r4, 524288 +; CHECK: sl %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SLY range. +define zeroext i1 @f9(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f9: +; CHECK: sly %r3, -4(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the SLY range. +define zeroext i1 @f10(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f10: +; CHECK: sly %r3, -524288(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next word down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f11(i32 %dummy, i32 %a, i32 *%src, i32 *%res) { +; CHECK-LABEL: f11: +; CHECK: agfi %r4, -524292 +; CHECK: sl %r3, 0(%r4) +; CHECK-DAG: st %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SL allows an index. +define zeroext i1 @f12(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f12: +; CHECK: sl %r4, 4092({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4092 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that SLY allows an index. +define zeroext i1 @f13(i64 %src, i64 %index, i32 %a, i32 *%res) { +; CHECK-LABEL: f13: +; CHECK: sly %r4, 4096({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: st %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 4096 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SL rather than SLR. +define zeroext i1 @f14(i32 *%ptr0) { +; CHECK-LABEL: f14: +; CHECK: brasl %r14, foo@PLT +; CHECK: sl %r2, 16{{[04]}}(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %ret = call i32 @foo() + + %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %ret, i32 %val0) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add0, i32 %val1) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add1, i32 %val2) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add2, i32 %val3) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add3, i32 %val4) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add4, i32 %val5) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add5, i32 %val6) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add6, i32 %val7) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add7, i32 %val8) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %add8, i32 %val9) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-02.ll b/llvm/test/CodeGen/SystemZ/int-usub-02.ll new file mode 100644 index 00000000000..de46d655a8c --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-02.ll @@ -0,0 +1,269 @@ +; Test 64-bit subtraction in which the second operand is variable. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SLGR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: slgr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: slgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i64 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: slgr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SLG with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: slg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i64, i64 *%src + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SLG range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: slg %r3, 524280(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65535 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: slg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SLG range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: slg %r3, -8(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -1 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SLG range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: slg %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65536 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i64 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524296 +; CHECK: slg %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%src, i64 -65537 + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SLG allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: slg %r4, 524280({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524280 + %ptr = inttoptr i64 %add2 to i64 * + %b = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SLG rather than SLGR. +define zeroext i1 @f11(i64 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: slg %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i64, i64 *%ptr0, i64 2 + %ptr2 = getelementptr i64, i64 *%ptr0, i64 4 + %ptr3 = getelementptr i64, i64 *%ptr0, i64 6 + %ptr4 = getelementptr i64, i64 *%ptr0, i64 8 + %ptr5 = getelementptr i64, i64 *%ptr0, i64 10 + %ptr6 = getelementptr i64, i64 *%ptr0, i64 12 + %ptr7 = getelementptr i64, i64 *%ptr0, i64 14 + %ptr8 = getelementptr i64, i64 *%ptr0, i64 16 + %ptr9 = getelementptr i64, i64 *%ptr0, i64 18 + + %val0 = load i64, i64 *%ptr0 + %val1 = load i64, i64 *%ptr1 + %val2 = load i64, i64 *%ptr2 + %val3 = load i64, i64 *%ptr3 + %val4 = load i64, i64 *%ptr4 + %val5 = load i64, i64 *%ptr5 + %val6 = load i64, i64 *%ptr6 + %val7 = load i64, i64 *%ptr7 + %val8 = load i64, i64 *%ptr8 + %val9 = load i64, i64 *%ptr9 + + %ret = call i64 @foo() + + %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %val0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %val1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %val2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %val3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %val4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %val5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %val6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %val7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %val8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %val9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-03.ll b/llvm/test/CodeGen/SystemZ/int-usub-03.ll new file mode 100644 index 00000000000..4e5f99fcee2 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-03.ll @@ -0,0 +1,312 @@ +; Test subtraction of a zero-extended i32 from an i64. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check SLGFR. +define zeroext i1 @f1(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: slgfr %r3, %r4 +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f2(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: slgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f3(i64 %dummy, i64 %a, i32 %b, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: slgfr %r3, %r4 +; CHECK: stg %r3, 0(%r5) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; Check SLGF with no displacement. +define zeroext i1 @f4(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: slgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %b = load i32, i32 *%src + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the aligned SLGF range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: slgf %r3, 524284(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131071 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword up, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f6(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: agfi %r4, 524288 +; CHECK: slgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative aligned SLGF range. +define zeroext i1 @f7(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: slgf %r3, -4(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -1 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the SLGF range. +define zeroext i1 @f8(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: slgf %r3, -524288(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131072 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next doubleword down, which needs separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 %dummy, i64 %a, i32 *%src, i64 *%res) { +; CHECK-LABEL: f9: +; CHECK: agfi %r4, -524292 +; CHECK: slgf %r3, 0(%r4) +; CHECK-DAG: stg %r3, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%src, i64 -131073 + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that SLGF allows an index. +define zeroext i1 @f10(i64 %src, i64 %index, i64 %a, i64 *%res) { +; CHECK-LABEL: f10: +; CHECK: slgf %r4, 524284({{%r3,%r2|%r2,%r3}}) +; CHECK-DAG: stg %r4, 0(%r5) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %src, %index + %add2 = add i64 %add1, 524284 + %ptr = inttoptr i64 %add2 to i32 * + %b = load i32, i32 *%ptr + %bext = zext i32 %b to i64 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %bext) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check that subtractions of spilled values can use SLGF rather than SLGFR. +define zeroext i1 @f11(i32 *%ptr0) { +; CHECK-LABEL: f11: +; CHECK: brasl %r14, foo@PLT +; CHECK: slgf %r2, 160(%r15) +; CHECK: br %r14 + %ptr1 = getelementptr i32, i32 *%ptr0, i64 2 + %ptr2 = getelementptr i32, i32 *%ptr0, i64 4 + %ptr3 = getelementptr i32, i32 *%ptr0, i64 6 + %ptr4 = getelementptr i32, i32 *%ptr0, i64 8 + %ptr5 = getelementptr i32, i32 *%ptr0, i64 10 + %ptr6 = getelementptr i32, i32 *%ptr0, i64 12 + %ptr7 = getelementptr i32, i32 *%ptr0, i64 14 + %ptr8 = getelementptr i32, i32 *%ptr0, i64 16 + %ptr9 = getelementptr i32, i32 *%ptr0, i64 18 + + %val0 = load i32, i32 *%ptr0 + %val1 = load i32, i32 *%ptr1 + %val2 = load i32, i32 *%ptr2 + %val3 = load i32, i32 *%ptr3 + %val4 = load i32, i32 *%ptr4 + %val5 = load i32, i32 *%ptr5 + %val6 = load i32, i32 *%ptr6 + %val7 = load i32, i32 *%ptr7 + %val8 = load i32, i32 *%ptr8 + %val9 = load i32, i32 *%ptr9 + + %frob0 = add i32 %val0, 100 + %frob1 = add i32 %val1, 100 + %frob2 = add i32 %val2, 100 + %frob3 = add i32 %val3, 100 + %frob4 = add i32 %val4, 100 + %frob5 = add i32 %val5, 100 + %frob6 = add i32 %val6, 100 + %frob7 = add i32 %val7, 100 + %frob8 = add i32 %val8, 100 + %frob9 = add i32 %val9, 100 + + store i32 %frob0, i32 *%ptr0 + store i32 %frob1, i32 *%ptr1 + store i32 %frob2, i32 *%ptr2 + store i32 %frob3, i32 *%ptr3 + store i32 %frob4, i32 *%ptr4 + store i32 %frob5, i32 *%ptr5 + store i32 %frob6, i32 *%ptr6 + store i32 %frob7, i32 *%ptr7 + store i32 %frob8, i32 *%ptr8 + store i32 %frob9, i32 *%ptr9 + + %ret = call i64 @foo() + + %ext0 = zext i32 %frob0 to i64 + %ext1 = zext i32 %frob1 to i64 + %ext2 = zext i32 %frob2 to i64 + %ext3 = zext i32 %frob3 to i64 + %ext4 = zext i32 %frob4 to i64 + %ext5 = zext i32 %frob5 to i64 + %ext6 = zext i32 %frob6 to i64 + %ext7 = zext i32 %frob7 to i64 + %ext8 = zext i32 %frob8 to i64 + %ext9 = zext i32 %frob9 to i64 + + %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %ret, i64 %ext0) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add0, i64 %ext1) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add1, i64 %ext2) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add2, i64 %ext3) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add3, i64 %ext4) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add4, i64 %ext5) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add5, i64 %ext6) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add6, i64 %ext7) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add7, i64 %ext8) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %add8, i64 %ext9) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + + ret i1 %res9 +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-04.ll b/llvm/test/CodeGen/SystemZ/int-usub-04.ll new file mode 100644 index 00000000000..d704f62f501 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-04.ll @@ -0,0 +1,98 @@ +; Test 32-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: slfi %r3, 1 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the SLFI range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: slfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 4294967295) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check that negative values are treated as unsigned +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: slfi %r3, 4294967295 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: slfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: slfi %r3, 1 +; CHECK: st %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-05.ll b/llvm/test/CodeGen/SystemZ/int-usub-05.ll new file mode 100644 index 00000000000..ffa1e90c843 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-05.ll @@ -0,0 +1,116 @@ +; Test 64-bit subtraction in which the second operand is constant. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check addition of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: slgfi %r3, 1 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the SLGFI range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: slgfi %r3, 4294967295 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 4294967295) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must be loaded into a register first. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: llihl [[REG1:%r[0-9]+]], 1 +; CHECK: slgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 4294967296) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Likewise for negative values. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: lghi [[REG1:%r[0-9]+]], -1 +; CHECK: slgr %r3, [[REG1]] +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: slgfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK: slgfi %r3, 1 +; CHECK: stg %r3, 0(%r4) +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-06.ll b/llvm/test/CodeGen/SystemZ/int-usub-06.ll new file mode 100644 index 00000000000..d7d47889f9e --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-06.ll @@ -0,0 +1,82 @@ +; Test the three-operand form of 32-bit subtraction. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo(i32, i32, i32) + +; Check SLRK. +define i32 @f1(i32 %dummy, i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f1: +; CHECK: slrk %r2, %r3, %r4 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: srl [[REG]], 31 +; CHECK: st [[REG]], 0(%r5) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +; Check using the overflow result for a branch. +define i32 @f2(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f2: +; CHECK: slrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; ... and the same with the inverted direction. +define i32 @f3(i32 %dummy, i32 %a, i32 %b) { +; CHECK-LABEL: f3: +; CHECK: slrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lhi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i32 @foo(i32 0, i32 %a, i32 %b) + ret i32 %res + +exit: + ret i32 %val +} + +; Check that we can still use SLR in obvious cases. +define i32 @f4(i32 %a, i32 %b, i32 *%flag) { +; CHECK-LABEL: f4: +; CHECK: slr %r2, %r3 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: srl [[REG]], 31 +; CHECK: st [[REG]], 0(%r4) +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 %b) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + %ext = zext i1 %obit to i32 + store i32 %ext, i32 *%flag + ret i32 %val +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-07.ll b/llvm/test/CodeGen/SystemZ/int-usub-07.ll new file mode 100644 index 00000000000..85ceb8adef5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-07.ll @@ -0,0 +1,82 @@ +; Test the three-operand form of 64-bit addition. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo(i64, i64, i64) + +; Check SLGRK. +define i64 @f1(i64 %dummy, i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f1: +; CHECK: slgrk %r2, %r3, %r4 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: afi [[REG1]], -536870912 +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 33 +; CHECK: stg [[REG2]], 0(%r5) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +; Check using the overflow result for a branch. +define i64 @f2(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f2: +; CHECK: slgrk %r2, %r3, %r4 +; CHECK-NEXT: bnler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %call, label %exit + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; ... and the same with the inverted direction. +define i64 @f3(i64 %dummy, i64 %a, i64 %b) { +; CHECK-LABEL: f3: +; CHECK: slgrk %r2, %r3, %r4 +; CHECK-NEXT: bler %r14 +; CHECK: lghi %r2, 0 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + br i1 %obit, label %exit, label %call + +call: + %res = tail call i64 @foo(i64 0, i64 %a, i64 %b) + ret i64 %res + +exit: + ret i64 %val +} + +; Check that we can still use SLGR in obvious cases. +define i64 @f4(i64 %a, i64 %b, i64 *%flag) { +; CHECK-LABEL: f4: +; CHECK: slgr %r2, %r3 +; CHECK: ipm [[REG1:%r[0-5]]] +; CHECK: afi [[REG1]], -536870912 +; CHECK: risbg [[REG2:%r[0-5]]], [[REG1]], 63, 191, 33 +; CHECK: stg [[REG2]], 0(%r4) +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 %b) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + %ext = zext i1 %obit to i64 + store i64 %ext, i64 *%flag + ret i64 %val +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-08.ll b/llvm/test/CodeGen/SystemZ/int-usub-08.ll new file mode 100644 index 00000000000..d282404e840 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-08.ll @@ -0,0 +1,148 @@ +; Test 32-bit subtraction in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f1: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the ALHSIK range. +define zeroext i1 @f2(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f2: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use SLFI instead. +define zeroext i1 @f3(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f3: +; CHECK: slfi %r3, 32769 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 32769) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the high end of the negative ALHSIK range. +define zeroext i1 @f4(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f4: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the low end of the ALHSIK range. +define zeroext i1 @f5(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f5: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -32767) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check the next value down, which must use SLFI instead. +define zeroext i1 @f6(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f6: +; CHECK: slfi %r3, 4294934528 +; CHECK-DAG: st %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -32768) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f7: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i32 %dummy, i32 %a, i32 *%res) { +; CHECK-LABEL: f8: +; CHECK: alhsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: st [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-09.ll b/llvm/test/CodeGen/SystemZ/int-usub-09.ll new file mode 100644 index 00000000000..ce5fafabe59 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-09.ll @@ -0,0 +1,145 @@ +; Test 64-bit addition in which the second operand is constant and in which +; three-operand forms are available. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +declare i64 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f1: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the ALGHSIK range. +define zeroext i1 @f2(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f2: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -32768 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the next value up, which must use SLGFI instead. +define zeroext i1 @f3(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f3: +; CHECK: slgfi %r3, 32769 +; CHECK-DAG: stg %r3, 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 32769) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the high end of the negative ALGHSIK range. +define zeroext i1 @f4(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f4: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check the low end of the ALGHSIK range. +define zeroext i1 @f5(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f5: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, 32767 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK-DAG: ipm [[REG2:%r[0-5]]] +; CHECK-DAG: afi [[REG2]], -536870912 +; CHECK-DAG: risbg %r2, [[REG2]], 63, 191, 33 +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -32767) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Test the next value down, which cannot use either ALGHSIK or SLGFI. +define zeroext i1 @f6(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f6: +; CHECK-NOT: alghsik +; CHECK-NOT: slgfi +; CHECK: br %r14 + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -32768) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + ret i1 %obit +} + +; Check using the overflow result for a branch. +define void @f7(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f7: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bnler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f8(i64 %dummy, i64 %a, i64 *%res) { +; CHECK-LABEL: f8: +; CHECK: alghsik [[REG1:%r[0-5]]], %r3, -1 +; CHECK-DAG: stg [[REG1]], 0(%r4) +; CHECK: bler %r14 +; CHECK: jg foo@PLT + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%res + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-10.ll b/llvm/test/CodeGen/SystemZ/int-usub-10.ll new file mode 100644 index 00000000000..97f08777894 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-10.ll @@ -0,0 +1,490 @@ +; Test 32-bit subtractions of constants from memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i32 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i32 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: alsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i32 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: alsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use a subtraction and a store. +define zeroext i1 @f3(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: slfi [[VAL]], 129 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 129) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i32 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: alsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -127) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i32 %dummy, i32 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: l [[VAL:%r[0-5]]], 0(%r3) +; CHECK: slfi [[VAL]], 4294967168 +; CHECK-DAG: st [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 -128) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ASI range. +define zeroext i1 @f6(i32 *%base) { +; CHECK-LABEL: f6: +; CHECK: alsi 524284(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131071 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i32 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: alsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the low end of the ALSI range. +define zeroext i1 @f8(i32 *%base) { +; CHECK-LABEL: f8: +; CHECK: alsi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131072 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i32 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524292 +; CHECK: alsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i32, i32 *%base, i64 -131073 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that ALSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: alsi 4(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 4 + %ptr = inttoptr i64 %add2 to i32 * + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 from a spilled value can use ALSI. +define zeroext i1 @f11(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f11: +; CHECK: alsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val0, i32 128) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val1, i32 128) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val2, i32 128) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val3, i32 128) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val4, i32 128) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val5, i32 128) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val6, i32 128) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val7, i32 128) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val8, i32 128) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val9, i32 128) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val10, i32 128) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val11, i32 128) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val12, i32 128) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val13, i32 128) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val14, i32 128) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val15, i32 128) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check that subtracting -127 from a spilled value can use ALSI. +define zeroext i1 @f12(i32 *%ptr, i32 %sel) { +; CHECK-LABEL: f12: +; CHECK: alsi {{[0-9]+}}(%r15), 127 +; CHECK: br %r14 +entry: + %val0 = load volatile i32, i32 *%ptr + %val1 = load volatile i32, i32 *%ptr + %val2 = load volatile i32, i32 *%ptr + %val3 = load volatile i32, i32 *%ptr + %val4 = load volatile i32, i32 *%ptr + %val5 = load volatile i32, i32 *%ptr + %val6 = load volatile i32, i32 *%ptr + %val7 = load volatile i32, i32 *%ptr + %val8 = load volatile i32, i32 *%ptr + %val9 = load volatile i32, i32 *%ptr + %val10 = load volatile i32, i32 *%ptr + %val11 = load volatile i32, i32 *%ptr + %val12 = load volatile i32, i32 *%ptr + %val13 = load volatile i32, i32 *%ptr + %val14 = load volatile i32, i32 *%ptr + %val15 = load volatile i32, i32 *%ptr + + %test = icmp ne i32 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val0, i32 -127) + %add0 = extractvalue {i32, i1} %t0, 0 + %obit0 = extractvalue {i32, i1} %t0, 1 + %t1 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val1, i32 -127) + %add1 = extractvalue {i32, i1} %t1, 0 + %obit1 = extractvalue {i32, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val2, i32 -127) + %add2 = extractvalue {i32, i1} %t2, 0 + %obit2 = extractvalue {i32, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val3, i32 -127) + %add3 = extractvalue {i32, i1} %t3, 0 + %obit3 = extractvalue {i32, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val4, i32 -127) + %add4 = extractvalue {i32, i1} %t4, 0 + %obit4 = extractvalue {i32, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val5, i32 -127) + %add5 = extractvalue {i32, i1} %t5, 0 + %obit5 = extractvalue {i32, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val6, i32 -127) + %add6 = extractvalue {i32, i1} %t6, 0 + %obit6 = extractvalue {i32, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val7, i32 -127) + %add7 = extractvalue {i32, i1} %t7, 0 + %obit7 = extractvalue {i32, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val8, i32 -127) + %add8 = extractvalue {i32, i1} %t8, 0 + %obit8 = extractvalue {i32, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val9, i32 -127) + %add9 = extractvalue {i32, i1} %t9, 0 + %obit9 = extractvalue {i32, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val10, i32 -127) + %add10 = extractvalue {i32, i1} %t10, 0 + %obit10 = extractvalue {i32, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val11, i32 -127) + %add11 = extractvalue {i32, i1} %t11, 0 + %obit11 = extractvalue {i32, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val12, i32 -127) + %add12 = extractvalue {i32, i1} %t12, 0 + %obit12 = extractvalue {i32, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val13, i32 -127) + %add13 = extractvalue {i32, i1} %t13, 0 + %obit13 = extractvalue {i32, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val14, i32 -127) + %add14 = extractvalue {i32, i1} %t14, 0 + %obit14 = extractvalue {i32, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %val15, i32 -127) + %add15 = extractvalue {i32, i1} %t15, 0 + %obit15 = extractvalue {i32, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i32 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i32 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i32 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i32 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i32 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i32 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i32 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i32 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i32 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i32 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i32 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i32 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i32 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i32 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i32 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i32 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i32 %new0, i32 *%ptr + store volatile i32 %new1, i32 *%ptr + store volatile i32 %new2, i32 *%ptr + store volatile i32 %new3, i32 *%ptr + store volatile i32 %new4, i32 *%ptr + store volatile i32 %new5, i32 *%ptr + store volatile i32 %new6, i32 *%ptr + store volatile i32 %new7, i32 *%ptr + store volatile i32 %new8, i32 *%ptr + store volatile i32 %new9, i32 *%ptr + store volatile i32 %new10, i32 *%ptr + store volatile i32 %new11, i32 *%ptr + store volatile i32 %new12, i32 *%ptr + store volatile i32 %new13, i32 *%ptr + store volatile i32 %new14, i32 *%ptr + store volatile i32 %new15, i32 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f13(i32 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: alsi 0(%r2), -1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f14(i32 *%ptr) { +; CHECK-LABEL: f14: +; CHECK: alsi 0(%r2), -1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i32, i32 *%ptr + %t = call {i32, i1} @llvm.usub.with.overflow.i32(i32 %a, i32 1) + %val = extractvalue {i32, i1} %t, 0 + %obit = extractvalue {i32, i1} %t, 1 + store i32 %val, i32 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i32 @foo() + br label %exit + +exit: + ret void +} + +declare {i32, i1} @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone + diff --git a/llvm/test/CodeGen/SystemZ/int-usub-11.ll b/llvm/test/CodeGen/SystemZ/int-usub-11.ll new file mode 100644 index 00000000000..cef5216b2c5 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/int-usub-11.ll @@ -0,0 +1,359 @@ +; Test 64-bit subtractions of constants from memory. +; +; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s + +declare i64 @foo() + +; Check subtraction of 1. +define zeroext i1 @f1(i64 *%ptr) { +; CHECK-LABEL: f1: +; CHECK: algsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the constant range. +define zeroext i1 @f2(i64 *%ptr) { +; CHECK-LABEL: f2: +; CHECK: algsi 0(%r2), -128 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next constant up, which must use an addition and a store. +define zeroext i1 @f3(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f3: +; CHECK: lg [[VAL:%r[0-5]]], 0(%r3) +; CHECK: slgfi [[VAL]], 129 +; CHECK-DAG: stg [[VAL]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 129) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the constant range. +define zeroext i1 @f4(i64 *%ptr) { +; CHECK-LABEL: f4: +; CHECK: algsi 0(%r2), 127 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -127) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next value down, with the same comment as f3. +define zeroext i1 @f5(i64 %dummy, i64 *%ptr) { +; CHECK-LABEL: f5: +; CHECK: lg [[VAL1:%r[0-5]]], 0(%r3) +; CHECK: lghi [[VAL2:%r[0-9]+]], -128 +; CHECK: slgr [[VAL1]], [[VAL2]] +; CHECK-DAG: stg [[VAL1]], 0(%r3) +; CHECK-DAG: ipm [[REG:%r[0-5]]] +; CHECK-DAG: afi [[REG]], -536870912 +; CHECK-DAG: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 -128) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the high end of the aligned ALGSI range. +define zeroext i1 @f6(i64 *%base) { +; CHECK-LABEL: f6: +; CHECK: algsi 524280(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65535 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word up, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f7(i64 *%base) { +; CHECK-LABEL: f7: +; CHECK: agfi %r2, 524288 +; CHECK: algsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the low end of the ALGSI range. +define zeroext i1 @f8(i64 *%base) { +; CHECK-LABEL: f8: +; CHECK: algsi -524288(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65536 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check the next word down, which must use separate address logic. +; Other sequences besides this one would be OK. +define zeroext i1 @f9(i64 *%base) { +; CHECK-LABEL: f9: +; CHECK: agfi %r2, -524296 +; CHECK: algsi 0(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %ptr = getelementptr i64, i64 *%base, i64 -65537 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that ALGSI does not allow indices. +define zeroext i1 @f10(i64 %base, i64 %index) { +; CHECK-LABEL: f10: +; CHECK: agr %r2, %r3 +; CHECK: algsi 8(%r2), -1 +; CHECK: ipm [[REG:%r[0-5]]] +; CHECK: afi [[REG]], -536870912 +; CHECK: risbg %r2, [[REG]], 63, 191, 33 +; CHECK: br %r14 + %add1 = add i64 %base, %index + %add2 = add i64 %add1, 8 + %ptr = inttoptr i64 %add2 to i64 * + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + ret i1 %obit +} + +; Check that subtracting 128 from a spilled value can use ALGSI. +define zeroext i1 @f11(i64 *%ptr, i64 %sel) { +; CHECK-LABEL: f11: +; CHECK: algsi {{[0-9]+}}(%r15), -128 +; CHECK: br %r14 +entry: + %val0 = load volatile i64, i64 *%ptr + %val1 = load volatile i64, i64 *%ptr + %val2 = load volatile i64, i64 *%ptr + %val3 = load volatile i64, i64 *%ptr + %val4 = load volatile i64, i64 *%ptr + %val5 = load volatile i64, i64 *%ptr + %val6 = load volatile i64, i64 *%ptr + %val7 = load volatile i64, i64 *%ptr + %val8 = load volatile i64, i64 *%ptr + %val9 = load volatile i64, i64 *%ptr + %val10 = load volatile i64, i64 *%ptr + %val11 = load volatile i64, i64 *%ptr + %val12 = load volatile i64, i64 *%ptr + %val13 = load volatile i64, i64 *%ptr + %val14 = load volatile i64, i64 *%ptr + %val15 = load volatile i64, i64 *%ptr + + %test = icmp ne i64 %sel, 0 + br i1 %test, label %add, label %store + +add: + %t0 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val0, i64 128) + %add0 = extractvalue {i64, i1} %t0, 0 + %obit0 = extractvalue {i64, i1} %t0, 1 + %t1 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val1, i64 128) + %add1 = extractvalue {i64, i1} %t1, 0 + %obit1 = extractvalue {i64, i1} %t1, 1 + %res1 = or i1 %obit0, %obit1 + %t2 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val2, i64 128) + %add2 = extractvalue {i64, i1} %t2, 0 + %obit2 = extractvalue {i64, i1} %t2, 1 + %res2 = or i1 %res1, %obit2 + %t3 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val3, i64 128) + %add3 = extractvalue {i64, i1} %t3, 0 + %obit3 = extractvalue {i64, i1} %t3, 1 + %res3 = or i1 %res2, %obit3 + %t4 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val4, i64 128) + %add4 = extractvalue {i64, i1} %t4, 0 + %obit4 = extractvalue {i64, i1} %t4, 1 + %res4 = or i1 %res3, %obit4 + %t5 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val5, i64 128) + %add5 = extractvalue {i64, i1} %t5, 0 + %obit5 = extractvalue {i64, i1} %t5, 1 + %res5 = or i1 %res4, %obit5 + %t6 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val6, i64 128) + %add6 = extractvalue {i64, i1} %t6, 0 + %obit6 = extractvalue {i64, i1} %t6, 1 + %res6 = or i1 %res5, %obit6 + %t7 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val7, i64 128) + %add7 = extractvalue {i64, i1} %t7, 0 + %obit7 = extractvalue {i64, i1} %t7, 1 + %res7 = or i1 %res6, %obit7 + %t8 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val8, i64 128) + %add8 = extractvalue {i64, i1} %t8, 0 + %obit8 = extractvalue {i64, i1} %t8, 1 + %res8 = or i1 %res7, %obit8 + %t9 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val9, i64 128) + %add9 = extractvalue {i64, i1} %t9, 0 + %obit9 = extractvalue {i64, i1} %t9, 1 + %res9 = or i1 %res8, %obit9 + %t10 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val10, i64 128) + %add10 = extractvalue {i64, i1} %t10, 0 + %obit10 = extractvalue {i64, i1} %t10, 1 + %res10 = or i1 %res9, %obit10 + %t11 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val11, i64 128) + %add11 = extractvalue {i64, i1} %t11, 0 + %obit11 = extractvalue {i64, i1} %t11, 1 + %res11 = or i1 %res10, %obit11 + %t12 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val12, i64 128) + %add12 = extractvalue {i64, i1} %t12, 0 + %obit12 = extractvalue {i64, i1} %t12, 1 + %res12 = or i1 %res11, %obit12 + %t13 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val13, i64 128) + %add13 = extractvalue {i64, i1} %t13, 0 + %obit13 = extractvalue {i64, i1} %t13, 1 + %res13 = or i1 %res12, %obit13 + %t14 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val14, i64 128) + %add14 = extractvalue {i64, i1} %t14, 0 + %obit14 = extractvalue {i64, i1} %t14, 1 + %res14 = or i1 %res13, %obit14 + %t15 = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %val15, i64 128) + %add15 = extractvalue {i64, i1} %t15, 0 + %obit15 = extractvalue {i64, i1} %t15, 1 + %res15 = or i1 %res14, %obit15 + + br label %store + +store: + %new0 = phi i64 [ %val0, %entry ], [ %add0, %add ] + %new1 = phi i64 [ %val1, %entry ], [ %add1, %add ] + %new2 = phi i64 [ %val2, %entry ], [ %add2, %add ] + %new3 = phi i64 [ %val3, %entry ], [ %add3, %add ] + %new4 = phi i64 [ %val4, %entry ], [ %add4, %add ] + %new5 = phi i64 [ %val5, %entry ], [ %add5, %add ] + %new6 = phi i64 [ %val6, %entry ], [ %add6, %add ] + %new7 = phi i64 [ %val7, %entry ], [ %add7, %add ] + %new8 = phi i64 [ %val8, %entry ], [ %add8, %add ] + %new9 = phi i64 [ %val9, %entry ], [ %add9, %add ] + %new10 = phi i64 [ %val10, %entry ], [ %add10, %add ] + %new11 = phi i64 [ %val11, %entry ], [ %add11, %add ] + %new12 = phi i64 [ %val12, %entry ], [ %add12, %add ] + %new13 = phi i64 [ %val13, %entry ], [ %add13, %add ] + %new14 = phi i64 [ %val14, %entry ], [ %add14, %add ] + %new15 = phi i64 [ %val15, %entry ], [ %add15, %add ] + %res = phi i1 [ 0, %entry ], [ %res15, %add ] + + store volatile i64 %new0, i64 *%ptr + store volatile i64 %new1, i64 *%ptr + store volatile i64 %new2, i64 *%ptr + store volatile i64 %new3, i64 *%ptr + store volatile i64 %new4, i64 *%ptr + store volatile i64 %new5, i64 *%ptr + store volatile i64 %new6, i64 *%ptr + store volatile i64 %new7, i64 *%ptr + store volatile i64 %new8, i64 *%ptr + store volatile i64 %new9, i64 *%ptr + store volatile i64 %new10, i64 *%ptr + store volatile i64 %new11, i64 *%ptr + store volatile i64 %new12, i64 *%ptr + store volatile i64 %new13, i64 *%ptr + store volatile i64 %new14, i64 *%ptr + store volatile i64 %new15, i64 *%ptr + + ret i1 %res +} + +; Check using the overflow result for a branch. +define void @f12(i64 *%ptr) { +; CHECK-LABEL: f12: +; CHECK: algsi 0(%r2), -1 +; CHECK: jgle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %call, label %exit + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +; ... and the same with the inverted direction. +define void @f13(i64 *%ptr) { +; CHECK-LABEL: f13: +; CHECK: algsi 0(%r2), -1 +; CHECK: jgnle foo@PLT +; CHECK: br %r14 + %a = load i64, i64 *%ptr + %t = call {i64, i1} @llvm.usub.with.overflow.i64(i64 %a, i64 1) + %val = extractvalue {i64, i1} %t, 0 + %obit = extractvalue {i64, i1} %t, 1 + store i64 %val, i64 *%ptr + br i1 %obit, label %exit, label %call + +call: + tail call i64 @foo() + br label %exit + +exit: + ret void +} + +declare {i64, i1} @llvm.usub.with.overflow.i64(i64, i64) nounwind readnone + |