diff options
| author | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-03-17 08:32:12 +0000 |
|---|---|---|
| committer | Jonas Paulsson <paulsson@linux.vnet.ibm.com> | 2018-03-17 08:32:12 +0000 |
| commit | 138960770c9b6d2ebaf6d42814ce69e25679cf08 (patch) | |
| tree | 69ddd03eb86e38075d4a4d11bf27a47d5e0147b5 /llvm/test/CodeGen/SystemZ | |
| parent | e9f7fa83d54cbb77e48b26685433a6777f613017 (diff) | |
| download | bcm5719-llvm-138960770c9b6d2ebaf6d42814ce69e25679cf08.tar.gz bcm5719-llvm-138960770c9b6d2ebaf6d42814ce69e25679cf08.zip | |
[SystemZ] computeKnownBitsForTargetNode() / ComputeNumSignBitsForTargetNode()
Improve/implement these methods to improve DAG combining. This mainly
concerns intrinsics.
Some constant operands to SystemZISD nodes have been marked Opaque to avoid
transforming back and forth between generic and target nodes infinitely.
Review: Ulrich Weigand
llvm-svn: 327765
Diffstat (limited to 'llvm/test/CodeGen/SystemZ')
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll | 460 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll | 384 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/knownbits.ll | 51 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll | 236 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll | 97 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/signbits.ll | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll | 3 |
7 files changed, 1265 insertions, 2 deletions
diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll new file mode 100644 index 00000000000..3bcbbb45581 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-binop.ll @@ -0,0 +1,460 @@ +; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with +; vector intrinsics. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) + +; PACKS_CC (operand elements are 0): i64 -> i32 +define <4 x i32> @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKS_CC (operand elements are 1): i64 -> i32 +; NOTE: The vector AND is optimized away, but vrepig+vpksgs is used instead +; of vrepif. Similarly for more test cases below. +define <4 x i32> @f1() { +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpksgs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKS_CC (operand elements are 0): i32 -> i16 +define <8 x i16> @f2() { +; CHECK-LABEL: f2: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, + <4 x i32> <i32 0, i32 0, i32 0, i32 0>) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKS_CC (operand elements are 1): i32 -> i16 +define <8 x i16> @f3() { +; CHECK-LABEL: f3: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpksfs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, + <4 x i32> <i32 1, i32 1, i32 1, i32 1>) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKS_CC (operand elements are 0): i16 -> i8 +define <16 x i8> @f4() { +; CHECK-LABEL: f4: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs( + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +; PACKS_CC (operand elements are 1): i16 -> i8 +define <16 x i8> @f5() { +; CHECK-LABEL: f5: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpkshs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs( + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) + +; PACKLS_CC (operand elements are 0): i64 -> i32 +define <4 x i32> @f6() { +; CHECK-LABEL: f6: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKLS_CC (operand elements are 1): i64 -> i32 +define <4 x i32> @f7() { +; CHECK-LABEL: f7: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpklsgs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %and = and <4 x i32> %extr, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKLS_CC (operand elements are 0): i32 -> i16 +define <8 x i16> @f8() { +; CHECK-LABEL: f8: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, + <4 x i32> <i32 0, i32 0, i32 0, i32 0>) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKLS_CC (operand elements are 1): i32 -> i16 +define <8 x i16> @f9() { +; CHECK-LABEL: f9: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpklsfs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, + <4 x i32> <i32 1, i32 1, i32 1, i32 1>) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %and = and <8 x i16> %extr, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKLS_CC (operand elements are 0): i16 -> i8 +define <16 x i8> @f10() { +; CHECK-LABEL: f10: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs( + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +; PACKLS_CC (operand elements are 1): i16 -> i8 +define <16 x i8> @f11() { +; CHECK-LABEL: f11: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpklshs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs( + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %and = and <16 x i8> %extr, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) + +; PACKS (operand elements are 0): i64 -> i32 +define <4 x i32> @f12() { +; CHECK-LABEL: f12: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>) + %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKS (operand elements are 1): i64 -> i32 +define <4 x i32> @f13() { +; CHECK-LABEL: f13: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpksg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>) + %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKS (operand elements are 0): i32 -> i16 +define <8 x i16> @f14() { +; CHECK-LABEL: f14: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, + <4 x i32> <i32 0, i32 0, i32 0, i32 0>) + %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKS (operand elements are 1): i32 -> i16 +define <8 x i16> @f15() { +; CHECK-LABEL: f15: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpksf %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, + <4 x i32> <i32 1, i32 1, i32 1, i32 1>) + %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKS (operand elements are 0): i16 -> i8 +define <16 x i8> @f16() { +; CHECK-LABEL: f16: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpksh( + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) + %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +; PACKS (operand elements are 1): i16 -> i8 +define <16 x i8> @f17() { +; CHECK-LABEL: f17: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpksh %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpksh( + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) + %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) + +; PACKLS (operand elements are 0): i64 -> i32 +define <4 x i32> @f18() { +; CHECK-LABEL: f18: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 0, i64 0>) + %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKLS (operand elements are 1): i64 -> i32 +define <4 x i32> @f19() { +; CHECK-LABEL: f19: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpklsg %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 1, i64 1>, <2 x i64> <i64 1, i64 1>) + %and = and <4 x i32> %call, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; PACKLS (operand elements are 0): i32 -> i16 +define <8 x i16> @f20() { +; CHECK-LABEL: f20: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 0, i32 0, i32 0>, + <4 x i32> <i32 0, i32 0, i32 0, i32 0>) + %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKLS (operand elements are 1): i32 -> i16 +define <8 x i16> @f21() { +; CHECK-LABEL: f21: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepif %v0, 1 +; CHECK-NEXT: vpklsf %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 1, i32 1, i32 1, i32 1>, + <4 x i32> <i32 1, i32 1, i32 1, i32 1>) + %and = and <8 x i16> %call, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; PACKLS (operand elements are 0): i16 -> i8 +define <16 x i8> @f22() { +; CHECK-LABEL: f22: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpklsh( + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>, + <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>) + %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +; PACKLS (operand elements are 1): i16 -> i8 +define <16 x i8> @f23() { +; CHECK-LABEL: f23: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepih %v0, 1 +; CHECK-NEXT: vpklsh %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpklsh( + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>, + <8 x i16> <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>) + %and = and <16 x i8> %call, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %and +} + +declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) + +; VPDI (operand elements are 0): +define <2 x i64> @f24() { +; CHECK-LABEL: f24: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 0>, + <2 x i64> <i64 0, i64 0>, i32 0) + %res = and <2 x i64> %perm, <i64 1, i64 1> + ret <2 x i64> %res +} + +; VPDI (operand elements are 1): +define <2 x i64> @f25() { +; CHECK-LABEL: f25: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepig %v0, 1 +; CHECK-NEXT: vpdi %v24, %v0, %v0, 0 +; CHECK-NEXT: br %r14 + %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 1, i64 1>, + <2 x i64> <i64 1, i64 1>, i32 0) + %res = and <2 x i64> %perm, <i64 1, i64 1> + ret <2 x i64> %res +} + +declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) + +; VSLDB (operand elements are 0): +define <16 x i8> @f26() { +; CHECK-LABEL: f26: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8> + <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, <16 x i8> + <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, + i32 1) + + %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %res +} + +; VSLDB (operand elements are 1): +define <16 x i8> @f27() { +; CHECK-LABEL: f27: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vrepib %v0, 1 +; CHECK-NEXT: vsldb %v24, %v0, %v0, 1 +; CHECK-NEXT: br %r14 + %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8> + <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, <16 x i8> + <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, + i32 1) + + %res = and <16 x i8> %shfd, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %res +} + +; Test that intrinsic CC result is recognized. +define i32 @f28(<4 x i32> %a, <4 x i32> %b) { +; CHECK-LABEL: f28: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: lhi %r2, 0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) + %cc = extractvalue {<8 x i16>, i32} %call, 1 + %res = and i32 %cc, -4 + ret i32 %res +} + +declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) + +; Test VPERM (operand elements are 0): +define <16 x i8> @f29() { +; CHECK-LABEL: f29: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %perm = call <16 x i8> @llvm.s390.vperm( + <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, + <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>, + <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %res +} + +; Test VPERM (operand elements are 1): +define <16 x i8> @f30() { +; CHECK-LABEL: f30: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v0, 0 +; CHECK-NEXT: vrepib %v1, 1 +; CHECK-NEXT: vperm %v24, %v1, %v1, %v0 +; CHECK-NEXT: br %r14 + %perm = call <16 x i8> @llvm.s390.vperm( + <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, + <16 x i8> <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>, + <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %res = and <16 x i8> %perm, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> + ret <16 x i8> %res +} diff --git a/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll new file mode 100644 index 00000000000..1966340adb9 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/knownbits-intrinsics-unpack.ll @@ -0,0 +1,384 @@ +; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode() with +; vector intrinsics. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +declare <8 x i16> @llvm.s390.vuphb(<16 x i8>) +declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>) + +; VUPHB (used operand elements are 0) +define <8 x i16> @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8> + <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; VUPHB (used operand elements are 1) +; NOTE: The AND is optimized away, but instead of replicating '1' into <8 x +; i16>, the original vector constant is put in the constant pool and then +; unpacked (repeated in more test cases below). +define <8 x i16> @f1() { +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuphb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8> + <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; VUPLHB (used operand elements are 0) +define <8 x i16> @f2() { +; CHECK-LABEL: f2: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> + <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; VUPLHB (used operand elements are 1) +define <8 x i16> @f3() { +; CHECK-LABEL: f3: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> + <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +declare <4 x i32> @llvm.s390.vuphh(<8 x i16>) +declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>) + +; VUPHH (used operand elements are 0) +define <4 x i32> @f4() { +; CHECK-LABEL: f4: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16> + <i16 0, i16 0, i16 0, i16 0, + i16 1, i16 1, i16 1, i16 1>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; VUPHH (used operand elements are 1) +define <4 x i32> @f5() { +; CHECK-LABEL: f5: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16> + <i16 1, i16 1, i16 1, i16 1, + i16 0, i16 0, i16 0, i16 0>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; VUPLHH (used operand elements are 0) +define <4 x i32> @f6() { +; CHECK-LABEL: f6: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> + <i16 0, i16 0, i16 0, i16 0, + i16 1, i16 1, i16 1, i16 1>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; VUPLHH (used operand elements are 1) +define <4 x i32> @f7() { +; CHECK-LABEL: f7: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> + <i16 1, i16 1, i16 1, i16 1, + i16 0, i16 0, i16 0, i16 0>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +declare <2 x i64> @llvm.s390.vuphf(<4 x i32>) +declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>) + +; VUPHF (used operand elements are 0) +define <2 x i64> @f8() { +; CHECK-LABEL: f8: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; VUPHF (used operand elements are 1) +define <2 x i64> @f9() { +; CHECK-LABEL: f9: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; VUPLHF (used operand elements are 0) +define <2 x i64> @f10() { +; CHECK-LABEL: f10: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; VUPLHF (used operand elements are 1) +define <2 x i64> @f11() { +; CHECK-LABEL: f11: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +declare <8 x i16> @llvm.s390.vuplb(<16 x i8>) +declare <8 x i16> @llvm.s390.vupllb(<16 x i8>) + +; VUPLB (used operand elements are 0) +define <8 x i16> @f12() { +; CHECK-LABEL: f12: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8> + <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; VUPLB (used operand elements are 1) +define <8 x i16> @f13() { +; CHECK-LABEL: f13: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8> + <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; VUPLLB (used operand elements are 0) +define <8 x i16> @f14() { +; CHECK-LABEL: f14: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8> + <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +; VUPLLB (used operand elements are 1) +define <8 x i16> @f15() { +; CHECK-LABEL: f15: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vupllb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vupllb(<16 x i8> + <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, + i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>) + %and = and <8 x i16> %unp, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> + ret <8 x i16> %and +} + +declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>) +declare <4 x i32> @llvm.s390.vupllh(<8 x i16>) + +; VUPLHW (used operand elements are 0) +define <4 x i32> @f16() { +; CHECK-LABEL: f16: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> + <i16 1, i16 1, i16 1, i16 1, + i16 0, i16 0, i16 0, i16 0>) + + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; VUPLHW (used operand elements are 1) +define <4 x i32> @f17() { +; CHECK-LABEL: f17: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplhw %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> + <i16 0, i16 0, i16 0, i16 0, + i16 1, i16 1, i16 1, i16 1>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; VUPLLH (used operand elements are 0) +define <4 x i32> @f18() { +; CHECK-LABEL: f18: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16> + <i16 1, i16 1, i16 1, i16 1, + i16 0, i16 0, i16 0, i16 0>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +; VUPLLH (used operand elements are 1) +define <4 x i32> @f19() { +; CHECK-LABEL: f19: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vupllh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vupllh(<8 x i16> + <i16 0, i16 0, i16 0, i16 0, + i16 1, i16 1, i16 1, i16 1>) + %and = and <4 x i32> %unp, <i32 1, i32 1, i32 1, i32 1> + ret <4 x i32> %and +} + +declare <2 x i64> @llvm.s390.vuplf(<4 x i32>) +declare <2 x i64> @llvm.s390.vupllf(<4 x i32>) + +; VUPLF (used operand elements are 0) +define <2 x i64> @f20() { +; CHECK-LABEL: f20: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; VUPLF (used operand elements are 1) +define <2 x i64> @f21() { +; CHECK-LABEL: f21: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; VUPLLF (used operand elements are 0) +define <2 x i64> @f22() { +; CHECK-LABEL: f22: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; VUPLLF (used operand elements are 1) +define <2 x i64> @f23() { +; CHECK-LABEL: f23: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: larl %r1, .LCPI +; CHECK-NEXT: vl %v0, 0(%r1) +; CHECK-NEXT: vupllf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vupllf(<4 x i32> <i32 0, i32 0, i32 1, i32 1>) + %and = and <2 x i64> %unp, <i64 1, i64 1> + ret <2 x i64> %and +} + +; Test that signed unpacking of positive elements gives known zeros in high part. +define <2 x i64> @f24() { +; CHECK-LABEL: f24: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 1, i32 1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000 + i64 -4294967296> + ret <2 x i64> %and +} + +; Test that signed unpacking of negative elements gives known ones in high part. +define <2 x i64> @f25() { +; CHECK-LABEL: f25: +; CHECK-LABEL: # %bb.0: +; 61680 = 0xf0f0 +; CHECK-NEXT: vgbm %v24, 61680 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000 + i64 -4294967296> + ret <2 x i64> %and +} + +; Test that logical unpacking of negative elements gives known zeros in high part. +define <2 x i64> @f26() { +; CHECK-LABEL: f26: +; CHECK-LABEL: # %bb.0: +; CHECK-NEXT: vgbm %v24, 0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> <i32 -1, i32 -1, i32 0, i32 0>) + %and = and <2 x i64> %unp, <i64 -4294967296, ; = 0xffffffff00000000 + i64 -4294967296> + ret <2 x i64> %and +} diff --git a/llvm/test/CodeGen/SystemZ/knownbits.ll b/llvm/test/CodeGen/SystemZ/knownbits.ll new file mode 100644 index 00000000000..703c0bf9479 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/knownbits.ll @@ -0,0 +1,51 @@ +; Test that DAGCombiner gets helped by computeKnownBitsForTargetNode(). +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +; SystemZISD::REPLICATE +define i32 @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK: vlgvf +; CHECK-NOT: lhi %r2, 0 +; CHECK-NOT: chi %r0, 0 +; CHECK-NOT: lochilh %r2, 1 +; CHECK: br %r14 + %cmp0 = icmp ne <4 x i32> undef, zeroinitializer + %zxt0 = zext <4 x i1> %cmp0 to <4 x i32> + %ext0 = extractelement <4 x i32> %zxt0, i32 3 + br label %exit + +exit: +; The vector icmp+zext involves a REPLICATE of 1's. If KnownBits reflects +; this, DAGCombiner can see that the i32 icmp and zext here are not needed. + %cmp1 = icmp ne i32 %ext0, 0 + %zxt1 = zext i1 %cmp1 to i32 + ret i32 %zxt1 +} + +; SystemZISD::JOIN_DWORDS (and REPLICATE) +define void @f1() { +; The DAG XOR has JOIN_DWORDS and REPLICATE operands. With KnownBits properly set +; for both these nodes, ICMP is used instead of TM during lowering because +; adjustForRedundantAnd() succeeds. +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK-NOT: tmll +; CHECK-NOT: jne +; CHECK: cijlh + %1 = load i16, i16* null, align 2 + %2 = icmp eq i16 %1, 0 + %3 = insertelement <2 x i1> undef, i1 %2, i32 0 + %4 = insertelement <2 x i1> %3, i1 true, i32 1 + %5 = xor <2 x i1> %4, <i1 true, i1 true> + %6 = extractelement <2 x i1> %5, i32 0 + %7 = or i1 %6, undef + br i1 %7, label %9, label %8 + +; <label>:8: ; preds = %0 + unreachable + +; <label>:9: ; preds = %0 + unreachable +} diff --git a/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll new file mode 100644 index 00000000000..1fc14964a94 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-binop.ll @@ -0,0 +1,236 @@ +; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with +; vector intrinsics. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) + +; PACKS_CC: i64 -> i32 +define <4 x i32> @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK: vpksgs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 0, i64 1>) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %trunc = trunc <4 x i32> %extr to <4 x i16> + %ret = sext <4 x i16> %trunc to <4 x i32> + ret <4 x i32> %ret +} + +; PACKS_CC: i32 -> i16 +define <8 x i16> @f1() { +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK: vpksfs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, + <4 x i32> <i32 0, i32 1, i32 1, i32 0>) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %trunc = trunc <8 x i16> %extr to <8 x i8> + %ret = sext <8 x i8> %trunc to <8 x i16> + ret <8 x i16> %ret +} + +; PACKS_CC: i16 -> i8 +define <16 x i8> @f2() { +; CHECK-LABEL: f2: +; CHECK-LABEL: # %bb.0: +; CHECK: vpkshs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpkshs( + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %trunc = trunc <16 x i8> %extr to <16 x i4> + %ret = sext <16 x i4> %trunc to <16 x i8> + ret <16 x i8> %ret +} + +declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) +declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) +declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) + +; PACKLS_CC: i64 -> i32 +define <4 x i32> @f3() { +; CHECK-LABEL: f3: +; CHECK-LABEL: # %bb.0: +; CHECK: vpklsgs %v24, %v1, %v0 +; CHECK-NEXT: br %r14 + %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>) + %extr = extractvalue {<4 x i32>, i32} %call, 0 + %trunc = trunc <4 x i32> %extr to <4 x i16> + %ret = sext <4 x i16> %trunc to <4 x i32> + ret <4 x i32> %ret +} + +; PACKLS_CC: i32 -> i16 +define <8 x i16> @f4() { +; CHECK-LABEL: f4: +; CHECK-LABEL: # %bb.0: +; CHECK: vpklsfs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, + <4 x i32> <i32 0, i32 1, i32 1, i32 0>) + %extr = extractvalue {<8 x i16>, i32} %call, 0 + %trunc = trunc <8 x i16> %extr to <8 x i8> + %ret = sext <8 x i8> %trunc to <8 x i16> + ret <8 x i16> %ret +} + +; PACKLS_CC: i16 -> i8 +define <16 x i8> @f5() { +; CHECK-LABEL: f5: +; CHECK-LABEL: # %bb.0: +; CHECK: vpklshs %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call {<16 x i8>, i32} @llvm.s390.vpklshs( + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) + %extr = extractvalue {<16 x i8>, i32} %call, 0 + %trunc = trunc <16 x i8> %extr to <16 x i4> + %ret = sext <16 x i4> %trunc to <16 x i8> + ret <16 x i8> %ret +} + +declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) + +; PACKS: i64 -> i32 +define <4 x i32> @f6() { +; CHECK-LABEL: f6: +; CHECK-LABEL: # %bb.0: +; CHECK: vpksg %v24, %v1, %v0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpksg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>) + %trunc = trunc <4 x i32> %call to <4 x i16> + %ret = sext <4 x i16> %trunc to <4 x i32> + ret <4 x i32> %ret +} + +; PACKS: i32 -> i16 +define <8 x i16> @f7() { +; CHECK-LABEL: f7: +; CHECK-LABEL: # %bb.0: +; CHECK: vpksf %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpksf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, + <4 x i32> <i32 0, i32 1, i32 1, i32 0>) + %trunc = trunc <8 x i16> %call to <8 x i8> + %ret = sext <8 x i8> %trunc to <8 x i16> + ret <8 x i16> %ret +} + +; PACKS: i16 -> i8 +define <16 x i8> @f8() { +; CHECK-LABEL: f8: +; CHECK-LABEL: # %bb.0: +; CHECK: vpksh %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpksh( + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) + %trunc = trunc <16 x i8> %call to <16 x i4> + %ret = sext <16 x i4> %trunc to <16 x i8> + ret <16 x i8> %ret +} + +declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) +declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) + +; PACKLS: i64 -> i32 +define <4 x i32> @f9() { +; CHECK-LABEL: f9: +; CHECK-LABEL: # %bb.0: +; CHECK: vpklsg %v24, %v1, %v0 +; CHECK-NEXT: br %r14 + %call = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> <i64 0, i64 1>, <2 x i64> <i64 1, i64 0>) + %trunc = trunc <4 x i32> %call to <4 x i16> + %ret = sext <4 x i16> %trunc to <4 x i32> + ret <4 x i32> %ret +} + +; PACKLS: i32 -> i16 +define <8 x i16> @f10() { +; CHECK-LABEL: f10: +; CHECK-LABEL: # %bb.0: +; CHECK: vpklsf %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> <i32 0, i32 1, i32 1, i32 0>, + <4 x i32> <i32 0, i32 1, i32 1, i32 0>) + %trunc = trunc <8 x i16> %call to <8 x i8> + %ret = sext <8 x i8> %trunc to <8 x i16> + ret <8 x i16> %ret +} + +; PACKLS: i16 -> i8 +define <16 x i8> @f11() { +; CHECK-LABEL: f11: +; CHECK-LABEL: # %bb.0: +; CHECK: vpklsh %v24, %v0, %v0 +; CHECK-NEXT: br %r14 + %call = call <16 x i8> @llvm.s390.vpklsh( + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>, + <8 x i16> <i16 0, i16 0, i16 1, i16 1, i16 0, i16 0, i16 1, i16 1>) + %trunc = trunc <16 x i8> %call to <16 x i4> + %ret = sext <16 x i4> %trunc to <16 x i8> + ret <16 x i8> %ret +} + +declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) + +; VPDI: +define <2 x i64> @f12() { +; CHECK-LABEL: f12: +; CHECK-LABEL: # %bb.0: +; CHECK: vpdi %v24, %v1, %v0, 0 +; CHECK-NEXT: br %r14 + %perm = call <2 x i64> @llvm.s390.vpdi(<2 x i64> <i64 0, i64 1>, + <2 x i64> <i64 1, i64 0>, i32 0) + %trunc = trunc <2 x i64> %perm to <2 x i32> + %ret = sext <2 x i32> %trunc to <2 x i64> + ret <2 x i64> %ret +} + +declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) + +; VSLDB: +define <16 x i8> @f13() { +; CHECK-LABEL: f13: +; CHECK-LABEL: # %bb.0: +; CHECK: vsldb %v24, %v0, %v0, 1 +; CHECK-NEXT: br %r14 + %shfd = call <16 x i8> @llvm.s390.vsldb(<16 x i8> + <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, <16 x i8> + <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, + i32 1) + %trunc = trunc <16 x i8> %shfd to <16 x i4> + %ret = sext <16 x i4> %trunc to <16 x i8> + ret <16 x i8> %ret +} + +declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) + +; Test VPERM: +define <16 x i8> @f14() { +; CHECK-LABEL: f14: +; CHECK-LABEL: # %bb.0: +; CHECK: vperm %v24, %v0, %v0, %v0 +; CHECK-NEXT: br %r14 + %perm = call <16 x i8> @llvm.s390.vperm( + <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, + <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>, + <16 x i8> <i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1, + i8 0, i8 0, i8 1, i8 1, i8 0, i8 1, i8 1, i8 1>) + %trunc = trunc <16 x i8> %perm to <16 x i4> + %ret = sext <16 x i4> %trunc to <16 x i8> + ret <16 x i8> %ret +} diff --git a/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll new file mode 100644 index 00000000000..b37c1c759bc --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/signbits-intrinsics-unpack.ll @@ -0,0 +1,97 @@ +; Test that DAGCombiner gets helped by ComputeNumSignBitsForTargetNode() with +; vector intrinsics. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 < %s | FileCheck %s + +declare <8 x i16> @llvm.s390.vuphb(<16 x i8>) + +; VUPHB +define <8 x i16> @f0() { +; CHECK-LABEL: f0: +; CHECK-LABEL: # %bb.0: +; CHECK: vuphb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuphb(<16 x i8> + <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, + i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) + %trunc = trunc <8 x i16> %unp to <8 x i8> + %ret = sext <8 x i8> %trunc to <8 x i16> + ret <8 x i16> %ret +} + +declare <4 x i32> @llvm.s390.vuphh(<8 x i16>) + +; VUPHH +define <4 x i32> @f1() { +; CHECK-LABEL: f1: +; CHECK-LABEL: # %bb.0: +; CHECK: vuphh %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuphh(<8 x i16> + <i16 0, i16 1, i16 0, i16 1, + i16 0, i16 1, i16 0, i16 1>) + %trunc = trunc <4 x i32> %unp to <4 x i16> + %ret = sext <4 x i16> %trunc to <4 x i32> + ret <4 x i32> %ret +} + +declare <2 x i64> @llvm.s390.vuphf(<4 x i32>) + +; VUPHF +define <2 x i64> @f2() { +; CHECK-LABEL: f2: +; CHECK-LABEL: # %bb.0: +; CHECK: vuphf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuphf(<4 x i32> <i32 0, i32 1, i32 0, i32 1>) + %trunc = trunc <2 x i64> %unp to <2 x i32> + %ret = sext <2 x i32> %trunc to <2 x i64> + ret <2 x i64> %ret +} + +declare <8 x i16> @llvm.s390.vuplb(<16 x i8>) + +; VUPLB +define <8 x i16> @f3() { +; CHECK-LABEL: f3: +; CHECK-LABEL: # %bb.0: +; CHECK: vuplb %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <8 x i16> @llvm.s390.vuplb(<16 x i8> + <i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, + i8 0, i8 1, i8 0, i8 1, i8 0, i8 1, i8 0, i8 1>) + %trunc = trunc <8 x i16> %unp to <8 x i8> + %ret = sext <8 x i8> %trunc to <8 x i16> + ret <8 x i16> %ret +} + +declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>) + +; VUPLHW +define <4 x i32> @f4() { +; CHECK-LABEL: f4: +; CHECK-LABEL: # %bb.0: +; CHECK: vuplhw %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> + <i16 1, i16 0, i16 1, i16 0, + i16 1, i16 0, i16 1, i16 0>) + %trunc = trunc <4 x i32> %unp to <4 x i16> + %ret = sext <4 x i16> %trunc to <4 x i32> + ret <4 x i32> %ret +} + +declare <2 x i64> @llvm.s390.vuplf(<4 x i32>) + +; VUPLF +define <2 x i64> @f5() { +; CHECK-LABEL: f5: +; CHECK-LABEL: # %bb.0: +; CHECK: vuplf %v24, %v0 +; CHECK-NEXT: br %r14 + %unp = call <2 x i64> @llvm.s390.vuplf(<4 x i32> <i32 1, i32 0, i32 1, i32 0>) + %trunc = trunc <2 x i64> %unp to <2 x i32> + %ret = sext <2 x i32> %trunc to <2 x i64> + ret <2 x i64> %ret +} + diff --git a/llvm/test/CodeGen/SystemZ/signbits.ll b/llvm/test/CodeGen/SystemZ/signbits.ll new file mode 100644 index 00000000000..4c019a62a07 --- /dev/null +++ b/llvm/test/CodeGen/SystemZ/signbits.ll @@ -0,0 +1,36 @@ +; Test that ComputeNumSignBitsForTargetNode() (SELECT_CCMASK) will help +; DAGCombiner so that it knows that %sel0 is already sign extended. +; +; RUN: llc -mtriple=s390x-linux-gnu -mcpu=z13 -debug-only=isel < %s 2>&1 | FileCheck %s + +%0 = type <{ %1*, i16, [6 x i8] }> +%1 = type { i32 (...)** } + +define signext i16 @fun(%0* %Arg0, i16 signext %Arg1) { +entry: + br i1 undef, label %lab0, label %lab1 + +lab0: + %icmp0 = icmp eq i32 undef, 0 + %sel0 = select i1 %icmp0, i16 %Arg1, i16 1 + br label %lab1 + +lab1: +; CHECK: *** MachineFunction at end of ISel *** +; CHECK-LABEL: bb.2.lab1: +; CHECK-NOT: LHR +; CHECK: BRC + %phi0 = phi i16 [ 2, %entry ], [ %sel0, %lab0 ] + %sext0 = sext i16 %phi0 to i32 + br i1 undef, label %lab2, label %lab3 + +lab2: + %and0 = and i32 %sext0, 8 + %icmp1 = icmp eq i32 %and0, 0 + %sel1 = select i1 %icmp1, i16 %phi0, i16 4 + ret i16 %sel1 + +lab3: + ret i16 8 +} + diff --git a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll index fe4ae4574a6..7da1dedc0a1 100644 --- a/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll +++ b/llvm/test/CodeGen/SystemZ/vec-trunc-to-i1.ll @@ -17,8 +17,7 @@ define void @pr32275(<4 x i8> %B15) { ; CHECK-NEXT: vlvgf [[REG2]], [[REG3]], 2 ; CHECK-NEXT: vn [[REG2]], [[REG2]], [[REG0]] ; CHECK-NEXT: vlgvf [[REG4:%r[0-9]]], [[REG2]], 3 -; CHECK-NEXT: tmll [[REG4]], 1 -; CHECK-NEXT: jne .LBB0_1 +; CHECK-NEXT: cijlh [[REG4]], 0, .LBB0_1 ; CHECK-NEXT: # %bb.2: # %CF36 ; CHECK-NEXT: br %r14 BB: |

