summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp9
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-sext.ll91
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-zext.ll91
3 files changed, 191 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 74740d1641a..39aba8979b1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -2326,6 +2326,15 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) {
return DAG.getNode(Opcode, DL, WidenVT, InOp);
return DAG.getNode(Opcode, DL, WidenVT, InOp, N->getOperand(1), Flags);
}
+ if (WidenVT.getSizeInBits() == InVT.getSizeInBits()) {
+ // If both input and result vector types are of same width, extend
+ // operations should be done with SIGN/ZERO_EXTEND_VECTOR_INREG, which
+ // accepts fewer elements in the result than in the input.
+ if (Opcode == ISD::SIGN_EXTEND)
+ return DAG.getSignExtendVectorInReg(InOp, DL, WidenVT);
+ if (Opcode == ISD::ZERO_EXTEND)
+ return DAG.getZeroExtendVectorInReg(InOp, DL, WidenVT);
+ }
}
if (TLI.isTypeLegal(InWidenVT)) {
diff --git a/llvm/test/CodeGen/SystemZ/vec-sext.ll b/llvm/test/CodeGen/SystemZ/vec-sext.ll
new file mode 100644
index 00000000000..9831de52ee8
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-sext.ll
@@ -0,0 +1,91 @@
+; Test that vector sexts are done efficently with unpack instructions also in
+; case of fewer elements than allowed, e.g. <2 x i32>.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+
+define <2 x i16> @fun1(<2 x i8> %val1) {
+; CHECK-LABEL: fun1:
+; CHECK: vuphb %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = sext <2 x i8> %val1 to <2 x i16>
+ ret <2 x i16> %z
+}
+
+define <2 x i32> @fun2(<2 x i8> %val1) {
+; CHECK-LABEL: fun2:
+; CHECK: vuphb %v0, %v24
+; CHECK-NEXT: vuphh %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = sext <2 x i8> %val1 to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i64> @fun3(<2 x i8> %val1) {
+; CHECK-LABEL: fun3:
+; CHECK: vuphb %v0, %v24
+; CHECK-NEXT: vuphh %v0, %v0
+; CHECK-NEXT: vuphf %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = sext <2 x i8> %val1 to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i32> @fun4(<2 x i16> %val1) {
+; CHECK-LABEL: fun4:
+; CHECK: vuphh %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = sext <2 x i16> %val1 to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i64> @fun5(<2 x i16> %val1) {
+; CHECK-LABEL: fun5:
+; CHECK: vuphh %v0, %v24
+; CHECK-NEXT: vuphf %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = sext <2 x i16> %val1 to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i64> @fun6(<2 x i32> %val1) {
+; CHECK-LABEL: fun6:
+; CHECK: vuphf %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = sext <2 x i32> %val1 to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <4 x i16> @fun7(<4 x i8> %val1) {
+; CHECK-LABEL: fun7:
+; CHECK: vuphb %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = sext <4 x i8> %val1 to <4 x i16>
+ ret <4 x i16> %z
+}
+
+define <4 x i32> @fun8(<4 x i8> %val1) {
+; CHECK-LABEL: fun8:
+; CHECK: vuphb %v0, %v24
+; CHECK-NEXT: vuphh %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = sext <4 x i8> %val1 to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i32> @fun9(<4 x i16> %val1) {
+; CHECK-LABEL: fun9:
+; CHECK: vuphh %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = sext <4 x i16> %val1 to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <8 x i16> @fun10(<8 x i8> %val1) {
+; CHECK-LABEL: fun10:
+; CHECK: vuphb %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = sext <8 x i8> %val1 to <8 x i16>
+ ret <8 x i16> %z
+}
+
diff --git a/llvm/test/CodeGen/SystemZ/vec-zext.ll b/llvm/test/CodeGen/SystemZ/vec-zext.ll
new file mode 100644
index 00000000000..831594d4020
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-zext.ll
@@ -0,0 +1,91 @@
+; Test that vector zexts are done efficently with unpack instructions also in
+; case of fewer elements than allowed, e.g. <2 x i32>.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
+
+
+define <2 x i16> @fun1(<2 x i8> %val1) {
+; CHECK-LABEL: fun1:
+; CHECK: vuplhb %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = zext <2 x i8> %val1 to <2 x i16>
+ ret <2 x i16> %z
+}
+
+define <2 x i32> @fun2(<2 x i8> %val1) {
+; CHECK-LABEL: fun2:
+; CHECK: vuplhb %v0, %v24
+; CHECK-NEXT: vuplhh %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = zext <2 x i8> %val1 to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i64> @fun3(<2 x i8> %val1) {
+; CHECK-LABEL: fun3:
+; CHECK: vuplhb %v0, %v24
+; CHECK-NEXT: vuplhh %v0, %v0
+; CHECK-NEXT: vuplhf %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = zext <2 x i8> %val1 to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i32> @fun4(<2 x i16> %val1) {
+; CHECK-LABEL: fun4:
+; CHECK: vuplhh %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = zext <2 x i16> %val1 to <2 x i32>
+ ret <2 x i32> %z
+}
+
+define <2 x i64> @fun5(<2 x i16> %val1) {
+; CHECK-LABEL: fun5:
+; CHECK: vuplhh %v0, %v24
+; CHECK-NEXT: vuplhf %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = zext <2 x i16> %val1 to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <2 x i64> @fun6(<2 x i32> %val1) {
+; CHECK-LABEL: fun6:
+; CHECK: vuplhf %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = zext <2 x i32> %val1 to <2 x i64>
+ ret <2 x i64> %z
+}
+
+define <4 x i16> @fun7(<4 x i8> %val1) {
+; CHECK-LABEL: fun7:
+; CHECK: vuplhb %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = zext <4 x i8> %val1 to <4 x i16>
+ ret <4 x i16> %z
+}
+
+define <4 x i32> @fun8(<4 x i8> %val1) {
+; CHECK-LABEL: fun8:
+; CHECK: vuplhb %v0, %v24
+; CHECK-NEXT: vuplhh %v24, %v0
+; CHECK-NEXT: br %r14
+ %z = zext <4 x i8> %val1 to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <4 x i32> @fun9(<4 x i16> %val1) {
+; CHECK-LABEL: fun9:
+; CHECK: vuplhh %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = zext <4 x i16> %val1 to <4 x i32>
+ ret <4 x i32> %z
+}
+
+define <8 x i16> @fun10(<8 x i8> %val1) {
+; CHECK-LABEL: fun10:
+; CHECK: vuplhb %v24, %v24
+; CHECK-NEXT: br %r14
+ %z = zext <8 x i8> %val1 to <8 x i16>
+ ret <8 x i16> %z
+}
+
OpenPOWER on IntegriCloud