summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/SystemZ/SystemZISelLowering.cpp2
-rw-r--r--llvm/test/CodeGen/SystemZ/vec-perm-12.ll43
2 files changed, 44 insertions, 1 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index 4aac09132ec..0e2bb5ebd2a 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -3895,7 +3895,7 @@ static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
GS.addUndef();
} else {
GS.add(SDValue(), ResidueOps.size());
- ResidueOps.push_back(Op);
+ ResidueOps.push_back(BVN->getOperand(I));
}
}
diff --git a/llvm/test/CodeGen/SystemZ/vec-perm-12.ll b/llvm/test/CodeGen/SystemZ/vec-perm-12.ll
new file mode 100644
index 00000000000..b70b13d9068
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/vec-perm-12.ll
@@ -0,0 +1,43 @@
+; Test inserting a truncated value into a vector element
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-CODE %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s
+
+define <4 x i32> @f1(<4 x i32> %x, i64 %y) {
+; CHECK-CODE-LABEL: f1:
+; CHECK-CODE: vlvgf [[ELT:%v[0-9]+]], %r2, 0
+; CHECK-CODE: larl [[REG:%r[0-5]]],
+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
+; CHECK-CODE: vperm %v24, %v24, [[ELT]], [[MASK]]
+; CHECK-CODE: br %r14
+
+; CHECK-VECTOR: .byte 12
+; CHECK-VECTOR-NEXT: .byte 13
+; CHECK-VECTOR-NEXT: .byte 14
+; CHECK-VECTOR-NEXT: .byte 15
+; CHECK-VECTOR-NEXT: .byte 8
+; CHECK-VECTOR-NEXT: .byte 9
+; CHECK-VECTOR-NEXT: .byte 10
+; CHECK-VECTOR-NEXT: .byte 11
+; CHECK-VECTOR-NEXT: .byte 4
+; CHECK-VECTOR-NEXT: .byte 5
+; CHECK-VECTOR-NEXT: .byte 6
+; CHECK-VECTOR-NEXT: .byte 7
+; CHECK-VECTOR-NEXT: .byte 16
+; CHECK-VECTOR-NEXT: .byte 17
+; CHECK-VECTOR-NEXT: .byte 18
+; CHECK-VECTOR-NEXT: .byte 19
+
+ %elt0 = extractelement <4 x i32> %x, i32 3
+ %elt1 = extractelement <4 x i32> %x, i32 2
+ %elt2 = extractelement <4 x i32> %x, i32 1
+ %elt3 = trunc i64 %y to i32
+ %vec0 = insertelement <4 x i32> undef, i32 %elt0, i32 0
+ %vec1 = insertelement <4 x i32> %vec0, i32 %elt1, i32 1
+ %vec2 = insertelement <4 x i32> %vec1, i32 %elt2, i32 2
+ %vec3 = insertelement <4 x i32> %vec2, i32 %elt3, i32 3
+ ret <4 x i32> %vec3
+}
+
OpenPOWER on IntegriCloud