summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGuozhi Wei <carrot@google.com>2016-04-29 17:00:54 +0000
committerGuozhi Wei <carrot@google.com>2016-04-29 17:00:54 +0000
commitfa3e04298b65abf1ef34187ab45db8b16329d4bf (patch)
treeb4e9d5d6c3133bc73ee94ffe7a8693b1947874a4
parentee0416459918b6368b547104bc88c7516efd9951 (diff)
downloadbcm5719-llvm-fa3e04298b65abf1ef34187ab45db8b16329d4bf.tar.gz
bcm5719-llvm-fa3e04298b65abf1ef34187ab45db8b16329d4bf.zip
[PPC] Enable shuffling of VSX vectors
This patch fixes PR27078 by enabling shuffling of vectors if VSX is available. llvm-svn: 268064
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp6
-rw-r--r--llvm/test/CodeGen/PowerPC/pr27078.ll15
2 files changed, 17 insertions, 4 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 47a9a40e2d2..d0f9e8dcc98 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -11940,10 +11940,8 @@ PPCTargetLowering::shouldExpandBuildVectorWithShuffles(
if (VT == MVT::v2i64)
return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
- if (Subtarget.hasQPX()) {
- if (VT == MVT::v4f32 || VT == MVT::v4f64 || VT == MVT::v4i1)
- return true;
- }
+ if (Subtarget.hasVSX() || Subtarget.hasQPX())
+ return true;
return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
}
diff --git a/llvm/test/CodeGen/PowerPC/pr27078.ll b/llvm/test/CodeGen/PowerPC/pr27078.ll
new file mode 100644
index 00000000000..324462cf601
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr27078.ll
@@ -0,0 +1,15 @@
+; RUN: llc -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx < %s | FileCheck %s
+
+define <4 x float> @bar(float* %p, float* %q) {
+ %1 = bitcast float* %p to <12 x float>*
+ %2 = bitcast float* %q to <12 x float>*
+ %3 = load <12 x float>, <12 x float>* %1, align 16
+ %4 = load <12 x float>, <12 x float>* %2, align 16
+ %5 = fsub <12 x float> %4, %3
+ %6 = shufflevector <12 x float> %5, <12 x float> undef, <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+ ret <4 x float> %6
+
+; CHECK: vspltw
+; CHECK: vmrghw
+; CHECK: vsldoi
+}
OpenPOWER on IntegriCloud