summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen/PowerPC
diff options
context:
space:
mode:
authorBill Schmidt <wschmidt@linux.vnet.ibm.com>2015-07-21 21:40:17 +0000
committerBill Schmidt <wschmidt@linux.vnet.ibm.com>2015-07-21 21:40:17 +0000
commit2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311 (patch)
treeb7a050ac1060d2a5c5a44d011033bf7a7a2e2bef /llvm/test/CodeGen/PowerPC
parentc1fbb3540a22c64b0afcfb3c2e99171ae7b13414 (diff)
downloadbcm5719-llvm-2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311.tar.gz
bcm5719-llvm-2be8054b49c42a6bd7d1e94f2b9ca24d92ae7311.zip
[PPC64LE] More vector swap optimization TLC
This makes one substantive change and a few stylistic changes to the VSX swap optimization pass. The substantive change is to permit LXSDX and LXSSPX instructions to participate in swap optimization computations. The previous change to insert a swap following a SUBREG_TO_REG widening operation makes this almost trivial. I experimented with also permitting STXSDX and STXSSPX instructions. This can be done using similar techniques: we could insert a swap prior to a narrowing COPY operation, and then permit these stores to participate. I prototyped this, but discovered that the pattern of a narrowing COPY followed by an STXSDX does not occur in any of our test-suite code. So instead, I added commentary indicating that this could be done. Other TLC: - I changed SH_COPYSCALAR to SH_COPYWIDEN to more clearly indicate the direction of the copy. - I factored the insertion of swap instructions into a separate function. Finally, I added a new test case to check that the scalar-to-vector loads are working properly with swap optimization. llvm-svn: 242838
Diffstat (limited to 'llvm/test/CodeGen/PowerPC')
-rw-r--r--llvm/test/CodeGen/PowerPC/swaps-le-6.ll44
1 files changed, 44 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
new file mode 100644
index 00000000000..365aeee2d8f
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -0,0 +1,44 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s
+
+; These tests verify that VSX swap optimization works when loading a scalar
+; into a vector register.
+
+
+@x = global <2 x double> <double 9.970000e+01, double -1.032220e+02>, align 16
+@z = global <2 x double> <double 2.332000e+01, double 3.111111e+01>, align 16
+@y = global double 1.780000e+00, align 8
+
+define void @bar0() {
+entry:
+ %0 = load <2 x double>, <2 x double>* @x, align 16
+ %1 = load double, double* @y, align 8
+ %vecins = insertelement <2 x double> %0, double %1, i32 0
+ store <2 x double> %vecins, <2 x double>* @z, align 16
+ ret void
+}
+
+; CHECK-LABEL: @bar0
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1
+; CHECK: stxvd2x [[REG5]]
+
+define void @bar1() {
+entry:
+ %0 = load <2 x double>, <2 x double>* @x, align 16
+ %1 = load double, double* @y, align 8
+ %vecins = insertelement <2 x double> %0, double %1, i32 1
+ store <2 x double> %vecins, <2 x double>* @z, align 16
+ ret void
+}
+
+; CHECK-LABEL: @bar1
+; CHECK-DAG: lxvd2x [[REG1:[0-9]+]]
+; CHECK-DAG: lxsdx [[REG2:[0-9]+]]
+; CHECK: xxswapd [[REG3:[0-9]+]], [[REG2]]
+; CHECK: xxspltd [[REG4:[0-9]+]], [[REG3]], 1
+; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]]
+; CHECK: stxvd2x [[REG5]]
+
OpenPOWER on IntegriCloud