diff options
| author | Tom Stellard <tstellar@redhat.com> | 2018-11-30 04:51:41 +0000 |
|---|---|---|
| committer | Tom Stellard <tstellar@redhat.com> | 2018-11-30 04:51:41 +0000 |
| commit | 4a6ae60f26152979c80137df145e834a889a64fc (patch) | |
| tree | 6b95e448dd40f893c395665a6ee816ed00ed1ab9 /llvm/test/CodeGen/PowerPC/swaps-le-6.ll | |
| parent | d6ffc0c6ead2c0e94f7268b7270f30cc3f478f19 (diff) | |
| download | bcm5719-llvm-4a6ae60f26152979c80137df145e834a889a64fc.tar.gz bcm5719-llvm-4a6ae60f26152979c80137df145e834a889a64fc.zip | |
Merging r339260:
------------------------------------------------------------------------
r339260 | syzaara | 2018-08-08 08:20:43 -0700 (Wed, 08 Aug 2018) | 13 lines
[PowerPC] Improve codegen for vector loads using scalar_to_vector
This patch aims to improve the codegen for vector loads involving the
scalar_to_vector (load X) sequence. Initially, ld->mv instructions were used
for scalar_to_vector (load X), so this patch allows scalar_to_vector (load X)
to utilize:
LXSD and LXSDX for i64 and f64
LXSIWAX for i32 (sign extension to i64)
LXSIWZX for i32 and f64
Committing on behalf of Amy Kwan.
Differential Revision: https://reviews.llvm.org/D48950
------------------------------------------------------------------------
llvm-svn: 347957
Diffstat (limited to 'llvm/test/CodeGen/PowerPC/swaps-le-6.ll')
| -rw-r--r-- | llvm/test/CodeGen/PowerPC/swaps-le-6.ll | 89 |
1 files changed, 57 insertions, 32 deletions
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll index 82c240e46d2..ac0bcc74068 100644 --- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll +++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll @@ -1,12 +1,15 @@ -; RUN: llc -verify-machineinstrs -mcpu=pwr8 \ -; RUN: -mtriple=powerpc64le-unknown-linux-gnu -O3 < %s | FileCheck %s +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \ +; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -O3 < %s | FileCheck %s ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-P9 \ +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: < %s | FileCheck %s --check-prefix=CHECK-P9 \ ; RUN: --implicit-check-not xxswapd ; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu -O3 \ -; RUN: -verify-machineinstrs -mattr=-power9-vector < %s | FileCheck %s +; RUN: -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names -verify-machineinstrs \ +; RUN: -mattr=-power9-vector < %s | FileCheck %s ; These tests verify that VSX swap optimization works when loading a scalar ; into a vector register. @@ -17,6 +20,31 @@ @y = global double 1.780000e+00, align 8 define void @bar0() { +; CHECK-LABEL: bar0: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxpermdi vs0, vs0, vs1, 1 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar0: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxpermdi vs0, vs1, vs0, 1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -25,21 +53,32 @@ entry: ret void } -; CHECK-LABEL: @bar0 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lfdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxpermdi [[REG5:[0-9]+]], [[REG4]], [[REG1]], 1 -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar0 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxpermdi [[REG5:[0-9]+]], [[REG1]], [[REG4]], 1 -; CHECK-P9: stxvx [[REG5]] - define void @bar1() { +; CHECK-LABEL: bar1: +; CHECK: # %bb.0: # %entry +; CHECK: addis r3, r2, .LC0@toc@ha +; CHECK: addis r4, r2, .LC1@toc@ha +; CHECK: ld r3, .LC0@toc@l(r3) +; CHECK: addis r3, r2, .LC2@toc@ha +; CHECK: ld r3, .LC2@toc@l(r3) +; CHECK: xxmrghd vs0, vs1, vs0 +; CHECK: stxvd2x vs0, 0, r3 +; CHECK: blr +; +; CHECK-P9-LABEL: bar1: +; CHECK-P9: # %bb.0: # %entry +; CHECK-P9: addis r3, r2, .LC0@toc@ha +; CHECK-P9: addis r4, r2, .LC1@toc@ha +; CHECK-P9: ld r3, .LC0@toc@l(r3) +; CHECK-P9: ld r4, .LC1@toc@l(r4) +; CHECK-P9: lfd f0, 0(r3) +; CHECK-P9: lxvx vs1, 0, r4 +; CHECK-P9: addis r3, r2, .LC2@toc@ha +; CHECK-P9: ld r3, .LC2@toc@l(r3) +; CHECK-P9: xxpermdi vs0, f0, f0, 2 +; CHECK-P9: xxmrgld vs0, vs0, vs1 +; CHECK-P9: stxvx vs0, 0, r3 +; CHECK-P9: blr entry: %0 = load <2 x double>, <2 x double>* @x, align 16 %1 = load double, double* @y, align 8 @@ -48,17 +87,3 @@ entry: ret void } -; CHECK-LABEL: @bar1 -; CHECK-DAG: lxvd2x [[REG1:[0-9]+]] -; CHECK-DAG: lfdx [[REG2:[0-9]+]] -; CHECK: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK: xxmrghd [[REG5:[0-9]+]], [[REG1]], [[REG4]] -; CHECK: stxvd2x [[REG5]] - -; CHECK-P9-LABEL: @bar1 -; CHECK-P9-DAG: lxvx [[REG1:[0-9]+]] -; CHECK-P9-DAG: lfd [[REG2:[0-9]+]], 0(3) -; CHECK-P9: xxspltd [[REG4:[0-9]+]], [[REG2]], 0 -; CHECK-P9: xxmrgld [[REG5:[0-9]+]], [[REG4]], [[REG1]] -; CHECK-P9: stxvx [[REG5]] - |

