diff options
author | Hal Finkel <hfinkel@anl.gov> | 2015-02-25 01:06:45 +0000 |
---|---|---|
committer | Hal Finkel <hfinkel@anl.gov> | 2015-02-25 01:06:45 +0000 |
commit | c93a9a2cb4593839b2cd495c53d5b1c9cf456830 (patch) | |
tree | 79da350b716ea3184f6913a2f638d859be362e7f /llvm/test/CodeGen | |
parent | 5638c1146ea2fb951932a55c9a4aa01d994f0d26 (diff) | |
download | bcm5719-llvm-c93a9a2cb4593839b2cd495c53d5b1c9cf456830.tar.gz bcm5719-llvm-c93a9a2cb4593839b2cd495c53d5b1c9cf456830.zip |
[PowerPC] Add support for the QPX vector instruction set
This adds support for the QPX vector instruction set, which is used by the
enhanced A2 cores on the IBM BG/Q supercomputers. QPX vectors are 256 bytes
wide, holding 4 double-precision floating-point values. Boolean values, modeled
here as <4 x i1> are actually also represented as floating-point values
(essentially { -1, 1 } for { false, true }). QPX shares many features with
Altivec and VSX, but is distinct from both of them. One major difference is
that, instead of adding completely-separate vector registers, QPX vector
registers are extensions of the scalar floating-point registers (lane 0 is the
corresponding scalar floating-point value). The operations supported on QPX
vectors mirrors that supported on the scalar floating-point values (with some
additional ones for permutations and logical/comparison operations).
I've been maintaining this support out-of-tree, as part of the bgclang project,
for several years. This is not the entire bgclang patch set, but is most of the
subset that can be cleanly integrated into LLVM proper at this time. Adding
this to the LLVM backend is part of my efforts to rebase bgclang to the current
LLVM trunk, but is independently useful (especially for codes that use LLVM as
a JIT in library form).
The assembler/disassembler test coverage is complete. The CodeGen test coverage
is not, but I've included some tests, and more will be added as follow-up work.
llvm-svn: 230413
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-bv-sint.ll | 33 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-bv.ll | 37 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-func-clobber.ll | 21 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-load.ll | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-recipest.ll | 194 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-rounding-ops.ll | 109 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-s-load.ll | 25 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-s-sel.ll | 143 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-s-store.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-sel.ll | 151 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-store.ll | 24 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/qpx-unalperm.ll | 64 | ||||
-rw-r--r-- | llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll | 1 |
13 files changed, 850 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/PowerPC/qpx-bv-sint.ll b/llvm/test/CodeGen/PowerPC/qpx-bv-sint.ll new file mode 100644 index 00000000000..0bc14ed4351 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-bv-sint.ll @@ -0,0 +1,33 @@ +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @s452() nounwind { +entry: + br label %for.body4 + +for.body4: ; preds = %for.body4, %entry + %conv.4 = sitofp i32 undef to double + %conv.5 = sitofp i32 undef to double + %mul.4.v.i0.1 = insertelement <2 x double> undef, double %conv.4, i32 0 + %mul.4.v.i0.2 = insertelement <2 x double> %mul.4.v.i0.1, double %conv.5, i32 1 + %mul.4 = fmul <2 x double> %mul.4.v.i0.2, undef + %add7.4 = fadd <2 x double> undef, %mul.4 + store <2 x double> %add7.4, <2 x double>* undef, align 16 + br i1 undef, label %for.end, label %for.body4 + +for.end: ; preds = %for.body4 + unreachable +; CHECK-LABEL: @s452 +; CHECK: lfiwax [[REG1:[0-9]+]], +; CHECK: fcfid [[REG2:[0-9]+]], [[REG1]] +; FIXME: We could 'promote' this to a vector earlier and remove this splat. +; CHECK: qvesplati {{[0-9]+}}, [[REG2]], 0 +; CHECK: qvfmul +; CHECK: qvfadd +; CHECK: qvesplati {{[0-9]+}}, +; FIXME: We can use qvstfcdx here instead of two stores. +; CHECK: stfd +; CHECK: stfd +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-bv.ll b/llvm/test/CodeGen/PowerPC/qpx-bv.ll new file mode 100644 index 00000000000..ae181de383b --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-bv.ll @@ -0,0 +1,37 @@ +; RUN: llc < %s -mcpu=a2q | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define <4 x double> @foo(double %f1, double %f2, double %f3, double %f4) { + %v1 = insertelement <4 x double> undef, double %f1, i32 0 + %v2 = insertelement <4 x double> %v1, double %f2, i32 1 + %v3 = insertelement <4 x double> %v2, double %f3, i32 2 + %v4 = insertelement <4 x double> %v3, double %f4, i32 3 + ret <4 x double> %v4 + +; CHECK-LABEL: @foo +; CHECK: qvgpci [[REG1:[0-9]+]], 275 +; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101 +; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]] +; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]] +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]] +; CHECK: blr +} + +define <4 x float> @goo(float %f1, float %f2, float %f3, float %f4) { + %v1 = insertelement <4 x float> undef, float %f1, i32 0 + %v2 = insertelement <4 x float> %v1, float %f2, i32 1 + %v3 = insertelement <4 x float> %v2, float %f3, i32 2 + %v4 = insertelement <4 x float> %v3, float %f4, i32 3 + ret <4 x float> %v4 + +; CHECK-LABEL: @goo +; CHECK: qvgpci [[REG1:[0-9]+]], 275 +; CHECK-DAG: qvgpci [[REG2:[0-9]+]], 101 +; CHECK-DAG: qvfperm [[REG3:[0-9]+]], 3, 4, [[REG1]] +; CHECK-DAG: qvfperm [[REG4:[0-9]+]], 1, 2, [[REG1]] +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG3]], [[REG2]] +; CHECK: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-func-clobber.ll b/llvm/test/CodeGen/PowerPC/qpx-func-clobber.ll new file mode 100644 index 00000000000..c1b808aa7c4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-func-clobber.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +declare <4 x double> @foo(<4 x double> %p) + +define <4 x double> @bar(<4 x double> %p, <4 x double> %q) { +entry: + %v = call <4 x double> @foo(<4 x double> %p) + %w = call <4 x double> @foo(<4 x double> %q) + %x = fadd <4 x double> %v, %w + ret <4 x double> %x + +; CHECK-LABEL: @bar +; CHECK: qvstfdx 2, +; CHECK: bl foo +; CHECK: qvstfdx 1, +; CHECK: qvlfdx 1, +; CHECK: bl foo +; CHECK: qvlfdx [[REG:[0-9]+]], +; CHECK: qvfadd 1, [[REG]], 1 +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-load.ll b/llvm/test/CodeGen/PowerPC/qpx-load.ll new file mode 100644 index 00000000000..2eb29081e26 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-load.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define <4 x double> @foo(<4 x double>* %p) { +entry: + %v = load <4 x double>* %p, align 8 + ret <4 x double> %v +} + +; CHECK: @foo +; CHECK-DAG: li [[REG1:[0-9]+]], 31 +; CHECK-DAG: qvlfdx [[REG4:[0-9]+]], 0, 3 +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], 3, [[REG1]] +; CHECK-DAG: qvlpcldx [[REG3:[0-9]+]], 0, 3 +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]] +; CHECK: blr + +define <4 x double> @bar(<4 x double>* %p) { +entry: + %v = load <4 x double>* %p, align 32 + ret <4 x double> %v +} + +; CHECK: @bar +; CHECK: qvlfdx + diff --git a/llvm/test/CodeGen/PowerPC/qpx-recipest.ll b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll new file mode 100644 index 00000000000..0e01358e579 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-recipest.ll @@ -0,0 +1,194 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck -check-prefix=CHECK-SAFE %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) + +define <4 x double> @foo(<4 x double> %a, <4 x double> %b) nounwind { +entry: + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %r = fdiv <4 x double> %a, %x + ret <4 x double> %r + +; CHECK-LABEL: @foo +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo +; CHECK-SAFE: fsqrt +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x double> @foof(<4 x double> %a, <4 x float> %b) nounwind { +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %y = fpext <4 x float> %x to <4 x double> + %r = fdiv <4 x double> %a, %y + ret <4 x double> %r + +; CHECK-LABEL: @foof +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foof +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x float> @food(<4 x float> %a, <4 x double> %b) nounwind { +entry: + %x = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %b) + %y = fptrunc <4 x double> %x to <4 x float> + %r = fdiv <4 x float> %a, %y + ret <4 x float> %r + +; CHECK-LABEL: @food +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfmul +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: qvfrsp +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @food +; CHECK-SAFE: fsqrt +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x float> @goo(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %x = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %b) + %r = fdiv <4 x float> %a, %x + ret <4 x float> %r + +; CHECK-LABEL: @goo +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo +; CHECK-SAFE: fsqrts +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x double> @foo2(<4 x double> %a, <4 x double> %b) nounwind { +entry: + %r = fdiv <4 x double> %a, %b + ret <4 x double> %r + +; CHECK-LABEL: @foo2 +; CHECK: qvfre +; CHECK: qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfnmsub +; CHECK: qvfmadd +; CHECK: qvfmul +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo2 +; CHECK-SAFE: fdiv +; CHECK-SAFE: blr +} + +define <4 x float> @goo2(<4 x float> %a, <4 x float> %b) nounwind { +entry: + %r = fdiv <4 x float> %a, %b + ret <4 x float> %r + +; CHECK-LABEL: @goo2 +; CHECK: qvfres +; CHECK: qvfnmsubs +; CHECK: qvfmadds +; CHECK: qvfmuls +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo2 +; CHECK-SAFE: fdivs +; CHECK-SAFE: blr +} + +define <4 x double> @foo3(<4 x double> %a) nounwind { +entry: + %r = call <4 x double> @llvm.sqrt.v4f64(<4 x double> %a) + ret <4 x double> %r + +; CHECK-LABEL: @foo3 +; CHECK: qvfrsqrte +; CHECK: qvfmul +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadd instead of a qvfnmsub +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfcmpeq +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmadd +; CHECK-DAG: qvfmul +; CHECK-DAG: qvfmul +; CHECK: qvfsel +; CHECK: blr + +; CHECK-SAFE-LABEL: @foo3 +; CHECK-SAFE: fsqrt +; CHECK-SAFE: blr +} + +define <4 x float> @goo3(<4 x float> %a) nounwind { +entry: + %r = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %a) + ret <4 x float> %r + +; CHECK-LABEL: @goo3 +; CHECK: qvfrsqrtes +; CHECK: qvfmuls +; FIXME: We're currently loading two constants here (1.5 and -1.5), and using +; an qvfmadds instead of a qvfnmsubs +; CHECK-DAG: qvfmadds +; CHECK-DAG: qvfcmpeq +; CHECK-DAG: qvfmadds +; CHECK-DAG: qvfmuls +; CHECK-DAG: qvfmuls +; CHECK: qvfsel +; CHECK: blr + +; CHECK-SAFE-LABEL: @goo3 +; CHECK-SAFE: fsqrts +; CHECK-SAFE: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-rounding-ops.ll b/llvm/test/CodeGen/PowerPC/qpx-rounding-ops.ll new file mode 100644 index 00000000000..6fdd8e6a714 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-rounding-ops.ll @@ -0,0 +1,109 @@ +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2q -enable-unsafe-fp-math | FileCheck -check-prefix=CHECK-FM %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +define <4 x float> @test1(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.floor.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test1: +; CHECK: qvfrim 1, 1 + +; CHECK-FM: test1: +; CHECK-FM: qvfrim 1, 1 +} + +declare <4 x float> @llvm.floor.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test2(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.floor.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test2: +; CHECK: qvfrim 1, 1 + +; CHECK-FM: test2: +; CHECK-FM: qvfrim 1, 1 +} + +declare <4 x double> @llvm.floor.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test3(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.nearbyint.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test3: +; CHECK-NOT: qvfrin + +; CHECK-FM: test3: +; CHECK-FM-NOT: qvfrin +} + +declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test4(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.nearbyint.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test4: +; CHECK-NOT: qvfrin + +; CHECK-FM: test4: +; CHECK-FM-NOT: qvfrin +} + +declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test5(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.ceil.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test5: +; CHECK: qvfrip 1, 1 + +; CHECK-FM: test5: +; CHECK-FM: qvfrip 1, 1 +} + +declare <4 x float> @llvm.ceil.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test6(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.ceil.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test6: +; CHECK: qvfrip 1, 1 + +; CHECK-FM: test6: +; CHECK-FM: qvfrip 1, 1 +} + +declare <4 x double> @llvm.ceil.v4f64(<4 x double>) nounwind readnone + +define <4 x float> @test9(<4 x float> %x) nounwind { + %call = tail call <4 x float> @llvm.trunc.v4f32(<4 x float> %x) nounwind readnone + ret <4 x float> %call + +; CHECK: test9: +; CHECK: qvfriz 1, 1 + +; CHECK-FM: test9: +; CHECK-FM: qvfriz 1, 1 +} + +declare <4 x float> @llvm.trunc.v4f32(<4 x float>) nounwind readnone + +define <4 x double> @test10(<4 x double> %x) nounwind { + %call = tail call <4 x double> @llvm.trunc.v4f64(<4 x double> %x) nounwind readnone + ret <4 x double> %call + +; CHECK: test10: +; CHECK: qvfriz 1, 1 + +; CHECK-FM: test10: +; CHECK-FM: qvfriz 1, 1 +} + +declare <4 x double> @llvm.trunc.v4f64(<4 x double>) nounwind readnone + diff --git a/llvm/test/CodeGen/PowerPC/qpx-s-load.ll b/llvm/test/CodeGen/PowerPC/qpx-s-load.ll new file mode 100644 index 00000000000..8dfab1385dd --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-s-load.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define <4 x float> @foo(<4 x float>* %p) { +entry: + %v = load <4 x float>* %p, align 4 + ret <4 x float> %v +} + +; CHECK: @foo +; CHECK-DAG: li [[REG1:[0-9]+]], 15 +; CHECK-DAG: qvlfsx [[REG4:[0-9]+]], 0, 3 +; CHECK-DAG: qvlfsx [[REG2:[0-9]+]], 3, [[REG1]] +; CHECK-DAG: qvlpclsx [[REG3:[0-9]+]], 0, 3 +; CHECK-DAG: qvfperm 1, [[REG4]], [[REG2]], [[REG3]] +; CHECK: blr + +define <4 x float> @bar(<4 x float>* %p) { +entry: + %v = load <4 x float>* %p, align 16 + ret <4 x float> %v +} + +; CHECK: @bar +; CHECK: qvlfsx + diff --git a/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll b/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll new file mode 100644 index 00000000000..008efea5da4 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-s-sel.ll @@ -0,0 +1,143 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16 +@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16 + +define <4 x float> @test1(<4 x float> %a, <4 x float> %b, <4 x i1> %c) nounwind readnone { +entry: + %r = select <4 x i1> %c, <4 x float> %a, <4 x float> %b + ret <4 x float> %r + +; CHECK-LABEL: @test1 +; CHECK: qvfsel 1, 3, 1, 2 +; CHECK: blr +} + +define <4 x float> @test2(<4 x float> %a, <4 x float> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone { +entry: + %v = insertelement <4 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3 + %r = select <4 x i1> %v4, <4 x float> %a, <4 x float> %b + ret <4 x float> %r + +; CHECK-LABEL: @test2 +; CHECK: stw +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + +define <4 x i1> @test3(<4 x i1> %a) nounwind readnone { +entry: + %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1> + ret <4 x i1> %v + +; CHECK-LABEL: @test3 +; CHECK: qvlfsx [[REG:[0-9]+]], +; qvflogical 1, 1, [[REG]], 1 +; blr +} + +define <4 x i1> @test4(<4 x i1> %a) nounwind { +entry: + %q = load <4 x i1>* @Q, align 16 + %v = and <4 x i1> %a, %q + ret <4 x i1> %v + +; CHECK-LABEL: @test4 +; CHECK-DAG: lbz +; CHECK-DAG: qvlfdx [[REG1:[0-9]+]], +; CHECK-DAG: stw +; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]] +; CHECK: qvflogical 1, 1, [[REG4]], 1 +; CHECK: blr +} + +define void @test5(<4 x i1> %a) nounwind { +entry: + store <4 x i1> %a, <4 x i1>* @R + ret void + +; CHECK-LABEL: @test5 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: stb +; CHECK: blr +} + +define i1 @test6(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test6 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define i1 @test7(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + %s = extractelement <4 x i1> %a, i32 3 + %q = and i1 %r, %s + ret i1 %q + +; CHECK-LABEL: @test7 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG4:[0-9]+]], +; FIXME: We're storing the vector twice, and that's silly. +; CHECK-DAG: qvstfiwx [[REG3]], +; CHECK: lwz [[REG5:[0-9]+]], +; CHECK: and 3, +; CHECK: blr +} + +define i1 @test8(<3 x i1> %a) nounwind { +entry: + %r = extractelement <3 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test8 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define <3 x float> @test9(<3 x float> %a, <3 x float> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone { +entry: + %v = insertelement <3 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2 + %r = select <3 x i1> %v3, <3 x float> %a, <3 x float> %b + ret <3 x float> %r + +; CHECK-LABEL: @test9 +; CHECK: stw +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-s-store.ll b/llvm/test/CodeGen/PowerPC/qpx-s-store.ll new file mode 100644 index 00000000000..d2ca45814da --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-s-store.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @foo(<4 x float> %v, <4 x float>* %p) { +entry: + store <4 x float> %v, <4 x float>* %p, align 4 + ret void +} + +; CHECK: @foo +; CHECK: stfs +; CHECK: stfs +; CHECK: stfs +; CHECK: stfs +; CHECK: blr + +define void @bar(<4 x float> %v, <4 x float>* %p) { +entry: + store <4 x float> %v, <4 x float>* %p, align 16 + ret void +} + +; CHECK: @bar +; CHECK: qvstfsx + diff --git a/llvm/test/CodeGen/PowerPC/qpx-sel.ll b/llvm/test/CodeGen/PowerPC/qpx-sel.ll new file mode 100644 index 00000000000..15ae57352c3 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-sel.ll @@ -0,0 +1,151 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +@Q = constant <4 x i1> <i1 0, i1 undef, i1 1, i1 1>, align 16 +@R = global <4 x i1> <i1 0, i1 0, i1 0, i1 0>, align 16 + +define <4 x double> @test1(<4 x double> %a, <4 x double> %b, <4 x i1> %c) nounwind readnone { +entry: + %r = select <4 x i1> %c, <4 x double> %a, <4 x double> %b + ret <4 x double> %r + +; CHECK-LABEL: @test1 +; CHECK: qvfsel 1, 3, 1, 2 +; CHECK: blr +} + +define <4 x double> @test2(<4 x double> %a, <4 x double> %b, i1 %c1, i1 %c2, i1 %c3, i1 %c4) nounwind readnone { +entry: + %v = insertelement <4 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <4 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <4 x i1> %v2, i1 %c3, i32 2 + %v4 = insertelement <4 x i1> %v3, i1 %c4, i32 3 + %r = select <4 x i1> %v4, <4 x double> %a, <4 x double> %b + ret <4 x double> %r + +; CHECK-LABEL: @test2 + +; FIXME: This load/store sequence is unnecessary. +; CHECK-DAG: lbz +; CHECK-DAG: stw + +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + +define <4 x i1> @test3(<4 x i1> %a) nounwind readnone { +entry: + %v = and <4 x i1> %a, <i1 0, i1 undef, i1 1, i1 1> + ret <4 x i1> %v + +; CHECK-LABEL: @test3 +; CHECK: qvlfsx [[REG:[0-9]+]], +; qvflogical 1, 1, [[REG]], 1 +; blr +} + +define <4 x i1> @test4(<4 x i1> %a) nounwind { +entry: + %q = load <4 x i1>* @Q, align 16 + %v = and <4 x i1> %a, %q + ret <4 x i1> %v + +; CHECK-LABEL: @test4 +; CHECK-DAG: lbz +; CHECK-DAG: qvlfdx [[REG1:[0-9]+]], +; CHECK-DAG: stw +; CHECK-DAG: qvlfiwzx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG1]] +; CHECK: qvflogical 1, 1, [[REG4]], 1 +; CHECK: blr +} + +define void @test5(<4 x i1> %a) nounwind { +entry: + store <4 x i1> %a, <4 x i1>* @R + ret void + +; CHECK-LABEL: @test5 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: stb +; CHECK: blr +} + +define i1 @test6(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test6 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define i1 @test7(<4 x i1> %a) nounwind { +entry: + %r = extractelement <4 x i1> %a, i32 2 + %s = extractelement <4 x i1> %a, i32 3 + %q = and i1 %r, %s + ret i1 %q + +; CHECK-LABEL: @test7 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG4:[0-9]+]], +; FIXME: We're storing the vector twice, and that's silly. +; CHECK-DAG: qvstfiwx [[REG3]], +; CHECK-DAG: lwz [[REG5:[0-9]+]], +; CHECK: and 3, +; CHECK: blr +} + +define i1 @test8(<3 x i1> %a) nounwind { +entry: + %r = extractelement <3 x i1> %a, i32 2 + ret i1 %r + +; CHECK-LABEL: @test8 +; CHECK: qvlfdx [[REG1:[0-9]+]], +; CHECK: qvfmadd [[REG2:[0-9]+]], 1, [[REG1]], [[REG1]] +; CHECK: qvfctiwu [[REG3:[0-9]+]], [[REG2]] +; CHECK: qvstfiwx [[REG3]], +; CHECK: lwz +; CHECK: blr +} + +define <3 x double> @test9(<3 x double> %a, <3 x double> %b, i1 %c1, i1 %c2, i1 %c3) nounwind readnone { +entry: + %v = insertelement <3 x i1> undef, i1 %c1, i32 0 + %v2 = insertelement <3 x i1> %v, i1 %c2, i32 1 + %v3 = insertelement <3 x i1> %v2, i1 %c3, i32 2 + %r = select <3 x i1> %v3, <3 x double> %a, <3 x double> %b + ret <3 x double> %r + +; CHECK-LABEL: @test9 + +; FIXME: This load/store sequence is unnecessary. +; CHECK-DAG: lbz +; CHECK-DAG: stw + +; CHECK-DAG: qvlfiwzx [[REG1:[0-9]+]], +; CHECK-DAG: qvlfdx [[REG2:[0-9]+]], +; CHECK-DAG: qvfcfidu [[REG3:[0-9]+]], [[REG1]] +; CHECK: qvfcmpeq [[REG4:[0-9]+]], [[REG3]], [[REG2]] +; CHECK: qvfsel 1, [[REG4]], 1, 2 +; CHECK: blr +} + diff --git a/llvm/test/CodeGen/PowerPC/qpx-store.ll b/llvm/test/CodeGen/PowerPC/qpx-store.ll new file mode 100644 index 00000000000..c29cc225b05 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-store.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -march=ppc64 -mcpu=a2q | FileCheck %s + +define void @foo(<4 x double> %v, <4 x double>* %p) { +entry: + store <4 x double> %v, <4 x double>* %p, align 8 + ret void +} + +; CHECK: @foo +; CHECK: stfd +; CHECK: stfd +; CHECK: stfd +; CHECK: stfd +; CHECK: blr + +define void @bar(<4 x double> %v, <4 x double>* %p) { +entry: + store <4 x double> %v, <4 x double>* %p, align 32 + ret void +} + +; CHECK: @bar +; CHECK: qvstfdx + diff --git a/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll b/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll new file mode 100644 index 00000000000..e765b46a7cf --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/qpx-unalperm.ll @@ -0,0 +1,64 @@ +; RUN: llc < %s -mcpu=a2q | FileCheck %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-bgq-linux" + +define <4 x double> @foo(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 32 + ret <4 x double> %r +; CHECK: qvlfdx +; CHECK: blr +} + +define <4 x double> @bar(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 16 + %s = load <4 x double>* %b, align 32 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +; CHECK: qvlpcldx +; CHECK: qvlfdx +; CHECK: qvfperm +; CHECK: blr +} + +define <4 x double> @bar1(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 16 + %s = load <4 x double>* %b, align 8 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar2(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 32 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar3(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 8 + %t = fadd <4 x double> %r, %s + ret <4 x double> %t +} + +define <4 x double> @bar4(<4 x double>* %a) { +entry: + %r = load <4 x double>* %a, align 8 + %b = getelementptr <4 x double>* %a, i32 1 + %s = load <4 x double>* %b, align 8 + %c = getelementptr <4 x double>* %b, i32 1 + %t = load <4 x double>* %c, align 8 + %u = fadd <4 x double> %r, %s + %v = fadd <4 x double> %u, %t + ret <4 x double> %v +} + diff --git a/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll b/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll index 037473bdec8..0f279067159 100644 --- a/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll +++ b/llvm/test/CodeGen/PowerPC/vsx-infl-copy2.ll @@ -8,7 +8,6 @@ entry: br i1 false, label %loop2_start, label %if.end5 ; CHECK-LABEL: @_Z28test_goto_loop_unroll_factorILi22EiEvPKT0_iPKc -; CHECK: xxlor loop2_start: ; preds = %loop2_start, %entry br i1 undef, label %loop2_start, label %if.then.i31 |