Add support for VSX FMA single-precision instructions to the PPC back end

This patch corresponds to review: http://reviews.llvm.org/D9941 It adds the various FMA instructions introduced in the version 2.07 of the ISA along with the testing for them. These are operations on single precision scalar values in VSX registers. llvm-svn: 238578
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2015-05-29 17:13:25 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2015-05-29 17:13:25 +0000
commit: 376e17364f2b3d95fbdcd985cfdc157fb0292ee6 (patch)
tree: afd749cb8ac56eb1e206fa108f114dd209710c3c /llvm/test
parent: cdb38e5625b760aa6634084a45ec563e8ff797e4 (diff)
download: bcm5719-llvm-376e17364f2b3d95fbdcd985cfdc157fb0292ee6.tar.gz
bcm5719-llvm-376e17364f2b3d95fbdcd985cfdc157fb0292ee6.zip
4 files changed, 298 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/PowerPC/fma.ll b/llvm/test/CodeGen/PowerPC/fma.ll
index ab5251b2a55..9cfef398edf 100644
--- a/llvm/test/CodeGen/PowerPC/fma.ll
+++ b/llvm/test/CodeGen/PowerPC/fma.ll
@@ -1,9 +1,12 @@
 ; RUN: llc < %s -march=ppc32 -fp-contract=fast -mattr=-vsx | FileCheck %s
 ; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mattr=+vsx -mcpu=pwr7 | FileCheck -check-prefix=CHECK-VSX %s
+; RUN: llc < %s -mtriple=powerpc64-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -fp-contract=fast -mcpu=pwr8 | FileCheck -check-prefix=CHECK-P8 %s
 
 declare double @dummy1(double) #0
 declare double @dummy2(double, double) #0
 declare double @dummy3(double, double, double) #0
+declare float @dummy4(float, float) #0
 
 define double @test_FMADD1(double %A, double %B, double %C) {
 	%D = fmul double %A, %B		; <double> [#uses=1]
@@ -126,3 +129,83 @@ define float @test_FNMSUBS(float %A, float %B, float %C) {
 ; CHECK-VSX: fnmsubs
 ; CHECK-VSX-NEXT: blr
 }
+
+define float @test_XSMADDMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fadd float %C, %D		; <float> [#uses=1]
+	ret float %E
+; CHECK-P8-LABEL: test_XSMADDMSP:
+; CHECK-P8: xsmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMSUBMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	ret float %E
+; CHECK-P8-LABEL: test_XSMSUBMSP:
+; CHECK-P8: xsmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSMADDASP(float %A, float %B, float %C, float %D) {
+	%E = fmul float %A, %B 	; <float> [#uses=2]
+	%F = fadd float %E, %C 	; <float> [#uses=1]
+	%G = fsub float %E, %D 	; <float> [#uses=1]
+	%H = call float @dummy4(float %F, float %G)      ; <float> [#uses=1]
+	ret float %H
+; CHECK-P8-LABEL: test_XSMADDASP:
+; CHECK-P8: xsmaddasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSMSUBASP(float %A, float %B, float %C, float %D) {
+	%E = fmul float %A, %B 	; <float> [#uses=2]
+	%F = fsub float %E, %C 	; <float> [#uses=1]
+	%G = fsub float %E, %D 	; <float> [#uses=1]
+	%H = call float @dummy4(float %F, float %G)      ; <float> [#uses=1]
+	ret float %H
+; CHECK-P8-LABEL: test_XSMSUBASP:
+; CHECK-P8: xsmsubasp
+; CHECK-P8-NEXT: xsmsubmsp
+}
+
+define float @test_XSNMADDMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fadd float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMADDMSP:
+; CHECK-P8: xsnmaddmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMSUBMSP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBMSP:
+; CHECK-P8: xsnmsubmsp
+; CHECK-P8-NEXT: blr
+}
+
+define float @test_XSNMADDASP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fadd float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	%H = call float @dummy4(float %E, float %F)      ; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMADDASP:
+; CHECK-P8: xsnmaddasp
+}
+
+define float @test_XSNMSUBASP(float %A, float %B, float %C) {
+	%D = fmul float %A, %B		; <float> [#uses=1]
+	%E = fsub float %D, %C		; <float> [#uses=1]
+	%F = fsub float -0.000000e+00, %E		; <float> [#uses=1]
+	%H = call float @dummy4(float %E, float %F)      ; <float> [#uses=1]
+	ret float %F
+; CHECK-P8-LABEL: test_XSNMSUBASP:
+; CHECK-P8: xsnmsubasp
+}
diff --git a/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll b/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll
new file mode 100644
index 00000000000..1c3e457f92c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/vsx-fma-sp.ll
@@ -0,0 +1,167 @@
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx | FileCheck %s
+; RUN: llc < %s -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 -mattr=+vsx -fast-isel -O0 | FileCheck -check-prefix=CHECK-FISL %s
+define void @test1sp(float %a, float %b, float %c, float %e, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  ret void
+
+; CHECK-LABEL: @test1sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 4
+; CHECK-DAG: stxsspx 3, 0, 7
+; CHECK-DAG: stxsspx 1, 7, [[C1]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test1sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 7
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 1, 7, [[C1]]
+; CHECK-FISL: blr
+}
+
+define void @test2sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  %2 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+  store float %2, float* %arrayidx2, align 4
+  ret void
+
+; CHECK-LABEL: @test2sp
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 3, 2, 1
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+; CHECK-DAG: stxsspx 3, 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test2sp
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr 0, 1
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 4
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+define void @test3sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 3
+  store float %2, float* %arrayidx1, align 4
+  %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx2 = getelementptr inbounds float, float* %d, i64 2
+  store float %3, float* %arrayidx2, align 4
+  %arrayidx3 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx3, align 4
+  ret void
+
+; CHECK-LABEL: @test3sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 12
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: li [[C3:[0-9]+]], 4
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+
+; CHECK-DAG: xsmaddmsp 3, 2, 4
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 3, 8, [[C1]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test3sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: fmr 4, [[F1]]
+; CHECK-FISL-DAG: xsmaddasp 4, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL: blr
+}
+
+define void @test4sp(float %a, float %b, float %c, float %e, float %f, float* nocapture %d) #0 {
+entry:
+  %0 = tail call float @llvm.fma.f32(float %b, float %c, float %a)
+  store float %0, float* %d, align 4
+  %1 = tail call float @llvm.fma.f32(float %b, float %e, float %a)
+  %arrayidx1 = getelementptr inbounds float, float* %d, i64 1
+  store float %1, float* %arrayidx1, align 4
+  %2 = tail call float @llvm.fma.f32(float %b, float %c, float %1)
+  %arrayidx3 = getelementptr inbounds float, float* %d, i64 3
+  store float %2, float* %arrayidx3, align 4
+  %3 = tail call float @llvm.fma.f32(float %b, float %f, float %a)
+  %arrayidx4 = getelementptr inbounds float, float* %d, i64 2
+  store float %3, float* %arrayidx4, align 4
+  ret void
+
+; CHECK-LABEL: @test4sp
+; CHECK-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-DAG: li [[C1:[0-9]+]], 4
+; CHECK-DAG: li [[C2:[0-9]+]], 8
+; CHECK-DAG: xsmaddmsp 4, 2, 1
+
+; Note: We could convert this next FMA to M-type as well, but it would require
+; re-ordering the instructions.
+; CHECK-DAG: xsmaddasp 1, 2, 5
+
+; CHECK-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-DAG: stxsspx [[F1]], 0, 8
+; CHECK-DAG: stxsspx 4, 8, [[C1]]
+; CHECK-DAG: li [[C3:[0-9]+]], 12
+; CHECK-DAG: xsmaddasp 4, 2, 3
+; CHECK-DAG: stxsspx 4, 8, [[C3]]
+; CHECK-DAG: stxsspx 1, 8, [[C2]]
+; CHECK: blr
+
+; CHECK-FISL-LABEL: @test4sp
+; CHECK-FISL-DAG: fmr [[F1:[0-9]+]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 3
+; CHECK-FISL-DAG: stxsspx 0, 0, 8
+; CHECK-FISL-DAG: fmr [[F1]], 1
+; CHECK-FISL-DAG: xsmaddasp [[F1]], 2, 4
+; CHECK-FISL-DAG: li [[C3:[0-9]+]], 4
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C3]]
+; CHECK-FISL-DAG: xsmaddasp 0, 2, 3
+; CHECK-FISL-DAG: li [[C1:[0-9]+]], 12
+; CHECK-FISL-DAG: stxsspx 0, 8, [[C1]]
+; CHECK-FISL-DAG: xsmaddasp 1, 2, 5
+; CHECK-FISL-DAG: li [[C2:[0-9]+]], 8
+; CHECK-FISL-DAG: stxsspx 1, 8, [[C2]]
+; CHECK-FISL: blr
+}
+
+declare float @llvm.fma.f32(float, float, float) #0
diff --git a/llvm/test/MC/Disassembler/PowerPC/vsx.txt b/llvm/test/MC/Disassembler/PowerPC/vsx.txt
index 04b2eeb0dd2..6f4ba6f6b9a 100644
--- a/llvm/test/MC/Disassembler/PowerPC/vsx.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/vsx.txt
@@ -90,6 +90,12 @@
 # CHECK: xsmaddmdp 7, 63, 27
 0xf0 0xff 0xd9 0x4c
 
+# CHECK: xsmaddasp 7, 63, 27
+0xf0 0xff 0xd8 0x0c
+
+# CHECK: xsmaddmsp 7, 63, 27
+0xf0 0xff 0xd8 0x4c
+
 # CHECK: xsmaxdp 7, 63, 27
 0xf0 0xff 0xdd 0x04
 
@@ -102,6 +108,12 @@
 # CHECK: xsmsubmdp 7, 63, 27
 0xf0 0xff 0xd9 0xcc
 
+# CHECK: xsmsubasp 7, 63, 27
+0xf0 0xff 0xd8 0x8c
+
+# CHECK: xsmsubmsp 7, 63, 27
+0xf0 0xff 0xd8 0xcc
+
 # CHECK: xsmulsp 7, 63, 27
 0xf0 0xff 0xd8 0x84
 
@@ -126,6 +138,18 @@
 # CHECK: xsnmsubmdp 7, 63, 27
 0xf0 0xff 0xdd 0xcc
 
+# CHECK: xsnmaddasp 7, 63, 27
+0xf0 0xff 0xdc 0x0c
+
+# CHECK: xsnmaddmsp 7, 63, 27
+0xf0 0xff 0xdc 0x4c
+
+# CHECK: xsnmsubasp 7, 63, 27
+0xf0 0xff 0xdc 0x8c
+
+# CHECK: xsnmsubmsp 7, 63, 27
+0xf0 0xff 0xdc 0xcc
+
 # CHECK: xsrdpi 7, 27
 0xf0 0xe0 0xd9 0x24
 
diff --git a/llvm/test/MC/PowerPC/vsx.s b/llvm/test/MC/PowerPC/vsx.s
index 773fc9eef6d..352fc517380 100644
--- a/llvm/test/MC/PowerPC/vsx.s
+++ b/llvm/test/MC/PowerPC/vsx.s
@@ -95,6 +95,12 @@
 # CHECK-BE: xsmaddmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0x4c]
 # CHECK-LE: xsmaddmdp 7, 63, 27                # encoding: [0x4c,0xd9,0xff,0xf0]
             xsmaddmdp 7, 63, 27
+# CHECK-BE: xsmaddasp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x0c]
+# CHECK-LE: xsmaddasp 7, 63, 27                # encoding: [0x0c,0xd8,0xff,0xf0]
+            xsmaddasp 7, 63, 27
+# CHECK-BE: xsmaddmsp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x4c]
+# CHECK-LE: xsmaddmsp 7, 63, 27                # encoding: [0x4c,0xd8,0xff,0xf0]
+            xsmaddmsp 7, 63, 27
 # CHECK-BE: xsmaxdp 7, 63, 27                  # encoding: [0xf0,0xff,0xdd,0x04]
 # CHECK-LE: xsmaxdp 7, 63, 27                  # encoding: [0x04,0xdd,0xff,0xf0]
             xsmaxdp 7, 63, 27
@@ -107,6 +113,12 @@
 # CHECK-BE: xsmsubmdp 7, 63, 27                # encoding: [0xf0,0xff,0xd9,0xcc]
 # CHECK-LE: xsmsubmdp 7, 63, 27                # encoding: [0xcc,0xd9,0xff,0xf0]
             xsmsubmdp 7, 63, 27
+# CHECK-BE: xsmsubasp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0x8c]
+# CHECK-LE: xsmsubasp 7, 63, 27                # encoding: [0x8c,0xd8,0xff,0xf0]
+            xsmsubasp 7, 63, 27
+# CHECK-BE: xsmsubmsp 7, 63, 27                # encoding: [0xf0,0xff,0xd8,0xcc]
+# CHECK-LE: xsmsubmsp 7, 63, 27                # encoding: [0xcc,0xd8,0xff,0xf0]
+            xsmsubmsp 7, 63, 27
 # CHECK-BE: xsmulsp 7, 63, 27                  # encoding: [0xf0,0xff,0xd8,0x84]
 # CHECK-LE: xsmulsp 7, 63, 27                  # encoding: [0x84,0xd8,0xff,0xf0]
             xsmulsp 7, 63, 27
@@ -131,6 +143,18 @@
 # CHECK-BE: xsnmsubmdp 7, 63, 27               # encoding: [0xf0,0xff,0xdd,0xcc]
 # CHECK-LE: xsnmsubmdp 7, 63, 27               # encoding: [0xcc,0xdd,0xff,0xf0]
             xsnmsubmdp 7, 63, 27
+# CHECK-BE: xsnmaddasp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x0c]
+# CHECK-LE: xsnmaddasp 7, 63, 27               # encoding: [0x0c,0xdc,0xff,0xf0]
+            xsnmaddasp 7, 63, 27
+# CHECK-BE: xsnmaddmsp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x4c]
+# CHECK-LE: xsnmaddmsp 7, 63, 27               # encoding: [0x4c,0xdc,0xff,0xf0]
+            xsnmaddmsp 7, 63, 27
+# CHECK-BE: xsnmsubasp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0x8c]
+# CHECK-LE: xsnmsubasp 7, 63, 27               # encoding: [0x8c,0xdc,0xff,0xf0]
+            xsnmsubasp 7, 63, 27
+# CHECK-BE: xsnmsubmsp 7, 63, 27               # encoding: [0xf0,0xff,0xdc,0xcc]
+# CHECK-LE: xsnmsubmsp 7, 63, 27               # encoding: [0xcc,0xdc,0xff,0xf0]
+            xsnmsubmsp 7, 63, 27
 # CHECK-BE: xsrdpi 7, 27                       # encoding: [0xf0,0xe0,0xd9,0x24]
 # CHECK-LE: xsrdpi 7, 27                       # encoding: [0x24,0xd9,0xe0,0xf0]
             xsrdpi 7, 27
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2015-05-29 17:13:25 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2015-05-29 17:13:25 +0000
commit	376e17364f2b3d95fbdcd985cfdc157fb0292ee6 (patch)
tree	afd749cb8ac56eb1e206fa108f114dd209710c3c /llvm/test
parent	cdb38e5625b760aa6634084a45ec563e8ff797e4 (diff)
download	bcm5719-llvm-376e17364f2b3d95fbdcd985cfdc157fb0292ee6.tar.gz bcm5719-llvm-376e17364f2b3d95fbdcd985cfdc157fb0292ee6.zip