[PowerPC] - Legalize vector types by widening instead of integer promotion

This patch corresponds to review: http://reviews.llvm.org/D20443 It changes the legalization strategy for illegal vector types from integer promotion to widening. This only applies for vectors with elements of width that is a multiple of a byte since we have hardware support for vectors with 1, 2, 3, 8 and 16 byte elements. Integer promotion for vectors is quite expensive on PPC due to the sequence of breaking apart the vector, extending the elements and reconstituting the vector. Two of these operations are expensive. This patch causes between minor and major improvements in performance on most benchmarks. There are very few benchmarks whose performance regresses. These regressions can be handled in a subsequent patch with a DAG combine (similar to how this patch handles int -> fp conversions of illegal vector types). llvm-svn: 274535
author: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-07-05 09:22:29 +0000
committer: Nemanja Ivanovic <nemanja.i.ibm@gmail.com> 2016-07-05 09:22:29 +0000
commit: 44513e545fdafe7d537f9f152bd8512c3c31f582 (patch)
tree: 19f3253d1799fac700b5df62c79c932403e93c04 /llvm/test
parent: dea33cc2f3b1f6c37cdb4178c8a213fea7db118b (diff)
download: bcm5719-llvm-44513e545fdafe7d537f9f152bd8512c3c31f582.tar.gz
bcm5719-llvm-44513e545fdafe7d537f9f152bd8512c3c31f582.zip
5 files changed, 82 insertions, 46 deletions
diff --git a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
index 0a568b88e72..d48be5b5f62 100644
--- a/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
+++ b/llvm/test/Analysis/CostModel/PowerPC/load_store.ll
@@ -31,7 +31,7 @@ define i32 @loads(i32 %arg) {
 
   ; FIXME: There actually are sub-vector Altivec loads, and so we could handle
   ; this with a small expense, but we don't currently.
-  ; CHECK: cost of 48 {{.*}} load
+  ; CHECK: cost of 42 {{.*}} load
   load <4 x i16>, <4 x i16>* undef, align 2
 
   ; CHECK: cost of 2 {{.*}} load
diff --git a/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
new file mode 100644
index 00000000000..d03ff06bbd9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/load-v4i8-improved.ll
@@ -0,0 +1,23 @@
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck \
+; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck \
+; RUN:   -implicit-check-not vmrg -implicit-check-not=vperm %s \
+; RUN:   --check-prefix=CHECK-BE
+
+define <16 x i8> @test(i32* %s, i32* %t) {
+entry:
+  %0 = bitcast i32* %s to <4 x i8>*
+  %1 = load <4 x i8>, <4 x i8>* %0, align 4
+  %2 = shufflevector <4 x i8> %1, <4 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+  ret <16 x i8> %2
+; CHECK-LABEL: test
+; CHECK: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK: mtvsrd [[VSR:[0-9]+]], [[GPR]]
+; CHECK: xxswapd  [[SWP:[0-9]+]], [[VSR]]
+; CHECK: xxspltw 34, [[SWP]], 3
+; CHECK-BE-LABEL: test
+; CHECK-BE: lwz [[GPR:[0-9]+]], 0(3)
+; CHECK-BE: sldi [[SHL:[0-9]+]], [[GPR]], 32
+; CHECK-BE: mtvsrd [[VSR:[0-9]+]], [[SHL]]
+; CHECK-BE: xxspltw 34, [[VSR]], 0
+}
diff --git a/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll b/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
index 052f55644fe..3610c044b94 100644
--- a/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
+++ b/llvm/test/CodeGen/PowerPC/p8altivec-shuffles-pred.ll
@@ -9,7 +9,8 @@ entry:
   ret <2 x i32> %strided.vec
 
 ; CHECK-LABEL: @test1
-; CHECK: vsldoi 2, 2, 2, 12
+; CHECK: vsldoi [[TGT:[0-9]+]], 2, 2, 8
+; CHECK: vmrghw 2, 2, [[TGT]]
 ; CHECK: blr
 }
 
diff --git a/llvm/test/CodeGen/PowerPC/vec_cmp.ll b/llvm/test/CodeGen/PowerPC/vec_cmp.ll
index 516b2dd58b9..7b228bffc38 100644
--- a/llvm/test/CodeGen/PowerPC/vec_cmp.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_cmp.ll
@@ -24,7 +24,7 @@ define <4 x i8> @v4si8_cmp(<4 x i8> %x, <4 x i8> %y) nounwind readnone {
   ret <4 x i8> %sext
 }
 ; CHECK-LABEL: v4si8_cmp:
-; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
 define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
@@ -33,7 +33,7 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone {
   ret <8 x i8> %sext
 }
 ; CHECK-LABEL: v8si8_cmp:
-; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
 ; Additional tests for v16i8 since it is a altivec native type
@@ -158,7 +158,7 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone {
   ret <4 x i16> %sext
 }
 ; CHECK-LABEL: v4si16_cmp:
-; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
+; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}}
 
 
 ; Additional tests for v8i16 since it is an altivec native type
diff --git a/llvm/test/CodeGen/PowerPC/vsx.ll b/llvm/test/CodeGen/PowerPC/vsx.ll
index b2eefb66676..9b65649978d 100644
--- a/llvm/test/CodeGen/PowerPC/vsx.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx.ll
@@ -1144,62 +1144,67 @@ define <2 x double> @test68(<2 x i32> %a) {
   ret <2 x double> %w
 
 ; CHECK-LABEL: @test68
-; CHECK: xxsldwi [[V1:[0-9]+]], 34, 34, 1
+; CHECK: xxmrghw [[V1:[0-9]+]]
 ; CHECK: xvcvsxwdp 34, [[V1]]
 ; CHECK: blr
 
 ; CHECK-LE-LABEL: @test68
-; CHECK-LE: xxsldwi [[V1:[0-9]+]], 34, 34, 1
+; CHECK-LE: xxmrglw [[V1:[0-9]+]], 34, 34
 ; CHECK-LE: xvcvsxwdp 34, [[V1]]
 ; CHECK-LE: blr
 }
 
+; This gets scalarized so the code isn't great
 define <2 x double> @test69(<2 x i16> %a) {
   %w = sitofp <2 x i16> %a to <2 x double>
   ret <2 x double> %w
 
 ; CHECK-LABEL: @test69
-; CHECK: vspltisw [[V1:[0-9]+]], 8
-; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
-; CHECK: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
-; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
-; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
-; CHECK: xvcvsxwdp 34, [[V4]]
+; CHECK-DAG: lfiwax
+; CHECK-DAG: lfiwax
+; CHECK-DAG: xscvsxddp
+; CHECK-DAG: xscvsxddp
+; CHECK: xxmrghd
 ; CHECK: blr
 
 ; CHECK-LE-LABEL: @test69
-; CHECK-LE: vspltisw [[V1:[0-9]+]], 8
-; CHECK-LE: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
-; CHECK-LE: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
-; CHECK-LE: vsraw {{[0-9]+}}, [[V3]], [[V2]]
-; CHECK-LE: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
-; CHECK-LE: xvcvsxwdp 34, [[V4]]
+; CHECK-LE: mfvsrd
+; CHECK-LE: mtvsrwa
+; CHECK-LE: mtvsrwa
+; CHECK-LE: xscvsxddp
+; CHECK-LE: xscvsxddp
+; CHECK-LE: xxspltd
+; CHECK-LE: xxspltd
+; CHECK-LE: xxmrgld
 ; CHECK-LE: blr
 }
 
+; This gets scalarized so the code isn't great
 define <2 x double> @test70(<2 x i8> %a) {
   %w = sitofp <2 x i8> %a to <2 x double>
   ret <2 x double> %w
 
 ; CHECK-LABEL: @test70
-; CHECK: vspltisw [[V1:[0-9]+]], 12
-; CHECK: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
-; CHECK: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
-; CHECK: vsraw {{[0-9]+}}, [[V3]], [[V2]]
-; CHECK: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
-; CHECK: xvcvsxwdp 34, [[V4]]
+; CHECK-DAG: lfiwax
+; CHECK-DAG: lfiwax
+; CHECK-DAG: xscvsxddp
+; CHECK-DAG: xscvsxddp
+; CHECK: xxmrghd
 ; CHECK: blr
 
 ; CHECK-LE-LABEL: @test70
-; CHECK-LE: vspltisw [[V1:[0-9]+]], 12
-; CHECK-LE: vadduwm [[V2:[0-9]+]], [[V1]], [[V1]]
-; CHECK-LE: vslw [[V3:[0-9]+]], {{[0-9]+}}, [[V2]]
-; CHECK-LE: vsraw {{[0-9]+}}, [[V3]], [[V2]]
-; CHECK-LE: xxsldwi [[V4:[0-9]+]], {{[0-9]+}}, {{[0-9]+}}, 1
-; CHECK-LE: xvcvsxwdp 34, [[V4]]
+; CHECK-LE: mfvsrd
+; CHECK-LE: mtvsrwa
+; CHECK-LE: mtvsrwa
+; CHECK-LE: xscvsxddp
+; CHECK-LE: xscvsxddp
+; CHECK-LE: xxspltd
+; CHECK-LE: xxspltd
+; CHECK-LE: xxmrgld
 ; CHECK-LE: blr
 }
 
+; This gets scalarized so the code isn't great
 define <2 x i32> @test80(i32 %v) {
   %b1 = insertelement <2 x i32> undef, i32 %v, i32 0
   %b2 = shufflevector <2 x i32> %b1, <2 x i32> undef, <2 x i32> zeroinitializer
@@ -1207,31 +1212,38 @@ define <2 x i32> @test80(i32 %v) {
   ret <2 x i32> %i
 
 ; CHECK-REG-LABEL: @test80
-; CHECK-REG-DAG: addi [[R1:[0-9]+]], 3, 3
-; CHECK-REG-DAG: addi [[R2:[0-9]+]], 1, -16
-; CHECK-REG-DAG: addi [[R3:[0-9]+]], 3, 2
-; CHECK-REG: std [[R1]], -8(1)
-; CHECK-REG: std [[R3]], -16(1)
-; CHECK-REG: lxvd2x 34, 0, [[R2]]
-; CHECK-REG-NOT: stxvd2x
+; CHECK-REG: stw 3, -16(1)
+; CHECK-REG: addi [[R1:[0-9]+]], 1, -16
+; CHECK-REG: addis [[R2:[0-9]+]]
+; CHECK-REG: addi [[R2]], [[R2]]
+; CHECK-REG-DAG: lxvw4x [[VS1:[0-9]+]], 0, [[R1]]
+; CHECK-REG-DAG: lxvw4x 35, 0, [[R2]]
+; CHECK-REG: xxspltw 34, [[VS1]], 0
+; CHECK-REG: vadduwm 2, 2, 3
+; CHECK-REG-NOT: stxvw4x
 ; CHECK-REG: blr
 
 ; CHECK-FISL-LABEL: @test80
-; CHECK-FISL-DAG: addi [[R1:[0-9]+]], 3, 3
-; CHECK-FISL-DAG: addi [[R2:[0-9]+]], 1, -16
-; CHECK-FISL-DAG: addi [[R3:[0-9]+]], 3, 2
-; CHECK-FISL-DAG: std [[R1]], -8(1)
-; CHECK-FISL-DAG: std [[R3]], -16(1)
-; CHECK-FISL-DAG: lxvd2x 0, 0, [[R2]]
+; CHECK-FISL: mr 4, 3
+; CHECK-FISL: stw 4, -16(1)
+; CHECK-FISL: addi [[R1:[0-9]+]], 1, -16
+; CHECK-FISL-DAG: lxvw4x [[VS1:[0-9]+]], 0, [[R1]]
+; CHECK-FISL-DAG: xxspltw {{[0-9]+}}, [[VS1]], 0
+; CHECK-FISL: addis [[R2:[0-9]+]]
+; CHECK-FISL: addi [[R2]], [[R2]]
+; CHECK-FISL-DAG: lxvw4x {{[0-9]+}}, 0, [[R2]]
+; CHECK-FISL: vadduwm
+; CHECK-FISL-NOT: stxvw4x
 ; CHECK-FISL: blr
 
 ; CHECK-LE-LABEL: @test80
 ; CHECK-LE-DAG: mtvsrd [[R1:[0-9]+]], 3
+; CHECK-LE-DAG: xxswapd  [[V1:[0-9]+]], [[R1]]
 ; CHECK-LE-DAG: addi [[R2:[0-9]+]], {{[0-9]+}}, .LCPI
 ; CHECK-LE-DAG: lxvd2x [[V2:[0-9]+]], 0, [[R2]]
-; CHECK-LE-DAG: xxspltd 34, [[R1]]
+; CHECK-LE-DAG: xxspltw 34, [[V1]]
 ; CHECK-LE-DAG: xxswapd 35, [[V2]]
-; CHECK-LE: vaddudm 2, 2, 3
+; CHECK-LE: vadduwm 2, 2, 3
 ; CHECK-LE: blr
 }
author	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-07-05 09:22:29 +0000
committer	Nemanja Ivanovic <nemanja.i.ibm@gmail.com>	2016-07-05 09:22:29 +0000
commit	44513e545fdafe7d537f9f152bd8512c3c31f582 (patch)
tree	19f3253d1799fac700b5df62c79c932403e93c04 /llvm/test
parent	dea33cc2f3b1f6c37cdb4178c8a213fea7db118b (diff)
download	bcm5719-llvm-44513e545fdafe7d537f9f152bd8512c3c31f582.tar.gz bcm5719-llvm-44513e545fdafe7d537f9f152bd8512c3c31f582.zip