Convert some X86 blendv* intrinsics into IR.

Summary: Implemented an InstCombine transformation that takes a blendv* intrinsic call and translates it into an IR select, if the mask is constant. This will eventually get lowered into blends with immediates if possible, or pblendvb (with an option to further optimize if we can transform the pblendvb into a blend+immediate instruction, depending on the selector). It will also enable optimizations by the IR passes, which give up on sight of the intrinsic. Both the transformation and the lowering of its result to asm got shiny new tests. The transformation is a bit convoluted because of blendvp[sd]'s definition: Its mask is a floating point value! This forces us to convert it and get the highest bit. I suppose this happened because the mask has type __m128 in Intel's intrinsic and v4sf (for blendps) in gcc's builtin. I will send an email to llvm-dev to discuss if we want to change this or not. Reviewers: grosbach, delena, nadav Differential Revision: http://reviews.llvm.org/D3859 llvm-svn: 209643
author: Filipe Cabecinhas <me@filcab.net> 2014-05-27 03:42:20 +0000
committer: Filipe Cabecinhas <me@filcab.net> 2014-05-27 03:42:20 +0000
commit: 82ac07c28332b64b24a9b74e3f6eb4365f679efb (patch)
tree: a5fa42f18fff06e7b68d942f42751b49baa1e63a /llvm/test/CodeGen
parent: 0dbb783c7be1756482c491b3635dd07dd5fe712c (diff)
download: bcm5719-llvm-82ac07c28332b64b24a9b74e3f6eb4365f679efb.tar.gz
bcm5719-llvm-82ac07c28332b64b24a9b74e3f6eb4365f679efb.zip
3 files changed, 66 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/X86/avx-blend.ll b/llvm/test/CodeGen/X86/avx-blend.ll
index 4d4f6c1a03a..e21c7a07e8b 100644
--- a/llvm/test/CodeGen/X86/avx-blend.ll
+++ b/llvm/test/CodeGen/X86/avx-blend.ll
@@ -135,3 +135,26 @@ define <2 x double> @testb(<2 x double> %x, <2 x double> %y) {
   %min = select <2 x i1> %min_is_x, <2 x double> %x, <2 x double> %y
   ret <2 x double> %min
 }
+
+; If we can figure out a blend has a constant mask, we should emit the
+; blend instruction with an immediate mask
+define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
+; CHECK-LABEL: constant_blendvpd_avx:
+; CHECK-NOT: mov
+; CHECK: vblendpd
+; CHECK: ret
+  %1 = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x double> %xy, <4 x double> %ab
+  ret <4 x double> %1
+}
+
+define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
+; CHECK-LABEL: constant_blendvps_avx:
+; CHECK-NOT: mov
+; CHECK: vblendps
+; CHECK: ret
+  %1 = select <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 true>, <8 x float> %xyzw, <8 x float> %abcd
+  ret <8 x float> %1
+}
+
+declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
+declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)
diff --git a/llvm/test/CodeGen/X86/avx2-blend.ll b/llvm/test/CodeGen/X86/avx2-blend.ll
new file mode 100644
index 00000000000..b02442b6fad
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx2-blend.ll
@@ -0,0 +1,11 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 | FileCheck %s
+
+define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
+; CHECK-LABEL: constant_pblendvb_avx2:
+; CHECK: vmovdqa
+; CHECK: vpblendvb
+  %1 = select <32 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <32 x i8> %xyzw, <32 x i8> %abcd
+  ret <32 x i8> %1
+}
+
+declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
diff --git a/llvm/test/CodeGen/X86/sse41-blend.ll b/llvm/test/CodeGen/X86/sse41-blend.ll
index 951bb7dc854..8ad79877c8e 100644
--- a/llvm/test/CodeGen/X86/sse41-blend.ll
+++ b/llvm/test/CodeGen/X86/sse41-blend.ll
@@ -88,3 +88,35 @@ entry:
   store double %extract214vector_func.i, double addrspace(1)* undef, align 8
   ret void
 }
+
+; If we can figure out a blend has a constant mask, we should emit the
+; blend instruction with an immediate mask
+define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
+; In this case, we emit a simple movss
+; CHECK-LABEL: constant_blendvpd
+; CHECK: movsd
+; CHECK: ret
+  %1 = select <2 x i1> <i1 true, i1 false>, <2 x double> %xy, <2 x double> %ab
+  ret <2 x double> %1
+}
+
+define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
+; CHECK-LABEL: constant_blendvps
+; CHECK-NOT: mov
+; CHECK: blendps $7
+; CHECK: ret
+  %1 = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> %xyzw, <4 x float> %abcd
+  ret <4 x float> %1
+}
+
+define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
+; CHECK-LABEL: constant_pblendvb:
+; CHECK: movaps
+; CHECK: pblendvb
+; CHECK: ret
+  %1 = select <16 x i1> <i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 true, i1 false>, <16 x i8> %xyzw, <16 x i8> %abcd
+  ret <16 x i8> %1
+}
+declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
+declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
+declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
author	Filipe Cabecinhas <me@filcab.net>	2014-05-27 03:42:20 +0000
committer	Filipe Cabecinhas <me@filcab.net>	2014-05-27 03:42:20 +0000
commit	82ac07c28332b64b24a9b74e3f6eb4365f679efb (patch)
tree	a5fa42f18fff06e7b68d942f42751b49baa1e63a /llvm/test/CodeGen
parent	0dbb783c7be1756482c491b3635dd07dd5fe712c (diff)
download	bcm5719-llvm-82ac07c28332b64b24a9b74e3f6eb4365f679efb.tar.gz bcm5719-llvm-82ac07c28332b64b24a9b74e3f6eb4365f679efb.zip