summaryrefslogtreecommitdiffstats
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
authorFilipe Cabecinhas <me@filcab.net>2014-05-16 22:47:49 +0000
committerFilipe Cabecinhas <me@filcab.net>2014-05-16 22:47:49 +0000
commite15551832c70ff477d09edde1ca91e9d96970a31 (patch)
treecc38af0ba54112b165119d0481be1467facf82bd /llvm/test/CodeGen
parent17254aaead65f5a989430cf5c077a9a1443e44bf (diff)
downloadbcm5719-llvm-e15551832c70ff477d09edde1ca91e9d96970a31.tar.gz
bcm5719-llvm-e15551832c70ff477d09edde1ca91e9d96970a31.zip
Lower vselects into X86ISD::BLENDI when appropriate.
LowerVSELECT will, if possible, generate a X86ISD::BLENDI DAG node if the condition is constant and we can emit that instruction, given the subtarget. This is not enough for all cases. An additional SELECTCombine optimization will be committed. Fixed tests that were expecting variable blends but where a blend+imm can be generated. Added test where we can't emit blend+immediate. Added avx2 blend+imm tests. llvm-svn: 209043
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/X86/avx-blend.ll10
-rw-r--r--llvm/test/CodeGen/X86/avx2.ll25
-rw-r--r--llvm/test/CodeGen/X86/blend-msb.ll12
-rw-r--r--llvm/test/CodeGen/X86/sse41-blend.ll8
-rw-r--r--llvm/test/CodeGen/X86/sse41.ll8
5 files changed, 45 insertions, 18 deletions
diff --git a/llvm/test/CodeGen/X86/avx-blend.ll b/llvm/test/CodeGen/X86/avx-blend.ll
index 5fcd5ff5f4c..4757ce0c9fa 100644
--- a/llvm/test/CodeGen/X86/avx-blend.ll
+++ b/llvm/test/CodeGen/X86/avx-blend.ll
@@ -3,7 +3,7 @@
; AVX128 tests:
;CHECK-LABEL: vsel_float:
-;CHECK: vblendvps
+;CHECK: vblendps $5
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x float> %v1, <4 x float> %v2
@@ -12,7 +12,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
;CHECK-LABEL: vsel_i32:
-;CHECK: vblendvps
+;CHECK: vblendps $5
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2
@@ -52,7 +52,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) {
;CHECK-LABEL: vsel_float8:
;CHECK-NOT: vinsertf128
-;CHECK: vblendvps
+;CHECK: vblendps $17
;CHECK: ret
define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2
@@ -61,7 +61,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) {
;CHECK-LABEL: vsel_i328:
;CHECK-NOT: vinsertf128
-;CHECK: vblendvps
+;CHECK: vblendps $17
;CHECK-NEXT: ret
define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2
@@ -86,7 +86,7 @@ define <8 x i64> @vsel_i648(<8 x i64> %v1, <8 x i64> %v2) {
;CHECK-LABEL: vsel_double4:
;CHECK-NOT: vinsertf128
-;CHECK: vblendvpd
+;CHECK: vblendpd $5
;CHECK-NEXT: ret
define <4 x double> @vsel_double4(<4 x double> %v1, <4 x double> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x double> %v1, <4 x double> %v2
diff --git a/llvm/test/CodeGen/X86/avx2.ll b/llvm/test/CodeGen/X86/avx2.ll
new file mode 100644
index 00000000000..290d0b6557b
--- /dev/null
+++ b/llvm/test/CodeGen/X86/avx2.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core-avx2 -mattr=+avx2 | FileCheck %s
+
+define <4 x i32> @blendvb_fallback_v4i32(<4 x i1> %mask, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: @blendvb_fallback_v4i32
+; CHECK: vblendvps
+; CHECK: ret
+ %ret = select <4 x i1> %mask, <4 x i32> %x, <4 x i32> %y
+ ret <4 x i32> %ret
+}
+
+define <8 x i32> @blendvb_fallback_v8i32(<8 x i1> %mask, <8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: @blendvb_fallback_v8i32
+; CHECK: vblendvps
+; CHECK: ret
+ %ret = select <8 x i1> %mask, <8 x i32> %x, <8 x i32> %y
+ ret <8 x i32> %ret
+}
+
+define <8 x float> @blendvb_fallback_v8f32(<8 x i1> %mask, <8 x float> %x, <8 x float> %y) {
+; CHECK-LABEL: @blendvb_fallback_v8f32
+; CHECK: vblendvps
+; CHECK: ret
+ %ret = select <8 x i1> %mask, <8 x float> %x, <8 x float> %y
+ ret <8 x float> %ret
+}
diff --git a/llvm/test/CodeGen/X86/blend-msb.ll b/llvm/test/CodeGen/X86/blend-msb.ll
index 6b465963292..4e17a714bf5 100644
--- a/llvm/test/CodeGen/X86/blend-msb.ll
+++ b/llvm/test/CodeGen/X86/blend-msb.ll
@@ -4,7 +4,7 @@
; Verify that we produce movss instead of blendvps when possible.
;CHECK-LABEL: vsel_float:
-;CHECK-NOT: blendvps
+;CHECK-NOT: blend
;CHECK: movss
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
@@ -13,7 +13,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
}
;CHECK-LABEL: vsel_4xi8:
-;CHECK-NOT: blendvps
+;CHECK-NOT: blend
;CHECK: movss
;CHECK: ret
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
@@ -21,14 +21,8 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
ret <4 x i8> %vsel
}
-
-; We do not have native support for v8i16 blends and we have to use the
-; blendvb instruction or a sequence of NAND/OR/AND. Make sure that we do not
-; reduce the mask in this case.
;CHECK-LABEL: vsel_8xi16:
-;CHECK: andps
-;CHECK: andps
-;CHECK: orps
+;CHECK: pblendw $17
;CHECK: ret
define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) {
%vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2
diff --git a/llvm/test/CodeGen/X86/sse41-blend.ll b/llvm/test/CodeGen/X86/sse41-blend.ll
index 4681fde7548..951bb7dc854 100644
--- a/llvm/test/CodeGen/X86/sse41-blend.ll
+++ b/llvm/test/CodeGen/X86/sse41-blend.ll
@@ -1,7 +1,7 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -mattr=+sse4.1 | FileCheck %s
;CHECK-LABEL: vsel_float:
-;CHECK: blendvps
+;CHECK: blendps
;CHECK: ret
define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x float> %v1, <4 x float> %v2
@@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) {
;CHECK-LABEL: vsel_4xi8:
-;CHECK: blendvps
+;CHECK: blendps
;CHECK: ret
define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
%vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2
@@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) {
}
;CHECK-LABEL: vsel_4xi16:
-;CHECK: blendvps
+;CHECK: blendps
;CHECK: ret
define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
%vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2
@@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) {
;CHECK-LABEL: vsel_i32:
-;CHECK: blendvps
+;CHECK: blendps
;CHECK: ret
define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) {
%vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2
diff --git a/llvm/test/CodeGen/X86/sse41.ll b/llvm/test/CodeGen/X86/sse41.ll
index db0d9c5c116..3652d8c0d02 100644
--- a/llvm/test/CodeGen/X86/sse41.ll
+++ b/llvm/test/CodeGen/X86/sse41.ll
@@ -576,3 +576,11 @@ define < 4 x float> @test_insertps_no_undef(<4 x float> %x) {
%res = select <4 x i1> %mask, <4 x float> %x, <4 x float>%vecinit5
ret <4 x float> %res
}
+
+define <8 x i16> @blendvb_fallback(<8 x i1> %mask, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: blendvb_fallback
+; CHECK: blendvb
+; CHECK: ret
+ %ret = select <8 x i1> %mask, <8 x i16> %x, <8 x i16> %y
+ ret <8 x i16> %ret
+}
OpenPOWER on IntegriCloud