diff options
| author | Chandler Carruth <chandlerc@gmail.com> | 2014-09-29 02:01:20 +0000 |
|---|---|---|
| committer | Chandler Carruth <chandlerc@gmail.com> | 2014-09-29 02:01:20 +0000 |
| commit | b1cc7a8542e7f59a23e7b8169b448cbfb77f3976 (patch) | |
| tree | fb50706b4762d6affbbbc4f1359f2b16cf562759 /llvm/test | |
| parent | d639c7a82992c4f785448682fcf84f9ddca3bebf (diff) | |
| download | bcm5719-llvm-b1cc7a8542e7f59a23e7b8169b448cbfb77f3976.tar.gz bcm5719-llvm-b1cc7a8542e7f59a23e7b8169b448cbfb77f3976.zip | |
[x86] Delete a bunch of really bad and totally unnecessary code in the
X86 target-specific DAG combining that tried to convert VSELECT nodes
into VECTOR_SHUFFLE nodes that it "knew" would lower into
immediate-controlled blend nodes.
Turns out, we have perfectly good lowering of all these VSELECT nodes,
and indeed that lowering already knows how to handle lowering through
BLENDI to immediate-controlled blend nodes. The code just wasn't getting
used much because this thing forced the world to go through the vector
shuffle lowering. Yuck.
This also exposes that I was too aggressive in avoiding domain crossing
in v218588 with that lowering -- when the other option is to expand into
two 128-bit vectors, it is worth domain crossing. Restore that behavior
now that we have nice tests covering it.
The test updates here fall into two camps. One is where previously we
ended up with an unsigned encoding of the blend operand and now we get
a signed encoding. In most of those places there were elaborate comments
explaining exactly what these operands really mean. Rather than that,
just switch these tests to use the nicely decoded comments that make it
obvious that the final shuffle matches.
The other updates are just removing pointless domain crossing by
blending integers with PBLENDW rather than BLENDPS.
llvm-svn: 218589
Diffstat (limited to 'llvm/test')
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-blend.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/blend-msb.ll | 12 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/sse41-blend.ll | 6 |
3 files changed, 7 insertions, 23 deletions
diff --git a/llvm/test/CodeGen/X86/avx-blend.ll b/llvm/test/CodeGen/X86/avx-blend.ll index d2a22d70947..17a4f71e48e 100644 --- a/llvm/test/CodeGen/X86/avx-blend.ll +++ b/llvm/test/CodeGen/X86/avx-blend.ll @@ -21,7 +21,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: vblendps $10, %xmm1, %xmm0, %xmm0 +;CHECK: vpblendw {{.*}} ## xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> %v1, <4 x i32> %v2 @@ -61,13 +61,7 @@ define <16 x i8> @vsel_i8(<16 x i8> %v1, <16 x i8> %v2) { ;CHECK-LABEL: vsel_float8: ;CHECK-NOT: vinsertf128 -; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false> -; which translates into the boolean mask (big endian representation): -; 00010001 = 17. -; '1' means takes the first argument, '0' means takes the second argument. -; This is the opposite of the intel syntax, thus we expect -; the inverted mask: 11101110 = 238. -;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 +;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ;CHECK: ret define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x float> %v1, <8 x float> %v2 @@ -76,7 +70,7 @@ define <8 x float> @vsel_float8(<8 x float> %v1, <8 x float> %v2) { ;CHECK-LABEL: vsel_i328: ;CHECK-NOT: vinsertf128 -;CHECK: vblendps $238, %ymm1, %ymm0, %ymm0 +;CHECK: vblendps {{.*}} ## ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7] ;CHECK-NEXT: ret define <8 x i32> @vsel_i328(<8 x i32> %v1, <8 x i32> %v2) { %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i32> %v1, <8 x i32> %v2 diff --git a/llvm/test/CodeGen/X86/blend-msb.ll b/llvm/test/CodeGen/X86/blend-msb.ll index 34aaf2c31ac..c4a6d32ae5a 100644 --- a/llvm/test/CodeGen/X86/blend-msb.ll +++ b/llvm/test/CodeGen/X86/blend-msb.ll @@ -22,17 +22,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_8xi16: -; The select mask is -; <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false> -; which translates into the boolean mask (big endian representation): -; 00010001 = 17. -; '1' means takes the first argument, '0' means takes the second argument. -; This is the opposite of the intel syntax, thus we expect -; the inverted mask: 11101110 = 238. -; According to the ABI: -; v1 is in xmm0 => first argument is xmm0. -; v2 is in xmm1 => second argument is xmm1. -;CHECK: pblendw $238, %xmm1, %xmm0 +;CHECK: pblendw {{.*}} ## xmm0 = xmm0[0],xmm1[1,2,3],xmm0[4],xmm1[5,6,7] ;CHECK: ret define <8 x i16> @vsel_8xi16(<8 x i16> %v1, <8 x i16> %v2) { %vsel = select <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false>, <8 x i16> %v1, <8 x i16> %v2 diff --git a/llvm/test/CodeGen/X86/sse41-blend.ll b/llvm/test/CodeGen/X86/sse41-blend.ll index 3992da0b512..4e1722da8d0 100644 --- a/llvm/test/CodeGen/X86/sse41-blend.ll +++ b/llvm/test/CodeGen/X86/sse41-blend.ll @@ -10,7 +10,7 @@ define <4 x float> @vsel_float(<4 x float> %v1, <4 x float> %v2) { ;CHECK-LABEL: vsel_4xi8: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i8> %v1, <4 x i8> %v2 @@ -18,7 +18,7 @@ define <4 x i8> @vsel_4xi8(<4 x i8> %v1, <4 x i8> %v2) { } ;CHECK-LABEL: vsel_4xi16: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { %vsel = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i16> %v1, <4 x i16> %v2 @@ -27,7 +27,7 @@ define <4 x i16> @vsel_4xi16(<4 x i16> %v1, <4 x i16> %v2) { ;CHECK-LABEL: vsel_i32: -;CHECK: blendps +;CHECK: blendw ;CHECK: ret define <4 x i32> @vsel_i32(<4 x i32> %v1, <4 x i32> %v2) { %vsel = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> %v1, <4 x i32> %v2 |

