diff options
Diffstat (limited to 'llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll')
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll | 784 |
1 files changed, 432 insertions, 352 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll index 35f6b2a127b..3a664ba6c88 100644 --- a/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll +++ b/llvm/test/CodeGen/X86/avx512-shuffles/broadcast-vector-fp.ll @@ -9,100 +9,108 @@ define <8 x float> @test_2xfloat_to_8xfloat(<8 x float> %vec) { %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $1, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $126, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-35, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %ymm1, %ymm0 ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $62, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } define <16 x float> @test_2xfloat_to_16xfloat(<16 x float> %vec) { @@ -113,100 +121,108 @@ define <16 x float> @test_2xfloat_to_16xfloat(<16 x float> %vec) { %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movw $21312, %ax # imm = 0x5340 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movw $21312, %ax # imm = 0x5340 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-8490, %ax # imm = 0xDED6 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movw $12522, %ax # imm = 0x30EA -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: vmovapd %zmm1, %zmm0 ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-28344, %ax # imm = 0x9148 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } define <4 x double> @test_2xdouble_to_4xdouble_mem(<2 x double>* %vp) { @@ -218,104 +234,112 @@ define <4 x double> @test_2xdouble_to_4xdouble_mem(<2 x double>* %vp) { %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> ret <4 x double> %res } -define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %default) { +define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 0, i1 0, i1 1, i1 0>, <4 x double> %shuf, <4 x double> %default + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default ret <4 x double> %res } -define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp) { +define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $4, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 0, i1 0, i1 1, i1 0>, <4 x double> %shuf, <4 x double> zeroinitializer + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res } -define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %default) { +define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x double> %shuf, <4 x double> %default + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default ret <4 x double> %res } -define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp) { +define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $13, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 1>, <4 x double> %shuf, <4 x double> zeroinitializer + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res } -define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %default) { +define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x double> %shuf, <4 x double> %default + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default ret <4 x double> %res } -define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp) { +define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $10, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 0, i1 1, i1 0, i1 1>, <4 x double> %shuf, <4 x double> zeroinitializer + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res } -define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %default) { +define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x double> %shuf, <4 x double> %default + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default ret <4 x double> %res } -define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp) { +define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $5, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> - %res = select <4 x i1> <i1 1, i1 0, i1 1, i1 0>, <4 x double> %shuf, <4 x double> zeroinitializer + %cmp = fcmp oeq <4 x double> %mask, zeroinitializer + %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer ret <4 x double> %res } define <8 x double> @test_2xdouble_to_8xdouble_mem(<2 x double>* %vp) { @@ -327,104 +351,112 @@ define <8 x double> @test_2xdouble_to_8xdouble_mem(<2 x double>* %vp) { %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> ret <8 x double> %res } -define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $21, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp) { +define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $21, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } -define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $82, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp) { +define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $82, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } -define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp) { +define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-126, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } -define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-19, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp) { +define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-19, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } define <8 x double> @test_4xdouble_to_8xdouble_mem(<4 x double>* %vp) { @@ -436,104 +468,112 @@ define <8 x double> @test_4xdouble_to_8xdouble_mem(<4 x double>* %vp) { %res = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> ret <8 x double> %res } -define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $28, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp) { +define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $28, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } -define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-115, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp) { +define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-115, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } -define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp) { +define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-76, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } -define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %default) { +define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) { ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x double> %shuf, <8 x double> %default + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default ret <8 x double> %res } -define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp) { +define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %mask) { ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-116, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x double>, <4 x double>* %vp %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x double> %shuf, <8 x double> zeroinitializer + %cmp = fcmp oeq <8 x double> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer ret <8 x double> %res } define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) { @@ -546,112 +586,120 @@ define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) { %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movb $-49, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movb $-118, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movb $-118, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movb $-11, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movb $-11, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movb $-102, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm1[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = xmm2[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp) { +define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movb $-102, %al -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) { @@ -664,112 +712,120 @@ define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) { %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movw $-27027, %ax # imm = 0x966D -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movw $29162, %ax # imm = 0x71EA -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movw $-26458, %ax # imm = 0x98A6 -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero -; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero +; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3 +; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = xmm2[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp) { +define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; CHECK-NEXT: movw $25225, %ax # imm = 0x6289 -; CHECK-NEXT: kmovw %eax, %k1 -; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] +; CHECK-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1 +; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm1[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1] ; CHECK-NEXT: retq %vec = load <2 x float>, <2 x float>* %vp %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> - %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } define <8 x float> @test_4xfloat_to_8xfloat_mem(<4 x float>* %vp) { @@ -781,104 +837,112 @@ define <8 x float> @test_4xfloat_to_8xfloat_mem(<4 x float>* %vp) { %res = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> ret <8 x float> %res } -define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-109, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp) { +define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movb $-109, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 0, i1 0, i1 1>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $74, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp) { +define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movb $74, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 0, i1 1, i1 0>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp) { +define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movb $49, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } -define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %default) { +define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $48, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> %default + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default ret <8 x float> %res } -define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp) { +define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movb $48, %al -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <8 x i1> <i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0>, <8 x float> %shuf, <8 x float> zeroinitializer + %cmp = fcmp oeq <8 x float> %mask, zeroinitializer + %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer ret <8 x float> %res } define <16 x float> @test_4xfloat_to_16xfloat_mem(<4 x float>* %vp) { @@ -890,104 +954,112 @@ define <16 x float> @test_4xfloat_to_16xfloat_mem(<4 x float>* %vp) { %res = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> ret <16 x float> %res } -define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-25378, %ax # imm = 0x9CDE -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp) { +define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-25378, %ax # imm = 0x9CDE -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-22502, %ax # imm = 0xA81A -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp) { +define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-22502, %ax # imm = 0xA81A -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp) { +define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movw $31229, %ax # imm = 0x79FD -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp) { +define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movw $5887, %ax # imm = 0x16FF -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; CHECK-NEXT: retq %vec = load <4 x float>, <4 x float>* %vp %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> - %res = select <16 x i1> <i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 1, i1 0, i1 1, i1 0, i1 0, i1 0>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } define <16 x float> @test_8xfloat_to_16xfloat_mem(<8 x float>* %vp) { @@ -999,103 +1071,111 @@ define <16 x float> @test_8xfloat_to_16xfloat_mem(<8 x float>* %vp) { %res = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ret <16 x float> %res } -define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp) { +define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask0: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-15887, %ax # imm = 0xC1F1 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp) { +define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask1: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-8077, %ax # imm = 0xE073 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 1, i1 1, i1 0, i1 0, i1 1, i1 1, i1 1, i1 0, i1 0, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp) { +define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask2: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-5023, %ax # imm = 0xEC61 -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 1, i1 0, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 0, i1 0, i1 1, i1 1, i1 0, i1 1, i1 1, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } -define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %default) { +define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) { ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2 +; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <16 x float> %shuf, <16 x float> %default + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default ret <16 x float> %res } -define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp) { +define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %mask) { ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask3: ; CHECK: # BB#0: -; CHECK-NEXT: movw $-10326, %ax # imm = 0xD7AA -; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 +; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7] ; CHECK-NEXT: retq %vec = load <8 x float>, <8 x float>* %vp %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> - %res = select <16 x i1> <i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1, i1 1, i1 1, i1 0, i1 1, i1 0, i1 1, i1 1>, <16 x float> %shuf, <16 x float> zeroinitializer + %cmp = fcmp oeq <16 x float> %mask, zeroinitializer + %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer ret <16 x float> %res } |