diff options
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-logic.ll | 389 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-logic.ll | 804 | 
2 files changed, 1192 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index e4ee454dd00..b54c8a94058 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -494,3 +494,392 @@ entry:    %4 = bitcast <16 x i32> %3 to <8 x i64>    ret <8 x i64> %4  } + +define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_xor_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %zmm2, %zmm1, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_xor_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorpd %zmm2, %zmm1, %zmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %xor.i.i = xor <8 x i64> %0, %1 +  %2 = bitcast <8 x i64> %xor.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W +  ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_xor_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_xor_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorpd %zmm1, %zmm0, %zmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %xor.i.i = xor <8 x i64> %0, %1 +  %2 = bitcast <8 x i64> %xor.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer +  ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_xor_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %zmm2, %zmm1, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_xor_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorps %zmm2, %zmm1, %zmm1 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %xor.i.i = xor <16 x i32> %0, %1 +  %2 = bitcast <16 x i32> %xor.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W +  ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_xor_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_xor_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %xor.i.i = xor <16 x i32> %0, %1 +  %2 = bitcast <16 x i32> %xor.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer +  ret <16 x float> %4 +} + +define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_or_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %zmm1, %zmm2, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_or_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorpd %zmm1, %zmm2, %zmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %or.i.i = or <8 x i64> %1, %0 +  %2 = bitcast <8 x i64> %or.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W +  ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_or_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %zmm0, %zmm1, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_or_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %or.i.i = or <8 x i64> %1, %0 +  %2 = bitcast <8 x i64> %or.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer +  ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_or_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %zmm1, %zmm2, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_or_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorps %zmm1, %zmm2, %zmm1 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %or.i.i = or <16 x i32> %1, %0 +  %2 = bitcast <16 x i32> %or.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W +  ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_or_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %zmm0, %zmm1, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_or_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorps %zmm0, %zmm1, %zmm0 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %or.i.i = or <16 x i32> %1, %0 +  %2 = bitcast <16 x i32> %or.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer +  ret <16 x float> %4 +} + +define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_and_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %zmm1, %zmm2, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_and_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandpd %zmm1, %zmm2, %zmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %and.i.i = and <8 x i64> %1, %0 +  %2 = bitcast <8 x i64> %and.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W +  ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_and_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_and_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %and.i.i = and <8 x i64> %1, %0 +  %2 = bitcast <8 x i64> %and.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer +  ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_and_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %zmm1, %zmm2, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_and_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandps %zmm1, %zmm2, %zmm1 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %and.i.i = and <16 x i32> %1, %0 +  %2 = bitcast <16 x i32> %and.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W +  ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_and_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %zmm0, %zmm1, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_and_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandps %zmm0, %zmm1, %zmm0 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %and.i.i = and <16 x i32> %1, %0 +  %2 = bitcast <16 x i32> %and.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer +  ret <16 x float> %4 +} + +define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_andnot_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %zmm2, %zmm1, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_andnot_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnpd %zmm2, %zmm1, %zmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %and.i.i = and <8 x i64> %1, %neg.i.i +  %2 = bitcast <8 x i64> %and.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W +  ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_andnot_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %zmm1, %zmm0, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_andnot_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnpd %zmm1, %zmm0, %zmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x double> %__A to <8 x i64> +  %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> +  %1 = bitcast <8 x double> %__B to <8 x i64> +  %and.i.i = and <8 x i64> %1, %neg.i.i +  %2 = bitcast <8 x i64> %and.i.i to <8 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer +  ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_andnot_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %zmm2, %zmm1, %zmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_mask_andnot_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnps %zmm2, %zmm1, %zmm1 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %and.i.i = and <16 x i32> %1, %neg.i.i +  %2 = bitcast <16 x i32> %and.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W +  ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_andnot_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %zmm1, %zmm0, %zmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm512_maskz_andnot_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnps %zmm1, %zmm0, %zmm0 +; SKX-NEXT:    kmovw %edi, %k1 +; SKX-NEXT:    vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <16 x float> %__A to <16 x i32> +  %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +  %1 = bitcast <16 x float> %__B to <16 x i32> +  %and.i.i = and <16 x i32> %1, %neg.i.i +  %2 = bitcast <16 x i32> %and.i.i to <16 x float> +  %3 = bitcast i16 %__U to <16 x i1> +  %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer +  ret <16 x float> %4 +} + diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll index dbf28ef999b..011497eba8a 100644 --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -1,5 +1,6 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX  ; 256-bit @@ -216,3 +217,804 @@ entry:    %x = xor <2 x i64> %a2, %b    ret <2 x i64> %x  } + + +define <4 x double> @test_mm256_mask_andnot_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_andnot_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %ymm2, %ymm1, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_andnot_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnpd %ymm2, %ymm1, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %and.i.i = and <4 x i64> %1, %neg.i.i +  %2 = bitcast <4 x i64> %and.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W +  ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_andnot_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_andnot_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_andnot_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnpd %ymm1, %ymm0, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %and.i.i = and <4 x i64> %1, %neg.i.i +  %2 = bitcast <4 x i64> %and.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer +  ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_andnot_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_andnot_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %xmm2, %xmm1, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_andnot_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnpd %xmm2, %xmm1, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %and.i.i = and <2 x i64> %1, %neg.i.i +  %2 = bitcast <2 x i64> %and.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W +  ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_andnot_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_andnot_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_andnot_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnpd %xmm1, %xmm0, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %and.i.i = and <2 x i64> %1, %neg.i.i +  %2 = bitcast <2 x i64> %and.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer +  ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_andnot_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_andnot_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %ymm2, %ymm1, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_andnot_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnps %ymm2, %ymm1, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %and.i.i = and <8 x i32> %1, %neg.i.i +  %2 = bitcast <8 x i32> %and.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W +  ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_andnot_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_andnot_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_andnot_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnps %ymm1, %ymm0, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %and.i.i = and <8 x i32> %1, %neg.i.i +  %2 = bitcast <8 x i32> %and.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer +  ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_andnot_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_andnot_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %xmm2, %xmm1, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_andnot_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnps %xmm2, %xmm1, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %and.i.i = and <4 x i32> %1, %neg.i.i +  %2 = bitcast <4 x i32> %and.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W +  ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_andnot_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_andnot_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_andnot_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandnps %xmm1, %xmm0, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %and.i.i = and <4 x i32> %1, %neg.i.i +  %2 = bitcast <4 x i32> %and.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer +  ret <4 x float> %4 +} + +define <4 x double> @test_mm256_mask_and_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_and_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %ymm1, %ymm2, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_and_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandpd %ymm1, %ymm2, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %and.i.i = and <4 x i64> %1, %0 +  %2 = bitcast <4 x i64> %and.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W +  ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_and_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_and_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %ymm0, %ymm1, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_and_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandpd %ymm0, %ymm1, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %and.i.i = and <4 x i64> %1, %0 +  %2 = bitcast <4 x i64> %and.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer +  ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_and_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_and_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %xmm1, %xmm2, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_and_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandpd %xmm1, %xmm2, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %and.i.i = and <2 x i64> %1, %0 +  %2 = bitcast <2 x i64> %and.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W +  ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_and_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_and_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %xmm0, %xmm1, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_and_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandpd %xmm0, %xmm1, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %and.i.i = and <2 x i64> %1, %0 +  %2 = bitcast <2 x i64> %and.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer +  ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_and_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_and_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %ymm1, %ymm2, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_and_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandps %ymm1, %ymm2, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %and.i.i = and <8 x i32> %1, %0 +  %2 = bitcast <8 x i32> %and.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W +  ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_and_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_and_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %ymm0, %ymm1, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_and_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandps %ymm0, %ymm1, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %and.i.i = and <8 x i32> %1, %0 +  %2 = bitcast <8 x i32> %and.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer +  ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_and_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_and_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %xmm1, %xmm2, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_and_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandps %xmm1, %xmm2, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %and.i.i = and <4 x i32> %1, %0 +  %2 = bitcast <4 x i32> %and.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W +  ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_and_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_and_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpandq %xmm0, %xmm1, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_and_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vandps %xmm0, %xmm1, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %and.i.i = and <4 x i32> %1, %0 +  %2 = bitcast <4 x i32> %and.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer +  ret <4 x float> %4 +} + +define <4 x double> @test_mm256_mask_xor_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_xor_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %ymm2, %ymm1, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_xor_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorpd %ymm2, %ymm1, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %xor.i.i = xor <4 x i64> %0, %1 +  %2 = bitcast <4 x i64> %xor.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W +  ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_xor_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_xor_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %ymm1, %ymm0, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_xor_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorpd %ymm1, %ymm0, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %xor.i.i = xor <4 x i64> %0, %1 +  %2 = bitcast <4 x i64> %xor.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer +  ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_xor_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_xor_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %xmm2, %xmm1, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_xor_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorpd %xmm2, %xmm1, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %xor.i.i = xor <2 x i64> %0, %1 +  %2 = bitcast <2 x i64> %xor.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W +  ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_xor_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_xor_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %xmm1, %xmm0, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_xor_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorpd %xmm1, %xmm0, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %xor.i.i = xor <2 x i64> %0, %1 +  %2 = bitcast <2 x i64> %xor.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer +  ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_xor_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_xor_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %ymm2, %ymm1, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_xor_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorps %ymm2, %ymm1, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %xor.i.i = xor <8 x i32> %0, %1 +  %2 = bitcast <8 x i32> %xor.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W +  ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_xor_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_xor_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %ymm1, %ymm0, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_xor_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorps %ymm1, %ymm0, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %xor.i.i = xor <8 x i32> %0, %1 +  %2 = bitcast <8 x i32> %xor.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer +  ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_xor_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_xor_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %xmm2, %xmm1, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_xor_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorps %xmm2, %xmm1, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %xor.i.i = xor <4 x i32> %0, %1 +  %2 = bitcast <4 x i32> %xor.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W +  ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_xor_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_xor_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vpxorq %xmm1, %xmm0, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_xor_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vxorps %xmm1, %xmm0, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %xor.i.i = xor <4 x i32> %0, %1 +  %2 = bitcast <4 x i32> %xor.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer +  ret <4 x float> %4 +} + +define <4 x double> @test_mm256_mask_or_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_or_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %ymm1, %ymm2, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_or_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorpd %ymm1, %ymm2, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %or.i.i = or <4 x i64> %1, %0 +  %2 = bitcast <4 x i64> %or.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W +  ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_or_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_or_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %ymm0, %ymm1, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_or_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorpd %ymm0, %ymm1, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x double> %__A to <4 x i64> +  %1 = bitcast <4 x double> %__B to <4 x i64> +  %or.i.i = or <4 x i64> %1, %0 +  %2 = bitcast <4 x i64> %or.i.i to <4 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer +  ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_or_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_or_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %xmm1, %xmm2, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_or_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorpd %xmm1, %xmm2, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %or.i.i = or <2 x i64> %1, %0 +  %2 = bitcast <2 x i64> %or.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W +  ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_or_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_or_pd: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %xmm0, %xmm1, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_or_pd: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorpd %xmm0, %xmm1, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <2 x double> %__A to <2 x i64> +  %1 = bitcast <2 x double> %__B to <2 x i64> +  %or.i.i = or <2 x i64> %1, %0 +  %2 = bitcast <2 x i64> %or.i.i to <2 x double> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> +  %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer +  ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_or_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_or_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %ymm1, %ymm2, %ymm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_mask_or_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorps %ymm1, %ymm2, %ymm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %or.i.i = or <8 x i32> %1, %0 +  %2 = bitcast <8 x i32> %or.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W +  ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_or_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_or_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %ymm0, %ymm1, %ymm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm256_maskz_or_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorps %ymm0, %ymm1, %ymm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <8 x float> %__A to <8 x i32> +  %1 = bitcast <8 x float> %__B to <8 x i32> +  %or.i.i = or <8 x i32> %1, %0 +  %2 = bitcast <8 x i32> %or.i.i to <8 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer +  ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_or_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_or_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %xmm1, %xmm2, %xmm1 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_mask_or_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorps %xmm1, %xmm2, %xmm1 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %or.i.i = or <4 x i32> %1, %0 +  %2 = bitcast <4 x i32> %or.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W +  ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_or_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_or_ps: +; KNL:       ## BB#0: ## %entry +; KNL-NEXT:    vporq %xmm0, %xmm1, %xmm0 +; KNL-NEXT:    kmovw %edi, %k1 +; KNL-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT:    retq +; +; SKX-LABEL: test_mm_maskz_or_ps: +; SKX:       ## BB#0: ## %entry +; SKX-NEXT:    vorps %xmm0, %xmm1, %xmm0 +; SKX-NEXT:    kmovb %edi, %k1 +; SKX-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT:    retq +entry: +  %0 = bitcast <4 x float> %__A to <4 x i32> +  %1 = bitcast <4 x float> %__B to <4 x i32> +  %or.i.i = or <4 x i32> %1, %0 +  %2 = bitcast <4 x i32> %or.i.i to <4 x float> +  %3 = bitcast i8 %__U to <8 x i1> +  %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> +  %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer +  ret <4 x float> %4 +}  | 

