diff options
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-logic.ll | 389 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-logic.ll | 804 |
2 files changed, 1192 insertions, 1 deletions
diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll index e4ee454dd00..b54c8a94058 100644 --- a/llvm/test/CodeGen/X86/avx512-logic.ll +++ b/llvm/test/CodeGen/X86/avx512-logic.ll @@ -494,3 +494,392 @@ entry: %4 = bitcast <16 x i32> %3 to <8 x i64> ret <8 x i64> %4 } + +define <8 x double> @test_mm512_mask_xor_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_xor_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_xor_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorpd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %1 = bitcast <8 x double> %__B to <8 x i64> + %xor.i.i = xor <8 x i64> %0, %1 + %2 = bitcast <8 x i64> %xor.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W + ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_xor_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_xor_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_xor_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorpd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %1 = bitcast <8 x double> %__B to <8 x i64> + %xor.i.i = xor <8 x i64> %0, %1 + %2 = bitcast <8 x i64> %xor.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_xor_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_xor_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_xor_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorps %zmm2, %zmm1, %zmm1 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %1 = bitcast <16 x float> %__B to <16 x i32> + %xor.i.i = xor <16 x i32> %0, %1 + %2 = bitcast <16 x i32> %xor.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W + ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_xor_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_xor_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_xor_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %1 = bitcast <16 x float> %__B to <16 x i32> + %xor.i.i = xor <16 x i32> %0, %1 + %2 = bitcast <16 x i32> %xor.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + ret <16 x float> %4 +} + +define <8 x double> @test_mm512_mask_or_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_or_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %zmm1, %zmm2, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_or_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorpd %zmm1, %zmm2, %zmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %1 = bitcast <8 x double> %__B to <8 x i64> + %or.i.i = or <8 x i64> %1, %0 + %2 = bitcast <8 x i64> %or.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W + ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_or_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_or_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_or_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %1 = bitcast <8 x double> %__B to <8 x i64> + %or.i.i = or <8 x i64> %1, %0 + %2 = bitcast <8 x i64> %or.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_or_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_or_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %zmm1, %zmm2, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_or_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorps %zmm1, %zmm2, %zmm1 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %1 = bitcast <16 x float> %__B to <16 x i32> + %or.i.i = or <16 x i32> %1, %0 + %2 = bitcast <16 x i32> %or.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W + ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_or_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_or_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_or_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %1 = bitcast <16 x float> %__B to <16 x i32> + %or.i.i = or <16 x i32> %1, %0 + %2 = bitcast <16 x i32> %or.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + ret <16 x float> %4 +} + +define <8 x double> @test_mm512_mask_and_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_and_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_and_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd %zmm1, %zmm2, %zmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %1 = bitcast <8 x double> %__B to <8 x i64> + %and.i.i = and <8 x i64> %1, %0 + %2 = bitcast <8 x i64> %and.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W + ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_and_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_and_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_and_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd %zmm0, %zmm1, %zmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %1 = bitcast <8 x double> %__B to <8 x i64> + %and.i.i = and <8 x i64> %1, %0 + %2 = bitcast <8 x i64> %and.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_and_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_and_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %zmm1, %zmm2, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_and_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps %zmm1, %zmm2, %zmm1 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %1 = bitcast <16 x float> %__B to <16 x i32> + %and.i.i = and <16 x i32> %1, %0 + %2 = bitcast <16 x i32> %and.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W + ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_and_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_and_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %zmm0, %zmm1, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_and_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps %zmm0, %zmm1, %zmm0 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %1 = bitcast <16 x float> %__B to <16 x i32> + %and.i.i = and <16 x i32> %1, %0 + %2 = bitcast <16 x i32> %and.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + ret <16 x float> %4 +} + +define <8 x double> @test_mm512_mask_andnot_pd(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_mask_andnot_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_andnot_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnpd %zmm2, %zmm1, %zmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> + %1 = bitcast <8 x double> %__B to <8 x i64> + %and.i.i = and <8 x i64> %1, %neg.i.i + %2 = bitcast <8 x i64> %and.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> %__W + ret <8 x double> %4 +} + +define <8 x double> @test_mm512_maskz_andnot_pd(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) { +; KNL-LABEL: test_mm512_maskz_andnot_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_andnot_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnpd %zmm1, %zmm0, %zmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x double> %__A to <8 x i64> + %neg.i.i = xor <8 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1> + %1 = bitcast <8 x double> %__B to <8 x i64> + %and.i.i = and <8 x i64> %1, %neg.i.i + %2 = bitcast <8 x i64> %and.i.i to <8 x double> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x double> %2, <8 x double> zeroinitializer + ret <8 x double> %4 +} + +define <16 x float> @test_mm512_mask_andnot_ps(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_mask_andnot_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %zmm2, %zmm1, %zmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_mask_andnot_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnps %zmm2, %zmm1, %zmm1 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vblendmps %zmm1, %zmm0, %zmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <16 x float> %__B to <16 x i32> + %and.i.i = and <16 x i32> %1, %neg.i.i + %2 = bitcast <16 x i32> %and.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> %__W + ret <16 x float> %4 +} + +define <16 x float> @test_mm512_maskz_andnot_ps(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) { +; KNL-LABEL: test_mm512_maskz_andnot_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %zmm1, %zmm0, %zmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm512_maskz_andnot_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnps %zmm1, %zmm0, %zmm0 +; SKX-NEXT: kmovw %edi, %k1 +; SKX-NEXT: vmovaps %zmm0, %zmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <16 x float> %__A to <16 x i32> + %neg.i.i = xor <16 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <16 x float> %__B to <16 x i32> + %and.i.i = and <16 x i32> %1, %neg.i.i + %2 = bitcast <16 x i32> %and.i.i to <16 x float> + %3 = bitcast i16 %__U to <16 x i1> + %4 = select <16 x i1> %3, <16 x float> %2, <16 x float> zeroinitializer + ret <16 x float> %4 +} + diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll index dbf28ef999b..011497eba8a 100644 --- a/llvm/test/CodeGen/X86/avx512vl-logic.ll +++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -mattr=+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=SKX ; 256-bit @@ -216,3 +217,804 @@ entry: %x = xor <2 x i64> %a2, %b ret <2 x i64> %x } + + +define <4 x double> @test_mm256_mask_andnot_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_andnot_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_andnot_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnpd %ymm2, %ymm1, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> + %1 = bitcast <4 x double> %__B to <4 x i64> + %and.i.i = and <4 x i64> %1, %neg.i.i + %2 = bitcast <4 x i64> %and.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W + ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_andnot_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_andnot_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_andnot_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnpd %ymm1, %ymm0, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %neg.i.i = xor <4 x i64> %0, <i64 -1, i64 -1, i64 -1, i64 -1> + %1 = bitcast <4 x double> %__B to <4 x i64> + %and.i.i = and <4 x i64> %1, %neg.i.i + %2 = bitcast <4 x i64> %and.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer + ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_andnot_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_andnot_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_andnot_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnpd %xmm2, %xmm1, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1> + %1 = bitcast <2 x double> %__B to <2 x i64> + %and.i.i = and <2 x i64> %1, %neg.i.i + %2 = bitcast <2 x i64> %and.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W + ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_andnot_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_andnot_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_andnot_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnpd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %neg.i.i = xor <2 x i64> %0, <i64 -1, i64 -1> + %1 = bitcast <2 x double> %__B to <2 x i64> + %and.i.i = and <2 x i64> %1, %neg.i.i + %2 = bitcast <2 x i64> %and.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer + ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_andnot_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_andnot_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %ymm2, %ymm1, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_andnot_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnps %ymm2, %ymm1, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <8 x float> %__B to <8 x i32> + %and.i.i = and <8 x i32> %1, %neg.i.i + %2 = bitcast <8 x i32> %and.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W + ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_andnot_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_andnot_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %ymm1, %ymm0, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_andnot_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnps %ymm1, %ymm0, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %neg.i.i = xor <8 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <8 x float> %__B to <8 x i32> + %and.i.i = and <8 x i32> %1, %neg.i.i + %2 = bitcast <8 x i32> %and.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer + ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_andnot_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_andnot_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %xmm2, %xmm1, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_andnot_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnps %xmm2, %xmm1, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <4 x float> %__B to <4 x i32> + %and.i.i = and <4 x i32> %1, %neg.i.i + %2 = bitcast <4 x i32> %and.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W + ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_andnot_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_andnot_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandnq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_andnot_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandnps %xmm1, %xmm0, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %neg.i.i = xor <4 x i32> %0, <i32 -1, i32 -1, i32 -1, i32 -1> + %1 = bitcast <4 x float> %__B to <4 x i32> + %and.i.i = and <4 x i32> %1, %neg.i.i + %2 = bitcast <4 x i32> %and.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer + ret <4 x float> %4 +} + +define <4 x double> @test_mm256_mask_and_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_and_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_and_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd %ymm1, %ymm2, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %1 = bitcast <4 x double> %__B to <4 x i64> + %and.i.i = and <4 x i64> %1, %0 + %2 = bitcast <4 x i64> %and.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W + ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_and_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_and_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_and_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %1 = bitcast <4 x double> %__B to <4 x i64> + %and.i.i = and <4 x i64> %1, %0 + %2 = bitcast <4 x i64> %and.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer + ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_and_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_and_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_and_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd %xmm1, %xmm2, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %1 = bitcast <2 x double> %__B to <2 x i64> + %and.i.i = and <2 x i64> %1, %0 + %2 = bitcast <2 x i64> %and.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W + ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_and_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_and_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_and_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandpd %xmm0, %xmm1, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %1 = bitcast <2 x double> %__B to <2 x i64> + %and.i.i = and <2 x i64> %1, %0 + %2 = bitcast <2 x i64> %and.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer + ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_and_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_and_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %ymm1, %ymm2, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_and_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps %ymm1, %ymm2, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %1 = bitcast <8 x float> %__B to <8 x i32> + %and.i.i = and <8 x i32> %1, %0 + %2 = bitcast <8 x i32> %and.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W + ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_and_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_and_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %ymm0, %ymm1, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_and_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %1 = bitcast <8 x float> %__B to <8 x i32> + %and.i.i = and <8 x i32> %1, %0 + %2 = bitcast <8 x i32> %and.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer + ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_and_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_and_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %xmm1, %xmm2, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_and_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps %xmm1, %xmm2, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %1 = bitcast <4 x float> %__B to <4 x i32> + %and.i.i = and <4 x i32> %1, %0 + %2 = bitcast <4 x i32> %and.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W + ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_and_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_and_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpandq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_and_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vandps %xmm0, %xmm1, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %1 = bitcast <4 x float> %__B to <4 x i32> + %and.i.i = and <4 x i32> %1, %0 + %2 = bitcast <4 x i32> %and.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer + ret <4 x float> %4 +} + +define <4 x double> @test_mm256_mask_xor_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_xor_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_xor_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorpd %ymm2, %ymm1, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %1 = bitcast <4 x double> %__B to <4 x i64> + %xor.i.i = xor <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %xor.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W + ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_xor_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_xor_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_xor_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorpd %ymm1, %ymm0, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %1 = bitcast <4 x double> %__B to <4 x i64> + %xor.i.i = xor <4 x i64> %0, %1 + %2 = bitcast <4 x i64> %xor.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer + ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_xor_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_xor_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_xor_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorpd %xmm2, %xmm1, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %1 = bitcast <2 x double> %__B to <2 x i64> + %xor.i.i = xor <2 x i64> %0, %1 + %2 = bitcast <2 x i64> %xor.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W + ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_xor_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_xor_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_xor_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorpd %xmm1, %xmm0, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %1 = bitcast <2 x double> %__B to <2 x i64> + %xor.i.i = xor <2 x i64> %0, %1 + %2 = bitcast <2 x i64> %xor.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer + ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_xor_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_xor_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %ymm2, %ymm1, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_xor_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorps %ymm2, %ymm1, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %1 = bitcast <8 x float> %__B to <8 x i32> + %xor.i.i = xor <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %xor.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W + ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_xor_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_xor_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %ymm1, %ymm0, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_xor_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorps %ymm1, %ymm0, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %1 = bitcast <8 x float> %__B to <8 x i32> + %xor.i.i = xor <8 x i32> %0, %1 + %2 = bitcast <8 x i32> %xor.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer + ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_xor_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_xor_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %xmm2, %xmm1, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_xor_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorps %xmm2, %xmm1, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %1 = bitcast <4 x float> %__B to <4 x i32> + %xor.i.i = xor <4 x i32> %0, %1 + %2 = bitcast <4 x i32> %xor.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W + ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_xor_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_xor_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vpxorq %xmm1, %xmm0, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_xor_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vxorps %xmm1, %xmm0, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %1 = bitcast <4 x float> %__B to <4 x i32> + %xor.i.i = xor <4 x i32> %0, %1 + %2 = bitcast <4 x i32> %xor.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer + ret <4 x float> %4 +} + +define <4 x double> @test_mm256_mask_or_pd(<4 x double> %__W, i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_mask_or_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %ymm1, %ymm2, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_or_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorpd %ymm1, %ymm2, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %1 = bitcast <4 x double> %__B to <4 x i64> + %or.i.i = or <4 x i64> %1, %0 + %2 = bitcast <4 x i64> %or.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> %__W + ret <4 x double> %4 +} + +define <4 x double> @test_mm256_maskz_or_pd(i8 zeroext %__U, <4 x double> %__A, <4 x double> %__B) { +; KNL-LABEL: test_mm256_maskz_or_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_or_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorpd %ymm0, %ymm1, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x double> %__A to <4 x i64> + %1 = bitcast <4 x double> %__B to <4 x i64> + %or.i.i = or <4 x i64> %1, %0 + %2 = bitcast <4 x i64> %or.i.i to <4 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x double> %2, <4 x double> zeroinitializer + ret <4 x double> %4 +} + +define <2 x double> @test_mm_mask_or_pd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_mask_or_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %xmm1, %xmm2, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_or_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorpd %xmm1, %xmm2, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmpd %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %1 = bitcast <2 x double> %__B to <2 x i64> + %or.i.i = or <2 x i64> %1, %0 + %2 = bitcast <2 x i64> %or.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> %__W + ret <2 x double> %4 +} + +define <2 x double> @test_mm_maskz_or_pd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) { +; KNL-LABEL: test_mm_maskz_or_pd: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_or_pd: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorpd %xmm0, %xmm1, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovapd %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <2 x double> %__A to <2 x i64> + %1 = bitcast <2 x double> %__B to <2 x i64> + %or.i.i = or <2 x i64> %1, %0 + %2 = bitcast <2 x i64> %or.i.i to <2 x double> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> <i32 0, i32 1> + %4 = select <2 x i1> %extract.i, <2 x double> %2, <2 x double> zeroinitializer + ret <2 x double> %4 +} + +define <8 x float> @test_mm256_mask_or_ps(<8 x float> %__W, i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_mask_or_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %ymm1, %ymm2, %ymm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_mask_or_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorps %ymm1, %ymm2, %ymm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %ymm1, %ymm0, %ymm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %1 = bitcast <8 x float> %__B to <8 x i32> + %or.i.i = or <8 x i32> %1, %0 + %2 = bitcast <8 x i32> %or.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> %__W + ret <8 x float> %4 +} + +define <8 x float> @test_mm256_maskz_or_ps(i8 zeroext %__U, <8 x float> %__A, <8 x float> %__B) { +; KNL-LABEL: test_mm256_maskz_or_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %ymm0, %ymm1, %ymm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm256_maskz_or_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorps %ymm0, %ymm1, %ymm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %ymm0, %ymm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <8 x float> %__A to <8 x i32> + %1 = bitcast <8 x float> %__B to <8 x i32> + %or.i.i = or <8 x i32> %1, %0 + %2 = bitcast <8 x i32> %or.i.i to <8 x float> + %3 = bitcast i8 %__U to <8 x i1> + %4 = select <8 x i1> %3, <8 x float> %2, <8 x float> zeroinitializer + ret <8 x float> %4 +} + +define <4 x float> @test_mm_mask_or_ps(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_mask_or_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %xmm1, %xmm2, %xmm1 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_mask_or_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorps %xmm1, %xmm2, %xmm1 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vblendmps %xmm1, %xmm0, %xmm0 {%k1} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %1 = bitcast <4 x float> %__B to <4 x i32> + %or.i.i = or <4 x i32> %1, %0 + %2 = bitcast <4 x i32> %or.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> %__W + ret <4 x float> %4 +} + +define <4 x float> @test_mm_maskz_or_ps(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) { +; KNL-LABEL: test_mm_maskz_or_ps: +; KNL: ## BB#0: ## %entry +; KNL-NEXT: vporq %xmm0, %xmm1, %xmm0 +; KNL-NEXT: kmovw %edi, %k1 +; KNL-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; KNL-NEXT: retq +; +; SKX-LABEL: test_mm_maskz_or_ps: +; SKX: ## BB#0: ## %entry +; SKX-NEXT: vorps %xmm0, %xmm1, %xmm0 +; SKX-NEXT: kmovb %edi, %k1 +; SKX-NEXT: vmovaps %xmm0, %xmm0 {%k1} {z} +; SKX-NEXT: retq +entry: + %0 = bitcast <4 x float> %__A to <4 x i32> + %1 = bitcast <4 x float> %__B to <4 x i32> + %or.i.i = or <4 x i32> %1, %0 + %2 = bitcast <4 x i32> %or.i.i to <4 x float> + %3 = bitcast i8 %__U to <8 x i1> + %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> + %4 = select <4 x i1> %extract.i, <4 x float> %2, <4 x float> zeroinitializer + ret <4 x float> %4 +} |