diff options
-rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 267 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 27 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vec_fabs.ll | 12 |
3 files changed, 75 insertions, 231 deletions
diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index ec7c5291568..f5686252cac 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -4238,215 +4238,72 @@ defm VXOR : avx512_fp_binop_p<0x57, "vxor", X86fxor, HasDQI, SSE_ALU_ITINS_P, 1>; // Patterns catch floating point selects with bitcasted integer logic ops. -let Predicates = [HasVLX] in { - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPANDDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPXORDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPANDNDZ128rrk VR128X:$src0, VK4WM:$mask, VR128X:$src1, - VR128X:$src2)>; - - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPANDDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPXORDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v4f32 (vselect VK4WM:$mask, - (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPANDNDZ128rrkz VK4WM:$mask, VR128X:$src1, VR128X:$src2)>; - - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPANDQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPXORQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))), - VR128X:$src0)), - (VPANDNQZ128rrk VR128X:$src0, VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (and VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPANDQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (or VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (xor VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPXORQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - def : Pat<(v2f64 (vselect VK2WM:$mask, - (bitconvert (v2i64 (X86andnp VR128X:$src1, VR128X:$src2))), - (bitconvert (v4i32 immAllZerosV)))), - (VPANDNQZ128rrkz VK2WM:$mask, VR128X:$src1, VR128X:$src2)>; - - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPANDDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPXORDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPANDNDZ256rrk VR256X:$src0, VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPANDDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPXORDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v8f32 (vselect VK8WM:$mask, - (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPANDNDZ256rrkz VK8WM:$mask, VR256X:$src1, VR256X:$src2)>; - - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPANDQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPXORQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))), - VR256X:$src0)), - (VPANDNQZ256rrk VR256X:$src0, VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (and VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPANDQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (or VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (xor VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPXORQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; - def : Pat<(v4f64 (vselect VK4WM:$mask, - (bitconvert (v4i64 (X86andnp VR256X:$src1, VR256X:$src2))), - (bitconvert (v8i32 immAllZerosV)))), - (VPANDNQZ256rrkz VK4WM:$mask, VR256X:$src1, VR256X:$src2)>; +multiclass avx512_fp_logical_lowering<string InstrStr, SDNode OpNode, + X86VectorVTInfo _, Predicate prd> { +let Predicates = [prd] in { + // Masked register-register logical operations. + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), + _.RC:$src0)), + (!cast<Instruction>(InstrStr#rrk) _.RC:$src0, _.KRCWM:$mask, + _.RC:$src1, _.RC:$src2)>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (bitconvert (_.i64VT (OpNode _.RC:$src1, _.RC:$src2))), + _.ImmAllZerosV)), + (!cast<Instruction>(InstrStr#rrkz) _.KRCWM:$mask, _.RC:$src1, + _.RC:$src2)>; + // Masked register-memory logical operations. + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (bitconvert (_.i64VT (OpNode _.RC:$src1, + (load addr:$src2)))), + _.RC:$src0)), + (!cast<Instruction>(InstrStr#rmk) _.RC:$src0, _.KRCWM:$mask, + _.RC:$src1, addr:$src2)>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (bitconvert (_.i64VT (OpNode _.RC:$src1, (load addr:$src2)))), + _.ImmAllZerosV)), + (!cast<Instruction>(InstrStr#rmkz) _.KRCWM:$mask, _.RC:$src1, + addr:$src2)>; + // Register-broadcast logical operations. + def : Pat<(_.i64VT (OpNode _.RC:$src1, + (bitconvert (_.VT (X86VBroadcast + (_.ScalarLdFrag addr:$src2)))))), + (!cast<Instruction>(InstrStr#rmb) _.RC:$src1, addr:$src2)>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (bitconvert + (_.i64VT (OpNode _.RC:$src1, + (bitconvert (_.VT + (X86VBroadcast + (_.ScalarLdFrag addr:$src2))))))), + _.RC:$src0)), + (!cast<Instruction>(InstrStr#rmbk) _.RC:$src0, _.KRCWM:$mask, + _.RC:$src1, addr:$src2)>; + def : Pat<(_.VT (vselect _.KRCWM:$mask, + (bitconvert + (_.i64VT (OpNode _.RC:$src1, + (bitconvert (_.VT + (X86VBroadcast + (_.ScalarLdFrag addr:$src2))))))), + _.ImmAllZerosV)), + (!cast<Instruction>(InstrStr#rmbkz) _.KRCWM:$mask, + _.RC:$src1, addr:$src2)>; +} } -let Predicates = [HasAVX512] in { - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPANDDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPXORDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPANDNDZrrk VR512:$src0, VK16WM:$mask, VR512:$src1, VR512:$src2)>; - - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPANDDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPXORDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v16f32 (vselect VK16WM:$mask, - (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPANDNDZrrkz VK16WM:$mask, VR512:$src1, VR512:$src2)>; - - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPANDQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPXORQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))), - VR512:$src0)), - (VPANDNQZrrk VR512:$src0, VK8WM:$mask, VR512:$src1, VR512:$src2)>; - - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (and VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPANDQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (or VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (xor VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPXORQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>; - def : Pat<(v8f64 (vselect VK8WM:$mask, - (bitconvert (v8i64 (X86andnp VR512:$src1, VR512:$src2))), - (bitconvert (v16i32 immAllZerosV)))), - (VPANDNQZrrkz VK8WM:$mask, VR512:$src1, VR512:$src2)>; +multiclass avx512_fp_logical_lowering_sizes<string InstrStr, SDNode OpNode> { + defm : avx512_fp_logical_lowering<InstrStr#DZ128, OpNode, v4f32x_info, HasVLX>; + defm : avx512_fp_logical_lowering<InstrStr#QZ128, OpNode, v2f64x_info, HasVLX>; + defm : avx512_fp_logical_lowering<InstrStr#DZ256, OpNode, v8f32x_info, HasVLX>; + defm : avx512_fp_logical_lowering<InstrStr#QZ256, OpNode, v4f64x_info, HasVLX>; + defm : avx512_fp_logical_lowering<InstrStr#DZ, OpNode, v16f32_info, HasAVX512>; + defm : avx512_fp_logical_lowering<InstrStr#QZ, OpNode, v8f64_info, HasAVX512>; } +defm : avx512_fp_logical_lowering_sizes<"VPAND", and>; +defm : avx512_fp_logical_lowering_sizes<"VPOR", or>; +defm : avx512_fp_logical_lowering_sizes<"VPXOR", xor>; +defm : avx512_fp_logical_lowering_sizes<"VPANDN", X86andnp>; + multiclass avx512_fp_scalef_p<bits<8> opc, string OpcodeStr, SDNode OpNode, X86VectorVTInfo _> { defm rr: AVX512_maskable<opc, MRMSrcReg, _, (outs _.RC:$dst), diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index a739ee3ac0f..d4f6dd943c7 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -985,20 +985,17 @@ define <8 x double> @test_maskz_broadcast_vaddpd(<8 x double> %i, double* %j, define <16 x float> @test_fxor(<16 x float> %a) { ; AVX512F-LABEL: test_fxor: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; AVX512F-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: test_fxor: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; AVX512VL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: test_fxor: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpxord {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: test_fxor: @@ -1051,20 +1048,17 @@ define <8 x float> @test_fxor_8f32(<8 x float> %a) { define <8 x double> @fabs_v8f64(<8 x double> %p) ; AVX512F-LABEL: fabs_v8f64: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 -; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fabs_v8f64: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 -; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: fabs_v8f64: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: fabs_v8f64: @@ -1085,20 +1079,17 @@ declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) define <16 x float> @fabs_v16f32(<16 x float> %p) ; AVX512F-LABEL: fabs_v16f32: ; AVX512F: ## BB#0: -; AVX512F-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; AVX512F-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: fabs_v16f32: ; AVX512VL: ## BB#0: -; AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: fabs_v16f32: ; AVX512BW: ## BB#0: -; AVX512BW-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: fabs_v16f32: diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 854b84cb04a..35d6da9ff36 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -145,8 +145,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) { ; ; X32_AVX512VL-LABEL: fabs_v8f64: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastsd {{\.LCPI.*}}, %zmm1 -; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; X32_AVX512VL-NEXT: vpandq {{\.LCPI.*}}{1to8}, %zmm0, %zmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v8f64: @@ -163,8 +162,7 @@ define <8 x double> @fabs_v8f64(<8 x double> %p) { ; ; X64_AVX512VL-LABEL: fabs_v8f64: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %zmm1 -; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; X64_AVX512VL-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v8f64: @@ -186,8 +184,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) { ; ; X32_AVX512VL-LABEL: fabs_v16f32: ; X32_AVX512VL: # BB#0: -; X32_AVX512VL-NEXT: vbroadcastss {{\.LCPI.*}}, %zmm1 -; X32_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; X32_AVX512VL-NEXT: vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm0 ; X32_AVX512VL-NEXT: retl ; ; X32_AVX512VLDQ-LABEL: fabs_v16f32: @@ -204,8 +201,7 @@ define <16 x float> @fabs_v16f32(<16 x float> %p) { ; ; X64_AVX512VL-LABEL: fabs_v16f32: ; X64_AVX512VL: # BB#0: -; X64_AVX512VL-NEXT: vbroadcastss {{.*}}(%rip), %zmm1 -; X64_AVX512VL-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; X64_AVX512VL-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; X64_AVX512VL-NEXT: retq ; ; X64_AVX512VLDQ-LABEL: fabs_v16f32: |