diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 15 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 46 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 22 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll | 9 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 12 |
5 files changed, 59 insertions, 45 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 12240ff5407..ce590de7308 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17677,6 +17677,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget subVec, subVec, immVal), Mask, Passthru, Subtarget, DAG); } + case BRCST32x2_TO_VEC: { + SDValue Src = Op.getOperand(1); + SDValue PassThru = Op.getOperand(2); + SDValue Mask = Op.getOperand(3); + + assert((VT.getScalarType() == MVT::i32 || + VT.getScalarType() == MVT::f32) && "Unexpected type!"); + //bitcast Src to packed 64 + MVT ScalarVT = VT.getScalarType() == MVT::i32 ? MVT::i64 : MVT::f64; + MVT BitcastVT = MVT::getVectorVT(ScalarVT, Src.getValueSizeInBits()/64); + Src = DAG.getBitcast(BitcastVT, Src); + + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, Src), + Mask, PassThru, Subtarget, DAG); + } default: break; } diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7eba559f49a..fc84cc555b7 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -898,12 +898,22 @@ multiclass avx512_broadcast_rm<bits<8> opc, string OpcodeStr, (ins SrcInfo.RC:$src), OpcodeStr, "$src", "$src", (DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src)))>, T8PD, EVEX; - let mayLoad = 1 in + let mayLoad = 1 in { defm m : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src", (DestInfo.VT (X86VBroadcast (SrcInfo.ScalarLdFrag addr:$src)))>, T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>; + + let isCodeGenOnly = 1 in + defm m_Int : AVX512_maskable<opc, MRMSrcMem, DestInfo, (outs DestInfo.RC:$dst), + (ins SrcInfo.ScalarMemOp:$src), OpcodeStr, "$src", "$src", + (DestInfo.VT + (X86VBroadcast + (SrcInfo.VT (scalar_to_vector + (SrcInfo.ScalarLdFrag addr:$src)))))>, + T8PD, EVEX, EVEX_CD8<SrcInfo.EltSize, CD8VT1>; + } //mayLoad = 1 } multiclass avx512_fp_broadcast_vl<bits<8> opc, string OpcodeStr, @@ -1070,45 +1080,29 @@ defm VBROADCASTF32X8 : avx512_subvec_broadcast_rm<0x1b, "vbroadcastf32x8", EVEX_V512, EVEX_CD8<32, CD8VT8>; } -multiclass avx512_broadcast_32x2<bits<8> opc, string OpcodeStr, - X86VectorVTInfo _Dst, X86VectorVTInfo _Src, - SDNode OpNode = X86SubVBroadcast> { - - defm r : AVX512_maskable<opc, MRMSrcReg, _Dst, (outs _Dst.RC:$dst), - (ins _Src.RC:$src), OpcodeStr, "$src", "$src", - (_Dst.VT (OpNode (_Src.VT _Src.RC:$src)))>, - T8PD, EVEX; - let mayLoad = 1 in - defm m : AVX512_maskable<opc, MRMSrcMem, _Dst, (outs _Dst.RC:$dst), - (ins _Src.ScalarMemOp:$src), OpcodeStr, "$src", "$src", - (_Dst.VT (OpNode - (_Src.VT (scalar_to_vector(loadi64 addr:$src)))))>, - T8PD, EVEX, EVEX_CD8<_Src.EltSize, CD8VT2>; -} - multiclass avx512_common_broadcast_32x2<bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _> { + AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> { let Predicates = [HasDQI] in - defm Z : avx512_broadcast_32x2<opc, OpcodeStr, _.info512, _.info128>, + defm Z : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info512, _Src.info128>, EVEX_V512; let Predicates = [HasDQI, HasVLX] in - defm Z256 : avx512_broadcast_32x2<opc, OpcodeStr, _.info256, _.info128>, + defm Z256 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info256, _Src.info128>, EVEX_V256; } multiclass avx512_common_broadcast_i32x2<bits<8> opc, string OpcodeStr, - AVX512VLVectorVTInfo _> : - avx512_common_broadcast_32x2<opc, OpcodeStr, _> { + AVX512VLVectorVTInfo _Dst, AVX512VLVectorVTInfo _Src> : + avx512_common_broadcast_32x2<opc, OpcodeStr, _Dst, _Src> { let Predicates = [HasDQI, HasVLX] in - defm Z128 : avx512_broadcast_32x2<opc, OpcodeStr, _.info128, _.info128, - X86SubV32x2Broadcast>, EVEX_V128; + defm Z128 : avx512_broadcast_rm<opc, OpcodeStr, _Dst.info128, _Src.info128>, + EVEX_V128; } defm VPBROADCASTI32X2 : avx512_common_broadcast_i32x2<0x59, "vbroadcasti32x2", - avx512vl_i32_info>; + avx512vl_i32_info, avx512vl_i64_info>; defm VPBROADCASTF32X2 : avx512_common_broadcast_32x2<0x19, "vbroadcastf32x2", - avx512vl_f32_info>; + avx512vl_f32_info, avx512vl_f64_info>; def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index c4c5c1c95ab..7c1447150a0 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -31,7 +31,7 @@ enum IntrinsicType { FMA_OP_SCALAR_MASK, FMA_OP_SCALAR_MASKZ, FMA_OP_SCALAR_MASK3, VPERM_2OP_MASK, VPERM_3OP_MASK, VPERM_3OP_MASKZ, INTR_TYPE_SCALAR_MASK, INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, - COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, + COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, BRCST_SUBVEC_TO_VEC, BRCST32x2_TO_VEC, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, LOADA, LOADU, STOREA, STOREU, STOREANT, BLEND, INSERT_SUBVEC, TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK, FIXUPIMM, FIXUPIMM_MASKZ, FIXUPIMMS, @@ -471,10 +471,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcast_ss_ps_512, INTR_TYPE_1OP_MASK, X86ISD::VBROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_256, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcastf32x2_512, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_256, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf32x4_512, BRCST_SUBVEC_TO_VEC, @@ -487,12 +487,12 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcastf64x4_512, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), - X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, INTR_TYPE_1OP_MASK, - X86ISD::SUBV_BROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_128, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_256, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_mask_broadcasti32x2_512, BRCST32x2_TO_VEC, + X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_256, BRCST_SUBVEC_TO_VEC, X86ISD::SHUF128, 0), X86_INTRINSIC_DATA(avx512_mask_broadcasti32x4_512, BRCST_SUBVEC_TO_VEC, diff --git a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll index 5a1576fe7ea..1ef679a687e 100644 --- a/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -2226,17 +2226,20 @@ define <8 x float>@test_int_x86_avx512_mask_broadcastf32x2_256(<4 x float> %x0, declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32>, <8 x i32>, i8) -define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) { +define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x2_256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3, i64 * %y_ptr) { ; CHECK-LABEL: test_int_x86_avx512_mask_broadcasti32x2_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] -; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0xc8] +; CHECK-NEXT: vbroadcasti32x2 (%rsi), %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x59,0x0e] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x59,0xd0] ; CHECK-NEXT: vbroadcasti32x2 %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x59,0xc0] ; CHECK-NEXT: vpaddd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xca] ; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 %x3) + %y_64 = load i64, i64 * %y_ptr + %y_v2i64 = insertelement <2 x i64> undef, i64 %y_64, i32 0 + %y = bitcast <2 x i64> %y_v2i64 to <4 x i32> + %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %y, <8 x i32> %x2, i8 %x3) %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %x3) %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x2.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1) %res3 = add <8 x i32> %res, %res1 diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index f2a967a2e21..c8c7b1354f4 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -6786,17 +6786,19 @@ define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i6 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) -define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) { +define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) { ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256: ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: vpbroadcastd (%rsi), %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x58,0x16] ; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8] -; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xd0] -; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x58,0xc0] +; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0] +; CHECK-NEXT: vpaddd %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc9] ; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1] -; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1) + %y_32 = load i32, i32 * %y_ptr + %y = insertelement <4 x i32> undef, i32 %y_32, i32 0 + %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1) %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) %res3 = add <8 x i32> %res, %res1 |

