diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 8 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 81 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 8 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 5 | 
4 files changed, 41 insertions, 61 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b44b18118e7..3548b9e1c82 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17869,19 +17869,21 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget &Subtarget      }      case VPERM_3OP_MASKZ:      case VPERM_3OP_MASK:{ +      MVT VT = Op.getSimpleValueType();        // Src2 is the PassThru        SDValue Src1 = Op.getOperand(1); -      SDValue Src2 = Op.getOperand(2); +      // PassThru needs to be the same type as the destination in order +      // to pattern match correctly. +      SDValue Src2 = DAG.getBitcast(VT, Op.getOperand(2));        SDValue Src3 = Op.getOperand(3);        SDValue Mask = Op.getOperand(4); -      MVT VT = Op.getSimpleValueType();        SDValue PassThru = SDValue();        // set PassThru element        if (IntrData->Type == VPERM_3OP_MASKZ)          PassThru = getZeroVector(VT, Subtarget, DAG, dl);        else -        PassThru = DAG.getBitcast(VT, Src2); +        PassThru = Src2;        // Swap Src1 and Src2 in the node creation        return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 59218bcf552..469acea9949 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -299,22 +299,6 @@ multiclass AVX512_maskable_3src<bits<8> O, Format F, X86VectorVTInfo _,                            (vselect _.KRCWM:$mask, RHS, _.RC:$src1),                            vselect, "", NoItinerary, IsCommutable, IsKCommutable>; -// Similar to AVX512_maskable_3src but in this case the input VT for the tied -// operand differs from the output VT. This requires a bitconvert on -// the preserved vector going into the vselect. -multiclass AVX512_maskable_3src_cast<bits<8> O, Format F, X86VectorVTInfo OutVT, -                                     X86VectorVTInfo InVT, -                                     dag Outs, dag NonTiedIns, string OpcodeStr, -                                     string AttSrcAsm, string IntelSrcAsm, -                                     dag RHS> : -   AVX512_maskable_common<O, F, OutVT, Outs, -                          !con((ins InVT.RC:$src1), NonTiedIns), -                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), -                          !con((ins InVT.RC:$src1, InVT.KRCWM:$mask), NonTiedIns), -                          OpcodeStr, AttSrcAsm, IntelSrcAsm, RHS, -                          (vselect InVT.KRCWM:$mask, RHS, -                           (bitconvert InVT.RC:$src1))>; -  multiclass AVX512_maskable_3src_scalar<bits<8> O, Format F, X86VectorVTInfo _,                                       dag Outs, dag NonTiedIns, string OpcodeStr,                                       string AttSrcAsm, string IntelSrcAsm, @@ -1181,83 +1165,76 @@ defm VPBROADCASTMB2Q : avx512_mask_broadcast<0x2A, "vpbroadcastmb2q",  //===----------------------------------------------------------------------===//  // -- VPERMI2 - 3 source operands form -- -multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, -                         X86VectorVTInfo _, X86VectorVTInfo IdxVT> { +multiclass avx512_perm_i<bits<8> opc, string OpcodeStr, X86VectorVTInfo _> {  let Constraints = "$src1 = $dst" in { -  defm rr: AVX512_maskable_3src_cast<opc, MRMSrcReg, _, IdxVT, (outs _.RC:$dst), +  // The index operand in the pattern should really be an integer type. However, +  // if we do that and it happens to come from a bitcast, then it becomes +  // difficult to find the bitcast needed to convert the index to the +  // destination type for the passthru since it will be folded with the bitcast +  // of the index operand. +  defm rr: AVX512_maskable_3src<opc, MRMSrcReg, _, (outs _.RC:$dst),            (ins _.RC:$src2, _.RC:$src3),            OpcodeStr, "$src3, $src2", "$src2, $src3", -          (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V, +          (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2, _.RC:$src3))>, EVEX_4V,           AVX5128IBase; -  defm rm: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), +  defm rm: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),              (ins _.RC:$src2, _.MemOp:$src3),              OpcodeStr, "$src3, $src2", "$src2, $src3", -            (_.VT (X86VPermi2X IdxVT.RC:$src1, _.RC:$src2, +            (_.VT (X86VPermi2X _.RC:$src1, _.RC:$src2,                     (_.VT (bitconvert (_.LdFrag addr:$src3)))))>,              EVEX_4V, AVX5128IBase;    }  }  multiclass avx512_perm_i_mb<bits<8> opc, string OpcodeStr, -                            X86VectorVTInfo _, X86VectorVTInfo IdxVT> { +                            X86VectorVTInfo _> {    let Constraints = "$src1 = $dst" in -  defm rmb: AVX512_maskable_3src_cast<opc, MRMSrcMem, _, IdxVT, (outs _.RC:$dst), +  defm rmb: AVX512_maskable_3src<opc, MRMSrcMem, _, (outs _.RC:$dst),                (ins _.RC:$src2, _.ScalarMemOp:$src3),                OpcodeStr,   !strconcat("${src3}", _.BroadcastStr,", $src2"),                !strconcat("$src2, ${src3}", _.BroadcastStr ), -              (_.VT (X86VPermi2X IdxVT.RC:$src1, +              (_.VT (X86VPermi2X _.RC:$src1,                 _.RC:$src2,(_.VT (X86VBroadcast (_.ScalarLdFrag addr:$src3)))))>,                AVX5128IBase, EVEX_4V, EVEX_B;  }  multiclass avx512_perm_i_sizes<bits<8> opc, string OpcodeStr, -                               AVX512VLVectorVTInfo VTInfo, -                               AVX512VLVectorVTInfo ShuffleMask> { -  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512, -                           ShuffleMask.info512>, -            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512, -                             ShuffleMask.info512>, EVEX_V512; +                               AVX512VLVectorVTInfo VTInfo> { +  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, +            avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;    let Predicates = [HasVLX] in { -  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128, -                               ShuffleMask.info128>, -                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128, -                                  ShuffleMask.info128>, EVEX_V128; -  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256, -                               ShuffleMask.info256>, -                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256, -                                  ShuffleMask.info256>,  EVEX_V256; +  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, +                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info128>, EVEX_V128; +  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>, +                 avx512_perm_i_mb<opc, OpcodeStr, VTInfo.info256>, EVEX_V256;    }  }  multiclass avx512_perm_i_sizes_bw<bits<8> opc, string OpcodeStr,                                   AVX512VLVectorVTInfo VTInfo, -                                 AVX512VLVectorVTInfo Idx,                                   Predicate Prd> {    let Predicates = [Prd] in -  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512, -                           Idx.info512>, EVEX_V512; +  defm NAME: avx512_perm_i<opc, OpcodeStr, VTInfo.info512>, EVEX_V512;    let Predicates = [Prd, HasVLX] in { -  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128, -                               Idx.info128>, EVEX_V128; -  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256, -                               Idx.info256>,  EVEX_V256; +  defm NAME#128: avx512_perm_i<opc, OpcodeStr, VTInfo.info128>, EVEX_V128; +  defm NAME#256: avx512_perm_i<opc, OpcodeStr, VTInfo.info256>,  EVEX_V256;    }  }  defm VPERMI2D  : avx512_perm_i_sizes<0x76, "vpermi2d", -                  avx512vl_i32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +                  avx512vl_i32_info>, EVEX_CD8<32, CD8VF>;  defm VPERMI2Q  : avx512_perm_i_sizes<0x76, "vpermi2q", -                  avx512vl_i64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +                  avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>;  defm VPERMI2W  : avx512_perm_i_sizes_bw<0x75, "vpermi2w", -                  avx512vl_i16_info, avx512vl_i16_info, HasBWI>, +                  avx512vl_i16_info, HasBWI>,                    VEX_W, EVEX_CD8<16, CD8VF>;  defm VPERMI2B  : avx512_perm_i_sizes_bw<0x75, "vpermi2b", -                  avx512vl_i8_info, avx512vl_i8_info, HasVBMI>, +                  avx512vl_i8_info, HasVBMI>,                    EVEX_CD8<8, CD8VF>;  defm VPERMI2PS : avx512_perm_i_sizes<0x77, "vpermi2ps", -                  avx512vl_f32_info, avx512vl_i32_info>, EVEX_CD8<32, CD8VF>; +                  avx512vl_f32_info>, EVEX_CD8<32, CD8VF>;  defm VPERMI2PD : avx512_perm_i_sizes<0x77, "vpermi2pd", -                  avx512vl_f64_info, avx512vl_i64_info>, VEX_W, EVEX_CD8<64, CD8VF>; +                  avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VF>;  // VPERMT2  multiclass avx512_perm_t<bits<8> opc, string OpcodeStr, diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index d2a190530ee..08b7e022841 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -414,10 +414,12 @@ def X86VPermt2     : SDNode<"X86ISD::VPERMV3",                                           SDTCisSameSizeAs<0,2>,                                           SDTCisSameAs<0,3>]>, []>; +// Even though the index operand should be integer, we need to make it match the +// destination type so that we can pattern match the masked version where the +// index is also the passthru operand.  def X86VPermi2X   : SDNode<"X86ISD::VPERMIV3", -                    SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisInt<1>, -                                         SDTCisVec<1>, SDTCisSameNumEltsAs<0, 1>, -                                         SDTCisSameSizeAs<0,1>, +                    SDTypeProfile<1, 3, [SDTCisVec<0>, +                                         SDTCisSameAs<0,1>,                                           SDTCisSameAs<0,2>,                                           SDTCisSameAs<0,3>]>, []>; diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 2d7dca8025e..e9d106ba642 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -2030,9 +2030,8 @@ define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %  ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:  ; CHECK:       ## BB#0:  ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] -; CHECK-NEXT:    vmovdqa64 %xmm1, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xd9] -; CHECK-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm3 ## encoding: [0x62,0xf2,0x7d,0x08,0x77,0xda] -; CHECK-NEXT:    vblendmps %xmm3, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x65,0xc3] +; CHECK-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca] +; CHECK-NEXT:    vmovdqa64 %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0xc1]  ; CHECK-NEXT:    retq ## encoding: [0xc3]    %x1cast = bitcast <2 x i64> %x1 to <4 x i32>    %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)  | 

