diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrAVX512.td | 12 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86IntrinsicsInfo.h | 4 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-intrinsics.ll | 33 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll | 24 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512vl-intrinsics.ll | 4 | 
8 files changed, 37 insertions, 43 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 383ec563b98..086dbd5dd83 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -21923,6 +21923,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {    case X86ISD::FSQRT_RND:          return "X86ISD::FSQRT_RND";    case X86ISD::FGETEXP_RND:        return "X86ISD::FGETEXP_RND";    case X86ISD::SCALEF:             return "X86ISD::SCALEF"; +  case X86ISD::SCALEFS:            return "X86ISD::SCALEFS";    case X86ISD::ADDS:               return "X86ISD::ADDS";    case X86ISD::SUBS:               return "X86ISD::SUBS";    case X86ISD::AVG:                return "X86ISD::AVG"; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 4cf35d2436b..6cebebefd01 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -213,6 +213,7 @@ namespace llvm {        VGETMANT,        // FP Scale.        SCALEF, +      SCALEFS,        // Integer add/sub with unsigned saturation.        ADDUS, diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index 7a30aa2d27b..4dfc6a9a3bf 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -3827,18 +3827,18 @@ multiclass avx512_fp_scalef_scalar<bits<8> opc, string OpcodeStr, SDNode OpNode,    }//let mayLoad = 1  } -multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode> { +multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr, SDNode OpNode, SDNode OpNodeScal> {    defm PSZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v16f32_info>,               avx512_fp_round_packed<opc, OpcodeStr, OpNode, v16f32_info>,                                EVEX_V512, EVEX_CD8<32, CD8VF>;    defm PDZ : avx512_fp_scalef_p<opc, OpcodeStr, OpNode, v8f64_info>,               avx512_fp_round_packed<opc, OpcodeStr, OpNode, v8f64_info>,                                EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -  defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f32x_info>, -                avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNode, SSE_ALU_ITINS_S.s>, +  defm SSZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f32x_info>, +                avx512_fp_scalar_round<opcScaler, OpcodeStr##"ss", f32x_info, OpNodeScal, SSE_ALU_ITINS_S.s>,                                EVEX_4V,EVEX_CD8<32, CD8VT1>; -  defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNode, f64x_info>, -                avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNode, SSE_ALU_ITINS_S.d>, +  defm SDZ128 : avx512_fp_scalef_scalar<opcScaler, OpcodeStr, OpNodeScal, f64x_info>, +                avx512_fp_scalar_round<opcScaler, OpcodeStr##"sd", f64x_info, OpNodeScal, SSE_ALU_ITINS_S.d>,                                EVEX_4V, EVEX_CD8<64, CD8VT1>, VEX_W;    // Define only if AVX512VL feature is present. @@ -3853,7 +3853,7 @@ multiclass avx512_fp_scalef_all<bits<8> opc, bits<8> opcScaler, string OpcodeStr                                     EVEX_V256, VEX_W, EVEX_CD8<64, CD8VF>;    }  } -defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef>, T8PD; +defm VSCALEF : avx512_fp_scalef_all<0x2C, 0x2D, "vscalef", X86scalef, X86scalefs>, T8PD;  //===----------------------------------------------------------------------===//  // AVX-512  VPTESTM instructions diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td index c39924a79fd..b1e35184e57 100644 --- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td +++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -460,6 +460,7 @@ def X86fmulRnd   : SDNode<"X86ISD::FMUL_RND",  SDTFPBinOpRound>;  def X86fdivRnd   : SDNode<"X86ISD::FDIV_RND",  SDTFPBinOpRound>;  def X86fmaxRnd   : SDNode<"X86ISD::FMAX_RND",       SDTFPBinOpRound>;  def X86scalef    : SDNode<"X86ISD::SCALEF",         SDTFPBinOpRound>; +def X86scalefs   : SDNode<"X86ISD::SCALEFS",        SDTFPBinOpRound>;  def X86fminRnd   : SDNode<"X86ISD::FMIN_RND",       SDTFPBinOpRound>;  def X86fsqrtRnd     : SDNode<"X86ISD::FSQRT_RND",   SDTFPUnaryOpRound>;  def X86fsqrtRnds    : SDNode<"X86ISD::FSQRT_RND",   SDTFPBinOpRound>; diff --git a/llvm/lib/Target/X86/X86IntrinsicsInfo.h b/llvm/lib/Target/X86/X86IntrinsicsInfo.h index 559209690e9..742b14095ab 100644 --- a/llvm/lib/Target/X86/X86IntrinsicsInfo.h +++ b/llvm/lib/Target/X86/X86IntrinsicsInfo.h @@ -1659,9 +1659,9 @@ static const IntrinsicData  IntrinsicsWithoutChain[] = {    X86_INTRINSIC_DATA(avx512_mask_scalef_ps_512, INTR_TYPE_2OP_MASK_RM,                       X86ISD::SCALEF, 0),    X86_INTRINSIC_DATA(avx512_mask_scalef_sd, INTR_TYPE_SCALAR_MASK_RM, -                     X86ISD::SCALEF, 0), +                     X86ISD::SCALEFS, 0),    X86_INTRINSIC_DATA(avx512_mask_scalef_ss, INTR_TYPE_SCALAR_MASK_RM, -                     X86ISD::SCALEF, 0), +                     X86ISD::SCALEFS, 0),    X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4, INTR_TYPE_3OP_IMM8_MASK,                       X86ISD::SHUF128, 0),    X86_INTRINSIC_DATA(avx512_mask_shuf_f32x4_256, INTR_TYPE_3OP_IMM8_MASK, diff --git a/llvm/test/CodeGen/X86/avx512-intrinsics.ll b/llvm/test/CodeGen/X86/avx512-intrinsics.ll index 133d3a80c0f..3fb9dc80ffb 100644 --- a/llvm/test/CodeGen/X86/avx512-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512-intrinsics.ll @@ -5616,39 +5616,6 @@ define <16 x i32>@test_int_x86_avx512_mask_cvtt_ps2udq_512(<16 x float> %x0, <16    ret <16 x i32> %res2  } - -declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) -define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { -; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: -; CHECK:       ## BB#0: -; CHECK-NEXT:    andl $1, %edi -; CHECK-NEXT:    kmovw %edi, %k1 -; CHECK-NEXT:    vscalefss %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 -; CHECK-NEXT:    retq -  %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) -  %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) -  %res2 = fadd <4 x float> %res, %res1 -  ret <4 x float> %res2 -} - -declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) -define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { -; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd: -; CHECK:       ## BB#0: -; CHECK-NEXT:    andl $1, %edi -; CHECK-NEXT:    kmovw %edi, %k1 -; CHECK-NEXT:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1} -; CHECK-NEXT:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 -; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 -; CHECK-NEXT:    retq -  %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) -  %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) -  %res2 = fadd <2 x double> %res, %res1 -  ret <2 x double> %res2 -} -  declare <4 x float> @llvm.x86.avx512.mask.getexp.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32) nounwind readnone  define <4 x float> @test_getexp_ss(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) { diff --git a/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll new file mode 100644 index 00000000000..44b790be0c0 --- /dev/null +++ b/llvm/test/CodeGen/X86/avx512-scalarIntrinsics.ll @@ -0,0 +1,24 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl | FileCheck %s + +declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) +define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { +  ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_ss: +  ; CHECK:    vscalefss %xmm1, %xmm0, %xmm2 {%k1} +  ; CHECK:    vscalefss {rn-sae}, %xmm1, %xmm0, %xmm0 +    %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) +    %res1 = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 -1, i32 8) +    %res2 = fadd <4 x float> %res, %res1 +    ret <4 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) +define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { +  ; CHECK-LABEL: test_int_x86_avx512_mask_scalef_sd: +  ; CHECK:    vscalefsd %xmm1, %xmm0, %xmm2 {%k1} +  ; CHECK:    vscalefsd {rn-sae}, %xmm1, %xmm0, %xmm0 +    %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) +    %res1 = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 -1, i32 8) +    %res2 = fadd <2 x double> %res, %res1 +    ret <2 x double> %res2 +} diff --git a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll index 94ab9e0857c..c97262f8daf 100644 --- a/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll +++ b/llvm/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4243,7 +4243,7 @@ define <2 x double>@test_int_x86_avx512_mask_scalef_pd_128(<2 x double> %x0, <2  ; CHECK:       ## BB#0:  ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]  ; CHECK-NEXT:    vscalefpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x2c,0xd1] -; CHECK-NEXT:    vscalefsd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2d,0xc1] +; CHECK-NEXT:    vscalefpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x2c,0xc1]  ; CHECK-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc0]  ; CHECK-NEXT:    retq ## encoding: [0xc3]    %res = call <2 x double> @llvm.x86.avx512.mask.scalef.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) @@ -4275,7 +4275,7 @@ define <4 x float>@test_int_x86_avx512_mask_scalef_ps_128(<4 x float> %x0, <4 x  ; CHECK:       ## BB#0:  ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]  ; CHECK-NEXT:    vscalefps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2c,0xd1] -; CHECK-NEXT:    vscalefss %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2d,0xc1] +; CHECK-NEXT:    vscalefps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2c,0xc1]  ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]  ; CHECK-NEXT:    retq ## encoding: [0xc3]    %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)  | 

