diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 55 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 1 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 2 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx-logic.ll | 1 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/avx512-arith.ll | 71 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vec_fabs.ll | 2 | 
6 files changed, 111 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index fa6f5c8be88..21bca74353c 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1340,6 +1340,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,      setOperationAction(ISD::FDIV,               MVT::v16f32, Legal);      setOperationAction(ISD::FSQRT,              MVT::v16f32, Legal);      setOperationAction(ISD::FNEG,               MVT::v16f32, Custom); +    setOperationAction(ISD::FABS,               MVT::v16f32, Custom);      setOperationAction(ISD::FADD,               MVT::v8f64, Legal);      setOperationAction(ISD::FSUB,               MVT::v8f64, Legal); @@ -1347,6 +1348,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,      setOperationAction(ISD::FDIV,               MVT::v8f64, Legal);      setOperationAction(ISD::FSQRT,              MVT::v8f64, Legal);      setOperationAction(ISD::FNEG,               MVT::v8f64, Custom); +    setOperationAction(ISD::FABS,               MVT::v8f64, Custom);      setOperationAction(ISD::FMA,                MVT::v8f64, Legal);      setOperationAction(ISD::FMA,                MVT::v16f32, Legal); @@ -26339,6 +26341,31 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG,    return SDValue();  } +static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, +                              const X86Subtarget *Subtarget) { +  EVT VT = N->getValueType(0); +  if (VT.is512BitVector() && !Subtarget->hasDQI()) { +    // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. +    // These logic operations may be executed in the integer domain. +    SDLoc dl(N); +    MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); +    MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); + +    SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); +    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); +    unsigned IntOpcode = 0; +    switch (N->getOpcode()) { +      default: llvm_unreachable("Unexpected FP logic op"); +      case X86ISD::FOR: IntOpcode = ISD::OR; break; +      case X86ISD::FXOR: IntOpcode = ISD::XOR; break; +      case X86ISD::FAND: IntOpcode = ISD::AND; break; +      case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; +    } +    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); +    return  DAG.getNode(ISD::BITCAST, dl, VT, IntOp); +  } +  return SDValue(); +}  /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes.  static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,                                   const X86Subtarget *Subtarget) { @@ -26354,19 +26381,7 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG,      if (C->getValueAPF().isPosZero())        return N->getOperand(0); -  EVT VT = N->getValueType(0); -  if (VT.is512BitVector() && !Subtarget->hasDQI()) { -    SDLoc dl(N); -    MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); -    MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); - -    SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); -    SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); -    unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR; -    SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); -    return  DAG.getNode(ISD::BITCAST, dl, VT, IntOp); -  } -  return SDValue(); +  return lowerX86FPLogicOp(N, DAG, Subtarget);  }  /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. @@ -26391,7 +26406,8 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) {  }  /// Do target-specific dag combines on X86ISD::FAND nodes. -static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, +                                  const X86Subtarget *Subtarget) {    // FAND(0.0, x) -> 0.0    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))      if (C->getValueAPF().isPosZero()) @@ -26402,11 +26418,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) {      if (C->getValueAPF().isPosZero())        return N->getOperand(1); -  return SDValue(); +  return lowerX86FPLogicOp(N, DAG, Subtarget);  }  /// Do target-specific dag combines on X86ISD::FANDN nodes -static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, +                                   const X86Subtarget *Subtarget) {    // FANDN(0.0, x) -> x    if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))      if (C->getValueAPF().isPosZero()) @@ -26417,7 +26434,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) {      if (C->getValueAPF().isPosZero())        return N->getOperand(1); -  return SDValue(); +  return lowerX86FPLogicOp(N, DAG, Subtarget);  }  static SDValue PerformBTCombine(SDNode *N, @@ -27233,8 +27250,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,    case X86ISD::FOR:         return PerformFORCombine(N, DAG, Subtarget);    case X86ISD::FMIN:    case X86ISD::FMAX:        return PerformFMinFMaxCombine(N, DAG); -  case X86ISD::FAND:        return PerformFANDCombine(N, DAG); -  case X86ISD::FANDN:       return PerformFANDNCombine(N, DAG); +  case X86ISD::FAND:        return PerformFANDCombine(N, DAG, Subtarget); +  case X86ISD::FANDN:       return PerformFANDNCombine(N, DAG, Subtarget);    case X86ISD::BT:          return PerformBTCombine(N, DAG, DCI);    case X86ISD::VZEXT_MOVL:  return PerformVZEXT_MOVLCombine(N, DAG);    case ISD::ANY_EXTEND: diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 4a4ceaca88f..b412f8fb3ec 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -770,6 +770,7 @@ def HasVLX       : Predicate<"Subtarget->hasVLX()">,                       AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">;  def NoVLX        : Predicate<"!Subtarget->hasVLX()">;  def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; +def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;  def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;  def HasAES       : Predicate<"Subtarget->hasAES()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a93240bd717..a545335dd5d 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -2906,7 +2906,7 @@ let isCodeGenOnly = 1 in {  // Multiclass for vectors using the X86 logical operation aliases for FP.  multiclass sse12_fp_packed_vector_logical_alias<      bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { -  let Predicates = [HasAVX, NoVLX] in { +  let Predicates = [HasAVX, NoVLX_Or_NoDQI] in {    defm V#NAME#PS : sse12_fp_packed<opc, !strconcat(OpcodeStr, "ps"), OpNode,                VR128, v4f32, f128mem, loadv4f32, SSEPackedSingle, itins, 0>,                PS, VEX_4V; diff --git a/llvm/test/CodeGen/X86/avx-logic.ll b/llvm/test/CodeGen/X86/avx-logic.ll index a91fe7e0c52..e9e7d5aea27 100644 --- a/llvm/test/CodeGen/X86/avx-logic.ll +++ b/llvm/test/CodeGen/X86/avx-logic.ll @@ -1,5 +1,6 @@  ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s  define <4 x double> @andpd256(<4 x double> %y, <4 x double> %x) nounwind uwtable readnone ssp {  ; CHECK-LABEL: andpd256: diff --git a/llvm/test/CodeGen/X86/avx512-arith.ll b/llvm/test/CodeGen/X86/avx512-arith.ll index d7da77a5eb5..9220e4f269c 100644 --- a/llvm/test/CodeGen/X86/avx512-arith.ll +++ b/llvm/test/CodeGen/X86/avx512-arith.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f | FileCheck --check-prefix=CHECK --check-prefix=AVX512F %s  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl | FileCheck --check-prefix=CHECK --check-prefix=AVX512VL %s  ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bw | FileCheck --check-prefix=CHECK --check-prefix=AVX512BW %s @@ -823,3 +824,73 @@ define <16 x float>  @test_fxor(<16 x float> %a) {    ret <16 x float>%res  } +define <8 x float>  @test_fxor_8f32(<8 x float> %a) { +; CHECK-LABEL: test_fxor_8f32: +; CHECK:       ## BB#0: +; CHECK-NEXT:    vxorps {{.*}}(%rip), %ymm0, %ymm0 +; CHECK-NEXT:    retq +  %res = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a +  ret <8 x float>%res +} + +define <8 x double> @fabs_v8f64(<8 x double> %p) +; AVX512F-LABEL: fabs_v8f64: +; AVX512F:       ## BB#0: +; AVX512F-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT:    retq +; +; AVX512VL-LABEL: fabs_v8f64: +; AVX512VL:       ## BB#0: +; AVX512VL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT:    retq +; +; AVX512BW-LABEL: fabs_v8f64: +; AVX512BW:       ## BB#0: +; AVX512BW-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT:    retq +; +; AVX512DQ-LABEL: fabs_v8f64: +; AVX512DQ:       ## BB#0: +; AVX512DQ-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT:    retq +; +; SKX-LABEL: fabs_v8f64: +; SKX:       ## BB#0: +; SKX-NEXT:    vandpd {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT:    retq +{ +  %t = call <8 x double> @llvm.fabs.v8f64(<8 x double> %p) +  ret <8 x double> %t +} +declare <8 x double> @llvm.fabs.v8f64(<8 x double> %p) + +define <16 x float> @fabs_v16f32(<16 x float> %p) +; AVX512F-LABEL: fabs_v16f32: +; AVX512F:       ## BB#0: +; AVX512F-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512F-NEXT:    retq +; +; AVX512VL-LABEL: fabs_v16f32: +; AVX512VL:       ## BB#0: +; AVX512VL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VL-NEXT:    retq +; +; AVX512BW-LABEL: fabs_v16f32: +; AVX512BW:       ## BB#0: +; AVX512BW-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT:    retq +; +; AVX512DQ-LABEL: fabs_v16f32: +; AVX512DQ:       ## BB#0: +; AVX512DQ-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0 +; AVX512DQ-NEXT:    retq +; +; SKX-LABEL: fabs_v16f32: +; SKX:       ## BB#0: +; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0 +; SKX-NEXT:    retq +{ +  %t = call <16 x float> @llvm.fabs.v16f32(<16 x float> %p) +  ret <16 x float> %t +} +declare <16 x float> @llvm.fabs.v16f32(<16 x float> %p) diff --git a/llvm/test/CodeGen/X86/vec_fabs.ll b/llvm/test/CodeGen/X86/vec_fabs.ll index 960b5f27cf5..54f33b2bd22 100644 --- a/llvm/test/CodeGen/X86/vec_fabs.ll +++ b/llvm/test/CodeGen/X86/vec_fabs.ll @@ -1,5 +1,5 @@  ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx | FileCheck %s - +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx512f | FileCheck %s  define <2 x double> @fabs_v2f64(<2 x double> %p)  {  | 

