diff options
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/include/llvm/CodeGen/TargetLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-mul.ll | 100 | 
5 files changed, 41 insertions, 81 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index ebe8872e5de..3d6f4700255 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1904,7 +1904,8 @@ public:    /// This may be true if the target does not directly support the    /// multiplication operation for the specified type or the sequence of simpler    /// ops is faster than the multiply. -  virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { +  virtual bool decomposeMulByConstant(LLVMContext &Context, +                                      EVT VT, SDValue C) const {      return false;    } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 185a9a72fa3..9d8850b59f5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3556,7 +3556,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {    //           x * 15 --> (x << 4) - x    //           x * -33 --> -((x << 5) + x)    //           x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) -  if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { +  if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {      // TODO: We could handle more general decomposition of any constant by      //       having the target set a limit on number of ops and making a      //       callback to determine that sequence (similar to sqrt expansion). diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4b8987b0a02..46b31894df7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4869,15 +4869,25 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {    return true;  } -bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const { +bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, +                                               SDValue C) const {    // TODO: We handle scalars using custom code, but generic combining could make    // that unnecessary.    APInt MulC;    if (!ISD::isConstantSplatVector(C.getNode(), MulC))      return false; +  // Find the type this will be legalized too. Otherwise we might prematurely +  // convert this to shl+add/sub and then still have to type legalize those ops. +  // Another choice would be to defer the decision for illegal types until  +  // after type legalization. But constant splat vectors of i64 can't make it +  // through type legalization on 32-bit targets so we would need to special +  // case vXi64. +  while (getTypeAction(Context, VT) != TypeLegal) +    VT = getTypeToTransformTo(Context, VT); +    // If vector multiply is legal, assume that's faster than shl + add/sub. -  // TODO: Multiply is a complex op with higher latency and lower througput in +  // TODO: Multiply is a complex op with higher latency and lower throughput in    //       most implementations, so this check could be loosened based on type    //       and/or a CPU attribute.    if (isOperationLegal(ISD::MUL, VT)) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 625b42d3515..8dc58a188dd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1105,7 +1105,8 @@ namespace llvm {      bool convertSelectOfConstantsToMath(EVT VT) const override; -    bool decomposeMulByConstant(EVT VT, SDValue C) const override; +    bool decomposeMulByConstant(LLVMContext &Context, EVT VT, +                                SDValue C) const override;      bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,                                    bool IsSigned) const override; diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index 1377d1ce920..805ff9f69ed 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -435,26 +435,16 @@ define <4 x i64> @mul_v4i64_17(<4 x i64> %a0) nounwind {  define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {  ; X86-LABEL: mul_v8i32_17:  ; X86:       # %bb.0: -; X86-NEXT:    movdqa %xmm0, %xmm2 -; X86-NEXT:    pslld $4, %xmm2 -; X86-NEXT:    paddd %xmm0, %xmm2 -; X86-NEXT:    movdqa %xmm1, %xmm3 -; X86-NEXT:    pslld $4, %xmm3 -; X86-NEXT:    paddd %xmm1, %xmm3 -; X86-NEXT:    movdqa %xmm2, %xmm0 -; X86-NEXT:    movdqa %xmm3, %xmm1 +; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17] +; X86-NEXT:    pmulld %xmm2, %xmm0 +; X86-NEXT:    pmulld %xmm2, %xmm1  ; X86-NEXT:    retl  ;  ; X64-LABEL: mul_v8i32_17:  ; X64:       # %bb.0: -; X64-NEXT:    movdqa %xmm0, %xmm2 -; X64-NEXT:    pslld $4, %xmm2 -; X64-NEXT:    paddd %xmm0, %xmm2 -; X64-NEXT:    movdqa %xmm1, %xmm3 -; X64-NEXT:    pslld $4, %xmm3 -; X64-NEXT:    paddd %xmm1, %xmm3 -; X64-NEXT:    movdqa %xmm2, %xmm0 -; X64-NEXT:    movdqa %xmm3, %xmm1 +; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17] +; X64-NEXT:    pmulld %xmm2, %xmm0 +; X64-NEXT:    pmulld %xmm2, %xmm1  ; X64-NEXT:    retq  ;  ; X64-XOP-LABEL: mul_v8i32_17: @@ -484,26 +474,16 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {  define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind {  ; X86-LABEL: mul_v16i16_17:  ; X86:       # %bb.0: -; X86-NEXT:    movdqa %xmm0, %xmm2 -; X86-NEXT:    psllw $4, %xmm2 -; X86-NEXT:    paddw %xmm0, %xmm2 -; X86-NEXT:    movdqa %xmm1, %xmm3 -; X86-NEXT:    psllw $4, %xmm3 -; X86-NEXT:    paddw %xmm1, %xmm3 -; X86-NEXT:    movdqa %xmm2, %xmm0 -; X86-NEXT:    movdqa %xmm3, %xmm1 +; X86-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] +; X86-NEXT:    pmullw %xmm2, %xmm0 +; X86-NEXT:    pmullw %xmm2, %xmm1  ; X86-NEXT:    retl  ;  ; X64-LABEL: mul_v16i16_17:  ; X64:       # %bb.0: -; X64-NEXT:    movdqa %xmm0, %xmm2 -; X64-NEXT:    psllw $4, %xmm2 -; X64-NEXT:    paddw %xmm0, %xmm2 -; X64-NEXT:    movdqa %xmm1, %xmm3 -; X64-NEXT:    psllw $4, %xmm3 -; X64-NEXT:    paddw %xmm1, %xmm3 -; X64-NEXT:    movdqa %xmm2, %xmm0 -; X64-NEXT:    movdqa %xmm3, %xmm1 +; X64-NEXT:    movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] +; X64-NEXT:    pmullw %xmm2, %xmm0 +; X64-NEXT:    pmullw %xmm2, %xmm1  ; X64-NEXT:    retq  ;  ; X64-XOP-LABEL: mul_v16i16_17: @@ -797,32 +777,16 @@ define <4 x i64> @mul_v4i64_neg1025(<4 x i64> %a0) nounwind {  define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {  ; X86-LABEL: mul_v8i32_neg33:  ; X86:       # %bb.0: -; X86-NEXT:    movdqa %xmm0, %xmm3 -; X86-NEXT:    pslld $5, %xmm3 -; X86-NEXT:    paddd %xmm0, %xmm3 -; X86-NEXT:    pxor %xmm2, %xmm2 -; X86-NEXT:    pxor %xmm0, %xmm0 -; X86-NEXT:    psubd %xmm3, %xmm0 -; X86-NEXT:    movdqa %xmm1, %xmm3 -; X86-NEXT:    pslld $5, %xmm3 -; X86-NEXT:    paddd %xmm1, %xmm3 -; X86-NEXT:    psubd %xmm3, %xmm2 -; X86-NEXT:    movdqa %xmm2, %xmm1 +; X86-NEXT:    movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] +; X86-NEXT:    pmulld %xmm2, %xmm0 +; X86-NEXT:    pmulld %xmm2, %xmm1  ; X86-NEXT:    retl  ;  ; X64-LABEL: mul_v8i32_neg33:  ; X64:       # %bb.0: -; X64-NEXT:    movdqa %xmm0, %xmm3 -; X64-NEXT:    pslld $5, %xmm3 -; X64-NEXT:    paddd %xmm0, %xmm3 -; X64-NEXT:    pxor %xmm2, %xmm2 -; X64-NEXT:    pxor %xmm0, %xmm0 -; X64-NEXT:    psubd %xmm3, %xmm0 -; X64-NEXT:    movdqa %xmm1, %xmm3 -; X64-NEXT:    pslld $5, %xmm3 -; X64-NEXT:    paddd %xmm1, %xmm3 -; X64-NEXT:    psubd %xmm3, %xmm2 -; X64-NEXT:    movdqa %xmm2, %xmm1 +; X64-NEXT:    movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] +; X64-NEXT:    pmulld %xmm2, %xmm0 +; X64-NEXT:    pmulld %xmm2, %xmm1  ; X64-NEXT:    retq  ;  ; X64-XOP-LABEL: mul_v8i32_neg33: @@ -855,32 +819,16 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {  define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind {  ; X86-LABEL: mul_v16i16_neg9:  ; X86:       # %bb.0: -; X86-NEXT:    movdqa %xmm0, %xmm3 -; X86-NEXT:    psllw $3, %xmm3 -; X86-NEXT:    paddw %xmm0, %xmm3 -; X86-NEXT:    pxor %xmm2, %xmm2 -; X86-NEXT:    pxor %xmm0, %xmm0 -; X86-NEXT:    psubw %xmm3, %xmm0 -; X86-NEXT:    movdqa %xmm1, %xmm3 -; X86-NEXT:    psllw $3, %xmm3 -; X86-NEXT:    paddw %xmm1, %xmm3 -; X86-NEXT:    psubw %xmm3, %xmm2 -; X86-NEXT:    movdqa %xmm2, %xmm1 +; X86-NEXT:    movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] +; X86-NEXT:    pmullw %xmm2, %xmm0 +; X86-NEXT:    pmullw %xmm2, %xmm1  ; X86-NEXT:    retl  ;  ; X64-LABEL: mul_v16i16_neg9:  ; X64:       # %bb.0: -; X64-NEXT:    movdqa %xmm0, %xmm3 -; X64-NEXT:    psllw $3, %xmm3 -; X64-NEXT:    paddw %xmm0, %xmm3 -; X64-NEXT:    pxor %xmm2, %xmm2 -; X64-NEXT:    pxor %xmm0, %xmm0 -; X64-NEXT:    psubw %xmm3, %xmm0 -; X64-NEXT:    movdqa %xmm1, %xmm3 -; X64-NEXT:    psllw $3, %xmm3 -; X64-NEXT:    paddw %xmm1, %xmm3 -; X64-NEXT:    psubw %xmm3, %xmm2 -; X64-NEXT:    movdqa %xmm2, %xmm1 +; X64-NEXT:    movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] +; X64-NEXT:    pmullw %xmm2, %xmm0 +; X64-NEXT:    pmullw %xmm2, %xmm1  ; X64-NEXT:    retq  ;  ; X64-XOP-LABEL: mul_v16i16_neg9:  | 

