diff options
| -rw-r--r-- | llvm/include/llvm/CodeGen/TargetLowering.h | 3 | ||||
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 14 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 3 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/vector-mul.ll | 100 |
5 files changed, 41 insertions, 81 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index ebe8872e5de..3d6f4700255 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -1904,7 +1904,8 @@ public: /// This may be true if the target does not directly support the /// multiplication operation for the specified type or the sequence of simpler /// ops is faster than the multiply. - virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { + virtual bool decomposeMulByConstant(LLVMContext &Context, + EVT VT, SDValue C) const { return false; } diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 185a9a72fa3..9d8850b59f5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3556,7 +3556,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { // x * 15 --> (x << 4) - x // x * -33 --> -((x << 5) + x) // x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4) - if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) { + if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) { // TODO: We could handle more general decomposition of any constant by // having the target set a limit on number of ops and making a // callback to determine that sequence (similar to sqrt expansion). diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4b8987b0a02..46b31894df7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4869,15 +4869,25 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const { return true; } -bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const { +bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const { // TODO: We handle scalars using custom code, but generic combining could make // that unnecessary. APInt MulC; if (!ISD::isConstantSplatVector(C.getNode(), MulC)) return false; + // Find the type this will be legalized too. Otherwise we might prematurely + // convert this to shl+add/sub and then still have to type legalize those ops. + // Another choice would be to defer the decision for illegal types until + // after type legalization. But constant splat vectors of i64 can't make it + // through type legalization on 32-bit targets so we would need to special + // case vXi64. + while (getTypeAction(Context, VT) != TypeLegal) + VT = getTypeToTransformTo(Context, VT); + // If vector multiply is legal, assume that's faster than shl + add/sub. - // TODO: Multiply is a complex op with higher latency and lower througput in + // TODO: Multiply is a complex op with higher latency and lower throughput in // most implementations, so this check could be loosened based on type // and/or a CPU attribute. if (isOperationLegal(ISD::MUL, VT)) diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 625b42d3515..8dc58a188dd 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1105,7 +1105,8 @@ namespace llvm { bool convertSelectOfConstantsToMath(EVT VT) const override; - bool decomposeMulByConstant(EVT VT, SDValue C) const override; + bool decomposeMulByConstant(LLVMContext &Context, EVT VT, + SDValue C) const override; bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, bool IsSigned) const override; diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll index 1377d1ce920..805ff9f69ed 100644 --- a/llvm/test/CodeGen/X86/vector-mul.ll +++ b/llvm/test/CodeGen/X86/vector-mul.ll @@ -435,26 +435,16 @@ define <4 x i64> @mul_v4i64_17(<4 x i64> %a0) nounwind { define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind { ; X86-LABEL: mul_v8i32_17: ; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: pslld $4, %xmm2 -; X86-NEXT: paddd %xmm0, %xmm2 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: pslld $4, %xmm3 -; X86-NEXT: paddd %xmm1, %xmm3 -; X86-NEXT: movdqa %xmm2, %xmm0 -; X86-NEXT: movdqa %xmm3, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17] +; X86-NEXT: pmulld %xmm2, %xmm0 +; X86-NEXT: pmulld %xmm2, %xmm1 ; X86-NEXT: retl ; ; X64-LABEL: mul_v8i32_17: ; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: pslld $4, %xmm2 -; X64-NEXT: paddd %xmm0, %xmm2 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: pslld $4, %xmm3 -; X64-NEXT: paddd %xmm1, %xmm3 -; X64-NEXT: movdqa %xmm2, %xmm0 -; X64-NEXT: movdqa %xmm3, %xmm1 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17] +; X64-NEXT: pmulld %xmm2, %xmm0 +; X64-NEXT: pmulld %xmm2, %xmm1 ; X64-NEXT: retq ; ; X64-XOP-LABEL: mul_v8i32_17: @@ -484,26 +474,16 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind { define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind { ; X86-LABEL: mul_v16i16_17: ; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm2 -; X86-NEXT: psllw $4, %xmm2 -; X86-NEXT: paddw %xmm0, %xmm2 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: psllw $4, %xmm3 -; X86-NEXT: paddw %xmm1, %xmm3 -; X86-NEXT: movdqa %xmm2, %xmm0 -; X86-NEXT: movdqa %xmm3, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] +; X86-NEXT: pmullw %xmm2, %xmm0 +; X86-NEXT: pmullw %xmm2, %xmm1 ; X86-NEXT: retl ; ; X64-LABEL: mul_v16i16_17: ; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm2 -; X64-NEXT: psllw $4, %xmm2 -; X64-NEXT: paddw %xmm0, %xmm2 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psllw $4, %xmm3 -; X64-NEXT: paddw %xmm1, %xmm3 -; X64-NEXT: movdqa %xmm2, %xmm0 -; X64-NEXT: movdqa %xmm3, %xmm1 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17] +; X64-NEXT: pmullw %xmm2, %xmm0 +; X64-NEXT: pmullw %xmm2, %xmm1 ; X64-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i16_17: @@ -797,32 +777,16 @@ define <4 x i64> @mul_v4i64_neg1025(<4 x i64> %a0) nounwind { define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind { ; X86-LABEL: mul_v8i32_neg33: ; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: pslld $5, %xmm3 -; X86-NEXT: paddd %xmm0, %xmm3 -; X86-NEXT: pxor %xmm2, %xmm2 -; X86-NEXT: pxor %xmm0, %xmm0 -; X86-NEXT: psubd %xmm3, %xmm0 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: pslld $5, %xmm3 -; X86-NEXT: paddd %xmm1, %xmm3 -; X86-NEXT: psubd %xmm3, %xmm2 -; X86-NEXT: movdqa %xmm2, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] +; X86-NEXT: pmulld %xmm2, %xmm0 +; X86-NEXT: pmulld %xmm2, %xmm1 ; X86-NEXT: retl ; ; X64-LABEL: mul_v8i32_neg33: ; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: pslld $5, %xmm3 -; X64-NEXT: paddd %xmm0, %xmm3 -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: psubd %xmm3, %xmm0 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: pslld $5, %xmm3 -; X64-NEXT: paddd %xmm1, %xmm3 -; X64-NEXT: psubd %xmm3, %xmm2 -; X64-NEXT: movdqa %xmm2, %xmm1 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263] +; X64-NEXT: pmulld %xmm2, %xmm0 +; X64-NEXT: pmulld %xmm2, %xmm1 ; X64-NEXT: retq ; ; X64-XOP-LABEL: mul_v8i32_neg33: @@ -855,32 +819,16 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind { define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind { ; X86-LABEL: mul_v16i16_neg9: ; X86: # %bb.0: -; X86-NEXT: movdqa %xmm0, %xmm3 -; X86-NEXT: psllw $3, %xmm3 -; X86-NEXT: paddw %xmm0, %xmm3 -; X86-NEXT: pxor %xmm2, %xmm2 -; X86-NEXT: pxor %xmm0, %xmm0 -; X86-NEXT: psubw %xmm3, %xmm0 -; X86-NEXT: movdqa %xmm1, %xmm3 -; X86-NEXT: psllw $3, %xmm3 -; X86-NEXT: paddw %xmm1, %xmm3 -; X86-NEXT: psubw %xmm3, %xmm2 -; X86-NEXT: movdqa %xmm2, %xmm1 +; X86-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] +; X86-NEXT: pmullw %xmm2, %xmm0 +; X86-NEXT: pmullw %xmm2, %xmm1 ; X86-NEXT: retl ; ; X64-LABEL: mul_v16i16_neg9: ; X64: # %bb.0: -; X64-NEXT: movdqa %xmm0, %xmm3 -; X64-NEXT: psllw $3, %xmm3 -; X64-NEXT: paddw %xmm0, %xmm3 -; X64-NEXT: pxor %xmm2, %xmm2 -; X64-NEXT: pxor %xmm0, %xmm0 -; X64-NEXT: psubw %xmm3, %xmm0 -; X64-NEXT: movdqa %xmm1, %xmm3 -; X64-NEXT: psllw $3, %xmm3 -; X64-NEXT: paddw %xmm1, %xmm3 -; X64-NEXT: psubw %xmm3, %xmm2 -; X64-NEXT: movdqa %xmm2, %xmm1 +; X64-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527] +; X64-NEXT: pmullw %xmm2, %xmm0 +; X64-NEXT: pmullw %xmm2, %xmm1 ; X64-NEXT: retq ; ; X64-XOP-LABEL: mul_v16i16_neg9: |

