summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
Diffstat (limited to 'llvm')
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h3
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp14
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h3
-rw-r--r--llvm/test/CodeGen/X86/vector-mul.ll100
5 files changed, 41 insertions, 81 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index ebe8872e5de..3d6f4700255 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -1904,7 +1904,8 @@ public:
/// This may be true if the target does not directly support the
/// multiplication operation for the specified type or the sequence of simpler
/// ops is faster than the multiply.
- virtual bool decomposeMulByConstant(EVT VT, SDValue C) const {
+ virtual bool decomposeMulByConstant(LLVMContext &Context,
+ EVT VT, SDValue C) const {
return false;
}
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 185a9a72fa3..9d8850b59f5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3556,7 +3556,7 @@ SDValue DAGCombiner::visitMUL(SDNode *N) {
// x * 15 --> (x << 4) - x
// x * -33 --> -((x << 5) + x)
// x * -15 --> -((x << 4) - x) ; this reduces --> x - (x << 4)
- if (N1IsConst && TLI.decomposeMulByConstant(VT, N1)) {
+ if (N1IsConst && TLI.decomposeMulByConstant(*DAG.getContext(), VT, N1)) {
// TODO: We could handle more general decomposition of any constant by
// having the target set a limit on number of ops and making a
// callback to determine that sequence (similar to sqrt expansion).
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 4b8987b0a02..46b31894df7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -4869,15 +4869,25 @@ bool X86TargetLowering::convertSelectOfConstantsToMath(EVT VT) const {
return true;
}
-bool X86TargetLowering::decomposeMulByConstant(EVT VT, SDValue C) const {
+bool X86TargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const {
// TODO: We handle scalars using custom code, but generic combining could make
// that unnecessary.
APInt MulC;
if (!ISD::isConstantSplatVector(C.getNode(), MulC))
return false;
+ // Find the type this will be legalized too. Otherwise we might prematurely
+ // convert this to shl+add/sub and then still have to type legalize those ops.
+ // Another choice would be to defer the decision for illegal types until
+ // after type legalization. But constant splat vectors of i64 can't make it
+ // through type legalization on 32-bit targets so we would need to special
+ // case vXi64.
+ while (getTypeAction(Context, VT) != TypeLegal)
+ VT = getTypeToTransformTo(Context, VT);
+
// If vector multiply is legal, assume that's faster than shl + add/sub.
- // TODO: Multiply is a complex op with higher latency and lower througput in
+ // TODO: Multiply is a complex op with higher latency and lower throughput in
// most implementations, so this check could be loosened based on type
// and/or a CPU attribute.
if (isOperationLegal(ISD::MUL, VT))
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 625b42d3515..8dc58a188dd 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -1105,7 +1105,8 @@ namespace llvm {
bool convertSelectOfConstantsToMath(EVT VT) const override;
- bool decomposeMulByConstant(EVT VT, SDValue C) const override;
+ bool decomposeMulByConstant(LLVMContext &Context, EVT VT,
+ SDValue C) const override;
bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT,
bool IsSigned) const override;
diff --git a/llvm/test/CodeGen/X86/vector-mul.ll b/llvm/test/CodeGen/X86/vector-mul.ll
index 1377d1ce920..805ff9f69ed 100644
--- a/llvm/test/CodeGen/X86/vector-mul.ll
+++ b/llvm/test/CodeGen/X86/vector-mul.ll
@@ -435,26 +435,16 @@ define <4 x i64> @mul_v4i64_17(<4 x i64> %a0) nounwind {
define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
; X86-LABEL: mul_v8i32_17:
; X86: # %bb.0:
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: pslld $4, %xmm2
-; X86-NEXT: paddd %xmm0, %xmm2
-; X86-NEXT: movdqa %xmm1, %xmm3
-; X86-NEXT: pslld $4, %xmm3
-; X86-NEXT: paddd %xmm1, %xmm3
-; X86-NEXT: movdqa %xmm2, %xmm0
-; X86-NEXT: movdqa %xmm3, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17]
+; X86-NEXT: pmulld %xmm2, %xmm0
+; X86-NEXT: pmulld %xmm2, %xmm1
; X86-NEXT: retl
;
; X64-LABEL: mul_v8i32_17:
; X64: # %bb.0:
-; X64-NEXT: movdqa %xmm0, %xmm2
-; X64-NEXT: pslld $4, %xmm2
-; X64-NEXT: paddd %xmm0, %xmm2
-; X64-NEXT: movdqa %xmm1, %xmm3
-; X64-NEXT: pslld $4, %xmm3
-; X64-NEXT: paddd %xmm1, %xmm3
-; X64-NEXT: movdqa %xmm2, %xmm0
-; X64-NEXT: movdqa %xmm3, %xmm1
+; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17]
+; X64-NEXT: pmulld %xmm2, %xmm0
+; X64-NEXT: pmulld %xmm2, %xmm1
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v8i32_17:
@@ -484,26 +474,16 @@ define <8 x i32> @mul_v8i32_17(<8 x i32> %a0) nounwind {
define <16 x i16> @mul_v16i16_17(<16 x i16> %a0) nounwind {
; X86-LABEL: mul_v16i16_17:
; X86: # %bb.0:
-; X86-NEXT: movdqa %xmm0, %xmm2
-; X86-NEXT: psllw $4, %xmm2
-; X86-NEXT: paddw %xmm0, %xmm2
-; X86-NEXT: movdqa %xmm1, %xmm3
-; X86-NEXT: psllw $4, %xmm3
-; X86-NEXT: paddw %xmm1, %xmm3
-; X86-NEXT: movdqa %xmm2, %xmm0
-; X86-NEXT: movdqa %xmm3, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
+; X86-NEXT: pmullw %xmm2, %xmm0
+; X86-NEXT: pmullw %xmm2, %xmm1
; X86-NEXT: retl
;
; X64-LABEL: mul_v16i16_17:
; X64: # %bb.0:
-; X64-NEXT: movdqa %xmm0, %xmm2
-; X64-NEXT: psllw $4, %xmm2
-; X64-NEXT: paddw %xmm0, %xmm2
-; X64-NEXT: movdqa %xmm1, %xmm3
-; X64-NEXT: psllw $4, %xmm3
-; X64-NEXT: paddw %xmm1, %xmm3
-; X64-NEXT: movdqa %xmm2, %xmm0
-; X64-NEXT: movdqa %xmm3, %xmm1
+; X64-NEXT: movdqa {{.*#+}} xmm2 = [17,17,17,17,17,17,17,17]
+; X64-NEXT: pmullw %xmm2, %xmm0
+; X64-NEXT: pmullw %xmm2, %xmm1
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v16i16_17:
@@ -797,32 +777,16 @@ define <4 x i64> @mul_v4i64_neg1025(<4 x i64> %a0) nounwind {
define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
; X86-LABEL: mul_v8i32_neg33:
; X86: # %bb.0:
-; X86-NEXT: movdqa %xmm0, %xmm3
-; X86-NEXT: pslld $5, %xmm3
-; X86-NEXT: paddd %xmm0, %xmm3
-; X86-NEXT: pxor %xmm2, %xmm2
-; X86-NEXT: pxor %xmm0, %xmm0
-; X86-NEXT: psubd %xmm3, %xmm0
-; X86-NEXT: movdqa %xmm1, %xmm3
-; X86-NEXT: pslld $5, %xmm3
-; X86-NEXT: paddd %xmm1, %xmm3
-; X86-NEXT: psubd %xmm3, %xmm2
-; X86-NEXT: movdqa %xmm2, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263]
+; X86-NEXT: pmulld %xmm2, %xmm0
+; X86-NEXT: pmulld %xmm2, %xmm1
; X86-NEXT: retl
;
; X64-LABEL: mul_v8i32_neg33:
; X64: # %bb.0:
-; X64-NEXT: movdqa %xmm0, %xmm3
-; X64-NEXT: pslld $5, %xmm3
-; X64-NEXT: paddd %xmm0, %xmm3
-; X64-NEXT: pxor %xmm2, %xmm2
-; X64-NEXT: pxor %xmm0, %xmm0
-; X64-NEXT: psubd %xmm3, %xmm0
-; X64-NEXT: movdqa %xmm1, %xmm3
-; X64-NEXT: pslld $5, %xmm3
-; X64-NEXT: paddd %xmm1, %xmm3
-; X64-NEXT: psubd %xmm3, %xmm2
-; X64-NEXT: movdqa %xmm2, %xmm1
+; X64-NEXT: movdqa {{.*#+}} xmm2 = [4294967263,4294967263,4294967263,4294967263]
+; X64-NEXT: pmulld %xmm2, %xmm0
+; X64-NEXT: pmulld %xmm2, %xmm1
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v8i32_neg33:
@@ -855,32 +819,16 @@ define <8 x i32> @mul_v8i32_neg33(<8 x i32> %a0) nounwind {
define <16 x i16> @mul_v16i16_neg9(<16 x i16> %a0) nounwind {
; X86-LABEL: mul_v16i16_neg9:
; X86: # %bb.0:
-; X86-NEXT: movdqa %xmm0, %xmm3
-; X86-NEXT: psllw $3, %xmm3
-; X86-NEXT: paddw %xmm0, %xmm3
-; X86-NEXT: pxor %xmm2, %xmm2
-; X86-NEXT: pxor %xmm0, %xmm0
-; X86-NEXT: psubw %xmm3, %xmm0
-; X86-NEXT: movdqa %xmm1, %xmm3
-; X86-NEXT: psllw $3, %xmm3
-; X86-NEXT: paddw %xmm1, %xmm3
-; X86-NEXT: psubw %xmm3, %xmm2
-; X86-NEXT: movdqa %xmm2, %xmm1
+; X86-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527]
+; X86-NEXT: pmullw %xmm2, %xmm0
+; X86-NEXT: pmullw %xmm2, %xmm1
; X86-NEXT: retl
;
; X64-LABEL: mul_v16i16_neg9:
; X64: # %bb.0:
-; X64-NEXT: movdqa %xmm0, %xmm3
-; X64-NEXT: psllw $3, %xmm3
-; X64-NEXT: paddw %xmm0, %xmm3
-; X64-NEXT: pxor %xmm2, %xmm2
-; X64-NEXT: pxor %xmm0, %xmm0
-; X64-NEXT: psubw %xmm3, %xmm0
-; X64-NEXT: movdqa %xmm1, %xmm3
-; X64-NEXT: psllw $3, %xmm3
-; X64-NEXT: paddw %xmm1, %xmm3
-; X64-NEXT: psubw %xmm3, %xmm2
-; X64-NEXT: movdqa %xmm2, %xmm1
+; X64-NEXT: movdqa {{.*#+}} xmm2 = [65527,65527,65527,65527,65527,65527,65527,65527]
+; X64-NEXT: pmullw %xmm2, %xmm0
+; X64-NEXT: pmullw %xmm2, %xmm1
; X64-NEXT: retq
;
; X64-XOP-LABEL: mul_v16i16_neg9:
OpenPOWER on IntegriCloud