summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.cpp86
-rw-r--r--llvm/lib/Target/PowerPC/PPCISelLowering.h1
-rw-r--r--llvm/test/CodeGen/PowerPC/mul-const-i64.ll92
-rw-r--r--llvm/test/CodeGen/PowerPC/mul-const-vector.ll382
-rw-r--r--llvm/test/CodeGen/PowerPC/mul-const.ll79
-rw-r--r--llvm/test/CodeGen/PowerPC/mul-neg-power-2.ll8
-rw-r--r--llvm/test/CodeGen/PowerPC/mulli64.ll16
7 files changed, 640 insertions, 24 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index aa3fc7c315b..33f46634029 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1071,6 +1071,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
+ setTargetDAGCombine(ISD::MUL);
setTargetDAGCombine(ISD::SINT_TO_FP);
setTargetDAGCombine(ISD::BUILD_VECTOR);
if (Subtarget.hasFPCVT())
@@ -12643,6 +12644,8 @@ SDValue PPCTargetLowering::PerformDAGCombine(SDNode *N,
return combineSRA(N, DCI);
case ISD::SRL:
return combineSRL(N, DCI);
+ case ISD::MUL:
+ return combineMUL(N, DCI);
case PPCISD::SHL:
if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
return N->getOperand(0);
@@ -14565,6 +14568,89 @@ SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
return SDValue();
}
+SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
+ SelectionDAG &DAG = DCI.DAG;
+
+ ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
+ if (!ConstOpOrElement)
+ return SDValue();
+
+ // An imul is usually smaller than the alternative sequence for legal type.
+ if (DAG.getMachineFunction().getFunction().optForMinSize() &&
+ isOperationLegal(ISD::MUL, N->getValueType(0)))
+ return SDValue();
+
+ auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
+ switch (this->Subtarget.getDarwinDirective()) {
+ default:
+ // TODO: enhance the condition for subtarget before pwr8
+ return false;
+ case PPC::DIR_PWR8:
+ // type mul add shl
+ // scalar 4 1 1
+ // vector 7 2 2
+ return true;
+ case PPC::DIR_PWR9:
+ // type mul add shl
+ // scalar 5 2 2
+ // vector 7 2 2
+
+ // The cycle RATIO of related operations are showed as a table above.
+ // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
+ // scalar and vector type. For 2 instrs patterns, add/sub + shl
+ // are 4, it is always profitable; but for 3 instrs patterns
+ // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
+ // So we should only do it for vector type.
+ return IsAddOne && IsNeg ? VT.isVector() : true;
+ }
+ };
+
+ EVT VT = N->getValueType(0);
+ SDLoc DL(N);
+
+ const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
+ bool IsNeg = MulAmt.isNegative();
+ APInt MulAmtAbs = MulAmt.abs();
+
+ if ((MulAmtAbs - 1).isPowerOf2()) {
+ // (mul x, 2^N + 1) => (add (shl x, N), x)
+ // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
+
+ if (!IsProfitable(IsNeg, true, VT))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
+ SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
+
+ if (!IsNeg)
+ return Res;
+
+ return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
+ } else if ((MulAmtAbs + 1).isPowerOf2()) {
+ // (mul x, 2^N - 1) => (sub (shl x, N), x)
+ // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
+
+ if (!IsProfitable(IsNeg, false, VT))
+ return SDValue();
+
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 =
+ DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
+ DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
+
+ if (!IsNeg)
+ return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
+ else
+ return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
+
+ } else {
+ return SDValue();
+ }
+}
+
bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
// Only duplicate to increase tail-calls for the 64bit SysV ABIs.
if (!Subtarget.isSVR4ABI() || !Subtarget.isPPC64())
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.h b/llvm/lib/Target/PowerPC/PPCISelLowering.h
index c38d6655318..93920e9f9a9 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.h
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.h
@@ -1121,6 +1121,7 @@ namespace llvm {
SDValue combineSHL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRA(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSRL(SDNode *N, DAGCombinerInfo &DCI) const;
+ SDValue combineMUL(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineADD(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const;
SDValue combineSetCC(SDNode *N, DAGCombinerInfo &DCI) const;
diff --git a/llvm/test/CodeGen/PowerPC/mul-const-i64.ll b/llvm/test/CodeGen/PowerPC/mul-const-i64.ll
new file mode 100644
index 00000000000..d030fa99179
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mul-const-i64.ll
@@ -0,0 +1,92 @@
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=generic < %s -mtriple=ppc64-- | FileCheck %s -check-prefix=GENERIC-CHECK
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr8 < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr -mcpu=pwr9 < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK
+
+
+define i64 @foo(i64 %a) {
+entry:
+ %mul = mul nsw i64 %a, 6
+ ret i64 %mul
+}
+
+; GENERIC-CHECK-LABEL: @foo
+; GENERIC-CHECK: mulli r3, r3, 6
+; GENERIC-CHECK: blr
+
+define i64 @test1(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, 16 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test1:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
+
+
+define i64 @test2(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, 17 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test2:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
+
+define i64 @test3(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, 15 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test3:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3
+
+; negtive constant
+
+define i64 @test4(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, -16 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test4:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]]
+
+define i64 @test5(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, -17 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test5:
+; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17
+; PWR8-CHECK-NOT: mul
+; PWR8-CHECK: sldi r[[REG1:[0-9]+]], r3, 4
+; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
+; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]]
+
+define i64 @test6(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, -15 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test6:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: sub r[[REG2:[0-9]+]], r3, r[[REG1]]
+; CHECK-NOT: neg
+
+; boundary case
+
+define i64 @test7(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, -9223372036854775808 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test7:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 63
+
+define i64 @test8(i64 %a) {
+ %tmp.1 = mul nsw i64 %a, 9223372036854775807 ; <i64> [#uses=1]
+ ret i64 %tmp.1
+}
+; CHECK-LABEL: test8:
+; CHECK-NOT: mul
+; CHECK: sldi r[[REG1:[0-9]+]], r3, 63
+; CHECK-NEXT: sub r[[REG2:[0-9]+]], r[[REG1]], r3
diff --git a/llvm/test/CodeGen/PowerPC/mul-const-vector.ll b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
new file mode 100644
index 00000000000..08b069c073b
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mul-const-vector.ll
@@ -0,0 +1,382 @@
+; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr8 | FileCheck %s --check-prefixes=CHECK,CHECK-P8
+; RUN: llc -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names < %s -mtriple=ppc64le-- -mcpu=pwr9 | FileCheck %s --check-prefixes=CHECK,CHECK-P9
+
+define <16 x i8> @test1_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16, i8 16> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test1_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
+
+define <16 x i8> @test2_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17, i8 17> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test2_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vaddubm v[[REG3:[0-9]+]], v2, v[[REG2]]
+
+define <16 x i8> @test3_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test3_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2
+
+; negtive constant
+
+define <16 x i8> @test4_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16, i8 -16> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test4_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
+; CHECK-NEXT: vsububm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]]
+
+define <16 x i8> @test5_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17, i8 -17> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test5_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG3:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vaddubm v[[REG4:[0-9]+]], v2, v[[REG3]]
+; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
+; CHECK-NEXT: vsububm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]]
+
+define <16 x i8> @test6_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15, i8 -15> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test6_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 4
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v2, v[[REG2]]
+
+; boundary case
+
+define <16 x i8> @test7_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test7_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG5:[0-9]+]], v2, v[[REG1]]
+
+define <16 x i8> @test8_v16i8(<16 x i8> %a) {
+ %tmp.1 = mul nsw <16 x i8> %a, <i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127, i8 127> ; <<16 x i8>> [#uses=1]
+ ret <16 x i8> %tmp.1
+}
+; CHECK-LABEL: test8_v16i8:
+; CHECK-P8: vspltisb v[[REG1:[0-9]+]], 7
+; CHECK-P9: xxspltib v[[REG1:[0-9]+]], 7
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslb v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsububm v[[REG3:[0-9]+]], v[[REG2]], v2
+
+define <8 x i16> @test1_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16, i16 16> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test1_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
+
+define <8 x i16> @test2_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17, i16 17> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test2_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vadduhm v[[REG3:[0-9]+]], v2, v[[REG2]]
+
+define <8 x i16> @test3_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test3_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2
+
+; negtive constant
+
+define <8 x i16> @test4_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16, i16 -16> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test4_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
+; CHECK-NEXT: vsubuhm v[[REG4:[0-9]+]], v[[REG2]], v[[REG3]]
+
+define <8 x i16> @test5_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17, i16 -17> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test5_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG3:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vadduhm v[[REG4:[0-9]+]], v2, v[[REG3]]
+; CHECK-NEXT: xxlxor v[[REG2:[0-9]+]],
+; CHECK-NEXT: vsubuhm v[[REG5:[0-9]+]], v[[REG2]], v[[REG4]]
+
+define <8 x i16> @test6_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15, i16 -15> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test6_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v2, v[[REG2]]
+
+; boundary case
+
+define <8 x i16> @test7_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768, i16 -32768> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test7_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 15
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG5:[0-9]+]], v2, v[[REG1]]
+
+define <8 x i16> @test8_v8i16(<8 x i16> %a) {
+ %tmp.1 = mul nsw <8 x i16> %a, <i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767, i16 32767> ; <<8 x i16>> [#uses=1]
+ ret <8 x i16> %tmp.1
+}
+; CHECK-LABEL: test8_v8i16:
+; CHECK: vspltish v[[REG1:[0-9]+]], 15
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslh v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsubuhm v[[REG3:[0-9]+]], v[[REG2]], v2
+
+define <4 x i32> @test1_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 16, i32 16, i32 16, i32 16> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test1_v4i32:
+; CHECK: vspltisw v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
+
+define <4 x i32> @test2_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 17, i32 17, i32 17, i32 17> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test2_v4i32:
+; CHECK: vspltisw v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]]
+
+define <4 x i32> @test3_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 15, i32 15, i32 15, i32 15> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test3_v4i32:
+; CHECK: vspltisw v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v[[REG2]], v2
+
+; negtive constant
+
+define <4 x i32> @test4_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 -16, i32 -16, i32 -16, i32 -16> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test4_v4i32:
+; CHECK: vspltisw v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-P8-NEXT: xxlxor v[[REG3:[0-9]+]],
+; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG3]], v[[REG2]]
+; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG2]]
+
+define <4 x i32> @test5_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 -17, i32 -17, i32 -17, i32 -17> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test5_v4i32:
+; CHECK: vspltisw v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vadduwm v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
+; CHECK-P8-NEXT: vsubuwm v{{[0-9]+}}, v[[REG4]], v[[REG3]]
+; CHECK-P9-NEXT: vnegw v{{[0-9]+}}, v[[REG3]]
+
+define <4 x i32> @test6_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 -15, i32 -15, i32 -15, i32 -15> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test6_v4i32:
+; CHECK: vspltisw v[[REG1:[0-9]+]], 4
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG2:[0-9]+]], v2, v[[REG1]]
+; CHECK-NEXT: vsubuwm v[[REG3:[0-9]+]], v2, v[[REG2]]
+
+; boundary case
+
+define <4 x i32> @test7_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test7_v4i32:
+; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16
+; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15
+; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]]
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]]
+
+define <4 x i32> @test8_v4i32(<4 x i32> %a) {
+ %tmp.1 = mul nsw <4 x i32> %a, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647> ; <<4 x i32>> [#uses=1]
+ ret <4 x i32> %tmp.1
+}
+; CHECK-LABEL: test8_v4i32:
+; CHECK-DAG: vspltisw v[[REG2:[0-9]+]], -16
+; CHECK-DAG: vspltisw v[[REG3:[0-9]+]], 15
+; CHECK-NEXT: vsubuwm v[[REG4:[0-9]+]], v[[REG3]], v[[REG2]]
+; CHECK-NOT: vmul
+; CHECK-NEXT: vslw v[[REG5:[0-9]+]], v2, v[[REG4]]
+; CHECK-NEXT: vsubuwm v[[REG6:[0-9]+]], v[[REG5]], v2
+
+define <2 x i64> @test1_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 16, i64 16> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+; CHECK-LABEL: test1_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v{{[0-9]+}}, v2, v[[REG2]]
+
+define <2 x i64> @test2_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 17, i64 17> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test2_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-NEXT: vaddudm v{{[0-9]+}}, v2, v[[REG3]]
+
+define <2 x i64> @test3_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 15, i64 15> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test3_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
+
+; negtive constant
+
+define <2 x i64> @test4_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 -16, i64 -16> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test4_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-P8-NEXT: xxlxor v[[REG4:[0-9]+]],
+; CHECK-P8-NEXT: vsubudm v{{[0-9]+}}, v[[REG4]], v[[REG3]]
+; CHECK-P9-NEXT: vnegd v[[REG4:[0-9]+]], v[[REG3]]
+
+define <2 x i64> @test5_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 -17, i64 -17> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test5_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-NEXT: vaddudm v[[REG4:[0-9]+]], v2, v[[REG3]]
+; CHECK-P8-NEXT: xxlxor v[[REG5:[0-9]+]],
+; CHECK-P8-NEXT: vsubudm v[[REG6:[0-9]+]], v[[REG5]], v[[REG4]]
+; CHECK-P9-NEXT: vnegd v{{[0-9]+}}, v[[REG4]]
+
+define <2 x i64> @test6_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 -15, i64 -15> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test6_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-NEXT: vsubudm v{{[0-9]+}}, v2, v[[REG3]]
+
+
+; boundary case
+
+define <2 x i64> @test7_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 -9223372036854775808, i64 -9223372036854775808> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test7_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG4:[0-9]+]], v2, v[[REG2]]
+
+define <2 x i64> @test8_v2i64(<2 x i64> %a) {
+ %tmp.1 = mul nsw <2 x i64> %a, <i64 9223372036854775807, i64 9223372036854775807> ; <<2 x i64>> [#uses=1]
+ ret <2 x i64> %tmp.1
+}
+
+; CHECK-LABEL: test8_v2i64:
+; CHECK-P8: lxvd2x vs[[REG1:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-P8-NEXT: xxswapd v[[REG2:[0-9]+]], vs[[REG1]]
+; CHECK-P9: lxvx v[[REG2:[0-9]+]], 0, r{{[0-9]+}}
+; CHECK-NOT: vmul
+; CHECK-NEXT: vsld v[[REG3:[0-9]+]], v2, v[[REG2]]
+; CHECK-NEXT: vsubudm v{{[0-9]+}}, v[[REG3]], v2
diff --git a/llvm/test/CodeGen/PowerPC/mul-const.ll b/llvm/test/CodeGen/PowerPC/mul-const.ll
new file mode 100644
index 00000000000..9a9be994d8d
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/mul-const.ll
@@ -0,0 +1,79 @@
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64-- | FileCheck %s -check-prefixes=PWR8-CHECK,CHECK
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=ppc64le-- | FileCheck %s -check-prefixes=PWR9-CHECK,CHECK
+
+define i32 @test1(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, 16 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test1:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
+
+define i32 @test2(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, 17 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test2:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
+
+define i32 @test3(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, 15 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test3:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]]
+
+; negtive constant
+
+define i32 @test4(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, -16 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test4:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: neg r[[REG2:[0-9]+]], r[[REG1]]
+
+define i32 @test5(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, -17 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test5:
+; PWR9-CHECK: mulli r[[REG1:[0-9]+]], r3, -17
+; PWR8-CHECK-NOT: mul
+; PWR8-CHECK: slwi r[[REG1:[0-9]+]], r3, 4
+; PWR8-CHECK-NEXT: add r[[REG2:[0-9]+]], r3, r[[REG1]]
+; PWR8-CHECK-NEXT: neg r{{[0-9]+}}, r[[REG2]]
+
+define i32 @test6(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, -15 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test6:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 4
+; CHECK-NEXT: subf r[[REG2:[0-9]+]], r[[REG1]], r3
+; CHECK-NOT: neg
+
+; boundary case
+
+define i32 @test7(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, -2147483648 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test7:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 31
+
+define i32 @test8(i32 %a) {
+ %tmp.1 = mul nsw i32 %a, 2147483647 ; <i32> [#uses=1]
+ ret i32 %tmp.1
+}
+; CHECK-LABEL: test8:
+; CHECK-NOT: mul
+; CHECK: slwi r[[REG1:[0-9]+]], r3, 31
+; CHECK-NEXT: subf r[[REG2:[0-9]+]], r3, r[[REG1]]
diff --git a/llvm/test/CodeGen/PowerPC/mul-neg-power-2.ll b/llvm/test/CodeGen/PowerPC/mul-neg-power-2.ll
deleted file mode 100644
index 81e8d6c3769..00000000000
--- a/llvm/test/CodeGen/PowerPC/mul-neg-power-2.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: llc -verify-machineinstrs < %s -mtriple=ppc32-- | not grep mul
-
-define i32 @test1(i32 %a) {
- %tmp.1 = mul i32 %a, -2 ; <i32> [#uses=1]
- %tmp.2 = add i32 %tmp.1, 63 ; <i32> [#uses=1]
- ret i32 %tmp.2
-}
-
diff --git a/llvm/test/CodeGen/PowerPC/mulli64.ll b/llvm/test/CodeGen/PowerPC/mulli64.ll
deleted file mode 100644
index cee8479b694..00000000000
--- a/llvm/test/CodeGen/PowerPC/mulli64.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: llc -verify-machineinstrs -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-define i64 @foo(i64 %a) #0 {
-entry:
- %mul = mul nsw i64 %a, 3
- ret i64 %mul
-}
-
-; CHECK-LABEL: @foo
-; CHECK: mulli 3, 3, 3
-; CHECK: blr
-
-attributes #0 = { nounwind readnone }
-
OpenPOWER on IntegriCloud