summaryrefslogtreecommitdiffstats
path: root/llvm
diff options
context:
space:
mode:
authorZi Xuan Wu <wuzish@cn.ibm.com>2019-04-12 05:21:31 +0000
committerZi Xuan Wu <wuzish@cn.ibm.com>2019-04-12 05:21:31 +0000
commitac79ef8f0ec23aaaf485ec2da4ef7cadd36e3aa6 (patch)
treed9af13064f5c4a678e4acd4b85bca94118202ef6 /llvm
parentaa1cad1591bf4fd55b0d7f28e616329c4943e27d (diff)
downloadbcm5719-llvm-ac79ef8f0ec23aaaf485ec2da4ef7cadd36e3aa6.tar.gz
bcm5719-llvm-ac79ef8f0ec23aaaf485ec2da4ef7cadd36e3aa6.zip
[PowerPC] More precise exploitation of P9 maddld instruction when operands are constant
There are 3 operands of maddld, (add (mul %1, %2), %3) and sometimes they are constant. If there is constant operand, it takes extra li to materialize the operand, and one more extra register too. So it's not profitable to use maddld to optimize mul-add pattern. Differential Revision: https://reviews.llvm.org/D60181 llvm-svn: 358253
Diffstat (limited to 'llvm')
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstr64Bit.td4
-rw-r--r--llvm/lib/Target/PowerPC/PPCInstrInfo.td11
-rw-r--r--llvm/test/CodeGen/PowerPC/maddld.ll226
3 files changed, 188 insertions, 53 deletions
diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
index 42e4ce7172a..256e62f7d21 100644
--- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -777,7 +777,7 @@ def MADDHDU : VAForm_1a<49,
"maddhdu $RT, $RA, $RB, $RC", IIC_IntMulHD, []>, isPPC64;
def MADDLD : VAForm_1a<51, (outs gprc :$RT), (ins gprc:$RA, gprc:$RB, gprc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
- [(set i32:$RT, (add (mul i32:$RA, i32:$RB), i32:$RC))]>,
+ [(set i32:$RT, (add_without_simm16 (mul_without_simm16 i32:$RA, i32:$RB), i32:$RC))]>,
isPPC64;
def SETB : XForm_44<31, 128, (outs gprc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
@@ -785,7 +785,7 @@ let Interpretation64Bit = 1, isCodeGenOnly = 1 in {
def MADDLD8 : VAForm_1a<51,
(outs g8rc :$RT), (ins g8rc:$RA, g8rc:$RB, g8rc:$RC),
"maddld $RT, $RA, $RB, $RC", IIC_IntMulHD,
- [(set i64:$RT, (add (mul i64:$RA, i64:$RB), i64:$RC))]>,
+ [(set i64:$RT, (add_without_simm16 (mul_without_simm16 i64:$RA, i64:$RB), i64:$RC))]>,
isPPC64;
def SETB8 : XForm_44<31, 128, (outs g8rc:$RT), (ins crrc:$BFA),
"setb $RT, $BFA", IIC_IntGeneral>, isPPC64;
diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
index e9bc47efc5a..5b541f2bb03 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td
@@ -457,6 +457,17 @@ def nonQuadwOffsetStore : PatFrag<(ops node:$val, node:$ptr),
return !isOffsetMultipleOf(N, 16);
}]>;
+// PatFrag for binary operation whose operands are both non-constant
+class BinOpWithoutSImm16Operand<SDNode opcode> :
+ PatFrag<(ops node:$left, node:$right), (opcode node:$left, node:$right), [{
+ int16_t Imm;
+ return !isIntS16Immediate(N->getOperand(0), Imm)
+ && !isIntS16Immediate(N->getOperand(1), Imm);
+}]>;
+
+def add_without_simm16 : BinOpWithoutSImm16Operand<add>;
+def mul_without_simm16 : BinOpWithoutSImm16Operand<mul>;
+
//===----------------------------------------------------------------------===//
// PowerPC Flag Definitions.
diff --git a/llvm/test/CodeGen/PowerPC/maddld.ll b/llvm/test/CodeGen/PowerPC/maddld.ll
index 1ffd52c802c..3b60a8f88b0 100644
--- a/llvm/test/CodeGen/PowerPC/maddld.ll
+++ b/llvm/test/CodeGen/PowerPC/maddld.ll
@@ -1,18 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P8
define signext i64 @maddld64(i64 signext %a, i64 signext %b) {
-; CHECK-P9-LABEL: maddld64:
-; CHECK-P9: # %bb.0: # %entry
+; CHECK-LABEL: maddld64:
+; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: maddld 3, 4, 3, 3
-; CHECK-P9-NEXT: blr
-;
-; CHECK-P8-LABEL: maddld64:
-; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mulld 4, 4, 3
; CHECK-P8-NEXT: add 3, 4, 3
-; CHECK-P8-NEXT: blr
+; CHECK-NEXT: blr
+
entry:
%mul = mul i64 %b, %a
%add = add i64 %mul, %a
@@ -20,18 +17,15 @@ entry:
}
define signext i32 @maddld32(i32 signext %a, i32 signext %b) {
-; CHECK-P9-LABEL: maddld32:
-; CHECK-P9: # %bb.0: # %entry
+; CHECK-LABEL: maddld32:
+; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: maddld 3, 4, 3, 3
; CHECK-P9-NEXT: extsw 3, 3
-; CHECK-P9-NEXT: blr
-;
-; CHECK-P8-LABEL: maddld32:
-; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mullw 4, 4, 3
; CHECK-P8-NEXT: add 3, 4, 3
; CHECK-P8-NEXT: extsw 3, 3
-; CHECK-P8-NEXT: blr
+; CHECK-NEXT: blr
+
entry:
%mul = mul i32 %b, %a
%add = add i32 %mul, %a
@@ -39,18 +33,15 @@ entry:
}
define signext i16 @maddld16(i16 signext %a, i16 signext %b, i16 signext %c) {
-; CHECK-P9-LABEL: maddld16:
-; CHECK-P9: # %bb.0: # %entry
+; CHECK-LABEL: maddld16:
+; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: maddld 3, 4, 3, 5
; CHECK-P9-NEXT: extsh 3, 3
-; CHECK-P9-NEXT: blr
-;
-; CHECK-P8-LABEL: maddld16:
-; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mullw 3, 4, 3
-; CHECK-P8-NEXT: add 3, 3, 5
+; CHECK-P8-NEXT: add 3, 3, 5
; CHECK-P8-NEXT: extsh 3, 3
-; CHECK-P8-NEXT: blr
+; CHECK-NEXT: blr
+
entry:
%mul = mul i16 %b, %a
%add = add i16 %mul, %c
@@ -58,18 +49,14 @@ entry:
}
define zeroext i32 @maddld32zeroext(i32 zeroext %a, i32 zeroext %b) {
-; CHECK-P9-LABEL: maddld32zeroext:
-; CHECK-P9: # %bb.0: # %entry
+; CHECK-LABEL: maddld32zeroext:
+; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: maddld 3, 4, 3, 3
-; CHECK-P9-NEXT: clrldi 3, 3, 32
-; CHECK-P9-NEXT: blr
-;
-; CHECK-P8-LABEL: maddld32zeroext:
-; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mullw 4, 4, 3
; CHECK-P8-NEXT: add 3, 4, 3
-; CHECK-P8-NEXT: clrldi 3, 3, 32
-; CHECK-P8-NEXT: blr
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
entry:
%mul = mul i32 %b, %a
%add = add i32 %mul, %a
@@ -77,18 +64,14 @@ entry:
}
define signext i32 @maddld32nsw(i32 signext %a, i32 signext %b) {
-; CHECK-P9-LABEL: maddld32nsw:
-; CHECK-P9: # %bb.0: # %entry
+; CHECK-LABEL: maddld32nsw:
+; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: maddld 3, 4, 3, 3
-; CHECK-P9-NEXT: extsw 3, 3
-; CHECK-P9-NEXT: blr
-;
-; CHECK-P8-LABEL: maddld32nsw:
-; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mullw 4, 4, 3
; CHECK-P8-NEXT: add 3, 4, 3
-; CHECK-P8-NEXT: extsw 3, 3
-; CHECK-P8-NEXT: blr
+; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: blr
+
entry:
%mul = mul nsw i32 %b, %a
%add = add nsw i32 %mul, %a
@@ -96,20 +79,161 @@ entry:
}
define zeroext i32 @maddld32nuw(i32 zeroext %a, i32 zeroext %b) {
-; CHECK-P9-LABEL: maddld32nuw:
-; CHECK-P9: # %bb.0: # %entry
+; CHECK-LABEL: maddld32nuw:
+; CHECK: # %bb.0: # %entry
; CHECK-P9-NEXT: maddld 3, 4, 3, 3
-; CHECK-P9-NEXT: clrldi 3, 3, 32
-; CHECK-P9-NEXT: blr
-;
-; CHECK-P8-LABEL: maddld32nuw:
-; CHECK-P8: # %bb.0: # %entry
; CHECK-P8-NEXT: mullw 4, 4, 3
; CHECK-P8-NEXT: add 3, 4, 3
-; CHECK-P8-NEXT: clrldi 3, 3, 32
-; CHECK-P8-NEXT: blr
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
entry:
%mul = mul nuw i32 %b, %a
%add = add nuw i32 %mul, %a
ret i32 %add
}
+
+define signext i64 @maddld64_imm(i64 signext %a, i64 signext %b) {
+; CHECK-LABEL: maddld64_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mulli 4, 4, 13
+; CHECK-NEXT: add 3, 4, 3
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul i64 %b, 13
+ %add = add i64 %mul, %a
+ ret i64 %add
+}
+
+define signext i32 @maddld32_imm(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: maddld32_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mullw 3, 4, 3
+; CHECK-NEXT: addi 3, 3, 13
+; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul i32 %b, %a
+ %add = add i32 %mul, 13
+ ret i32 %add
+}
+
+define signext i16 @maddld16_imm(i16 signext %a, i16 signext %b, i16 signext %c) {
+; CHECK-LABEL: maddld16_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mulli 3, 4, 13
+; CHECK-NEXT: add 3, 3, 5
+; CHECK-NEXT: extsh 3, 3
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul i16 %b, 13
+ %add = add i16 %mul, %c
+ ret i16 %add
+}
+
+define zeroext i32 @maddld32zeroext_imm(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: maddld32zeroext_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mullw 3, 4, 3
+; CHECK-NEXT: addi 3, 3, 13
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul i32 %b, %a
+ %add = add i32 %mul, 13
+ ret i32 %add
+}
+
+define signext i32 @maddld32nsw_imm(i32 signext %a, i32 signext %b) {
+; CHECK-LABEL: maddld32nsw_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mulli 4, 4, 13
+; CHECK-NEXT: add 3, 4, 3
+; CHECK-NEXT: extsw 3, 3
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul nsw i32 %b, 13
+ %add = add nsw i32 %mul, %a
+ ret i32 %add
+}
+
+define zeroext i32 @maddld32nuw_imm(i32 zeroext %a, i32 zeroext %b) {
+; CHECK-LABEL: maddld32nuw_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mullw 3, 4, 3
+; CHECK-NEXT: addi 3, 3, 13
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul nuw i32 %b, %a
+ %add = add nuw i32 %mul, 13
+ ret i32 %add
+}
+
+define zeroext i32 @maddld32nuw_imm_imm(i32 zeroext %b) {
+; CHECK-LABEL: maddld32nuw_imm_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: mulli 3, 3, 18
+; CHECK-NEXT: addi 3, 3, 13
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul nuw i32 %b, 18
+ %add = add nuw i32 %mul, 13
+ ret i32 %add
+}
+
+define zeroext i32 @maddld32nuw_bigimm_imm(i32 zeroext %b) {
+; CHECK-LABEL: maddld32nuw_bigimm_imm:
+; CHECK: # %bb.0: # %entry
+; CHECK-NOT: maddld
+; CHECK-NEXT: lis 4, 26127
+; CHECK-NEXT: ori 4, 4, 63251
+; CHECK-NEXT: mullw 3, 3, 4
+; CHECK-NEXT: addi 3, 3, 13
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul nuw i32 %b, 1712322323
+ %add = add nuw i32 %mul, 13
+ ret i32 %add
+}
+
+define zeroext i32 @maddld32nuw_bigimm_bigimm(i32 zeroext %b) {
+; CHECK-LABEL: maddld32nuw_bigimm_bigimm:
+; CHECK: # %bb.0: # %entry
+; CHECK-P9-NEXT: lis 4, -865
+; CHECK-P9-NEXT: lis 5, 26127
+; CHECK-P9-NEXT: ori 4, 4, 42779
+; CHECK-P9-NEXT: ori 5, 5, 63251
+; CHECK-P9-NEXT: maddld 3, 3, 5, 4
+
+; CHECK-P8-NEXT: lis 4, 26127
+; CHECK-P8-NEXT: ori 4, 4, 63251
+; CHECK-P8-NEXT: mullw 3, 3, 4
+; CHECK-P8-NEXT: addi 3, 3, -22757
+; CHECK-P8-NEXT: addis 3, 3, -864
+
+; CHECK-NEXT: clrldi 3, 3, 32
+; CHECK-NEXT: blr
+
+entry:
+ %mul = mul nuw i32 %b, 1712322323
+ %add = add nuw i32 %mul, 17123223323
+ ret i32 %add
+}
OpenPOWER on IntegriCloud