summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2017-09-09 17:11:59 +0000
committerCraig Topper <craig.topper@intel.com>2017-09-09 17:11:59 +0000
commit3be1db82b6cc285994e395fe91588eb8efb81aae (patch)
tree947c292f6f9b60d3dd9498c3ba5579863b933627
parent7c2556a8950545a582e4437913cf7be1590a88e8 (diff)
downloadbcm5719-llvm-3be1db82b6cc285994e395fe91588eb8efb81aae.tar.gz
bcm5719-llvm-3be1db82b6cc285994e395fe91588eb8efb81aae.zip
[X86] Don't disable slow INC/DEC if optimizing for size
Summary: Just because INC/DEC is a little slow on some processors doesn't mean we shouldn't prefer it when optimizing for size. This appears to match gcc behavior. Reviewers: chandlerc, zvi, RKSimon, spatel Reviewed By: RKSimon Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D37177 llvm-svn: 312866
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp8
-rw-r--r--llvm/lib/Target/X86/X86InstrArithmetic.td4
-rw-r--r--llvm/lib/Target/X86/X86InstrCompiler.td14
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td3
-rw-r--r--llvm/test/CodeGen/X86/slow-incdec.ll32
5 files changed, 28 insertions, 33 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 151eba4e715..ed8b3890360 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -16587,14 +16587,18 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, const SDLoc &dl,
if (ConstantSDNode *C =
dyn_cast<ConstantSDNode>(ArithOp.getOperand(1))) {
// An add of one will be selected as an INC.
- if (C->isOne() && !Subtarget.slowIncDec()) {
+ if (C->isOne() &&
+ (!Subtarget.slowIncDec() ||
+ DAG.getMachineFunction().getFunction()->optForSize())) {
Opcode = X86ISD::INC;
NumOperands = 1;
break;
}
// An add of negative one (subtract of one) will be selected as a DEC.
- if (C->isAllOnesValue() && !Subtarget.slowIncDec()) {
+ if (C->isAllOnesValue() &&
+ (!Subtarget.slowIncDec() ||
+ DAG.getMachineFunction().getFunction()->optForSize())) {
Opcode = X86ISD::DEC;
NumOperands = 1;
break;
diff --git a/llvm/lib/Target/X86/X86InstrArithmetic.td b/llvm/lib/Target/X86/X86InstrArithmetic.td
index 2bf6351d045..55e2bb322ca 100644
--- a/llvm/lib/Target/X86/X86InstrArithmetic.td
+++ b/llvm/lib/Target/X86/X86InstrArithmetic.td
@@ -481,7 +481,7 @@ def INC32r_alt : I<0x40, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
} // CodeSize = 1, hasSideEffects = 0
} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
@@ -528,7 +528,7 @@ def DEC32r_alt : I<0x48, AddRegFrm, (outs GR32:$dst), (ins GR32:$src1),
} // Constraints = "$src1 = $dst", SchedRW
-let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {
+let CodeSize = 2, SchedRW = [WriteALULd, WriteRMW], Predicates = [UseIncDec] in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst),
(implicit EFLAGS)], IIC_UNARY_MEM>;
diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td
index d130c3e00df..a4c5fe2a6a1 100644
--- a/llvm/lib/Target/X86/X86InstrCompiler.td
+++ b/llvm/lib/Target/X86/X86InstrCompiler.td
@@ -273,7 +273,7 @@ def : Pat<(i16 0), (EXTRACT_SUBREG (MOV32r0), sub_16bit)>;
def : Pat<(i64 0), (SUBREG_TO_REG (i64 0), (MOV32r0), sub_32bit)>;
}
-let Predicates = [OptForSize, NotSlowIncDec, Not64BitMode],
+let Predicates = [OptForSize, Not64BitMode],
AddedComplexity = 10 in {
// Pseudo instructions for materializing 1 and -1 using XOR+INC/DEC,
// which only require 3 bytes compared to MOV32ri which requires 5.
@@ -698,7 +698,7 @@ defm LOCK_XOR : LOCK_ArithBinOp<0x30, 0x80, 0x83, MRM6m, X86lock_xor, "xor">;
multiclass LOCK_ArithUnOp<bits<8> Opc8, bits<8> Opc, Format Form,
int Increment, string mnemonic> {
let Defs = [EFLAGS], mayLoad = 1, mayStore = 1, isCodeGenOnly = 1,
- SchedRW = [WriteALULd, WriteRMW], Predicates = [NotSlowIncDec] in {
+ SchedRW = [WriteALULd, WriteRMW] in {
def NAME#8m : I<Opc8, Form, (outs), (ins i8mem :$dst),
!strconcat(mnemonic, "{b}\t$dst"),
[(set EFLAGS, (X86lock_add addr:$dst, (i8 Increment)))],
@@ -718,8 +718,10 @@ def NAME#64m : RI<Opc, Form, (outs), (ins i64mem:$dst),
}
}
+let Predicates = [UseIncDec] in {
defm LOCK_INC : LOCK_ArithUnOp<0xFE, 0xFF, MRM0m, 1, "inc">;
defm LOCK_DEC : LOCK_ArithUnOp<0xFE, 0xFF, MRM1m, -1, "dec">;
+}
// Atomic compare and swap.
multiclass LCMPXCHG_UnOp<bits<8> Opc, Format Form, string mnemonic,
@@ -942,17 +944,17 @@ multiclass RELEASE_UNOP<dag dag8, dag dag16, dag dag32, dag dag64> {
[(atomic_store_64 addr:$dst, dag64)]>;
}
-let Defs = [EFLAGS] in {
+let Defs = [EFLAGS], Predicates = [UseIncDec] in {
defm RELEASE_INC : RELEASE_UNOP<
(add (atomic_load_8 addr:$dst), (i8 1)),
(add (atomic_load_16 addr:$dst), (i16 1)),
(add (atomic_load_32 addr:$dst), (i32 1)),
- (add (atomic_load_64 addr:$dst), (i64 1))>, Requires<[NotSlowIncDec]>;
+ (add (atomic_load_64 addr:$dst), (i64 1))>;
defm RELEASE_DEC : RELEASE_UNOP<
(add (atomic_load_8 addr:$dst), (i8 -1)),
(add (atomic_load_16 addr:$dst), (i16 -1)),
(add (atomic_load_32 addr:$dst), (i32 -1)),
- (add (atomic_load_64 addr:$dst), (i64 -1))>, Requires<[NotSlowIncDec]>;
+ (add (atomic_load_64 addr:$dst), (i64 -1))>;
}
/*
TODO: These don't work because the type inference of TableGen fails.
@@ -1917,7 +1919,7 @@ def : Pat<(mul (loadi64 addr:$src1), i64immSExt32:$src2),
// Increment/Decrement reg.
// Do not make INC/DEC if it is slow
-let Predicates = [NotSlowIncDec] in {
+let Predicates = [UseIncDec] in {
def : Pat<(add GR8:$src, 1), (INC8r GR8:$src)>;
def : Pat<(add GR16:$src, 1), (INC16r GR16:$src)>;
def : Pat<(add GR32:$src, 1), (INC32r GR32:$src)>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 94567a48f35..0850b4a5a49 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -909,12 +909,13 @@ let RecomputePerFunction = 1 in {
def OptForSize : Predicate<"MF->getFunction()->optForSize()">;
def OptForMinSize : Predicate<"MF->getFunction()->optForMinSize()">;
def OptForSpeed : Predicate<"!MF->getFunction()->optForSize()">;
+ def UseIncDec : Predicate<"!Subtarget->slowIncDec() || "
+ "MF->getFunction()->optForSize()">;
}
def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
def CallImmAddr : Predicate<"Subtarget->isLegalToCallImmediateAddr()">;
def FavorMemIndirectCall : Predicate<"!Subtarget->slowTwoMemOps()">;
-def NotSlowIncDec : Predicate<"!Subtarget->slowIncDec()">;
def HasFastMem32 : Predicate<"!Subtarget->isUnalignedMem32Slow()">;
def HasFastLZCNT : Predicate<"Subtarget->hasFastLZCNT()">;
def HasFastSHLDRotate : Predicate<"Subtarget->hasFastSHLDRotate()">;
diff --git a/llvm/test/CodeGen/X86/slow-incdec.ll b/llvm/test/CodeGen/X86/slow-incdec.ll
index 715a6fe150e..5c406c77aa8 100644
--- a/llvm/test/CodeGen/X86/slow-incdec.ll
+++ b/llvm/test/CodeGen/X86/slow-incdec.ll
@@ -35,33 +35,21 @@ define i32 @dec(i32 %x) {
}
define i32 @inc_size(i32 %x) optsize {
-; INCDEC-LABEL: inc_size:
-; INCDEC: # BB#0:
-; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; INCDEC-NEXT: incl %eax
-; INCDEC-NEXT: retl
-;
-; ADD-LABEL: inc_size:
-; ADD: # BB#0:
-; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; ADD-NEXT: addl $1, %eax
-; ADD-NEXT: retl
+; CHECK-LABEL: inc_size:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: incl %eax
+; CHECK-NEXT: retl
%r = add i32 %x, 1
ret i32 %r
}
define i32 @dec_size(i32 %x) optsize {
-; INCDEC-LABEL: dec_size:
-; INCDEC: # BB#0:
-; INCDEC-NEXT: movl {{[0-9]+}}(%esp), %eax
-; INCDEC-NEXT: decl %eax
-; INCDEC-NEXT: retl
-;
-; ADD-LABEL: dec_size:
-; ADD: # BB#0:
-; ADD-NEXT: movl {{[0-9]+}}(%esp), %eax
-; ADD-NEXT: addl $-1, %eax
-; ADD-NEXT: retl
+; CHECK-LABEL: dec_size:
+; CHECK: # BB#0:
+; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
+; CHECK-NEXT: decl %eax
+; CHECK-NEXT: retl
%r = add i32 %x, -1
ret i32 %r
}
OpenPOWER on IntegriCloud