summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBenjamin Kramer <benny.kra@googlemail.com>2011-01-30 16:38:43 +0000
committerBenjamin Kramer <benny.kra@googlemail.com>2011-01-30 16:38:43 +0000
commit946e1522b6df5fb0eecd78913a98d310477f6a64 (patch)
tree84c6f180b05ceb4c2802dcdc95db7a6f68bb4021
parent115f0fa39769f371e4cb218a0919d3109eaf9cca (diff)
downloadbcm5719-llvm-946e1522b6df5fb0eecd78913a98d310477f6a64.tar.gz
bcm5719-llvm-946e1522b6df5fb0eecd78913a98d310477f6a64.zip
Teach DAGCombine to fold fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2) when c1 equals the amount of bits that are truncated off.
This happens all the time when a smul is promoted to a larger type. On x86-64 we now compile "int test(int x) { return x/10; }" into movslq %edi, %rax imulq $1717986919, %rax, %rax movq %rax, %rcx shrq $63, %rcx sarq $34, %rax <- used to be "shrq $32, %rax; sarl $2, %eax" addl %ecx, %eax This fires 96 times in gcc.c on x86-64. llvm-svn: 124559
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp23
-rw-r--r--llvm/lib/Target/README.txt21
-rw-r--r--llvm/test/CodeGen/X86/divide-by-constant.ll9
3 files changed, 32 insertions, 21 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index a5b2d9594d5..94487d40414 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3154,6 +3154,29 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
}
}
+ // fold (sra (trunc (sr x, c1)), c2) -> (trunc (sra x, c1+c2))
+ // if c1 is equal to the number of bits the trunc removes
+ if (N0.getOpcode() == ISD::TRUNCATE &&
+ (N0.getOperand(0).getOpcode() == ISD::SRL ||
+ N0.getOperand(0).getOpcode() == ISD::SRA) &&
+ N0.getOperand(0).hasOneUse() &&
+ N0.getOperand(0).getOperand(1).hasOneUse() &&
+ N1C && isa<ConstantSDNode>(N0.getOperand(0).getOperand(1))) {
+ EVT LargeVT = N0.getOperand(0).getValueType();
+ ConstantSDNode *LargeShiftAmt =
+ cast<ConstantSDNode>(N0.getOperand(0).getOperand(1));
+
+ if (LargeVT.getScalarType().getSizeInBits() - OpSizeInBits ==
+ LargeShiftAmt->getZExtValue()) {
+ SDValue Amt =
+ DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(),
+ getShiftAmountTy());
+ SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT,
+ N0.getOperand(0).getOperand(0), Amt);
+ return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA);
+ }
+ }
+
// Simplify, based on bits shifted out of the LHS.
if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
return SDValue(N, 0);
diff --git a/llvm/lib/Target/README.txt b/llvm/lib/Target/README.txt
index 8b5c5ce1371..c0a2b760de7 100644
--- a/llvm/lib/Target/README.txt
+++ b/llvm/lib/Target/README.txt
@@ -2274,24 +2274,3 @@ llc time when it gets inlined, because we can use smaller transfers. This also
avoids partial register stalls in some important cases.
//===---------------------------------------------------------------------===//
-
-We miss an optzn when lowering divide by some constants. For example:
- int test(int x) { return x/10; }
-
-We produce:
-
-_test: ## @test
-## BB#0: ## %entry
- movslq %edi, %rax
- imulq $1717986919, %rax, %rax ## imm = 0x66666667
- movq %rax, %rcx
- shrq $63, %rcx
-** shrq $32, %rax
-** sarl $2, %eax
- addl %ecx, %eax
- ret
-
-The two starred instructions could be replaced with a "sarl $34, %rax". This
-occurs in 186.crafty very frequently.
-
-//===---------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/divide-by-constant.ll b/llvm/test/CodeGen/X86/divide-by-constant.ll
index 545662fd0ff..7ceb972f61b 100644
--- a/llvm/test/CodeGen/X86/divide-by-constant.ll
+++ b/llvm/test/CodeGen/X86/divide-by-constant.ll
@@ -51,3 +51,12 @@ define i32 @test5(i32 %A) nounwind {
; CHECK: mull 4(%esp)
}
+define signext i16 @test6(i16 signext %x) nounwind {
+entry:
+ %div = sdiv i16 %x, 10
+ ret i16 %div
+; CHECK: test6:
+; CHECK: imull $26215, %eax, %eax
+; CHECK: shrl $31, %ecx
+; CHECK: sarl $18, %eax
+}
OpenPOWER on IntegriCloud