summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChad Rosier <mcrosier@apple.com>2012-03-22 00:21:17 +0000
committerChad Rosier <mcrosier@apple.com>2012-03-22 00:21:17 +0000
commit6a63a741131fbcf3e41d492bfed08fd56c8b4096 (patch)
tree87ae48b5a387328eadfa9e17642f10b298d46d0c
parent7cb09b61b250dec8fe81f6105cf59207224eff0a (diff)
downloadbcm5719-llvm-6a63a741131fbcf3e41d492bfed08fd56c8b4096.tar.gz
bcm5719-llvm-6a63a741131fbcf3e41d492bfed08fd56c8b4096.zip
[fast-isel] Fold "urem x, pow2" -> "and x, pow2-1". This should fix the 271%
execution-time regression for nsieve-bits on the ARMv7 -O0 -g nightly tester. This may also improve compile-time on architectures that would otherwise generate a libcall for urem (e.g., ARM) or fall back to the DAG selector. rdar://10810716 llvm-svn: 153230
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/FastISel.cpp7
-rw-r--r--llvm/test/CodeGen/ARM/fast-isel.ll9
2 files changed, 16 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index 9f4a44a4927..4db10b75868 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {
ISDOpcode = ISD::SRA;
}
+ // Transform "urem x, pow2" -> "and x, pow2-1".
+ if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) &&
+ isPowerOf2_64(Imm)) {
+ --Imm;
+ ISDOpcode = ISD::AND;
+ }
+
unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,
Op0IsKill, Imm, VT.getSimpleVT());
if (ResultReg == 0) return false;
diff --git a/llvm/test/CodeGen/ARM/fast-isel.ll b/llvm/test/CodeGen/ARM/fast-isel.ll
index 905543a54cf..417e2d9e410 100644
--- a/llvm/test/CodeGen/ARM/fast-isel.ll
+++ b/llvm/test/CodeGen/ARM/fast-isel.ll
@@ -217,3 +217,12 @@ entry:
; THUMB: vcmpe.f32 s0, #0
ret i1 %4
}
+
+; ARM: @urem_fold
+; THUMB: @urem_fold
+; ARM: and r0, r0, #31
+; THUMB: and r0, r0, #31
+define i32 @urem_fold(i32 %a) nounwind {
+ %rem = urem i32 %a, 32
+ ret i32 %rem
+}
OpenPOWER on IntegriCloud