diff options
| author | Chad Rosier <mcrosier@apple.com> | 2012-03-22 00:21:17 +0000 | 
|---|---|---|
| committer | Chad Rosier <mcrosier@apple.com> | 2012-03-22 00:21:17 +0000 | 
| commit | 6a63a741131fbcf3e41d492bfed08fd56c8b4096 (patch) | |
| tree | 87ae48b5a387328eadfa9e17642f10b298d46d0c | |
| parent | 7cb09b61b250dec8fe81f6105cf59207224eff0a (diff) | |
| download | bcm5719-llvm-6a63a741131fbcf3e41d492bfed08fd56c8b4096.tar.gz bcm5719-llvm-6a63a741131fbcf3e41d492bfed08fd56c8b4096.zip  | |
[fast-isel] Fold "urem x, pow2" -> "and x, pow2-1".  This should fix the 271%
execution-time regression for nsieve-bits on the ARMv7 -O0 -g nightly tester.
This may also improve compile-time on architectures that would otherwise 
generate a libcall for urem (e.g., ARM) or fall back to the DAG selector.
rdar://10810716
llvm-svn: 153230
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 7 | ||||
| -rw-r--r-- | llvm/test/CodeGen/ARM/fast-isel.ll | 9 | 
2 files changed, 16 insertions, 0 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index 9f4a44a4927..4db10b75868 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -395,6 +395,13 @@ bool FastISel::SelectBinaryOp(const User *I, unsigned ISDOpcode) {        ISDOpcode = ISD::SRA;      } +    // Transform "urem x, pow2" -> "and x, pow2-1". +    if (ISDOpcode == ISD::UREM && isa<BinaryOperator>(I) && +        isPowerOf2_64(Imm)) { +      --Imm; +      ISDOpcode = ISD::AND; +    } +      unsigned ResultReg = FastEmit_ri_(VT.getSimpleVT(), ISDOpcode, Op0,                                        Op0IsKill, Imm, VT.getSimpleVT());      if (ResultReg == 0) return false; diff --git a/llvm/test/CodeGen/ARM/fast-isel.ll b/llvm/test/CodeGen/ARM/fast-isel.ll index 905543a54cf..417e2d9e410 100644 --- a/llvm/test/CodeGen/ARM/fast-isel.ll +++ b/llvm/test/CodeGen/ARM/fast-isel.ll @@ -217,3 +217,12 @@ entry:  ; THUMB: vcmpe.f32 s0, #0    ret i1 %4  } + +; ARM: @urem_fold +; THUMB: @urem_fold +; ARM: and r0, r0, #31 +; THUMB: and r0, r0, #31 +define i32 @urem_fold(i32 %a) nounwind { +  %rem = urem i32 %a, 32 +  ret i32 %rem +}  | 

