summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuergen Ributzka <juergen@apple.com>2014-09-17 21:55:55 +0000
committerJuergen Ributzka <juergen@apple.com>2014-09-17 21:55:55 +0000
commitf6430314b4597687498327a0c5ff37b827c606aa (patch)
tree94baafdf3d7398fad8c456e152cc7b1818403f99
parent3e95fa431e847c1cec92ce8ac77c0b7be79199ed (diff)
downloadbcm5719-llvm-f6430314b4597687498327a0c5ff37b827c606aa.tar.gz
bcm5719-llvm-f6430314b4597687498327a0c5ff37b827c606aa.zip
[FastISel][AArch64] Custom lower sdiv by power-of-2.
Emit an optimized instruction sequence for sdiv by power-of-2 depending on the exact flag. This fixes rdar://problem/18224511. llvm-svn: 217986
-rw-r--r--llvm/lib/Target/AArch64/AArch64FastISel.cpp72
-rw-r--r--llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll56
2 files changed, 128 insertions, 0 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
index 33d17ef704f..347e0364d1d 100644
--- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp
@@ -133,6 +133,7 @@ private:
bool selectShift(const Instruction *I);
bool selectBitCast(const Instruction *I);
bool selectFRem(const Instruction *I);
+ bool selectSDiv(const Instruction *I);
// Utility helper routines.
bool isTypeLegal(Type *Ty, MVT &VT);
@@ -3980,6 +3981,75 @@ bool AArch64FastISel::selectFRem(const Instruction *I) {
return true;
}
+bool AArch64FastISel::selectSDiv(const Instruction *I) {
+ MVT VT;
+ if (!isTypeLegal(I->getType(), VT))
+ return false;
+
+ if (!isa<ConstantInt>(I->getOperand(1)))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ const APInt &C = cast<ConstantInt>(I->getOperand(1))->getValue();
+ if ((VT != MVT::i32 && VT != MVT::i64) || !C ||
+ !(C.isPowerOf2() || (-C).isPowerOf2()))
+ return selectBinaryOp(I, ISD::SDIV);
+
+ unsigned Lg2 = C.countTrailingZeros();
+ unsigned Src0Reg = getRegForValue(I->getOperand(0));
+ if (!Src0Reg)
+ return false;
+ bool Src0IsKill = hasTrivialKill(I->getOperand(0));
+
+ if (cast<BinaryOperator>(I)->isExact()) {
+ unsigned ResultReg = emitASR_ri(VT, VT, Src0Reg, Src0IsKill, Lg2);
+ if (!ResultReg)
+ return false;
+ updateValueMap(I, ResultReg);
+ return true;
+ }
+
+ unsigned Pow2MinusOne = (1 << Lg2) - 1;
+ unsigned AddReg = emitAddSub_ri(/*UseAdd=*/true, VT, Src0Reg,
+ /*IsKill=*/false, Pow2MinusOne);
+ if (!AddReg)
+ return false;
+
+ // (Src0 < 0) ? Pow2 - 1 : 0;
+ if (!emitICmp_ri(VT, Src0Reg, /*IsKill=*/false, 0))
+ return false;
+
+ unsigned SelectOpc;
+ const TargetRegisterClass *RC;
+ if (VT == MVT::i64) {
+ SelectOpc = AArch64::CSELXr;
+ RC = &AArch64::GPR64RegClass;
+ } else {
+ SelectOpc = AArch64::CSELWr;
+ RC = &AArch64::GPR32RegClass;
+ }
+ unsigned SelectReg =
+ fastEmitInst_rri(SelectOpc, RC, AddReg, /*IsKill=*/true, Src0Reg,
+ Src0IsKill, AArch64CC::LT);
+ if (!SelectReg)
+ return false;
+
+ // Divide by Pow2 --> ashr. If we're dividing by a negative value we must also
+ // negate the result.
+ unsigned ZeroReg = (VT == MVT::i64) ? AArch64::XZR : AArch64::WZR;
+ unsigned ResultReg;
+ if (C.isNegative())
+ ResultReg = emitAddSub_rs(/*UseAdd=*/false, VT, ZeroReg, /*IsKill=*/true,
+ SelectReg, /*IsKill=*/true, AArch64_AM::ASR, Lg2);
+ else
+ ResultReg = emitASR_ri(VT, VT, SelectReg, /*IsKill=*/true, Lg2);
+
+ if (!ResultReg)
+ return false;
+
+ updateValueMap(I, ResultReg);
+ return true;
+}
+
bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
switch (I->getOpcode()) {
default:
@@ -3989,6 +4059,8 @@ bool AArch64FastISel::fastSelectInstruction(const Instruction *I) {
return selectAddSub(I);
case Instruction::Mul:
return selectMul(I);
+ case Instruction::SDiv:
+ return selectSDiv(I);
case Instruction::SRem:
if (!selectBinaryOp(I, ISD::SREM))
return selectRem(I, ISD::SREM);
diff --git a/llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll b/llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll
new file mode 100644
index 00000000000..30807767fa7
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/fast-isel-sdiv.ll
@@ -0,0 +1,56 @@
+; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=aarch64-apple-darwin -fast-isel -fast-isel-abort -verify-machineinstrs < %s | FileCheck %s
+
+define i32 @sdiv_i32_exact(i32 %a) {
+; CHECK-LABEL: sdiv_i32_exact
+; CHECK: asr {{w[0-9]+}}, w0, #3
+ %1 = sdiv exact i32 %a, 8
+ ret i32 %1
+}
+
+define i32 @sdiv_i32_pos(i32 %a) {
+; CHECK-LABEL: sdiv_i32_pos
+; CHECK: add [[REG1:w[0-9]+]], w0, #7
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
+; CHECK-NEXT: asr {{w[0-9]+}}, [[REG2]], #3
+ %1 = sdiv i32 %a, 8
+ ret i32 %1
+}
+
+define i32 @sdiv_i32_neg(i32 %a) {
+; CHECK-LABEL: sdiv_i32_neg
+; CHECK: add [[REG1:w[0-9]+]], w0, #7
+; CHECK-NEXT: cmp w0, #0
+; CHECK-NEXT: csel [[REG2:w[0-9]+]], [[REG1]], w0, lt
+; CHECK-NEXT: neg {{w[0-9]+}}, [[REG2]], asr #3
+ %1 = sdiv i32 %a, -8
+ ret i32 %1
+}
+
+define i64 @sdiv_i64_exact(i64 %a) {
+; CHECK-LABEL: sdiv_i64_exact
+; CHECK: asr {{x[0-9]+}}, x0, #4
+ %1 = sdiv exact i64 %a, 16
+ ret i64 %1
+}
+
+define i64 @sdiv_i64_pos(i64 %a) {
+; CHECK-LABEL: sdiv_i64_pos
+; CHECK: add [[REG1:x[0-9]+]], x0, #15
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
+; CHECK-NEXT: asr {{x[0-9]+}}, [[REG2]], #4
+ %1 = sdiv i64 %a, 16
+ ret i64 %1
+}
+
+define i64 @sdiv_i64_neg(i64 %a) {
+; CHECK-LABEL: sdiv_i64_neg
+; CHECK: add [[REG1:x[0-9]+]], x0, #15
+; CHECK-NEXT: cmp x0, #0
+; CHECK-NEXT: csel [[REG2:x[0-9]+]], [[REG1]], x0, lt
+; CHECK-NEXT: neg {{x[0-9]+}}, [[REG2]], asr #4
+ %1 = sdiv i64 %a, -16
+ ret i64 %1
+}
OpenPOWER on IntegriCloud