summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSam Parker <sam.parker@arm.com>2018-08-15 08:23:03 +0000
committerSam Parker <sam.parker@arm.com>2018-08-15 08:23:03 +0000
commit6548cd3905ff88f0ad861ae9373c81992ba4b9da (patch)
tree0ff7cb1008c32fcedce5773c10ef2f94720d64c5
parent7def86bbdbbced744629030e980d29249f6bb96b (diff)
downloadbcm5719-llvm-6548cd3905ff88f0ad861ae9373c81992ba4b9da.tar.gz
bcm5719-llvm-6548cd3905ff88f0ad861ae9373c81992ba4b9da.zip
[ARM] Allow signed icmps in ARMCodeGenPrepare
Treat signed icmps as 'sinks', allowing them to be in the use-def tree, enabling more promotions to be performed. As a sink, any promoted incoming values need to be truncated before being used by the signed icmp. Differential Revision: https://reviews.llvm.org/D50067 llvm-svn: 339755
-rw-r--r--llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp66
-rw-r--r--llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll100
2 files changed, 144 insertions, 22 deletions
diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
index 69ecc337820..3151621ec10 100644
--- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
+++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp
@@ -181,6 +181,8 @@ static bool isSink(Value *V) {
return UsesNarrowValue(Return->getReturnValue());
if (auto *Trunc = dyn_cast<TruncInst>(V))
return UsesNarrowValue(Trunc->getOperand(0));
+ if (auto *ICmp = dyn_cast<ICmpInst>(V))
+ return ICmp->isSigned();
return isa<CallInst>(V);
}
@@ -294,6 +296,11 @@ void IRPromoter::Mutate(Type *OrigTy,
LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from "
<< ARMCodeGenPrepare::TypeSize << " to 32-bits\n");
+ // Cache original types.
+ DenseMap<Value*, Type*> TruncTysMap;
+ for (auto *V : Visited)
+ TruncTysMap[V] = V->getType();
+
auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) {
SmallVector<Instruction*, 4> Users;
Instruction *InstTo = dyn_cast<Instruction>(To);
@@ -337,6 +344,7 @@ void IRPromoter::Mutate(Type *OrigTy,
ReplaceAllUsersOfWith(I, Call);
InstsToRemove.push_back(I);
NewInsts.insert(Call);
+ TruncTysMap[Call] = OrigTy;
};
auto InsertZExt = [&](Value *V, Instruction *InsertPt) {
@@ -351,6 +359,7 @@ void IRPromoter::Mutate(Type *OrigTy,
ZExt->moveAfter(InsertPt);
ReplaceAllUsersOfWith(V, ZExt);
NewInsts.insert(ZExt);
+ TruncTysMap[ZExt] = TruncTysMap[V];
};
// First, insert extending instructions between the leaves and their users.
@@ -409,6 +418,22 @@ void IRPromoter::Mutate(Type *OrigTy,
InsertDSPIntrinsic(cast<Instruction>(V));
}
+ auto InsertTrunc = [&](Value *V, Type *TruncTy) -> Instruction* {
+ if (TruncTy == ExtTy || !isa<Instruction>(V) ||
+ !isa<IntegerType>(V->getType()))
+ return nullptr;
+
+ if (!Promoted.count(V) && !NewInsts.count(V))
+ return nullptr;
+
+ LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for "
+ << *V << "\n");
+ Builder.SetInsertPoint(cast<Instruction>(V));
+ auto *Trunc = cast<Instruction>(Builder.CreateTrunc(V, TruncTy));
+ NewInsts.insert(Trunc);
+ return Trunc;
+ };
+
LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n");
// Fix up any stores or returns that use the results of the promoted
// chain.
@@ -423,28 +448,25 @@ void IRPromoter::Mutate(Type *OrigTy,
TruncTy = F->getFunctionType()->getReturnType();
}
- for (unsigned i = 0; i < I->getNumOperands(); ++i) {
- Value *V = I->getOperand(i);
- if (!isa<IntegerType>(V->getType()))
- continue;
-
- if (Promoted.count(V) || NewInsts.count(V)) {
- if (auto *Op = dyn_cast<Instruction>(V)) {
-
- if (auto *Call = dyn_cast<CallInst>(I))
- TruncTy = Call->getFunctionType()->getParamType(i);
+ // These will only have one operand to fix.
+ if (isa<StoreInst>(I) || isa<ReturnInst>(I) || isa<TruncInst>(I)) {
+ if (Instruction *Trunc = InsertTrunc(I->getOperand(0), TruncTy)) {
+ Trunc->moveBefore(I);
+ I->setOperand(0, Trunc);
+ }
+ continue;
+ }
- if (TruncTy == ExtTy)
- continue;
+ // Now handle calls and signed icmps.
+ for (unsigned i = 0; i < I->getNumOperands(); ++i) {
+ if (auto *Call = dyn_cast<CallInst>(I))
+ TruncTy = Call->getFunctionType()->getParamType(i);
+ else
+ TruncTy = TruncTysMap[I->getOperand(i)];
- LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy
- << " Trunc for " << *Op << "\n");
- Builder.SetInsertPoint(Op);
- auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy));
- Trunc->moveBefore(I);
- I->setOperand(i, Trunc);
- NewInsts.insert(Trunc);
- }
+ if (Instruction *Trunc = InsertTrunc(I->getOperand(i), TruncTy)) {
+ Trunc->moveBefore(I);
+ I->setOperand(i, Trunc);
}
}
}
@@ -458,8 +480,8 @@ void IRPromoter::Mutate(Type *OrigTy,
bool ARMCodeGenPrepare::isSupportedValue(Value *V) {
LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n");
- if (auto *ICmp = dyn_cast<ICmpInst>(V))
- return ICmp->isEquality() || !ICmp->isSigned();
+ if (isa<ICmpInst>(V))
+ return true;
// Memory instructions
if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V))
diff --git a/llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll b/llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll
new file mode 100644
index 00000000000..d603511ece3
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP
+; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP
+; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM
+
+; CHECK-COMMON-LABEL: eq_sgt
+; CHECK-NODSP: add
+; CHECK-NODSP: uxtb
+; CHECK-NODSP: sxtb
+; CHECK-NODSP: cmp
+; CHECK-NODSP: sub
+; CHECK-NODSP: sxtb
+; CHECK-NODSP: cmp
+
+; CHECK-DSP: add
+; CHECK-DSP: uxtb
+; CHECK-DSP: cmp
+; CHECK-DSP: sxtb
+; CHECK-DSP: sub
+; CHECK-DSP: sxtb
+; CHECK-DSP: cmp
+
+; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]],
+; CHECK-DSP-IMM: cmp [[ADD]],
+; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]]
+; CHECK-DSP-IMM: usub8 [[SUB:r[0-9]+]],
+; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]]
+; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]]
+define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) {
+entry:
+ %load0 = load i8, i8* %x, align 1
+ %load1 = load i8, i8* %y, align 1
+ %add = add i8 %load0, %z
+ %sub = sub i8 %load1, 1
+ %cmp = icmp eq i8 %add, 200
+ %cmp1 = icmp sgt i8 %sub, %add
+ %res0 = select i1 %cmp, i8 35, i8 47
+ %res1 = select i1 %cmp1, i8 %res0, i8 %sub
+ ret i8 %res1
+}
+
+; CHECK-COMMON-LABEL: ugt_slt
+; CHECK-NODSP: sub
+; CHECK-NODSP: sxth
+; CHECK-NODSP: uxth
+; CHECK-NODSP: add
+; CHECK-NODSP: sxth
+; CHECK-NODSP: cmp
+; CHECK-NODSP: cmp
+
+; CHECK-DSP: sxth [[ARG:r[0-9]+]], r2
+; CHECK-DSP: subs [[SUB:r[0-9]+]],
+; CHECK-DSP: uadd16 [[ADD:r[0-9]+]],
+; CHECK-DSP: sxth.w [[SEXT:r[0-9]+]], [[ADD]]
+; CHECK-DSP: cmp [[SEXT]], [[ARG]]
+; CHECK-DSP-NOT: uxt
+; CHECK-DSP: cmp [[SUB]], r2
+define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) {
+entry:
+ %load0 = load i16, i16* %x, align 1
+ %add = add i16 %load0, %z
+ %sub = sub i16 %y, 1
+ %cmp = icmp slt i16 %add, %z
+ %cmp1 = icmp ugt i16 %sub, %z
+ %res0 = select i1 %cmp, i16 35, i16 -1
+ %res1 = select i1 %cmp1, i16 %res0, i16 0
+ ret i16 %res1
+}
+
+; CHECK-COMMON-LABEL: urem_trunc_icmps
+; CHECK-COMMON-NOT: uxt
+; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]],
+; CHECK-COMMON: cmp [[SEXT]], #7
+define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) {
+entry:
+ %ptr = load i16*, i16** %in, align 4
+ %ld = load i16, i16* %ptr, align 2
+ %cmp.i = icmp eq i16 %ld, 0
+ br i1 %cmp.i, label %exit, label %cond.false.i
+
+cond.false.i:
+ %rem = urem i16 5, %ld
+ %extract.t = trunc i16 %rem to i8
+ br label %body
+
+body:
+ %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ]
+ %cmp = icmp sgt i8 %cond.in.i.off0, 7
+ %conv5 = zext i1 %cmp to i32
+ store i32 %conv5, i32* %g, align 4
+ %.pr = load i32, i32* %k, align 4
+ %tobool13150 = icmp eq i32 %.pr, 0
+ br i1 %tobool13150, label %for.inc, label %exit
+
+for.inc:
+ %add = add nuw i8 %cond.in.i.off0, 1
+ br label %body
+
+exit:
+ ret void
+}
OpenPOWER on IntegriCloud