diff options
author | Sam Parker <sam.parker@arm.com> | 2018-08-16 10:05:39 +0000 |
---|---|---|
committer | Sam Parker <sam.parker@arm.com> | 2018-08-16 10:05:39 +0000 |
commit | 13567dbbd8b1ebc4e5a570acaae4d22915080115 (patch) | |
tree | 050f977f794212d9783ccbe4ab981b4d8cf9bc58 | |
parent | fe50950dac3acc25ef86ba11f1fa4b6e922ebbd4 (diff) | |
download | bcm5719-llvm-13567dbbd8b1ebc4e5a570acaae4d22915080115.tar.gz bcm5719-llvm-13567dbbd8b1ebc4e5a570acaae4d22915080115.zip |
[ARM] Allow signed icmps in ARMCodeGenPrepare
Originally committed in r339755 which was reverted in r339806 due to
an asan issue. The issue was caused by my assumption that operands to
a CallInst mapped to the FunctionType Params. CallInsts are now
handled by iterating over their ArgOperands instead of Operands.
Original Message:
Treat signed icmps as 'sinks', allowing them to be in the use-def
tree, enabling more promotions to be performed. As a sink, any
promoted incoming values need to be truncated before being used by
the signed icmp.
Differential Revision: https://reviews.llvm.org/D50067
llvm-svn: 339858
-rw-r--r-- | llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp | 75 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/arm-cgp-icmps.ll | 1 | ||||
-rw-r--r-- | llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll | 100 |
3 files changed, 145 insertions, 31 deletions
diff --git a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp index 99c89eb3239..164b255cd44 100644 --- a/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp +++ b/llvm/lib/Target/ARM/ARMCodeGenPrepare.cpp @@ -181,6 +181,8 @@ static bool isSink(Value *V) { return UsesNarrowValue(Return->getReturnValue()); if (auto *Trunc = dyn_cast<TruncInst>(V)) return UsesNarrowValue(Trunc->getOperand(0)); + if (auto *ICmp = dyn_cast<ICmpInst>(V)) + return ICmp->isSigned(); return isa<CallInst>(V); } @@ -294,6 +296,11 @@ void IRPromoter::Mutate(Type *OrigTy, LLVM_DEBUG(dbgs() << "ARM CGP: Promoting use-def chains to from " << ARMCodeGenPrepare::TypeSize << " to 32-bits\n"); + // Cache original types. + DenseMap<Value*, Type*> TruncTysMap; + for (auto *V : Visited) + TruncTysMap[V] = V->getType(); + auto ReplaceAllUsersOfWith = [&](Value *From, Value *To) { SmallVector<Instruction*, 4> Users; Instruction *InstTo = dyn_cast<Instruction>(To); @@ -337,6 +344,7 @@ void IRPromoter::Mutate(Type *OrigTy, ReplaceAllUsersOfWith(I, Call); InstsToRemove.push_back(I); NewInsts.insert(Call); + TruncTysMap[Call] = OrigTy; }; auto InsertZExt = [&](Value *V, Instruction *InsertPt) { @@ -351,6 +359,7 @@ void IRPromoter::Mutate(Type *OrigTy, ZExt->moveAfter(InsertPt); ReplaceAllUsersOfWith(V, ZExt); NewInsts.insert(ZExt); + TruncTysMap[ZExt] = TruncTysMap[V]; }; // First, insert extending instructions between the leaves and their users. @@ -409,42 +418,48 @@ void IRPromoter::Mutate(Type *OrigTy, InsertDSPIntrinsic(cast<Instruction>(V)); } + auto InsertTrunc = [&](Value *V) -> Instruction* { + if (!isa<Instruction>(V) || !isa<IntegerType>(V->getType())) + return nullptr; + + if ((!Promoted.count(V) && !NewInsts.count(V)) || !TruncTysMap.count(V)) + return nullptr; + + Type *TruncTy = TruncTysMap[V]; + if (TruncTy == ExtTy) + return nullptr; + + LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy << " Trunc for " + << *V << "\n"); + Builder.SetInsertPoint(cast<Instruction>(V)); + auto *Trunc = cast<Instruction>(Builder.CreateTrunc(V, TruncTy)); + NewInsts.insert(Trunc); + return Trunc; + }; + LLVM_DEBUG(dbgs() << "ARM CGP: Fixing up the roots:\n"); // Fix up any stores or returns that use the results of the promoted // chain. for (auto I : Roots) { LLVM_DEBUG(dbgs() << " - " << *I << "\n"); - Type *TruncTy = OrigTy; - if (auto *Store = dyn_cast<StoreInst>(I)) { - auto *PtrTy = cast<PointerType>(Store->getPointerOperandType()); - TruncTy = PtrTy->getElementType(); - } else if (isa<ReturnInst>(I)) { - Function *F = I->getParent()->getParent(); - TruncTy = F->getFunctionType()->getReturnType(); + + // Handle calls separately as we need to iterate over arg operands. + if (auto *Call = dyn_cast<CallInst>(I)) { + for (unsigned i = 0; i < Call->getNumArgOperands(); ++i) { + Value *Arg = Call->getArgOperand(i); + if (Instruction *Trunc = InsertTrunc(Arg)) { + Trunc->moveBefore(Call); + Call->setArgOperand(i, Trunc); + } + } + continue; } + // Now handle the others. for (unsigned i = 0; i < I->getNumOperands(); ++i) { - Value *V = I->getOperand(i); - if (!isa<IntegerType>(V->getType())) - continue; - - if (Promoted.count(V) || NewInsts.count(V)) { - if (auto *Op = dyn_cast<Instruction>(V)) { - - if (auto *Call = dyn_cast<CallInst>(I)) - TruncTy = Call->getFunctionType()->getParamType(i); - - if (TruncTy == ExtTy) - continue; - - LLVM_DEBUG(dbgs() << "ARM CGP: Creating " << *TruncTy - << " Trunc for " << *Op << "\n"); - Builder.SetInsertPoint(Op); - auto *Trunc = cast<Instruction>(Builder.CreateTrunc(Op, TruncTy)); - Trunc->moveBefore(I); - I->setOperand(i, Trunc); - NewInsts.insert(Trunc); - } + if (Instruction *Trunc = InsertTrunc(I->getOperand(i))) { + Trunc->moveBefore(I); + I->setOperand(i, Trunc); } } } @@ -458,8 +473,8 @@ void IRPromoter::Mutate(Type *OrigTy, bool ARMCodeGenPrepare::isSupportedValue(Value *V) { LLVM_DEBUG(dbgs() << "ARM CGP: Is " << *V << " supported?\n"); - if (auto *ICmp = dyn_cast<ICmpInst>(V)) - return ICmp->isEquality() || !ICmp->isSigned(); + if (isa<ICmpInst>(V)) + return true; // Memory instructions if (isa<StoreInst>(V) || isa<GetElementPtrInst>(V)) diff --git a/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll b/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll index d3a23bdee61..a24cdab559f 100644 --- a/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll +++ b/llvm/test/CodeGen/ARM/arm-cgp-icmps.ll @@ -269,7 +269,6 @@ entry: ; CHECK-COMMON-LABEL: icmp_i7 ; CHECK-COMMON: ldrb -; CHECK-COMMON: and ; CHECK-COMMON: cmp define i32 @icmp_i7(i7* %arg0, i7 zeroext %arg1, i32 %a, i32 %b) { entry: diff --git a/llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll b/llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll new file mode 100644 index 00000000000..d603511ece3 --- /dev/null +++ b/llvm/test/CodeGen/ARM/arm-cgp-signed-icmps.ll @@ -0,0 +1,100 @@ +; RUN: llc -mtriple=thumbv8.main -mcpu=cortex-m33 -arm-disable-cgp=false -mattr=-use-misched %s -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-NODSP +; RUN: llc -mtriple=thumbv7em %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP +; RUN: llc -mtriple=thumbv8 %s -arm-disable-cgp=false -arm-enable-scalar-dsp=true -arm-enable-scalar-dsp-imms=true -o - | FileCheck %s --check-prefix=CHECK-COMMON --check-prefix=CHECK-DSP-IMM + +; CHECK-COMMON-LABEL: eq_sgt +; CHECK-NODSP: add +; CHECK-NODSP: uxtb +; CHECK-NODSP: sxtb +; CHECK-NODSP: cmp +; CHECK-NODSP: sub +; CHECK-NODSP: sxtb +; CHECK-NODSP: cmp + +; CHECK-DSP: add +; CHECK-DSP: uxtb +; CHECK-DSP: cmp +; CHECK-DSP: sxtb +; CHECK-DSP: sub +; CHECK-DSP: sxtb +; CHECK-DSP: cmp + +; CHECK-DSP-IMM: uadd8 [[ADD:r[0-9]+]], +; CHECK-DSP-IMM: cmp [[ADD]], +; CHECK-DSP-IMM: sxtb [[SEXT0:r[0-9]+]], [[ADD]] +; CHECK-DSP-IMM: usub8 [[SUB:r[0-9]+]], +; CHECK-DSP-IMM: sxtb [[SEXT1:r[0-9]+]], [[SUB]] +; CHECK-DSP-IMM: cmp [[SEXT1]], [[SEXT0]] +define i8 @eq_sgt(i8* %x, i8 *%y, i8 zeroext %z) { +entry: + %load0 = load i8, i8* %x, align 1 + %load1 = load i8, i8* %y, align 1 + %add = add i8 %load0, %z + %sub = sub i8 %load1, 1 + %cmp = icmp eq i8 %add, 200 + %cmp1 = icmp sgt i8 %sub, %add + %res0 = select i1 %cmp, i8 35, i8 47 + %res1 = select i1 %cmp1, i8 %res0, i8 %sub + ret i8 %res1 +} + +; CHECK-COMMON-LABEL: ugt_slt +; CHECK-NODSP: sub +; CHECK-NODSP: sxth +; CHECK-NODSP: uxth +; CHECK-NODSP: add +; CHECK-NODSP: sxth +; CHECK-NODSP: cmp +; CHECK-NODSP: cmp + +; CHECK-DSP: sxth [[ARG:r[0-9]+]], r2 +; CHECK-DSP: subs [[SUB:r[0-9]+]], +; CHECK-DSP: uadd16 [[ADD:r[0-9]+]], +; CHECK-DSP: sxth.w [[SEXT:r[0-9]+]], [[ADD]] +; CHECK-DSP: cmp [[SEXT]], [[ARG]] +; CHECK-DSP-NOT: uxt +; CHECK-DSP: cmp [[SUB]], r2 +define i16 @ugt_slt(i16 *%x, i16 zeroext %y, i16 zeroext %z) { +entry: + %load0 = load i16, i16* %x, align 1 + %add = add i16 %load0, %z + %sub = sub i16 %y, 1 + %cmp = icmp slt i16 %add, %z + %cmp1 = icmp ugt i16 %sub, %z + %res0 = select i1 %cmp, i16 35, i16 -1 + %res1 = select i1 %cmp1, i16 %res0, i16 0 + ret i16 %res1 +} + +; CHECK-COMMON-LABEL: urem_trunc_icmps +; CHECK-COMMON-NOT: uxt +; CHECK-COMMON: sxtb [[SEXT:r[0-9]+]], +; CHECK-COMMON: cmp [[SEXT]], #7 +define void @urem_trunc_icmps(i16** %in, i32* %g, i32* %k) { +entry: + %ptr = load i16*, i16** %in, align 4 + %ld = load i16, i16* %ptr, align 2 + %cmp.i = icmp eq i16 %ld, 0 + br i1 %cmp.i, label %exit, label %cond.false.i + +cond.false.i: + %rem = urem i16 5, %ld + %extract.t = trunc i16 %rem to i8 + br label %body + +body: + %cond.in.i.off0 = phi i8 [ %extract.t, %cond.false.i ], [ %add, %for.inc ] + %cmp = icmp sgt i8 %cond.in.i.off0, 7 + %conv5 = zext i1 %cmp to i32 + store i32 %conv5, i32* %g, align 4 + %.pr = load i32, i32* %k, align 4 + %tobool13150 = icmp eq i32 %.pr, 0 + br i1 %tobool13150, label %for.inc, label %exit + +for.inc: + %add = add nuw i8 %cond.in.i.off0, 1 + br label %body + +exit: + ret void +} |