diff options
| author | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-12-19 14:43:36 +0000 |
|---|---|---|
| committer | Simon Pilgrim <llvm-dev@redking.me.uk> | 2018-12-19 14:43:36 +0000 |
| commit | 7bfbf3caa499b4903a3fcc52b99f7c4f9b9ae62d (patch) | |
| tree | f827b33012cbd2c7a44f4dccdf1cb36d2d47852a /llvm/lib | |
| parent | 2ae3a9165664a079e7386166f1d541fa4032bb85 (diff) | |
| download | bcm5719-llvm-7bfbf3caa499b4903a3fcc52b99f7c4f9b9ae62d.tar.gz bcm5719-llvm-7bfbf3caa499b4903a3fcc52b99f7c4f9b9ae62d.zip | |
[X86][SSE] Auto upgrade PADDUS/PSUBUS intrinsics to UADD_SAT/USUB_SAT generic intrinsics (llvm)
Now that we use the generic ISD opcodes, we can use the generic intrinsics directly as well. This fixes the poor fast-isel codegen by not expanding to an easily broken IR code sequence.
I'm intending to deal with the signed saturation equivalents as well.
Clang counterpart: https://reviews.llvm.org/D55879
Differential Revision: https://reviews.llvm.org/D55855
llvm-svn: 349630
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/IR/AutoUpgrade.cpp | 21 |
1 files changed, 4 insertions, 17 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 39e29a2a093..cea90bf9b06 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -926,26 +926,13 @@ static Value *UpgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, static Value *UpgradeX86AddSubSatIntrinsics(IRBuilder<> &Builder, CallInst &CI, bool IsAddition) { + Type *Ty = CI.getType(); Value *Op0 = CI.getOperand(0); Value *Op1 = CI.getOperand(1); - // Collect vector elements and type data. - Type *ResultType = CI.getType(); - - Value *Res; - if (IsAddition) { - // ADDUS: a > (a+b) ? ~0 : (a+b) - // If Op0 > Add, overflow occured. - Value *Add = Builder.CreateAdd(Op0, Op1); - Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Add); - Value *Max = llvm::Constant::getAllOnesValue(ResultType); - Res = Builder.CreateSelect(ICmp, Max, Add); - } else { - // SUBUS: max(a, b) - b - Value *ICmp = Builder.CreateICmp(ICmpInst::ICMP_UGT, Op0, Op1); - Value *Select = Builder.CreateSelect(ICmp, Op0, Op1); - Res = Builder.CreateSub(Select, Op1); - } + Intrinsic::ID IID = IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat; + Function *Intrin = Intrinsic::getDeclaration(CI.getModule(), IID, Ty); + Value *Res = Builder.CreateCall(Intrin, {Op0, Op1}); if (CI.getNumArgOperands() == 4) { // For masked intrinsics. Value *VecSrc = CI.getOperand(2); |

