diff options
author | Craig Topper <craig.topper@gmail.com> | 2016-12-26 06:33:19 +0000 |
---|---|---|
committer | Craig Topper <craig.topper@gmail.com> | 2016-12-26 06:33:19 +0000 |
commit | 7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5 (patch) | |
tree | 72434a00f5760fd44660a6eb23285250bed3aa58 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | |
parent | c47e1aab1c46fdca71ed148e2db83f48ea9efeec (diff) | |
download | bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.tar.gz bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.zip |
[AVX-512][InstCombine] Teach InstCombine to turn scalar add/sub/mul/div with rounding intrinsics into normal IR operations if the rounding mode is CUR_DIRECTION.
Summary:
I only do this for unmasked cases for now because isel is failing to fold the mask. I'll try to fix that soon.
I'll do the same thing for packed add/sub/mul/div in a future patch.
Reviewers: delena, RKSimon, zvi, craig.topper
Subscribers: llvm-commits
Differential Revision: https://reviews.llvm.org/D27879
llvm-svn: 290535
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r-- | llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 52 |
1 files changed, 49 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 31866630fa8..9b8f696da74 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1789,17 +1789,63 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) { break; } - // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts. case Intrinsic::x86_avx512_mask_add_ss_round: case Intrinsic::x86_avx512_mask_div_ss_round: case Intrinsic::x86_avx512_mask_mul_ss_round: case Intrinsic::x86_avx512_mask_sub_ss_round: - case Intrinsic::x86_avx512_mask_max_ss_round: - case Intrinsic::x86_avx512_mask_min_ss_round: case Intrinsic::x86_avx512_mask_add_sd_round: case Intrinsic::x86_avx512_mask_div_sd_round: case Intrinsic::x86_avx512_mask_mul_sd_round: case Intrinsic::x86_avx512_mask_sub_sd_round: + // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular + // IR operations. + if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) { + if (R->getValue() == 4) { + // Only do this if the mask bit is 1 so that we don't need a select. + // TODO: Improve this to handle masking cases. Isel doesn't fold + // the mask correctly right now. + if (auto *M = dyn_cast<ConstantInt>(II->getArgOperand(3))) { + if (M->getValue()[0]) { + // Extract the element as scalars. + Value *Arg0 = II->getArgOperand(0); + Value *Arg1 = II->getArgOperand(1); + Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0); + Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0); + + Value *V; + switch (II->getIntrinsicID()) { + default: llvm_unreachable("Case stmts out of sync!"); + case Intrinsic::x86_avx512_mask_add_ss_round: + case Intrinsic::x86_avx512_mask_add_sd_round: + V = Builder->CreateFAdd(LHS, RHS); + break; + case Intrinsic::x86_avx512_mask_sub_ss_round: + case Intrinsic::x86_avx512_mask_sub_sd_round: + V = Builder->CreateFSub(LHS, RHS); + break; + case Intrinsic::x86_avx512_mask_mul_ss_round: + case Intrinsic::x86_avx512_mask_mul_sd_round: + V = Builder->CreateFMul(LHS, RHS); + break; + case Intrinsic::x86_avx512_mask_div_ss_round: + case Intrinsic::x86_avx512_mask_div_sd_round: + V = Builder->CreateFDiv(LHS, RHS); + break; + } + + // Insert the result back into the original argument 0. + V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0); + + return replaceInstUsesWith(*II, V); + } + } + } + } + LLVM_FALLTHROUGH; + + // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts. + case Intrinsic::x86_avx512_mask_max_ss_round: + case Intrinsic::x86_avx512_mask_min_ss_round: case Intrinsic::x86_avx512_mask_max_sd_round: case Intrinsic::x86_avx512_mask_min_sd_round: case Intrinsic::x86_avx512_mask_vfmadd_ss: |