summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@gmail.com>2016-12-26 06:33:19 +0000
committerCraig Topper <craig.topper@gmail.com>2016-12-26 06:33:19 +0000
commit7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5 (patch)
tree72434a00f5760fd44660a6eb23285250bed3aa58 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parentc47e1aab1c46fdca71ed148e2db83f48ea9efeec (diff)
downloadbcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.tar.gz
bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.zip
[AVX-512][InstCombine] Teach InstCombine to turn scalar add/sub/mul/div with rounding intrinsics into normal IR operations if the rounding mode is CUR_DIRECTION.
Summary: I only do this for unmasked cases for now because isel is failing to fold the mask. I'll try to fix that soon. I'll do the same thing for packed add/sub/mul/div in a future patch. Reviewers: delena, RKSimon, zvi, craig.topper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D27879 llvm-svn: 290535
Diffstat (limited to 'llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp')
-rw-r--r--llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp52
1 files changed, 49 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 31866630fa8..9b8f696da74 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1789,17 +1789,63 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
break;
}
- // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
case Intrinsic::x86_avx512_mask_add_ss_round:
case Intrinsic::x86_avx512_mask_div_ss_round:
case Intrinsic::x86_avx512_mask_mul_ss_round:
case Intrinsic::x86_avx512_mask_sub_ss_round:
- case Intrinsic::x86_avx512_mask_max_ss_round:
- case Intrinsic::x86_avx512_mask_min_ss_round:
case Intrinsic::x86_avx512_mask_add_sd_round:
case Intrinsic::x86_avx512_mask_div_sd_round:
case Intrinsic::x86_avx512_mask_mul_sd_round:
case Intrinsic::x86_avx512_mask_sub_sd_round:
+ // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
+ // IR operations.
+ if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
+ if (R->getValue() == 4) {
+ // Only do this if the mask bit is 1 so that we don't need a select.
+ // TODO: Improve this to handle masking cases. Isel doesn't fold
+ // the mask correctly right now.
+ if (auto *M = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
+ if (M->getValue()[0]) {
+ // Extract the element as scalars.
+ Value *Arg0 = II->getArgOperand(0);
+ Value *Arg1 = II->getArgOperand(1);
+ Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
+ Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
+
+ Value *V;
+ switch (II->getIntrinsicID()) {
+ default: llvm_unreachable("Case stmts out of sync!");
+ case Intrinsic::x86_avx512_mask_add_ss_round:
+ case Intrinsic::x86_avx512_mask_add_sd_round:
+ V = Builder->CreateFAdd(LHS, RHS);
+ break;
+ case Intrinsic::x86_avx512_mask_sub_ss_round:
+ case Intrinsic::x86_avx512_mask_sub_sd_round:
+ V = Builder->CreateFSub(LHS, RHS);
+ break;
+ case Intrinsic::x86_avx512_mask_mul_ss_round:
+ case Intrinsic::x86_avx512_mask_mul_sd_round:
+ V = Builder->CreateFMul(LHS, RHS);
+ break;
+ case Intrinsic::x86_avx512_mask_div_ss_round:
+ case Intrinsic::x86_avx512_mask_div_sd_round:
+ V = Builder->CreateFDiv(LHS, RHS);
+ break;
+ }
+
+ // Insert the result back into the original argument 0.
+ V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
+
+ return replaceInstUsesWith(*II, V);
+ }
+ }
+ }
+ }
+ LLVM_FALLTHROUGH;
+
+ // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
+ case Intrinsic::x86_avx512_mask_max_ss_round:
+ case Intrinsic::x86_avx512_mask_min_ss_round:
case Intrinsic::x86_avx512_mask_max_sd_round:
case Intrinsic::x86_avx512_mask_min_sd_round:
case Intrinsic::x86_avx512_mask_vfmadd_ss:
OpenPOWER on IntegriCloud