[AVX-512][InstCombine] Teach InstCombine to turn scalar add/sub/mul/div with rounding intrinsics into normal IR operations if the rounding mode is CUR_DIRECTION.

Summary: I only do this for unmasked cases for now because isel is failing to fold the mask. I'll try to fix that soon. I'll do the same thing for packed add/sub/mul/div in a future patch. Reviewers: delena, RKSimon, zvi, craig.topper Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D27879 llvm-svn: 290535
author: Craig Topper <craig.topper@gmail.com> 2016-12-26 06:33:19 +0000
committer: Craig Topper <craig.topper@gmail.com> 2016-12-26 06:33:19 +0000
commit: 7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5 (patch)
tree: 72434a00f5760fd44660a6eb23285250bed3aa58 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parent: c47e1aab1c46fdca71ed148e2db83f48ea9efeec (diff)
download: bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.tar.gz
bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.zip
1 files changed, 49 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 31866630fa8..9b8f696da74 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1789,17 +1789,63 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   }
 
-  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
   case Intrinsic::x86_avx512_mask_add_ss_round:
   case Intrinsic::x86_avx512_mask_div_ss_round:
   case Intrinsic::x86_avx512_mask_mul_ss_round:
   case Intrinsic::x86_avx512_mask_sub_ss_round:
-  case Intrinsic::x86_avx512_mask_max_ss_round:
-  case Intrinsic::x86_avx512_mask_min_ss_round:
   case Intrinsic::x86_avx512_mask_add_sd_round:
   case Intrinsic::x86_avx512_mask_div_sd_round:
   case Intrinsic::x86_avx512_mask_mul_sd_round:
   case Intrinsic::x86_avx512_mask_sub_sd_round:
+    // If the rounding mode is CUR_DIRECTION(4) we can turn these into regular
+    // IR operations.
+    if (auto *R = dyn_cast<ConstantInt>(II->getArgOperand(4))) {
+      if (R->getValue() == 4) {
+        // Only do this if the mask bit is 1 so that we don't need a select.
+        // TODO: Improve this to handle masking cases. Isel doesn't fold
+        // the mask correctly right now.
+        if (auto *M = dyn_cast<ConstantInt>(II->getArgOperand(3))) {
+          if (M->getValue()[0]) {
+            // Extract the element as scalars.
+            Value *Arg0 = II->getArgOperand(0);
+            Value *Arg1 = II->getArgOperand(1);
+            Value *LHS = Builder->CreateExtractElement(Arg0, (uint64_t)0);
+            Value *RHS = Builder->CreateExtractElement(Arg1, (uint64_t)0);
+
+            Value *V;
+            switch (II->getIntrinsicID()) {
+            default: llvm_unreachable("Case stmts out of sync!");
+            case Intrinsic::x86_avx512_mask_add_ss_round:
+            case Intrinsic::x86_avx512_mask_add_sd_round:
+              V = Builder->CreateFAdd(LHS, RHS);
+              break;
+            case Intrinsic::x86_avx512_mask_sub_ss_round:
+            case Intrinsic::x86_avx512_mask_sub_sd_round:
+              V = Builder->CreateFSub(LHS, RHS);
+              break;
+            case Intrinsic::x86_avx512_mask_mul_ss_round:
+            case Intrinsic::x86_avx512_mask_mul_sd_round:
+              V = Builder->CreateFMul(LHS, RHS);
+              break;
+            case Intrinsic::x86_avx512_mask_div_ss_round:
+            case Intrinsic::x86_avx512_mask_div_sd_round:
+              V = Builder->CreateFDiv(LHS, RHS);
+              break;
+            }
+
+            // Insert the result back into the original argument 0.
+            V = Builder->CreateInsertElement(Arg0, V, (uint64_t)0);
+
+            return replaceInstUsesWith(*II, V);
+          }
+        }
+      }
+    }
+    LLVM_FALLTHROUGH;
+
+  // X86 scalar intrinsics simplified with SimplifyDemandedVectorElts.
+  case Intrinsic::x86_avx512_mask_max_ss_round:
+  case Intrinsic::x86_avx512_mask_min_ss_round:
   case Intrinsic::x86_avx512_mask_max_sd_round:
   case Intrinsic::x86_avx512_mask_min_sd_round:
   case Intrinsic::x86_avx512_mask_vfmadd_ss:
author	Craig Topper <craig.topper@gmail.com>	2016-12-26 06:33:19 +0000
committer	Craig Topper <craig.topper@gmail.com>	2016-12-26 06:33:19 +0000
commit	7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5 (patch)
tree	72434a00f5760fd44660a6eb23285250bed3aa58 /llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
parent	c47e1aab1c46fdca71ed148e2db83f48ea9efeec (diff)
download	bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.tar.gz bcm5719-llvm-7b788ada2d7213e4684d0771d8ab51a4a0ccb2e5.zip