Don't leave unused divs/rems sitting around in BypassSlowDivision.

Summary: This "pass" eagerly creates div and rem instructions even when only one is needed -- it relies on a later pass (machine DCE?) to clean them up. This is problematic not just from a cleanliness perspective (this pass is running during CodeGenPrepare, so should leave the IR in a better state), but it also creates a problem for instruction selection. If we always have a div+rem, isel will always select a divrem instruction (if possible), even when a single div or rem would do. Specifically, in NVPTX, we want to compute rem from the output of div, if available. But if a div is not available, we want to leave the rem alone. This transformation is overeager if div is always available. Because this code runs as part of CodeGenPrepare, it's nontrivial to write a test for this change. But this will effectively be tested by a later patch which adds the aforementioned change to NVPTX isel. Reviewers: tra Subscribers: llvm-commits Differential Revision: https://reviews.llvm.org/D26088 llvm-svn: 285460
author: Justin Lebar <jlebar@google.com> 2016-10-28 21:43:54 +0000
committer: Justin Lebar <jlebar@google.com> 2016-10-28 21:43:54 +0000
commit: 0ede5fb1bbdd2b19f1209d8e164633d56bbc2d6a (patch)
tree: 8e56519c9932f1b3751f256d4ce931f9617d6806
parent: 468bf732096c813a76fbad0ab204b0cf7269f82a (diff)
download: bcm5719-llvm-0ede5fb1bbdd2b19f1209d8e164633d56bbc2d6a.tar.gz
bcm5719-llvm-0ede5fb1bbdd2b19f1209d8e164633d56bbc2d6a.zip
2 files changed, 37 insertions, 0 deletions
diff --git a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
index 41a854362c9..0e2a4653353 100644
--- a/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
+++ b/llvm/lib/Transforms/Utils/BypassSlowDivision.cpp
@@ -20,6 +20,7 @@
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
+#include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
 
@@ -246,5 +247,12 @@ bool llvm::bypassSlowDivision(
     MadeChange |= reuseOrInsertFastDiv(I, BT, UseDivOp, UseSignedOp, DivCache);
   }
 
+  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
+  // create divrem machine instructions.  Now erase any unused divs / rems so we
+  // don't leave extra instructions sitting around.
+  for (auto &KV : DivCache)
+    for (Instruction *Phi : {KV.second.Quotient, KV.second.Remainder})
+      RecursivelyDeleteTriviallyDeadInstructions(Phi);
+
   return MadeChange;
 }
diff --git a/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll
new file mode 100644
index 00000000000..4846d52f4d2
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/NVPTX/bypass-slow-div.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target triple = "nvptx64-nvidia-cuda"
+
+; We only use the div instruction -- the rem should be DCE'ed.
+; CHECK-LABEL: @div_only
+define void @div_only(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK: udiv i32
+  ; CHECK-NOT: urem
+  ; CHECK: sdiv i64
+  ; CHECK-NOT: rem
+  %d = sdiv i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
+
+; We only use the rem instruction -- the div should be DCE'ed.
+; CHECK-LABEL: @rem_only
+define void @rem_only(i64 %a, i64 %b, i64* %retptr) {
+  ; CHECK-NOT: div
+  ; CHECK: urem i32
+  ; CHECK-NOT: div
+  ; CHECK: rem i64
+  ; CHECK-NOT: div
+  %d = srem i64 %a, %b
+  store i64 %d, i64* %retptr
+  ret void
+}
author	Justin Lebar <jlebar@google.com>	2016-10-28 21:43:54 +0000
committer	Justin Lebar <jlebar@google.com>	2016-10-28 21:43:54 +0000
commit	0ede5fb1bbdd2b19f1209d8e164633d56bbc2d6a (patch)
tree	8e56519c9932f1b3751f256d4ce931f9617d6806
parent	468bf732096c813a76fbad0ab204b0cf7269f82a (diff)
download	bcm5719-llvm-0ede5fb1bbdd2b19f1209d8e164633d56bbc2d6a.tar.gz bcm5719-llvm-0ede5fb1bbdd2b19f1209d8e164633d56bbc2d6a.zip