5 files changed, 71 insertions, 33 deletions
diff --git a/llvm/lib/Target/PowerPC/PPC.h b/llvm/lib/Target/PowerPC/PPC.h
index 38ae62b2675..23077637388 100644
--- a/llvm/lib/Target/PowerPC/PPC.h
+++ b/llvm/lib/Target/PowerPC/PPC.h
@@ -44,7 +44,7 @@ namespace llvm {
   FunctionPass *createPPCQPXLoadSplatPass();
   FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
   FunctionPass *createPPCTLSDynamicCallPass();
-  FunctionPass *createPPCBoolRetToIntPass();
+  FunctionPass *createPPCBoolRetToIntPass(PPCTargetMachine *TM);
   FunctionPass *createPPCExpandISELPass();
   void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
                                     AsmPrinter &AP, bool isDarwin);
diff --git a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
index 93c201d0386..aa78178e91e 100644
--- a/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
+++ b/llvm/lib/Target/PowerPC/PPCBoolRetToInt.cpp
@@ -7,15 +7,15 @@
 //
 //===----------------------------------------------------------------------===//
 //
-// This file implements converting i1 values to i32 if they could be more
+// This file implements converting i1 values to i32/i64 if they could be more
 // profitably allocated as GPRs rather than CRs. This pass will become totally
 // unnecessary if Register Bank Allocation and Global Instruction Selection ever
 // go upstream.
 //
-// Presently, the pass converts i1 Constants, and Arguments to i32 if the
+// Presently, the pass converts i1 Constants, and Arguments to i32/i64 if the
 // transitive closure of their uses includes only PHINodes, CallInsts, and
 // ReturnInsts. The rational is that arguments are generally passed and returned
-// in GPRs rather than CRs, so casting them to i32 at the LLVM IR level will
+// in GPRs rather than CRs, so casting them to i32/i64 at the LLVM IR level will
 // actually save casts at the Machine Instruction level.
 //
 // It might be useful to expand this pass to add bit-wise operations to the list
@@ -33,6 +33,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "PPC.h"
+#include "PPCTargetMachine.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
@@ -87,17 +88,19 @@ class PPCBoolRetToInt : public FunctionPass {
     return Defs;
   }
 
-  // Translate a i1 value to an equivalent i32 value:
-  static Value *translate(Value *V) {
-    Type *Int32Ty = Type::getInt32Ty(V->getContext());
+  // Translate a i1 value to an equivalent i32/i64 value:
+  Value *translate(Value *V) {
+    Type *IntTy = ST->isPPC64() ? Type::getInt64Ty(V->getContext())
+                                : Type::getInt32Ty(V->getContext());
+
     if (auto *C = dyn_cast<Constant>(V))
-      return ConstantExpr::getZExt(C, Int32Ty);
+      return ConstantExpr::getZExt(C, IntTy);
     if (auto *P = dyn_cast<PHINode>(V)) {
       // Temporarily set the operands to 0. We'll fix this later in
       // runOnUse.
-      Value *Zero = Constant::getNullValue(Int32Ty);
+      Value *Zero = Constant::getNullValue(IntTy);
       PHINode *Q =
-        PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P);
+        PHINode::Create(IntTy, P->getNumIncomingValues(), P->getName(), P);
       for (unsigned i = 0; i < P->getNumOperands(); ++i)
         Q->addIncoming(Zero, P->getIncomingBlock(i));
       return Q;
@@ -109,7 +112,7 @@ class PPCBoolRetToInt : public FunctionPass {
 
     auto InstPt =
       A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode();
-    return new ZExtInst(V, Int32Ty, "", InstPt);
+    return new ZExtInst(V, IntTy, "", InstPt);
   }
 
   typedef SmallPtrSet<const PHINode *, 8> PHINodeSet;
@@ -177,7 +180,11 @@ class PPCBoolRetToInt : public FunctionPass {
  public:
   static char ID;
 
-  PPCBoolRetToInt() : FunctionPass(ID) {
+  PPCBoolRetToInt() : FunctionPass(ID), TM(nullptr) {
+    initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
+  }
+
+  PPCBoolRetToInt(TargetMachine *&TM) : FunctionPass(ID), TM(TM) {
     initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry());
   }
 
@@ -185,6 +192,10 @@ class PPCBoolRetToInt : public FunctionPass {
     if (skipFunction(F))
       return false;
 
+    if (!TM)
+      return false;
+    ST = ((PPCTargetMachine*)TM)->getSubtargetImpl(F);
+
     PHINodeSet PromotablePHINodes = getPromotablePHINodes(F);
     B2IMap Bool2IntMap;
     bool Changed = false;
@@ -205,7 +216,7 @@ class PPCBoolRetToInt : public FunctionPass {
     return Changed;
   }
 
-  static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
+  bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes,
                        B2IMap &BoolToIntMap) {
     auto Defs = findAllDefs(U);
 
@@ -262,13 +273,20 @@ class PPCBoolRetToInt : public FunctionPass {
     AU.addPreserved<DominatorTreeWrapperPass>();
     FunctionPass::getAnalysisUsage(AU);
   }
+
+private:
+  const PPCSubtarget *ST;
+  TargetMachine *TM;
 };
 
 } // end anonymous namespace
 
 char PPCBoolRetToInt::ID = 0;
-INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int",
-                "Convert i1 constants to i32 if they are returned",
-                false, false)
-
-FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); }
+INITIALIZE_TM_PASS(PPCBoolRetToInt, "bool-ret-to-int",
+                   "Convert i1 constants to i32/i64 if they are returned",
+                   false, false)
+
+FunctionPass *llvm::createPPCBoolRetToIntPass(PPCTargetMachine *TM) {
+  TargetMachine *pTM = TM;
+  return new PPCBoolRetToInt(pTM);
+}
diff --git a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
index 5d68f32ccc5..298f8ec0bbf 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -323,7 +323,7 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
 
 void PPCPassConfig::addIRPasses() {
   if (TM->getOptLevel() != CodeGenOpt::None)
-    addPass(createPPCBoolRetToIntPass());
+    addPass(createPPCBoolRetToIntPass(&getPPCTargetMachine()));
   addPass(createAtomicExpandPass(&getPPCTargetMachine()));
 
   // For the BG/Q (or if explicitly requested), add explicit data prefetch
diff --git a/llvm/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll b/llvm/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
new file mode 100644
index 00000000000..93b1c40217c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/BoolRetToIntTest-2.ll
@@ -0,0 +1,20 @@
+; RUN: llc -mtriple=powerpc64le-linux-gnu -mcpu=pwr8 < %s | FileCheck %s
+
+; https://bugs.llvm.org/show_bug.cgi?id=32442
+; Don't generate zero extension for the return value.
+; CHECK-NOT: clrldi
+
+define zeroext i1 @foo(i32 signext %i, i32* %p) {
+entry:
+  %cmp = icmp eq i32 %i, 0
+  br i1 %cmp, label %return, label %if.end
+
+if.end:
+  store i32 %i, i32* %p, align 4
+  br label %return
+
+return:
+  %retval = phi i1 [ true, %if.end ], [ false, %entry ]
+  ret i1 %retval
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/llvm/test/CodeGen/PowerPC/BoolRetToIntTest.ll
index 4a0966b2859..fd515281e39 100644
--- a/llvm/test/CodeGen/PowerPC/BoolRetToIntTest.ll
+++ b/llvm/test/CodeGen/PowerPC/BoolRetToIntTest.ll
@@ -31,14 +31,14 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %call, label %cleanup.loopexit, label %for.cond
 
 cleanup.loopexit:                                 ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
   %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
   br label %cleanup
 
 cleanup:                                          ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
   %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   ret i1 %cleanup.dest.slot.0
 }
@@ -78,14 +78,14 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %call, label %cleanup.loopexit, label %for.cond
 
 cleanup.loopexit:                                 ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
   %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
   br label %cleanup
 
 cleanup:                                          ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
   %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: call void %cont(i1 [[REG]]
   tail call void %cont(i1 %cleanup.dest.slot.0)
   ret void
@@ -112,17 +112,17 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %call, label %cleanup.loopexit, label %for.cond
 
 cleanup.loopexit:                                 ; preds = %for.body, %for.cond
-; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ]
+; CHECK: [[PHI:%.+]] = phi i64 [ 1, %for.body ], [ 0, %for.cond ]
   %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ]
   br label %cleanup
 
 cleanup:                                          ; preds = %cleanup.loopexit, %entry
-; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
+; CHECK: = phi i64 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ]
   %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ]
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: call void %cont(i1 [[REG]]
   tail call void %cont(i1 %cleanup.dest.slot.0)
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   ret i1 %cleanup.dest.slot.0
 }
@@ -136,7 +136,7 @@ foo:
   br label %cleanup
 
 cleanup:
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   %result = phi i1 [ false, %foo ], [ %operand, %entry ]
   ret i1 %result
@@ -186,7 +186,7 @@ foo:
 
 ; CHECK-LABEL: cleanup
 cleanup:
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   %result = phi i1 [ %bar, %foo], [ %operand, %entry ]
   ret i1 %result
@@ -198,8 +198,8 @@ declare zeroext i1 @return_i1()
 define zeroext i1 @call_test() {
 ; CHECK: [[REG:%.+]] = call i1
   %result = call i1 @return_i1()
-; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i32
-; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1
+; CHECK: [[REG:%.+]] = zext i1 {{%.+}} to i64
+; CHECK: [[REG:%.+]] = trunc i64 {{%.+}} to i1
 ; CHECK: ret i1 [[REG]]
   ret i1 %result
 }