3 files changed, 52 insertions, 18 deletions
diff --git a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
index ada17fc6308..d5f11eb9005 100644
--- a/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp
@@ -155,6 +155,30 @@ Reference SystemZElimCompare::getRegReferences(MachineInstr *MI, unsigned Reg) {
   return Ref;
 }
 
+// Return true if this is a load and test which can be optimized the
+// same way as compare instruction.
+static bool isLoadAndTestAsCmp(MachineInstr *MI) {
+  // If we during isel used a load-and-test as a compare with 0, the
+  // def operand is dead.
+  return ((MI->getOpcode() == SystemZ::LTEBR ||
+	   MI->getOpcode() == SystemZ::LTDBR ||
+	   MI->getOpcode() == SystemZ::LTXBR) &&
+	  MI->getOperand(0).isDead());
+}
+
+// Return the source register of Compare, which is the unknown value
+// being tested.
+static unsigned getCompareSourceReg(MachineInstr *Compare) {
+  unsigned reg = 0;
+  if (Compare->isCompare())
+    reg = Compare->getOperand(0).getReg();
+  else if (isLoadAndTestAsCmp(Compare))
+    reg = Compare->getOperand(1).getReg();
+  assert (reg);
+
+  return reg;
+}
+
 // Compare compares the result of MI against zero.  If MI is an addition
 // of -1 and if CCUsers is a single branch on nonzero, eliminate the addition
 // and convert the branch to a BRCT(G).  Return true on success.
@@ -185,7 +209,7 @@ SystemZElimCompare::convertToBRCT(MachineInstr *MI, MachineInstr *Compare,
   // We already know that there are no references to the register between
   // MI and Compare.  Make sure that there are also no references between
   // Compare and Branch.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock::iterator MBBI = Compare, MBBE = Branch;
   for (++MBBI; MBBI != MBBE; ++MBBI)
     if (getRegReferences(MBBI, SrcReg))
@@ -305,6 +329,10 @@ static bool isCompareZero(MachineInstr *Compare) {
     return true;
 
   default:
+
+    if (isLoadAndTestAsCmp(Compare))
+      return true;
+
     return (Compare->getNumExplicitOperands() == 2 &&
             Compare->getOperand(1).isImm() &&
             Compare->getOperand(1).getImm() == 0);
@@ -322,7 +350,7 @@ optimizeCompareZero(MachineInstr *Compare,
     return false;
 
   // Search back for CC results that are based on the first operand.
-  unsigned SrcReg = Compare->getOperand(0).getReg();
+  unsigned SrcReg = getCompareSourceReg(Compare);
   MachineBasicBlock &MBB = *Compare->getParent();
   MachineBasicBlock::iterator MBBI = Compare, MBBE = MBB.begin();
   Reference CCRefs;
@@ -431,7 +459,7 @@ bool SystemZElimCompare::processBlock(MachineBasicBlock &MBB) {
   while (MBBI != MBB.begin()) {
     MachineInstr *MI = --MBBI;
     if (CompleteCCUsers &&
-        MI->isCompare() &&
+        (MI->isCompare() || isLoadAndTestAsCmp(MI)) &&
         (optimizeCompareZero(MI, CCUsers) ||
          fuseCompareAndBranch(MI, CCUsers))) {
       ++MBBI;
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
index efa3f82cec4..22beaad2ab7 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -130,6 +130,13 @@ void SystemZPassConfig::addPreSched2() {
 }
 
 void SystemZPassConfig::addPreEmitPass() {
+
+  // Do instruction shortening before compare elimination because some
+  // vector instructions will be shortened into opcodes that compare
+  // elimination recognizes.
+  if (getOptLevel() != CodeGenOpt::None)
+    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
+
   // We eliminate comparisons here rather than earlier because some
   // transformations can change the set of available CC values and we
   // generally want those transformations to have priority.  This is
@@ -155,8 +162,6 @@ void SystemZPassConfig::addPreEmitPass() {
   // preventing that would be a win or not.
   if (getOptLevel() != CodeGenOpt::None)
     addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false);
-  if (getOptLevel() != CodeGenOpt::None)
-    addPass(createSystemZShortenInstPass(getSystemZTargetMachine()), false);
   addPass(createSystemZLongBranchPass(getSystemZTargetMachine()));
 }
 
diff --git a/llvm/test/CodeGen/SystemZ/fp-cmp-05.ll b/llvm/test/CodeGen/SystemZ/fp-cmp-05.ll
index 1d71a0fcec5..7e937d1adae 100644
--- a/llvm/test/CodeGen/SystemZ/fp-cmp-05.ll
+++ b/llvm/test/CodeGen/SystemZ/fp-cmp-05.ll
@@ -4,14 +4,15 @@
 ; handled by SystemZElimcompare, so for Z13 this is currently
 ; unimplemented.
 ;
-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -check-prefix=CHECK-Z10
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
 
 ; Load complement (sign-bit flipped).
 ; Test f32
 define float @f1(float %a, float %b, float %f) {
 ; CHECK-LABEL: f1:
-; CHECK-Z10: lcebr
-; CHECK-Z10-NEXT: je
+; CHECK: lcebr
+; CHECK-NEXT: je
   %neg = fsub float -0.0, %f
   %cond = fcmp oeq float %neg, 0.0
   %res = select i1 %cond, float %a, float %b
@@ -21,8 +22,8 @@ define float @f1(float %a, float %b, float %f) {
 ; Test f64
 define double @f2(double %a, double %b, double %f) {
 ; CHECK-LABEL: f2:
-; CHECK-Z10: lcdbr
-; CHECK-Z10-NEXT: je
+; CHECK: lcdbr
+; CHECK-NEXT: je
   %neg = fsub double -0.0, %f
   %cond = fcmp oeq double %neg, 0.0
   %res = select i1 %cond, double %a, double %b
@@ -34,8 +35,8 @@ define double @f2(double %a, double %b, double %f) {
 declare float @llvm.fabs.f32(float %f)
 define float @f3(float %a, float %b, float %f) {
 ; CHECK-LABEL: f3:
-; CHECK-Z10: lnebr
-; CHECK-Z10-NEXT: je
+; CHECK: lnebr
+; CHECK-NEXT: je
   %abs = call float @llvm.fabs.f32(float %f)
   %neg = fsub float -0.0, %abs
   %cond = fcmp oeq float %neg, 0.0
@@ -47,8 +48,8 @@ define float @f3(float %a, float %b, float %f) {
 declare double @llvm.fabs.f64(double %f)
 define double @f4(double %a, double %b, double %f) {
 ; CHECK-LABEL: f4:
-; CHECK-Z10: lndbr
-; CHECK-Z10-NEXT: je
+; CHECK: lndbr
+; CHECK-NEXT: je
   %abs = call double @llvm.fabs.f64(double %f)
   %neg = fsub double -0.0, %abs
   %cond = fcmp oeq double %neg, 0.0
@@ -60,8 +61,8 @@ define double @f4(double %a, double %b, double %f) {
 ; Test f32
 define float @f5(float %a, float %b, float %f) {
 ; CHECK-LABEL: f5:
-; CHECK-Z10: lpebr
-; CHECK-Z10-NEXT: je
+; CHECK: lpebr
+; CHECK-NEXT: je
   %abs = call float @llvm.fabs.f32(float %f)
   %cond = fcmp oeq float %abs, 0.0
   %res = select i1 %cond, float %a, float %b
@@ -71,8 +72,8 @@ define float @f5(float %a, float %b, float %f) {
 ; Test f64
 define double @f6(double %a, double %b, double %f) {
 ; CHECK-LABEL: f6:
-; CHECK-Z10: lpdbr
-; CHECK-Z10-NEXT: je
+; CHECK: lpdbr
+; CHECK-NEXT: je
   %abs = call double @llvm.fabs.f64(double %f)
   %cond = fcmp oeq double %abs, 0.0
   %res = select i1 %cond, double %a, double %b