[X86] Emit more efficient >= comparisons against 0

We don't do a great job with >= 0 comparisons against zero when the result is used as an i8. Given something like: void f(long long LL, bool *B) { *B = LL >= 0; } We used to generate: shrq $63, %rdi xorb $1, %dil movb %dil, (%rsi) Now we generate: testq %rdi, %rdi setns (%rsi) Differential Revision: http://reviews.llvm.org/D12136 llvm-svn: 245498
author: David Majnemer <david.majnemer@gmail.com> 2015-08-19 20:51:40 +0000
committer: David Majnemer <david.majnemer@gmail.com> 2015-08-19 20:51:40 +0000
commit: f25fe647166375440a2945ca3393c1c54c3efe5c (patch)
tree: d32a450a896aa7da924c30509019c5095b3fda47
parent: 7747ce226051791be0a03ec106d9502e3564c1af (diff)
download: bcm5719-llvm-f25fe647166375440a2945ca3393c1c54c3efe5c.tar.gz
bcm5719-llvm-f25fe647166375440a2945ca3393c1c54c3efe5c.zip
2 files changed, 93 insertions, 1 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 0e218e33a0d..a27c9f56ae4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24074,13 +24074,61 @@ static SDValue performIntegerAbsCombine(SDNode *N, SelectionDAG &DAG) {
   return SDValue();
 }
 
-// PerformXorCombine - Attempts to turn XOR nodes into BLSMSK nodes
+// Try to turn tests against the signbit in the form of:
+//   XOR(TRUNCATE(SRL(X, size(X)-1)), 1)
+// into:
+//   SETGT(X, -1)
+static SDValue foldXorTruncShiftIntoCmp(SDNode *N, SelectionDAG &DAG) {
+  // This is only worth doing if the output type is i8.
+  if (N->getValueType(0) != MVT::i8)
+    return SDValue();
+
+  SDValue N0 = N->getOperand(0);
+  SDValue N1 = N->getOperand(1);
+
+  // We should be performing an xor against a truncated shift.
+  if (N0.getOpcode() != ISD::TRUNCATE || !N0.hasOneUse())
+    return SDValue();
+
+  // Make sure we are performing an xor against one.
+  if (!isa<ConstantSDNode>(N1) || !cast<ConstantSDNode>(N1)->isOne())
+    return SDValue();
+
+  // SetCC on x86 zero extends so only act on this if it's a logical shift.
+  SDValue Shift = N0.getOperand(0);
+  if (Shift.getOpcode() != ISD::SRL || !Shift.hasOneUse())
+    return SDValue();
+
+  // Make sure we are truncating from one of i16, i32 or i64.
+  EVT ShiftTy = Shift.getValueType();
+  if (ShiftTy != MVT::i16 && ShiftTy != MVT::i32 && ShiftTy != MVT::i64)
+    return SDValue();
+
+  // Make sure the shift amount extracts the sign bit.
+  if (!isa<ConstantSDNode>(Shift.getOperand(1)) ||
+      Shift.getConstantOperandVal(1) != ShiftTy.getSizeInBits() - 1)
+    return SDValue();
+
+  // Create a greater-than comparison against -1.
+  // N.B. Using SETGE against 0 works but we want a canonical looking
+  // comparison, using SETGT matches up with what TranslateX86CC.
+  SDLoc DL(N);
+  SDValue ShiftOp = Shift.getOperand(0);
+  EVT ShiftOpTy = ShiftOp.getValueType();
+  SDValue Cond = DAG.getSetCC(DL, MVT::i8, ShiftOp,
+                              DAG.getConstant(-1, DL, ShiftOpTy), ISD::SETGT);
+  return Cond;
+}
+
 static SDValue PerformXorCombine(SDNode *N, SelectionDAG &DAG,
                                  TargetLowering::DAGCombinerInfo &DCI,
                                  const X86Subtarget *Subtarget) {
   if (DCI.isBeforeLegalizeOps())
     return SDValue();
 
+  if (SDValue RV = foldXorTruncShiftIntoCmp(N, DAG))
+    return RV;
+
   if (Subtarget->hasCMov())
     if (SDValue RV = performIntegerAbsCombine(N, DAG))
       return RV;
diff --git a/llvm/test/CodeGen/X86/cmp.ll b/llvm/test/CodeGen/X86/cmp.ll
index 584179aacbc..eb9a2901142 100644
--- a/llvm/test/CodeGen/X86/cmp.ll
+++ b/llvm/test/CodeGen/X86/cmp.ll
@@ -211,3 +211,47 @@ define zeroext i1 @test15(i32 %bf.load, i32 %n) {
 ; CHECK:  shrl	$16, %edi
 ; CHECK:  cmpl	%esi, %edi
 }
+
+define i8 @test16(i16 signext %L) {
+  %lshr  = lshr i16 %L, 15
+  %trunc = trunc i16 %lshr to i8
+  %not   = xor i8 %trunc, 1
+  ret i8 %not
+
+; CHECK-LABEL: test16:
+; CHECK:  testw   %di, %di
+; CHECK:  setns   %al
+}
+
+define i8 @test17(i32 %L) {
+  %lshr  = lshr i32 %L, 31
+  %trunc = trunc i32 %lshr to i8
+  %not   = xor i8 %trunc, 1
+  ret i8 %not
+
+; CHECK-LABEL: test17:
+; CHECK:  testl   %edi, %edi
+; CHECK:  setns   %al
+}
+
+define i8 @test18(i64 %L) {
+  %lshr  = lshr i64 %L, 63
+  %trunc = trunc i64 %lshr to i8
+  %not   = xor i8 %trunc, 1
+  ret i8 %not
+
+; CHECK-LABEL: test18:
+; CHECK:  testq   %rdi, %rdi
+; CHECK:  setns   %al
+}
+
+define zeroext i1 @test19(i32 %L) {
+  %lshr  = lshr i32 %L, 31
+  %trunc = trunc i32 %lshr to i1
+  %not   = xor i1 %trunc, 1
+  ret i1 %not
+
+; CHECK-LABEL: test19:
+; CHECK:  testl   %edi, %edi
+; CHECK:  setns   %al
+}
author	David Majnemer <david.majnemer@gmail.com>	2015-08-19 20:51:40 +0000
committer	David Majnemer <david.majnemer@gmail.com>	2015-08-19 20:51:40 +0000
commit	f25fe647166375440a2945ca3393c1c54c3efe5c (patch)
tree	d32a450a896aa7da924c30509019c5095b3fda47
parent	7747ce226051791be0a03ec106d9502e3564c1af (diff)
download	bcm5719-llvm-f25fe647166375440a2945ca3393c1c54c3efe5c.tar.gz bcm5719-llvm-f25fe647166375440a2945ca3393c1c54c3efe5c.zip