Add a neat little two's complement hack for x86.

On x86 we can't encode an immediate LHS of a sub directly. If the RHS comes from a XOR with a constant we can fold the negation into the xor and add one to the immediate of the sub. Then we can turn the sub into an add, which can be commuted and encoded efficiently. This code is generated for __builtin_clz and friends. llvm-svn: 136167
author: Benjamin Kramer <benny.kra@googlemail.com> 2011-07-26 22:42:13 +0000
committer: Benjamin Kramer <benny.kra@googlemail.com> 2011-07-26 22:42:13 +0000
commit: 124ac2b997a953ca620d2e339c2b6f6f0fc88abc (patch)
tree: 2e9ffd42d961fe1f1fbf64bd7829af9cab984a14
parent: f883941903fe8e820d67e466e5b0c3e0b80f8a9b (diff)
download: bcm5719-llvm-124ac2b997a953ca620d2e339c2b6f6f0fc88abc.tar.gz
bcm5719-llvm-124ac2b997a953ca620d2e339c2b6f6f0fc88abc.zip
3 files changed, 49 insertions, 28 deletions
diff --git a/llvm/lib/Target/X86/README.txt b/llvm/lib/Target/X86/README.txt
index d6ceaa7ed4b..8d9eabad0be 100644
--- a/llvm/lib/Target/X86/README.txt
+++ b/llvm/lib/Target/X86/README.txt
@@ -2076,12 +2076,11 @@ generates (x86_64):
 	jb	LBB0_2
 ## BB#1:
 	decl	%edi
-	movl	$63, %eax
-	bsrl	%edi, %ecx
-	cmovel	%eax, %ecx
-	xorl	$31, %ecx
-	movl	$32, %eax
-	subl	%ecx, %eax
+	movl	$63, %ecx
+	bsrl	%edi, %eax
+	cmovel	%ecx, %eax
+	xorl	$-32, %eax
+	addl	$33, %eax
 LBB0_2:
 	ret
 
@@ -2091,26 +2090,10 @@ The cmov and the early test are redundant:
 	jb	LBB0_2
 ## BB#1:
 	decl	%edi
-	bsrl	%edi, %ecx
-	xorl	$31, %ecx
-	movl	$32, %eax
-	subl	%ecx, %eax
+	bsrl	%edi, %eax
+	xorl	$-32, %eax
+	addl	$33, %eax
 LBB0_2:
 	ret
 
-If we want to get really fancy we could use some two's complement magic:
-	xorl	%eax, %eax
-	cmpl	$2, %edi
-	jb	LBB0_2
-## BB#1:
-	decl	%edi
-	bsrl	%edi, %ecx
-	xorl	$-32, %ecx
-	leal    33(%ecx), %eax
-LBB0_2:
-	ret
-
-This is only useful on targets that can't encode the first operand of a sub
-directly.  The rule is C1 - (X^C2) -> (C1+1) + (X^~C2).
-
 //===---------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 26c48a4d56a..f51a455b703 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -12550,7 +12550,7 @@ static SDValue PerformADCCombine(SDNode *N, SelectionDAG &DAG,
 //      (add Y, (setne X, 0)) -> sbb -1, Y
 //      (sub (sete  X, 0), Y) -> sbb  0, Y
 //      (sub (setne X, 0), Y) -> adc -1, Y
-static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
+static SDValue OptimizeConditionalInDecrement(SDNode *N, SelectionDAG &DAG) {
   DebugLoc DL = N->getDebugLoc();
 
   // Look through ZExts.
@@ -12586,6 +12586,33 @@ static SDValue OptimizeConditonalInDecrement(SDNode *N, SelectionDAG &DAG) {
                      DAG.getConstant(0, OtherVal.getValueType()), NewCmp);
 }
 
+static SDValue PerformSubCombine(SDNode *N, SelectionDAG &DAG) {
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+
+  // X86 can't encode an immediate LHS of a sub. See if we can push the
+  // negation into a preceding instruction.
+  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op0)) {
+    uint64_t Op0C = C->getSExtValue();
+
+    // If the RHS of the sub is a XOR with one use and a constant, invert the
+    // immediate. Then add one to the LHS of the sub so we can turn
+    // X-Y -> X+~Y+1, saving one register.
+    if (Op1->hasOneUse() && Op1.getOpcode() == ISD::XOR &&
+        isa<ConstantSDNode>(Op1.getOperand(1))) {
+      uint64_t XorC = cast<ConstantSDNode>(Op1.getOperand(1))->getSExtValue();
+      EVT VT = Op0.getValueType();
+      SDValue NewXor = DAG.getNode(ISD::XOR, Op1.getDebugLoc(), VT,
+                                   Op1.getOperand(0),
+                                   DAG.getConstant(~XorC, VT));
+      return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, NewXor,
+                         DAG.getConstant(Op0C+1, VT));
+    }
+  }
+
+  return OptimizeConditionalInDecrement(N, DAG);
+}
+
 SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
                                              DAGCombinerInfo &DCI) const {
   SelectionDAG &DAG = DCI.DAG;
@@ -12595,8 +12622,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
     return PerformEXTRACT_VECTOR_ELTCombine(N, DAG, *this);
   case ISD::SELECT:         return PerformSELECTCombine(N, DAG, Subtarget);
   case X86ISD::CMOV:        return PerformCMOVCombine(N, DAG, DCI);
-  case ISD::ADD:
-  case ISD::SUB:            return OptimizeConditonalInDecrement(N, DAG);
+  case ISD::ADD:            return OptimizeConditionalInDecrement(N, DAG);
+  case ISD::SUB:            return PerformSubCombine(N, DAG);
   case X86ISD::ADC:         return PerformADCCombine(N, DAG, DCI);
   case ISD::MUL:            return PerformMulCombine(N, DAG, DCI);
   case ISD::SHL:
diff --git a/llvm/test/CodeGen/X86/sub.ll b/llvm/test/CodeGen/X86/sub.ll
new file mode 100644
index 00000000000..2a4d2d6e3fc
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sub.ll
@@ -0,0 +1,11 @@
+; RUN: llc -march=x86 < sub.ll | FileCheck %s
+
+define i32 @test1(i32 %x) {
+  %xor = xor i32 %x, 31
+  %sub = sub i32 32, %xor
+  ret i32 %sub
+; CHECK: test1:
+; CHECK:      xorl $-32
+; CHECK-NEXT: addl $33
+; CHECK-NEXT: ret
+}
author	Benjamin Kramer <benny.kra@googlemail.com>	2011-07-26 22:42:13 +0000
committer	Benjamin Kramer <benny.kra@googlemail.com>	2011-07-26 22:42:13 +0000
commit	124ac2b997a953ca620d2e339c2b6f6f0fc88abc (patch)
tree	2e9ffd42d961fe1f1fbf64bd7829af9cab984a14
parent	f883941903fe8e820d67e466e5b0c3e0b80f8a9b (diff)
download	bcm5719-llvm-124ac2b997a953ca620d2e339c2b6f6f0fc88abc.tar.gz bcm5719-llvm-124ac2b997a953ca620d2e339c2b6f6f0fc88abc.zip