diff options
| author | Evan Cheng <evan.cheng@apple.com> | 2010-01-11 22:03:29 +0000 | 
|---|---|---|
| committer | Evan Cheng <evan.cheng@apple.com> | 2010-01-11 22:03:29 +0000 | 
| commit | 99789a7a76de9675f0abe5c87c35887b40274ab6 (patch) | |
| tree | 9b96435cdca2cd42ac520ba65cccc46a6d0644aa | |
| parent | 7419ce72aee24860364c2e60944ebe8b6fecccb0 (diff) | |
| download | bcm5719-llvm-99789a7a76de9675f0abe5c87c35887b40274ab6.tar.gz bcm5719-llvm-99789a7a76de9675f0abe5c87c35887b40274ab6.zip  | |
Extend r93152 to work on OR r, r. If the source set bits are known not to overlap, then select as an ADD instead.
llvm-svn: 93191
| -rw-r--r-- | llvm/lib/Target/X86/X86Instr64bit.td | 7 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 36 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/3addr-or.ll | 16 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/fast-isel.ll | 2 | 
4 files changed, 51 insertions, 10 deletions
diff --git a/llvm/lib/Target/X86/X86Instr64bit.td b/llvm/lib/Target/X86/X86Instr64bit.td index b2aead6a6d4..7077cf9bb0a 100644 --- a/llvm/lib/Target/X86/X86Instr64bit.td +++ b/llvm/lib/Target/X86/X86Instr64bit.td @@ -1093,7 +1093,7 @@ let isCommutable = 1 in  def OR64rr   : RI<0x09, MRMDestReg, (outs GR64:$dst),                     (ins GR64:$src1, GR64:$src2),                    "or{q}\t{$src2, $dst|$dst, $src2}", -                  [(set GR64:$dst, (or GR64:$src1, GR64:$src2)), +                  [(set GR64:$dst, (or_not_add GR64:$src1, GR64:$src2)),                     (implicit EFLAGS)]>;  def OR64rr_REV : RI<0x0B, MRMSrcReg, (outs GR64:$dst),                       (ins GR64:$src1, GR64:$src2), @@ -2125,13 +2125,16 @@ def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),                         GR64:$src2, (i8 imm:$amt2)), addr:$dst),            (SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>; -// (or x, c) -> (add x, c) if masked bits are known zero. +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.  def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt8:$src2),                      (implicit EFLAGS)),            (ADD64ri8 GR64:$src1, i64immSExt8:$src2)>;  def : Pat<(parallel (or_is_add GR64:$src1, i64immSExt32:$src2),                      (implicit EFLAGS)),            (ADD64ri32 GR64:$src1, i64immSExt32:$src2)>; +def : Pat<(parallel (or_is_add GR64:$src1, GR64:$src2), +                    (implicit EFLAGS)), +          (ADD64rr GR64:$src1, GR64:$src2)>;  // X86 specific add which produces a flag.  def : Pat<(addc GR64:$src1, GR64:$src2), diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 28c5154a56b..9b69018fa8b 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -497,12 +497,28 @@ def trunc_su : PatFrag<(ops node:$src), (trunc node:$src), [{  def or_is_add : PatFrag<(ops node:$lhs, node:$rhs), (or node:$lhs, node:$rhs),[{    if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))      return CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); -  return false; +  else { +    unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); +    APInt Mask = APInt::getAllOnesValue(BitWidth); +    APInt KnownZero0, KnownOne0; +    CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); +    APInt KnownZero1, KnownOne1; +    CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); +    return (~KnownZero0 & ~KnownZero1) == 0; +  }  }]>;  def or_not_add : PatFrag<(ops node:$lhs, node:$rhs),(or node:$lhs, node:$rhs),[{ -  ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1)); -  if (!CN) return true; -  return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); +  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N->getOperand(1))) +    return !CurDAG->MaskedValueIsZero(N->getOperand(0), CN->getAPIntValue()); +  else { +    unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); +    APInt Mask = APInt::getAllOnesValue(BitWidth); +    APInt KnownZero0, KnownOne0; +    CurDAG->ComputeMaskedBits(N->getOperand(0), Mask, KnownZero0, KnownOne0, 0); +    APInt KnownZero1, KnownOne1; +    CurDAG->ComputeMaskedBits(N->getOperand(1), Mask, KnownZero1, KnownOne1, 0); +    return (~KnownZero0 & ~KnownZero1) != 0; +  }  }]>;  // 'shld' and 'shrd' instruction patterns. Note that even though these have @@ -1853,12 +1869,12 @@ def OR8rr    : I<0x08, MRMDestReg, (outs GR8 :$dst),  def OR16rr   : I<0x09, MRMDestReg, (outs GR16:$dst),                    (ins GR16:$src1, GR16:$src2),                   "or{w}\t{$src2, $dst|$dst, $src2}", -                 [(set GR16:$dst, (or GR16:$src1, GR16:$src2)), +                 [(set GR16:$dst, (or_not_add GR16:$src1, GR16:$src2)),                    (implicit EFLAGS)]>, OpSize;  def OR32rr   : I<0x09, MRMDestReg, (outs GR32:$dst),                    (ins GR32:$src1, GR32:$src2),                   "or{l}\t{$src2, $dst|$dst, $src2}", -                 [(set GR32:$dst, (or GR32:$src1, GR32:$src2)), +                 [(set GR32:$dst, (or_not_add GR32:$src1, GR32:$src2)),                    (implicit EFLAGS)]>;  } @@ -4659,7 +4675,7 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),  def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),            (SETB_C32r)>; -// (or x, c) -> (add x, c) if masked bits are known zero. +// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.  def : Pat<(parallel (or_is_add GR16:$src1, imm:$src2),                      (implicit EFLAGS)),            (ADD16ri GR16:$src1, imm:$src2)>; @@ -4672,6 +4688,12 @@ def : Pat<(parallel (or_is_add GR16:$src1, i16immSExt8:$src2),  def : Pat<(parallel (or_is_add GR32:$src1, i32immSExt8:$src2),                      (implicit EFLAGS)),            (ADD32ri8 GR32:$src1, i32immSExt8:$src2)>; +def : Pat<(parallel (or_is_add GR16:$src1, GR16:$src2), +                    (implicit EFLAGS)), +          (ADD16rr GR16:$src1, GR16:$src2)>; +def : Pat<(parallel (or_is_add GR32:$src1, GR32:$src2), +                    (implicit EFLAGS)), +          (ADD32rr GR32:$src1, GR32:$src2)>;  //===----------------------------------------------------------------------===//  // EFLAGS-defining Patterns diff --git a/llvm/test/CodeGen/X86/3addr-or.ll b/llvm/test/CodeGen/X86/3addr-or.ll index 395ba46aab3..30a1f36850d 100644 --- a/llvm/test/CodeGen/X86/3addr-or.ll +++ b/llvm/test/CodeGen/X86/3addr-or.ll @@ -9,3 +9,19 @@ entry:    %1 = or i32 %0, 3                               ; <i32> [#uses=1]    ret i32 %1  } + +define i64 @test2(i8 %A, i8 %B) nounwind { +; CHECK: test2: +; CHECK: shrq $4 +; CHECK-NOT: movq +; CHECK-NOT: orq +; CHECK: leaq +; CHECK: ret +  %C = zext i8 %A to i64                          ; <i64> [#uses=1] +  %D = shl i64 %C, 4                              ; <i64> [#uses=1] +  %E = and i64 %D, 48                             ; <i64> [#uses=1] +  %F = zext i8 %B to i64                          ; <i64> [#uses=1] +  %G = lshr i64 %F, 4                             ; <i64> [#uses=1] +  %H = or i64 %G, %E                              ; <i64> [#uses=1] +  ret i64 %H +} diff --git a/llvm/test/CodeGen/X86/fast-isel.ll b/llvm/test/CodeGen/X86/fast-isel.ll index 3dcd736a140..84b3fd7caf3 100644 --- a/llvm/test/CodeGen/X86/fast-isel.ll +++ b/llvm/test/CodeGen/X86/fast-isel.ll @@ -14,7 +14,7 @@ fast:    %t1 = mul i32 %t0, %s    %t2 = sub i32 %t1, %s    %t3 = and i32 %t2, %s -  %t4 = or i32 %t3, %s +  %t4 = xor i32 %t3, 3    %t5 = xor i32 %t4, %s    %t6 = add i32 %t5, 2    %t7 = getelementptr i32* %y, i32 1  | 

