diff options
| author | Joel Jones <joel_k_jones@apple.com> | 2012-03-29 05:45:48 +0000 | 
|---|---|---|
| committer | Joel Jones <joel_k_jones@apple.com> | 2012-03-29 05:45:48 +0000 | 
| commit | 68d59e8a90afdf02b7bad25558286db51def8548 (patch) | |
| tree | cb2a5d47968332923d4802bcbe998580ba025eb5 | |
| parent | 2b743c3f8a0ca7c567fa1026a1873ffd3c346f8d (diff) | |
| download | bcm5719-llvm-68d59e8a90afdf02b7bad25558286db51def8548.tar.gz bcm5719-llvm-68d59e8a90afdf02b7bad25558286db51def8548.zip | |
For X86, change load/dec-or-inc/store into dec-or-inc, respectively.
This is a code change to add support for changing instruction sequences of the form:
  load
  inc/dec of 8/16/32/64 bits
  store
into the appropriate X86 inc/dec through memory instruction:
  inc[qlwb] / dec[qlwb]
The checks that were in X86DAGToDAGISel::Select(SDNode *Node)>>ISD::STORE have been extracted to isLoadIncOrDecStore and reworked to use the better
named wrappers for getOperand(unsigned) (e.g. getOffset()) and replaced Chain.getNode() with LoadNode.  The comments have also been expanded.
llvm-svn: 153635
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 128 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/dec-eflags-lower.ll | 67 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll | 179 | 
3 files changed, 273 insertions, 101 deletions
| diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp index d41a240a085..03b7a1285df 100644 --- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -1847,6 +1847,86 @@ static bool HasNoSignedComparisonUses(SDNode *N) {    return true;  } +/// isLoadIncOrDecStore - Check whether or not the chain ending in StoreNode +/// is suitable for doing the {load; increment or decrement; store} to modify +/// transformation. +static bool isLoadIncOrDecStore(StoreSDNode *StoreNode, unsigned Opc,  +                                SDValue &StoredVal) { + +  // is the value stored the result of a DEC or INC? +  if (!(Opc == X86ISD::DEC || Opc == X86ISD::INC)) return false; + +  // is the chain predecessor to the store a load? +  SDValue Chain = StoreNode->getChain(); +  if (Chain->getOpcode() != ISD::LOAD) return false; +   +  // is the size of the value one that we can handle? (i.e. 64, 32, 16, or 8) +  LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); +  EVT LdVT = LoadNode->getMemoryVT();     +  if (LdVT != MVT::i64 && LdVT != MVT::i32 && LdVT != MVT::i16 &&  +      LdVT != MVT::i8) +    return false; + +  // quick check of whether the store is simple +  SDValue Undef = StoreNode->getOffset(); +  if (Undef->getOpcode() != ISD::UNDEF) return false; + +  // is the stored value result 0 of the load? +  if (StoredVal.getResNo() != 0) return false; + +  // are there other uses of the loaded value than the inc or dec? +  if (!StoredVal.getNode()->hasNUsesOfValue(1, 0)) return false; + +  // is there exactly one use of the load? +  if (!LoadNode->hasNUsesOfValue(1, 0)) return false; +   +  // are the load and store connected by the chain? +  if (StoredVal->getOperand(0).getNode() != LoadNode) return false; + +  //OPC_CheckPredicate, 1, // Predicate_nontemporalstore +  if (StoreNode->isNonTemporal()) +    return false; + +  // is the address of the store the same as the load? +  SDValue Address = StoreNode->getBasePtr(); +  if (LoadNode->getBasePtr() != Address || +      LoadNode->getOffset() != Undef) +    return false; + +  // is the load non-extending and non-indexed? +  if (!ISD::isNormalLoad(LoadNode)) +    return false; + +  // is the store non-extending and non-indexed? +  if (!ISD::isNormalStore(StoreNode)) +    return false; + +  // check load chain has only one use (from the store) +  if (!Chain.hasOneUse()) +    return false; + +  return true; +} + +/// getFusedLdStOpcode - Get the appropriate X86 opcode for an in memory  +/// increment or decrement. Opc should be X86ISD::DEC or X86ISD:INC. +static unsigned getFusedLdStOpcode(EVT &LdVT, unsigned Opc) { +  if (Opc == X86ISD::DEC) { +    if (LdVT == MVT::i64) return X86::DEC64m; +    if (LdVT == MVT::i32) return X86::DEC32m; +    if (LdVT == MVT::i16) return X86::DEC16m; +    if (LdVT == MVT::i8)  return X86::DEC8m; +    assert(0 && "unrecognized size for LdVT"); +  } +  else { +    if (LdVT == MVT::i64) return X86::INC64m; +    if (LdVT == MVT::i32) return X86::INC32m; +    if (LdVT == MVT::i16) return X86::INC16m; +    if (LdVT == MVT::i8)  return X86::INC8m; +    assert(0 && "unrecognized size for LdVT"); +  } +} +  SDNode *X86DAGToDAGISel::Select(SDNode *Node) {    EVT NVT = Node->getValueType(0);    unsigned Opc, MOpc; @@ -2354,9 +2434,13 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {      break;    }    case ISD::STORE: { +    // Change a chain of {load; incr or dec; store} of the same value into +    // a simple increment or decrement through memory of that value, if the +    // uses of the modified value and its address are suitable.      // The DEC64m tablegen pattern is currently not able to match the case where -    // the EFLAGS on the original DEC are used. -    // we'll need to improve tablegen to allow flags to be transferred from a +    // the EFLAGS on the original DEC are used. (This also applies to  +    // {INC,DEC}X{64,32,16,8}.) +    // We'll need to improve tablegen to allow flags to be transferred from a      // node in the pattern to the result node.  probably with a new keyword      // for example, we have this      // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst", @@ -2366,42 +2450,16 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {      // def DEC64m : RI<0xFF, MRM1m, (outs), (ins i64mem:$dst), "dec{q}\t$dst",      //  [(store (add (loadi64 addr:$dst), -1), addr:$dst),      //   (transferrable EFLAGS)]>; +      StoreSDNode *StoreNode = cast<StoreSDNode>(Node); -    SDValue Chain = StoreNode->getOperand(0);      SDValue StoredVal = StoreNode->getOperand(1); -    SDValue Address = StoreNode->getOperand(2); -    SDValue Undef = StoreNode->getOperand(3); - -    if (StoreNode->getMemOperand()->getSize() != 8 || -        Undef->getOpcode() != ISD::UNDEF || -        Chain->getOpcode() != ISD::LOAD || -        StoredVal->getOpcode() != X86ISD::DEC || -        StoredVal.getResNo() != 0 || -        !StoredVal.getNode()->hasNUsesOfValue(1, 0) || -        !Chain.getNode()->hasNUsesOfValue(1, 0) || -        StoredVal->getOperand(0).getNode() != Chain.getNode()) -      break; +    unsigned Opc = StoredVal->getOpcode(); -    //OPC_CheckPredicate, 1, // Predicate_nontemporalstore -    if (StoreNode->isNonTemporal()) -      break; - -    LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode()); -    if (LoadNode->getOperand(1) != Address || -        LoadNode->getOperand(2) != Undef) -      break; - -    if (!ISD::isNormalLoad(LoadNode)) -      break; - -    if (!ISD::isNormalStore(StoreNode)) -      break; - -    // check load chain has only one use (from the store) -    if (!Chain.hasOneUse()) -      break; +    if (!isLoadIncOrDecStore(StoreNode, Opc, StoredVal)) break;      // Merge the input chains if they are not intra-pattern references. +    SDValue Chain = StoreNode->getOperand(0); +    LoadSDNode *LoadNode = cast<LoadSDNode>(Chain.getNode());      SDValue InputChain = LoadNode->getOperand(0);      SDValue Base, Scale, Index, Disp, Segment; @@ -2413,7 +2471,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {      MemOp[0] = StoreNode->getMemOperand();      MemOp[1] = LoadNode->getMemOperand();      const SDValue Ops[] = { Base, Scale, Index, Disp, Segment, InputChain }; -    MachineSDNode *Result = CurDAG->getMachineNode(X86::DEC64m, +    EVT LdVT = LoadNode->getMemoryVT();     +    unsigned newOpc = getFusedLdStOpcode(LdVT, Opc); +    MachineSDNode *Result = CurDAG->getMachineNode(newOpc,                                                     Node->getDebugLoc(),                                                     MVT::i32, MVT::Other, Ops,                                                     array_lengthof(Ops)); diff --git a/llvm/test/CodeGen/X86/dec-eflags-lower.ll b/llvm/test/CodeGen/X86/dec-eflags-lower.ll deleted file mode 100644 index 190819f4cf8..00000000000 --- a/llvm/test/CodeGen/X86/dec-eflags-lower.ll +++ /dev/null @@ -1,67 +0,0 @@ -; RUN: llc < %s -march=x86-64 | FileCheck %s - -%struct.obj = type { i64 } - -; CHECK: _Z7releaseP3obj -define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp { -entry: -; CHECK: decq	(%{{rdi|rcx}}) -; CHECK-NEXT: je -  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0 -  %0 = load i64* %refcnt, align 8, !tbaa !0 -  %dec = add i64 %0, -1 -  store i64 %dec, i64* %refcnt, align 8, !tbaa !0 -  %tobool = icmp eq i64 %dec, 0 -  br i1 %tobool, label %if.end, label %return - -if.end:                                           ; preds = %entry -  %1 = bitcast %struct.obj* %o to i8* -  tail call void @free(i8* %1) -  br label %return - -return:                                           ; preds = %entry, %if.end -  ret void -} - -@c = common global i64 0, align 8 -@a = common global i32 0, align 4 -@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1 -@b = common global i32 0, align 4 - -; CHECK: test -define i32 @test() nounwind uwtable ssp { -entry: -; CHECK: decq -; CHECK-NOT: decq -%0 = load i64* @c, align 8, !tbaa !0 -%dec.i = add nsw i64 %0, -1 -store i64 %dec.i, i64* @c, align 8, !tbaa !0 -%tobool.i = icmp ne i64 %dec.i, 0 -%lor.ext.i = zext i1 %tobool.i to i32 -store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3 -%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind -ret i32 0 -} - -; CHECK: test2 -define i32 @test2() nounwind uwtable ssp { -entry: -; CHECK-NOT: decq ({{.*}}) -%0 = load i64* @c, align 8, !tbaa !0 -%dec.i = add nsw i64 %0, -1 -store i64 %dec.i, i64* @c, align 8, !tbaa !0 -%tobool.i = icmp ne i64 %0, 0 -%lor.ext.i = zext i1 %tobool.i to i32 -store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3 -%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind -ret i32 0 -} - -declare i32 @printf(i8* nocapture, ...) nounwind - -declare void @free(i8* nocapture) nounwind - -!0 = metadata !{metadata !"long", metadata !1} -!1 = metadata !{metadata !"omnipotent char", metadata !2} -!2 = metadata !{metadata !"Simple C/C++ TBAA", null} -!3 = metadata !{metadata !"int", metadata !1} diff --git a/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll b/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll new file mode 100644 index 00000000000..faca3d7bacd --- /dev/null +++ b/llvm/test/CodeGen/X86/rd-mod-wr-eflags.ll @@ -0,0 +1,179 @@ +; RUN: llc < %s -march=x86-64 | FileCheck %s + +%struct.obj = type { i64 } + +; CHECK: _Z7releaseP3obj +define void @_Z7releaseP3obj(%struct.obj* nocapture %o) nounwind uwtable ssp { +entry: +; CHECK: decq	(%{{rdi|rcx}}) +; CHECK-NEXT: je +  %refcnt = getelementptr inbounds %struct.obj* %o, i64 0, i32 0 +  %0 = load i64* %refcnt, align 8, !tbaa !0 +  %dec = add i64 %0, -1 +  store i64 %dec, i64* %refcnt, align 8, !tbaa !0 +  %tobool = icmp eq i64 %dec, 0 +  br i1 %tobool, label %if.end, label %return + +if.end:                                           ; preds = %entry +  %1 = bitcast %struct.obj* %o to i8* +  tail call void @free(i8* %1) +  br label %return + +return:                                           ; preds = %entry, %if.end +  ret void +} + +@c = common global i64 0, align 8 +@a = common global i32 0, align 4 +@.str = private unnamed_addr constant [5 x i8] c"%ld\0A\00", align 1 +@b = common global i32 0, align 4 + +; CHECK: test +define i32 @test() nounwind uwtable ssp { +entry: +; CHECK: decq +; CHECK-NOT: decq +%0 = load i64* @c, align 8, !tbaa !0 +%dec.i = add nsw i64 %0, -1 +store i64 %dec.i, i64* @c, align 8, !tbaa !0 +%tobool.i = icmp ne i64 %dec.i, 0 +%lor.ext.i = zext i1 %tobool.i to i32 +store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3 +%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind +ret i32 0 +} + +; CHECK: test2 +define i32 @test2() nounwind uwtable ssp { +entry: +; CHECK-NOT: decq ({{.*}}) +%0 = load i64* @c, align 8, !tbaa !0 +%dec.i = add nsw i64 %0, -1 +store i64 %dec.i, i64* @c, align 8, !tbaa !0 +%tobool.i = icmp ne i64 %0, 0 +%lor.ext.i = zext i1 %tobool.i to i32 +store i32 %lor.ext.i, i32* @a, align 4, !tbaa !3 +%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([5 x i8]* @.str, i64 0, i64 0), i64 %dec.i) nounwind +ret i32 0 +} + +declare i32 @printf(i8* nocapture, ...) nounwind + +declare void @free(i8* nocapture) nounwind + +!0 = metadata !{metadata !"long", metadata !1} +!1 = metadata !{metadata !"omnipotent char", metadata !2} +!2 = metadata !{metadata !"Simple C/C++ TBAA", null} +!3 = metadata !{metadata !"int", metadata !1} + +%struct.obj2 = type { i64, i32, i16, i8 } + +declare void @other(%struct.obj2* ) nounwind; + +; CHECK: example_dec +define void @example_dec(%struct.obj2* %o) nounwind uwtable ssp { +; 64 bit dec +entry: +  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0 +; CHECK-NOT: load  +  %0 = load i64* %s64, align 8 +; CHECK: decq ({{.*}}) +  %dec = add i64 %0, -1 +  store i64 %dec, i64* %s64, align 8 +  %tobool = icmp eq i64 %dec, 0 +  br i1 %tobool, label %if.end, label %return + +; 32 bit dec +if.end: +  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1 +; CHECK-NOT: load  +  %1 = load i32* %s32, align 4 +; CHECK: decl {{[0-9][0-9]*}}({{.*}}) +  %dec1 = add i32 %1, -1 +  store i32 %dec1, i32* %s32, align 4 +  %tobool2 = icmp eq i32 %dec1, 0 +  br i1 %tobool2, label %if.end1, label %return + +; 16 bit dec +if.end1: +  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2 +; CHECK-NOT: load  +  %2 = load i16* %s16, align 2 +; CHECK: decw {{[0-9][0-9]*}}({{.*}}) +  %dec2 = add i16 %2, -1 +  store i16 %dec2, i16* %s16, align 2 +  %tobool3 = icmp eq i16 %dec2, 0 +  br i1 %tobool3, label %if.end2, label %return + +; 8 bit dec +if.end2: +  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3 +; CHECK-NOT: load  +  %3 = load i8* %s8 +; CHECK: decb {{[0-9][0-9]*}}({{.*}}) +  %dec3 = add i8 %3, -1 +  store i8 %dec3, i8* %s8 +  %tobool4 = icmp eq i8 %dec3, 0 +  br i1 %tobool4, label %if.end4, label %return + +if.end4: +  tail call void @other(%struct.obj2* %o) nounwind +  br label %return + +return:                                           ; preds = %if.end4, %if.end, %entry                                                                                                                                                                                +  ret void +} + +; CHECK: example_inc +define void @example_inc(%struct.obj2* %o) nounwind uwtable ssp { +; 64 bit inc +entry: +  %s64 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 0 +; CHECK-NOT: load  +  %0 = load i64* %s64, align 8 +; CHECK: incq ({{.*}}) +  %inc = add i64 %0, 1 +  store i64 %inc, i64* %s64, align 8 +  %tobool = icmp eq i64 %inc, 0 +  br i1 %tobool, label %if.end, label %return + +; 32 bit inc +if.end: +  %s32 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 1 +; CHECK-NOT: load  +  %1 = load i32* %s32, align 4 +; CHECK: incl {{[0-9][0-9]*}}({{.*}}) +  %inc1 = add i32 %1, 1 +  store i32 %inc1, i32* %s32, align 4 +  %tobool2 = icmp eq i32 %inc1, 0 +  br i1 %tobool2, label %if.end1, label %return + +; 16 bit inc +if.end1: +  %s16 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 2 +; CHECK-NOT: load  +  %2 = load i16* %s16, align 2 +; CHECK: incw {{[0-9][0-9]*}}({{.*}}) +  %inc2 = add i16 %2, 1 +  store i16 %inc2, i16* %s16, align 2 +  %tobool3 = icmp eq i16 %inc2, 0 +  br i1 %tobool3, label %if.end2, label %return + +; 8 bit inc +if.end2: +  %s8 = getelementptr inbounds %struct.obj2* %o, i64 0, i32 3 +; CHECK-NOT: load  +  %3 = load i8* %s8 +; CHECK: incb {{[0-9][0-9]*}}({{.*}}) +  %inc3 = add i8 %3, 1 +  store i8 %inc3, i8* %s8 +  %tobool4 = icmp eq i8 %inc3, 0 +  br i1 %tobool4, label %if.end4, label %return + +if.end4: +  tail call void @other(%struct.obj2* %o) nounwind +  br label %return + +return: +  ret void +} | 

