From 1a32ede6fd4c87b95a4bb7ff11b7e6dfba6b7af2 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 24 Dec 2009 00:37:38 +0000 Subject: move an optimization for memcmp out of simplifylibcalls and into SDISel. This optimization was causing simplifylibcalls to introduce type-unsafe nastiness. This is the first step, I'll be expanding the memcmp optimizations shortly, covering things that we really really wouldn't want simplifylibcalls to do. llvm-svn: 92098 --- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 103 +++++++++++++++++++++ .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 4 + llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp | 13 --- 3 files changed, 107 insertions(+), 13 deletions(-) (limited to 'llvm/lib') diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index e3c219cd0b2..e194003da93 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Constants.h" #include "llvm/CallingConv.h" #include "llvm/DerivedTypes.h" @@ -5075,6 +5076,105 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee, } } +/// IsOnlyUsedInZeroEqualityComparison - Return true if it only matters that the +/// value is equal or not-equal to zero. +static bool IsOnlyUsedInZeroEqualityComparison(Value *V) { + for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); + UI != E; ++UI) { + if (ICmpInst *IC = dyn_cast(*UI)) + if (IC->isEquality()) + if (Constant *C = dyn_cast(IC->getOperand(1))) + if (C->isNullValue()) + continue; + // Unknown instruction. + return false; + } + return true; +} + +static SDValue getMemCmpLoad(Value *PtrVal, unsigned Size, + SelectionDAGBuilder &Builder) { + MVT LoadVT; + const Type *LoadTy; + if (Size == 2) { + LoadVT = MVT::i16; + LoadTy = Type::getInt16Ty(PtrVal->getContext()); + } else { + LoadVT = MVT::i32; + LoadTy = Type::getInt32Ty(PtrVal->getContext()); + } + + // Check to see if this load can be trivially constant folded, e.g. if the + // input is from a string literal. + if (Constant *LoadInput = dyn_cast(PtrVal)) { + // Cast pointer to the type we really want to load. + LoadInput = ConstantExpr::getBitCast(LoadInput, + PointerType::getUnqual(LoadTy)); + + if (Constant *LoadCst = ConstantFoldLoadFromConstPtr(LoadInput, Builder.TD)) + return Builder.getValue(LoadCst); + } + + // Otherwise, we have to emit the load. If the pointer is to unfoldable but + // still constant memory, the input chain can be the entry node. + SDValue Root; + bool ConstantMemory = false; + + // Do not serialize (non-volatile) loads of constant memory with anything. + if (Builder.AA->pointsToConstantMemory(PtrVal)) { + Root = Builder.DAG.getEntryNode(); + ConstantMemory = true; + } else { + // Do not serialize non-volatile loads against each other. + Root = Builder.DAG.getRoot(); + } + + SDValue Ptr = Builder.getValue(PtrVal); + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + Ptr, PtrVal /*SrcValue*/, 0/*SVOffset*/, + false /*volatile*/, 1 /* align=1 */); + + if (!ConstantMemory) + Builder.PendingLoads.push_back(LoadVal.getValue(1)); + return LoadVal; +} + + +/// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) { + // Verify that the prototype makes sense. int memcmp(void*,void*,size_t) + if (I.getNumOperands() != 4) + return false; + + Value *LHS = I.getOperand(1), *RHS = I.getOperand(2); + if (!isa(LHS->getType()) || !isa(RHS->getType()) || + !isa(I.getOperand(3)->getType()) || + !isa(I.getType())) + return false; + + ConstantInt *Size = dyn_cast(I.getOperand(3)); + + // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 + // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 + if (Size && (Size->getValue() == 2 || Size->getValue() == 4) && + IsOnlyUsedInZeroEqualityComparison(&I)) { + SDValue LHSVal = getMemCmpLoad(LHS, Size->getZExtValue(), *this); + SDValue RHSVal = getMemCmpLoad(RHS, Size->getZExtValue(), *this); + + SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + ISD::SETNE); + EVT CallVT = TLI.getValueType(I.getType(), true); + setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + return true; + } + + + return false; +} + + void SelectionDAGBuilder::visitCall(CallInst &I) { const char *RenameFn = 0; if (Function *F = I.getCalledFunction()) { @@ -5148,6 +5248,9 @@ void SelectionDAGBuilder::visitCall(CallInst &I) { Tmp.getValueType(), Tmp)); return; } + } else if (Name == "memcmp") { + if (visitMemCmpCall(I)) + return; } } } else if (isa(I.getOperand(0))) { diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index c9c5f4f87c5..88a2017b474 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -91,11 +91,13 @@ class SelectionDAGBuilder { DenseMap NodeMap; +public: /// PendingLoads - Loads are not emitted to the program immediately. We bunch /// them up and then emit token factor nodes when possible. This allows us to /// get simple disambiguation between loads without worrying about alias /// analysis. SmallVector PendingLoads; +private: /// PendingExports - CopyToReg nodes that copy values to virtual registers /// for export to other blocks need to be emitted before any terminator @@ -461,6 +463,8 @@ private: void visitStore(StoreInst &I); void visitPHI(PHINode &I) { } // PHI nodes are handled specially. void visitCall(CallInst &I); + bool visitMemCmpCall(CallInst &I); + void visitInlineAsm(CallSite CS); const char *visitIntrinsicCall(CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(CallInst &I, unsigned Intrinsic); diff --git a/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp index 30c3f3f398c..3c28ad27e57 100644 --- a/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp +++ b/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp @@ -1011,19 +1011,6 @@ struct MemCmpOpt : public LibCallOptimization { return B.CreateSExt(B.CreateSub(LHSV, RHSV, "chardiff"), CI->getType()); } - // memcmp(S1,S2,2) != 0 -> (*(short*)LHS ^ *(short*)RHS) != 0 - // memcmp(S1,S2,4) != 0 -> (*(int*)LHS ^ *(int*)RHS) != 0 - if ((Len == 2 || Len == 4) && IsOnlyUsedInZeroEqualityComparison(CI)) { - const Type *PTy = PointerType::getUnqual(Len == 2 ? - Type::getInt16Ty(*Context) : Type::getInt32Ty(*Context)); - LHS = B.CreateBitCast(LHS, PTy, "tmp"); - RHS = B.CreateBitCast(RHS, PTy, "tmp"); - LoadInst *LHSV = B.CreateLoad(LHS, "lhsv"); - LoadInst *RHSV = B.CreateLoad(RHS, "rhsv"); - LHSV->setAlignment(1); RHSV->setAlignment(1); // Unaligned loads. - return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType()); - } - // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant) std::string LHSStr, RHSStr; if (GetConstantStringInfo(LHS, LHSStr) && -- cgit v1.2.3