diff options
| author | Nadav Rotem <nrotem@apple.com> | 2013-06-19 05:49:52 +0000 | 
|---|---|---|
| committer | Nadav Rotem <nrotem@apple.com> | 2013-06-19 05:49:52 +0000 | 
| commit | e98da7f5483317fbf6bd5ecca6b2ba5e14fedd17 (patch) | |
| tree | 446211854ccc77e0a3428bfd06d3291cfb1f3ec7 | |
| parent | 4f3224f3edb5f41b2d61722e6fb1953e33e335a9 (diff) | |
| download | bcm5719-llvm-e98da7f5483317fbf6bd5ecca6b2ba5e14fedd17.tar.gz bcm5719-llvm-e98da7f5483317fbf6bd5ecca6b2ba5e14fedd17.zip  | |
SLPVectorizer: vectorize compares and selects.
llvm-svn: 184282
| -rw-r--r-- | llvm/lib/Transforms/Vectorize/VecUtils.cpp | 78 | ||||
| -rw-r--r-- | llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll | 32 | 
2 files changed, 107 insertions, 3 deletions
diff --git a/llvm/lib/Transforms/Vectorize/VecUtils.cpp b/llvm/lib/Transforms/Vectorize/VecUtils.cpp index 658162aa9f6..2f6b7df21b6 100644 --- a/llvm/lib/Transforms/Vectorize/VecUtils.cpp +++ b/llvm/lib/Transforms/Vectorize/VecUtils.cpp @@ -384,6 +384,9 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {      case Instruction::Trunc:      case Instruction::FPTrunc:      case Instruction::BitCast: +    case Instruction::Select: +    case Instruction::ICmp: +    case Instruction::FCmp:      case Instruction::Add:      case Instruction::FAdd:      case Instruction::Sub: @@ -541,6 +544,18 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {      Cost += (VecCost - ScalarCost);      return Cost;    } +  case Instruction::FCmp: +  case Instruction::ICmp: { +    // Check that all of the compares have the same predicate. +    CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); +    for (unsigned i = 1, e = VL.size(); i < e; ++i) { +      CmpInst *Cmp = cast<CmpInst>(VL[i]); +      if (Cmp->getPredicate() != P0) +        return getScalarizationCost(VecTy); +    } +    // Fall through. +  } +  case Instruction::Select:    case Instruction::Add:    case Instruction::FAdd:    case Instruction::Sub: @@ -572,10 +587,19 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {      }      // Calculate the cost of this instruction. -    int ScalarCost = VecTy->getNumElements() * +    int ScalarCost = 0; +    int VecCost = 0; +    if (Opcode == Instruction::FCmp || Opcode == Instruction::ICmp || +        Opcode == Instruction::Select) { +      VectorType *MaskTy = VectorType::get(Builder.getInt1Ty(), VL.size()); +      ScalarCost = VecTy->getNumElements() * +        TTI->getCmpSelInstrCost(Opcode, ScalarTy, Builder.getInt1Ty()); +      VecCost = TTI->getCmpSelInstrCost(Opcode, VecTy, MaskTy); +    } else { +      ScalarCost = VecTy->getNumElements() *        TTI->getArithmeticInstrCost(Opcode, ScalarTy); - -    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); +      VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy); +    }      Cost += (VecCost - ScalarCost);      return Cost;    } @@ -772,6 +796,54 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {      return V;    } +  case Instruction::FCmp: +  case Instruction::ICmp: { +    // Check that all of the compares have the same predicate. +    CmpInst::Predicate P0 = dyn_cast<CmpInst>(VL0)->getPredicate(); +    for (unsigned i = 1, e = VF; i < e; ++i) { +      CmpInst *Cmp = cast<CmpInst>(VL[i]); +      if (Cmp->getPredicate() != P0) +        return Scalarize(VL, VecTy); +    } + +    ValueList LHSV, RHSV; +    for (int i = 0; i < VF; ++i) { +      LHSV.push_back(cast<Instruction>(VL[i])->getOperand(0)); +      RHSV.push_back(cast<Instruction>(VL[i])->getOperand(1)); +    } + +    Value *L = vectorizeTree_rec(LHSV, VF); +    Value *R = vectorizeTree_rec(RHSV, VF); +    Value *V; +    if (VL0->getOpcode() == Instruction::FCmp) +      V = Builder.CreateFCmp(P0, L, R); +    else +      V = Builder.CreateICmp(P0, L, R); + +    for (int i = 0; i < VF; ++i) +      VectorizedValues[VL[i]] = V; + +    return V; + +  } +  case Instruction::Select: { +    ValueList TrueVec, FalseVec, CondVec; +    for (int i = 0; i < VF; ++i) { +      CondVec.push_back(cast<Instruction>(VL[i])->getOperand(0)); +      TrueVec.push_back(cast<Instruction>(VL[i])->getOperand(1)); +      FalseVec.push_back(cast<Instruction>(VL[i])->getOperand(2)); +    } + +    Value *True = vectorizeTree_rec(TrueVec, VF); +    Value *False = vectorizeTree_rec(FalseVec, VF); +    Value *Cond = vectorizeTree_rec(CondVec, VF); +    Value *V = Builder.CreateSelect(Cond, True, False); + +    for (int i = 0; i < VF; ++i) +      VectorizedValues[VL[i]] = V; + +    return V; +  }    case Instruction::Add:    case Instruction::FAdd:    case Instruction::Sub: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll new file mode 100644 index 00000000000..3bf4f19517a --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/cmp_sel.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +; int foo(double * restrict A, double * restrict B, double G) { +;   A[0] = (B[10] ? G : 1); +;   A[1] = (B[11] ? G : 1); +; } + +;CHECK: @foo +;CHECK: load <2 x double> +;CHECK: fcmp une <2 x double> +;CHECK: select <2 x i1> +;CHECK: store <2 x double> +;CHECK: ret i32 undef +define i32 @foo(double* noalias nocapture %A, double* noalias nocapture %B, double %G) { +entry: +  %arrayidx = getelementptr inbounds double* %B, i64 10 +  %0 = load double* %arrayidx, align 8 +  %tobool = fcmp une double %0, 0.000000e+00 +  %cond = select i1 %tobool, double %G, double 1.000000e+00 +  store double %cond, double* %A, align 8 +  %arrayidx2 = getelementptr inbounds double* %B, i64 11 +  %1 = load double* %arrayidx2, align 8 +  %tobool3 = fcmp une double %1, 0.000000e+00 +  %cond7 = select i1 %tobool3, double %G, double 1.000000e+00 +  %arrayidx8 = getelementptr inbounds double* %A, i64 1 +  store double %cond7, double* %arrayidx8, align 8 +  ret i32 undef +} +  | 

