diff options
| author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-03-14 19:17:02 +0000 |
|---|---|---|
| committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-03-14 19:17:02 +0000 |
| commit | 8070b382ecf85996ca61aa41e25728038f76daa5 (patch) | |
| tree | 698e3f65a9e8a7cdb12901ddae9234f557b5ca89 /llvm/lib/Target | |
| parent | 44ebe00158326fbf68bffe86601637bbb6681041 (diff) | |
| download | bcm5719-llvm-8070b382ecf85996ca61aa41e25728038f76daa5.tar.gz bcm5719-llvm-8070b382ecf85996ca61aa41e25728038f76daa5.zip | |
ARM cost model: Increase cost of some vector selects we do terrible on
By terrible I mean we store/load from the stack.
This matters on PAQp8 in _Z5trainPsS_ii (which is inlined into Mixer::update)
where we decide to vectorize a loop with a VF of 8 resulting in a 25%
degradation on a cortex-a8.
LV: Found an estimated cost of 2 for VF 8 For instruction: icmp slt i32
LV: Found an estimated cost of 2 for VF 8 For instruction: select i1, i32, i32
The bug that tracks the CodeGen part is PR14868.
radar://13403975
llvm-svn: 177105
Diffstat (limited to 'llvm/lib/Target')
| -rw-r--r-- | llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 24 |
1 files changed, 24 insertions, 0 deletions
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index eef282e7092..062d4d3f575 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -334,6 +334,30 @@ unsigned ARMTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, int ISD = TLI->InstructionOpcodeToISD(Opcode); // On NEON a a vector select gets lowered to vbsl. if (ST->hasNEON() && ValTy->isVectorTy() && ISD == ISD::SELECT) { + // Lowering of some vector selects is currently far from perfect. + static const TypeConversionCostTblEntry<MVT> NEONVectorSelectTbl[] = { + { ISD::SELECT, MVT::v4i1, MVT::v4i8, 2*4 + 2*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i8, 2*8 + 1 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i8, 2*16 + 1 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i16, 2*4 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i16, 2*8 + 1 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i16, 2*16 + 1 + 3*1 + 4*1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i32, 4*8 + 1*3 + 1*4 + 1*2 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i32, 4*16 + 1*6 + 1*8 + 1*4 }, + { ISD::SELECT, MVT::v4i1, MVT::v4i64, 4*4 + 1*2 + 1 }, + { ISD::SELECT, MVT::v8i1, MVT::v8i64, 50 }, + { ISD::SELECT, MVT::v16i1, MVT::v16i64, 100 } + }; + + EVT SelCondTy = TLI->getValueType(CondTy); + EVT SelValTy = TLI->getValueType(ValTy); + int Idx = ConvertCostTableLookup<MVT>(NEONVectorSelectTbl, + array_lengthof(NEONVectorSelectTbl), + ISD, SelCondTy.getSimpleVT(), + SelValTy.getSimpleVT()); + if (Idx != -1) + return NEONVectorSelectTbl[Idx].Cost; + std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); return LT.first; } |

