diff options
| author | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-06-25 19:14:09 +0000 |
|---|---|---|
| committer | Arnold Schwaighofer <aschwaighofer@apple.com> | 2013-06-25 19:14:09 +0000 |
| commit | a04b9ef1e8bf3a36480371254f3779e7ef8f1167 (patch) | |
| tree | 862af0abb83d2de7a41c9d92189b80bf42f66a4b /llvm/lib | |
| parent | 09617a5d03d367c620f0e8ff3b968ed68a1ad8f1 (diff) | |
| download | bcm5719-llvm-a04b9ef1e8bf3a36480371254f3779e7ef8f1167.tar.gz bcm5719-llvm-a04b9ef1e8bf3a36480371254f3779e7ef8f1167.zip | |
X86 cost model: Vectorizing integer division is a bad idea
radar://14057959
llvm-svn: 184872
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 25 |
1 files changed, 25 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index df6f37b791f..3bcdfc1be50 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -196,6 +196,16 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v32i8, 32*10 }, // Scalarized. { ISD::SRA, MVT::v16i16, 16*10 }, // Scalarized. { ISD::SRA, MVT::v4i64, 4*10 }, // Scalarized. + + // Vectorizing division is a bad idea. See the SSE2 table for more comments. + { ISD::SDIV, MVT::v32i8, 32*20 }, + { ISD::SDIV, MVT::v16i16, 16*20 }, + { ISD::SDIV, MVT::v8i32, 8*20 }, + { ISD::SDIV, MVT::v4i64, 4*20 }, + { ISD::UDIV, MVT::v32i8, 32*20 }, + { ISD::UDIV, MVT::v16i16, 16*20 }, + { ISD::UDIV, MVT::v8i32, 8*20 }, + { ISD::UDIV, MVT::v4i64, 4*20 }, }; // Look for AVX2 lowering tricks. @@ -258,6 +268,21 @@ unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, { ISD::SRA, MVT::v8i16, 8*10 }, // Scalarized. { ISD::SRA, MVT::v4i32, 4*10 }, // Scalarized. { ISD::SRA, MVT::v2i64, 2*10 }, // Scalarized. + + // It is not a good idea to vectorize division. We have to scalarize it and + // in the process we will often end up having to spilling regular + // registers. The overhead of division is going to dominate most kernels + // anyways so try hard to prevent vectorization of division - it is + // generally a bad idea. Assume somewhat arbitrarily that we have to be able + // to hide "20 cycles" for each lane. + { ISD::SDIV, MVT::v16i8, 16*20 }, + { ISD::SDIV, MVT::v8i16, 8*20 }, + { ISD::SDIV, MVT::v4i32, 4*20 }, + { ISD::SDIV, MVT::v2i64, 2*20 }, + { ISD::UDIV, MVT::v16i8, 16*20 }, + { ISD::UDIV, MVT::v8i16, 8*20 }, + { ISD::UDIV, MVT::v4i32, 4*20 }, + { ISD::UDIV, MVT::v2i64, 2*20 }, }; if (ST->hasSSE2()) { |

