diff options
| author | Sanjay Patel <spatel@rotateright.com> | 2014-09-22 18:54:01 +0000 |
|---|---|---|
| committer | Sanjay Patel <spatel@rotateright.com> | 2014-09-22 18:54:01 +0000 |
| commit | 7939d7229d374b31c6237718d69c3c0bfa0846a8 (patch) | |
| tree | d6f8518ad25a9dda028cb7c85519d291a8147729 /llvm/lib | |
| parent | 869c0019b11546fc1ed191834cd5d2d4ae9ffc2a (diff) | |
| download | bcm5719-llvm-7939d7229d374b31c6237718d69c3c0bfa0846a8.tar.gz bcm5719-llvm-7939d7229d374b31c6237718d69c3c0bfa0846a8.zip | |
Use broadcasts to optimize overall size when loading constant splat vectors (x86-64 with AVX or AVX2).
We generate broadcast instructions on CPUs with AVX2 to load some constant splat vectors.
This patch should preserve all existing behavior with regular optimization levels,
but also use splats whenever possible when optimizing for *size* on any CPU with AVX or AVX2.
The tradeoff is up to 5 extra instruction bytes for the broadcast instruction to save
at least 8 bytes (up to 31 bytes) of constant pool data.
Differential Revision: http://reviews.llvm.org/D5347
llvm-svn: 218263
Diffstat (limited to 'llvm/lib')
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 33 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 7 |
2 files changed, 33 insertions, 7 deletions
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5b610b4e29f..f67eb96ade7 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -5996,7 +5996,10 @@ static SDValue EltsFromConsecutiveLoads(EVT VT, SmallVectorImpl<SDValue> &Elts, /// or SDValue() otherwise. static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, SelectionDAG &DAG) { - if (!Subtarget->hasFp256()) + // VBROADCAST requires AVX. + // TODO: Splats could be generated for non-AVX CPUs using SSE + // instructions, but there's less potential gain for only 128-bit vectors. + if (!Subtarget->hasAVX()) return SDValue(); MVT VT = Op.getSimpleValueType(); @@ -6073,17 +6076,34 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, } } + unsigned ScalarSize = Ld.getValueType().getSizeInBits(); bool IsGE256 = (VT.getSizeInBits() >= 256); - // Handle the broadcasting a single constant scalar from the constant pool - // into a vector. On Sandybridge it is still better to load a constant vector + // When optimizing for size, generate up to 5 extra bytes for a broadcast + // instruction to save 8 or more bytes of constant pool data. + // TODO: If multiple splats are generated to load the same constant, + // it may be detrimental to overall size. There needs to be a way to detect + // that condition to know if this is truly a size win. + const Function *F = DAG.getMachineFunction().getFunction(); + bool OptForSize = F->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); + + // Handle broadcasting a single constant scalar from the constant pool + // into a vector. + // On Sandybridge (no AVX2), it is still better to load a constant vector // from the constant pool and not to broadcast it from a scalar. - if (ConstSplatVal && Subtarget->hasInt256()) { + // But override that restriction when optimizing for size. + // TODO: Check if splatting is recommended for other AVX-capable CPUs. + if (ConstSplatVal && (Subtarget->hasAVX2() || OptForSize)) { EVT CVT = Ld.getValueType(); assert(!CVT.isVector() && "Must not broadcast a vector type"); - unsigned ScalarSize = CVT.getSizeInBits(); - if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64)) { + // Splat f32, i32, v4f64, v4i64 in all cases with AVX2. + // For size optimization, also splat v2f64 and v2i64, and for size opt + // with AVX2, also splat i8 and i16. + // With pattern matching, the VBROADCAST node may become a VMOVDDUP. + if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || + (OptForSize && (ScalarSize == 64 || Subtarget->hasAVX2()))) { const Constant *C = nullptr; if (ConstantSDNode *CI = dyn_cast<ConstantSDNode>(Ld)) C = CI->getConstantIntValue(); @@ -6104,7 +6124,6 @@ static SDValue LowerVectorBroadcast(SDValue Op, const X86Subtarget* Subtarget, } bool IsLoad = ISD::isNormalLoad(Ld.getNode()); - unsigned ScalarSize = Ld.getValueType().getSizeInBits(); // Handle AVX2 in-register broadcasts. if (!IsLoad && Subtarget->hasInt256() && diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 9030119edef..b2ffcb8f346 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5290,6 +5290,13 @@ let Predicates = [HasAVX] in { (VMOVDDUPYrr VR256:$src)>; } +let Predicates = [UseAVX, OptForSize] in { + def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))), + (VMOVDDUPrm addr:$src)>; + def : Pat<(v2i64 (X86VBroadcast (loadi64 addr:$src))), + (VMOVDDUPrm addr:$src)>; +} + let Predicates = [UseSSE3] in { def : Pat<(X86Movddup (memopv2f64 addr:$src)), (MOVDDUPrm addr:$src)>; |

