diff options
| author | Nadav Rotem <nadav.rotem@intel.com> | 2012-01-11 20:19:17 +0000 |
|---|---|---|
| committer | Nadav Rotem <nadav.rotem@intel.com> | 2012-01-11 20:19:17 +0000 |
| commit | b5ce6ee835786cd06c8b7d43c2d13c9dd5c29a90 (patch) | |
| tree | 09fa0e0277f12e0a644b0f583ae9ac98a8f0105b | |
| parent | a230dea394d9405dcf00cde41f8514f945c21ff9 (diff) | |
| download | bcm5719-llvm-b5ce6ee835786cd06c8b7d43c2d13c9dd5c29a90.tar.gz bcm5719-llvm-b5ce6ee835786cd06c8b7d43c2d13c9dd5c29a90.zip | |
On AVX, we can load v8i32 at a time. The bug happens when two uneven loads are used.
When we load the v12i32 type, the GenWidenVectorLoads method generates two loads: v8i32 and v4i32
and attempts to use CONCAT_VECTORS to join them. In this fix I concat undef values to widen
the smaller value. The test "widen_load-2.ll" also exposes this bug on AVX.
llvm-svn: 147964
| -rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 28 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/2012-01-11-split-cv.ll | 12 |
2 files changed, 35 insertions, 5 deletions
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 6aecca994a3..41b4221ef90 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2329,19 +2329,37 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector<SDValue, 16> &LdChain, BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, DAG.getIntPtrConstant(Increment)); + SDValue L; if (LdWidth < NewVTWidth) { // Our current type we are using is too large, find a better size NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); NewVTWidth = NewVT.getSizeInBits(); - } - - SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, isInvariant, MinAlign(Align, Increment)); - LdChain.push_back(LdOp.getValue(1)); - LdOps.push_back(LdOp); + LdChain.push_back(L.getValue(1)); + if (L->getValueType(0).isVector()) { + SmallVector<SDValue, 16> Loads; + Loads.push_back(L); + unsigned size = L->getValueSizeInBits(0); + while (size < LdOp->getValueSizeInBits(0)) { + Loads.push_back(DAG.getUNDEF(L->getValueType(0))); + size += L->getValueSizeInBits(0); + } + L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), + &Loads[0], Loads.size()); + } + } else { + L = DAG.getLoad(NewVT, dl, Chain, BasePtr, + LD->getPointerInfo().getWithOffset(Offset), isVolatile, + isNonTemporal, isInvariant, MinAlign(Align, Increment)); + LdChain.push_back(L.getValue(1)); + } + + LdOps.push_back(L); + LdWidth -= NewVTWidth; } diff --git a/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll b/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll new file mode 100644 index 00000000000..6b900729190 --- /dev/null +++ b/llvm/test/CodeGen/X86/2012-01-11-split-cv.ll @@ -0,0 +1,12 @@ +; RUN: llc < %s -march=x86 -mcpu=corei7-avx -mattr=+avx -mtriple=i686-pc-win32 | FileCheck %s + +;CHECK: add18i16 +define void @add18i16(<18 x i16>* nocapture sret %ret, <18 x i16>* %bp) nounwind { +;CHECK: vmovups + %b = load <18 x i16>* %bp, align 16 + %x = add <18 x i16> zeroinitializer, %b + store <18 x i16> %x, <18 x i16>* %ret, align 16 +;CHECK: ret + ret void +} + |

