summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/AArch64/AArch64ISelLowering.cpp')
-rw-r--r--llvm/lib/Target/AArch64/AArch64ISelLowering.cpp44
1 files changed, 37 insertions, 7 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index af3ab1b0fb3..4dff402ba70 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7281,7 +7281,7 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
///
/// E.g. Lower an interleaved store (Factor = 3):
/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
-/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
+/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
/// store <12 x i32> %i.vec, <12 x i32>* %ptr
///
/// Into:
@@ -7292,6 +7292,17 @@ static Constant *getSequentialMask(IRBuilder<> &Builder, unsigned Start,
///
/// Note that the new shufflevectors will be removed and we'll only generate one
/// st3 instruction in CodeGen.
+///
+/// Example for a more general valid mask (Factor 3). Lower:
+/// %i.vec = shuffle <32 x i32> %v0, <32 x i32> %v1,
+/// <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
+/// store <12 x i32> %i.vec, <12 x i32>* %ptr
+///
+/// Into:
+/// %sub.v0 = shuffle <32 x i32> %v0, <32 x i32> v1, <4, 5, 6, 7>
+/// %sub.v1 = shuffle <32 x i32> %v0, <32 x i32> v1, <32, 33, 34, 35>
+/// %sub.v2 = shuffle <32 x i32> %v0, <32 x i32> v1, <16, 17, 18, 19>
+/// call void llvm.aarch64.neon.st3(%sub.v0, %sub.v1, %sub.v2, %ptr)
bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
ShuffleVectorInst *SVI,
unsigned Factor) const {
@@ -7302,9 +7313,9 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
assert(VecTy->getVectorNumElements() % Factor == 0 &&
"Invalid interleaved store");
- unsigned NumSubElts = VecTy->getVectorNumElements() / Factor;
+ unsigned LaneLen = VecTy->getVectorNumElements() / Factor;
Type *EltTy = VecTy->getVectorElementType();
- VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts);
+ VectorType *SubVecTy = VectorType::get(EltTy, LaneLen);
const DataLayout &DL = SI->getModule()->getDataLayout();
unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy);
@@ -7329,7 +7340,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
Op0 = Builder.CreatePtrToInt(Op0, IntVecTy);
Op1 = Builder.CreatePtrToInt(Op1, IntVecTy);
- SubVecTy = VectorType::get(IntTy, NumSubElts);
+ SubVecTy = VectorType::get(IntTy, LaneLen);
}
Type *PtrTy = SubVecTy->getPointerTo(SI->getPointerAddressSpace());
@@ -7343,9 +7354,28 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI,
SmallVector<Value *, 5> Ops;
// Split the shufflevector operands into sub vectors for the new stN call.
- for (unsigned i = 0; i < Factor; i++)
- Ops.push_back(Builder.CreateShuffleVector(
- Op0, Op1, getSequentialMask(Builder, NumSubElts * i, NumSubElts)));
+ auto Mask = SVI->getShuffleMask();
+ for (unsigned i = 0; i < Factor; i++) {
+ if (Mask[i] >= 0) {
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, Mask[i], LaneLen)));
+ } else {
+ unsigned StartMask = 0;
+ for (unsigned j = 1; j < LaneLen; j++) {
+ if (Mask[j*Factor + i] >= 0) {
+ StartMask = Mask[j*Factor + i] - j;
+ break;
+ }
+ }
+ // Note: If all elements in a chunk are undefs, StartMask=0!
+ // Note: Filling undef gaps with random elements is ok, since
+ // those elements were being written anyway (with undefs).
+ // In the case of all undefs we're defaulting to using elems from 0
+ // Note: StartMask cannot be negative, it's checked in isReInterleaveMask
+ Ops.push_back(Builder.CreateShuffleVector(
+ Op0, Op1, getSequentialMask(Builder, StartMask, LaneLen)));
+ }
+ }
Ops.push_back(Builder.CreateBitCast(SI->getPointerOperand(), PtrTy));
Builder.CreateCall(StNFunc, Ops);
OpenPOWER on IntegriCloud