summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target/X86/X86InterleavedAccess.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/lib/Target/X86/X86InterleavedAccess.cpp')
-rw-r--r--llvm/lib/Target/X86/X86InterleavedAccess.cpp47
1 files changed, 26 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86InterleavedAccess.cpp b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
index 8c65eda54ca..ca5be914e58 100644
--- a/llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ b/llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -1,25 +1,44 @@
-//===--------- X86InterleavedAccess.cpp ----------------------------------===//
+//===- X86InterleavedAccess.cpp -------------------------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
-//===--------------------------------------------------------------------===//
-///
+//===----------------------------------------------------------------------===//
+//
/// \file
/// This file contains the X86 implementation of the interleaved accesses
/// optimization generating X86-specific instructions/intrinsics for
/// interleaved access groups.
-///
-//===--------------------------------------------------------------------===//
+//
+//===----------------------------------------------------------------------===//
-#include "X86TargetMachine.h"
+#include "X86ISelLowering.h"
+#include "X86Subtarget.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/Analysis/VectorUtils.h"
+#include "llvm/CodeGen/MachineValueType.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Support/Casting.h"
+#include <algorithm>
+#include <cassert>
+#include <cmath>
+#include <cstdint>
using namespace llvm;
namespace {
+
/// \brief This class holds necessary information to represent an interleaved
/// access group and supports utilities to lower the group into
/// X86-specific instructions/intrinsics.
@@ -104,6 +123,7 @@ public:
/// instructions/intrinsics.
bool lowerIntoOptimizedSequence();
};
+
} // end anonymous namespace
bool X86InterleavedAccessGroup::isSupported() const {
@@ -146,7 +166,6 @@ bool X86InterleavedAccessGroup::isSupported() const {
void X86InterleavedAccessGroup::decompose(
Instruction *VecInst, unsigned NumSubVectors, VectorType *SubVecTy,
SmallVectorImpl<Instruction *> &DecomposedVectors) {
-
assert((isa<LoadInst>(VecInst) || isa<ShuffleVectorInst>(VecInst)) &&
"Expected Load or Shuffle");
@@ -211,7 +230,6 @@ static uint32_t Concat[] = {
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 };
-
// genShuffleBland - Creates shuffle according to two vectors.This function is
// only works on instructions with lane inside 256 registers. According to
// the mask 'Mask' creates a new Mask 'Out' by the offset of the mask. The
@@ -289,8 +307,6 @@ static void reorderSubVector(MVT VT, SmallVectorImpl<Value *> &TransposedMatrix,
for (unsigned i = 0; i < Stride; i++)
TransposedMatrix[i] =
Builder.CreateShuffleVector(Temp[2 * i], Temp[2 * i + 1], Concat);
-
- return;
}
void X86InterleavedAccessGroup::interleave8bitStride4VF8(
@@ -336,7 +352,6 @@ void X86InterleavedAccessGroup::interleave8bitStride4VF8(
void X86InterleavedAccessGroup::interleave8bitStride4(
ArrayRef<Instruction *> Matrix, SmallVectorImpl<Value *> &TransposedMatrix,
unsigned NumOfElm) {
-
// Example: Assuming we start from the following vectors:
// Matrix[0]= c0 c1 c2 c3 c4 ... c31
// Matrix[1]= m0 m1 m2 m3 m4 ... m31
@@ -452,7 +467,6 @@ static void setGroupSize(MVT VT, SmallVectorImpl<uint32_t> &SizeInfo) {
static void DecodePALIGNRMask(MVT VT, unsigned Imm,
SmallVectorImpl<uint32_t> &ShuffleMask,
bool AlignDirection = true, bool Unary = false) {
-
unsigned NumElts = VT.getVectorNumElements();
unsigned NumLanes = std::max((int)VT.getSizeInBits() / 128, 1);
unsigned NumLaneElts = NumElts / NumLanes;
@@ -517,14 +531,11 @@ static void concatSubVector(Value **Vec, ArrayRef<Instruction *> InVec,
for (int i = 0; i < 3; i++)
Vec[i] = Builder.CreateShuffleVector(Vec[i], Vec[i + 3], Concat);
-
- return;
}
void X86InterleavedAccessGroup::deinterleave8bitStride3(
ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
unsigned VecElems) {
-
// Example: Assuming we start from the following vectors:
// Matrix[0]= a0 b0 c0 a1 b1 c1 a2 b2
// Matrix[1]= c2 a3 b3 c3 a4 b4 c4 a5
@@ -584,8 +595,6 @@ void X86InterleavedAccessGroup::deinterleave8bitStride3(
Vec[0], UndefValue::get(Vec[1]->getType()), VPAlign2);
TransposedMatrix[1] = VecElems == 8 ? Vec[2] : TempVec;
TransposedMatrix[2] = VecElems == 8 ? TempVec : Vec[2];
-
- return;
}
// group2Shuffle reorder the shuffle stride back into continuous order.
@@ -613,7 +622,6 @@ static void group2Shuffle(MVT VT, SmallVectorImpl<uint32_t> &Mask,
void X86InterleavedAccessGroup::interleave8bitStride3(
ArrayRef<Instruction *> InVec, SmallVectorImpl<Value *> &TransposedMatrix,
unsigned VecElems) {
-
// Example: Assuming we start from the following vectors:
// Matrix[0]= a0 a1 a2 a3 a4 a5 a6 a7
// Matrix[1]= b0 b1 b2 b3 b4 b5 b6 b7
@@ -670,8 +678,6 @@ void X86InterleavedAccessGroup::interleave8bitStride3(
unsigned NumOfElm = VT.getVectorNumElements();
group2Shuffle(VT, GroupSize, VPShuf);
reorderSubVector(VT, TransposedMatrix, Vec, VPShuf, NumOfElm,3, Builder);
-
- return;
}
void X86InterleavedAccessGroup::transpose_4x4(
@@ -834,4 +840,3 @@ bool X86TargetLowering::lowerInterleavedStore(StoreInst *SI,
return Grp.isSupported() && Grp.lowerIntoOptimizedSequence();
}
-
OpenPOWER on IntegriCloud