1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
|
//===- HexagonTargetTransformInfo.cpp - Hexagon specific TTI pass ---------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
/// \file
/// This file implements a TargetTransformInfo analysis pass specific to the
/// Hexagon target machine. It uses the target's detailed information to provide
/// more precise answers to certain TTI queries, while letting the target
/// independent and default TTI implementations handle the rest.
///
//===----------------------------------------------------------------------===//
#include "HexagonTargetTransformInfo.h"
#include "HexagonSubtarget.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/User.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
using namespace llvm;
#define DEBUG_TYPE "hexagontti"
static cl::opt<bool> HexagonAutoHVX("hexagon-autohvx", cl::init(false),
cl::Hidden, cl::desc("Enable loop vectorizer for HVX"));
static cl::opt<bool> EmitLookupTables("hexagon-emit-lookup-tables",
cl::init(true), cl::Hidden,
cl::desc("Control lookup table emission on Hexagon target"));
TargetTransformInfo::PopcntSupportKind
HexagonTTIImpl::getPopcntSupport(unsigned IntTyWidthInBit) const {
// Return Fast Hardware support as every input < 64 bits will be promoted
// to 64 bits.
return TargetTransformInfo::PSK_FastHardware;
}
// The Hexagon target can unroll loops with run-time trip counts.
void HexagonTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
TTI::UnrollingPreferences &UP) {
UP.Runtime = UP.Partial = true;
// Only try to peel innermost loops with small runtime trip counts.
if (L && L->empty() && canPeel(L) &&
SE.getSmallConstantTripCount(L) == 0 &&
SE.getSmallConstantMaxTripCount(L) > 0 &&
SE.getSmallConstantMaxTripCount(L) <= 5) {
UP.PeelCount = 2;
}
}
bool HexagonTTIImpl::shouldFavorPostInc() const {
return true;
}
unsigned HexagonTTIImpl::getNumberOfRegisters(bool Vector) const {
if (Vector)
return HexagonAutoHVX && getST()->useHVXOps() ? 32 : 0;
return 32;
}
unsigned HexagonTTIImpl::getMaxInterleaveFactor(unsigned VF) {
return HexagonAutoHVX && getST()->useHVXOps() ? 64 : 0;
}
unsigned HexagonTTIImpl::getRegisterBitWidth(bool Vector) const {
return Vector ? getMinVectorRegisterBitWidth() : 32;
}
unsigned HexagonTTIImpl::getMinVectorRegisterBitWidth() const {
return getST()->useHVXOps() ? getST()->getVectorLength()*8 : 0;
}
unsigned HexagonTTIImpl::getMinimumVF(unsigned ElemWidth) const {
return (8 * getST()->getVectorLength()) / ElemWidth;
}
unsigned HexagonTTIImpl::getMemoryOpCost(unsigned Opcode, Type *Src,
unsigned Alignment, unsigned AddressSpace, const Instruction *I) {
if (Opcode == Instruction::Load && Src->isVectorTy()) {
VectorType *VecTy = cast<VectorType>(Src);
unsigned VecWidth = VecTy->getBitWidth();
if (VecWidth > 64) {
// Assume that vectors longer than 64 bits are meant for HVX.
if (getNumberOfRegisters(true) > 0) {
if (VecWidth % getRegisterBitWidth(true) == 0)
return 1;
}
unsigned AlignWidth = 8 * std::max(1u, Alignment);
unsigned NumLoads = alignTo(VecWidth, AlignWidth) / AlignWidth;
return 3*NumLoads;
}
}
return BaseT::getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
}
unsigned HexagonTTIImpl::getPrefetchDistance() const {
return getST()->getL1PrefetchDistance();
}
unsigned HexagonTTIImpl::getCacheLineSize() const {
return getST()->getL1CacheLineSize();
}
int HexagonTTIImpl::getUserCost(const User *U,
ArrayRef<const Value *> Operands) {
auto isCastFoldedIntoLoad = [this](const CastInst *CI) -> bool {
if (!CI->isIntegerCast())
return false;
// Only extensions from an integer type shorter than 32-bit to i32
// can be folded into the load.
const DataLayout &DL = getDataLayout();
unsigned SBW = DL.getTypeSizeInBits(CI->getSrcTy());
unsigned DBW = DL.getTypeSizeInBits(CI->getDestTy());
if (DBW != 32 || SBW >= DBW)
return false;
const LoadInst *LI = dyn_cast<const LoadInst>(CI->getOperand(0));
// Technically, this code could allow multiple uses of the load, and
// check if all the uses are the same extension operation, but this
// should be sufficient for most cases.
return LI && LI->hasOneUse();
};
if (const CastInst *CI = dyn_cast<const CastInst>(U))
if (isCastFoldedIntoLoad(CI))
return TargetTransformInfo::TCC_Free;
return BaseT::getUserCost(U, Operands);
}
bool HexagonTTIImpl::shouldBuildLookupTables() const {
return EmitLookupTables;
}
|