summaryrefslogtreecommitdiffstats
path: root/llvm/lib/Target
diff options
context:
space:
mode:
authorZi Xuan Wu <wuzish@cn.ibm.com>2019-10-08 03:28:33 +0000
committerZi Xuan Wu <wuzish@cn.ibm.com>2019-10-08 03:28:33 +0000
commit9f41deccc0e648a006c9f38e11919f181b6c7e0a (patch)
tree0da247efc8203566946a70869472d3f9438b9251 /llvm/lib/Target
parent9806a1d5f90a21a16c4bfc6d4bb10e0d5b870573 (diff)
downloadbcm5719-llvm-9f41deccc0e648a006c9f38e11919f181b6c7e0a.tar.gz
bcm5719-llvm-9f41deccc0e648a006c9f38e11919f181b6c7e0a.zip
[LoopVectorize][PowerPC] Estimate int and float register pressure separately in loop-vectorize
In loop-vectorize, interleave count and vector factor depend on target register number. Currently, it does not estimate different register pressure for different register class separately(especially for scalar type, float type should not be on the same position with int type), so it's not accurate. Specifically, it causes too many times interleaving/unrolling, result in too many register spills in loop body and hurting performance. So we need classify the register classes in IR level, and importantly these are abstract register classes, and are not the target register class of backend provided in td file. It's used to establish the mapping between the types of IR values and the number of simultaneous live ranges to which we'd like to limit for some set of those types. For example, POWER target, register num is special when VSX is enabled. When VSX is enabled, the number of int scalar register is 32(GPR), float is 64(VSR), but for int and float vector register both are 64(VSR). So there should be 2 kinds of register class when vsx is enabled, and 3 kinds of register class when VSX is NOT enabled. It runs on POWER target, it makes big(+~30%) performance improvement in one specific bmk(503.bwaves_r) of spec2017 and no other obvious degressions. Differential revision: https://reviews.llvm.org/D67148 llvm-svn: 374017
Diffstat (limited to 'llvm/lib/Target')
-rw-r--r--llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/ARM/ARMTargetTransformInfo.h3
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp35
-rw-r--r--llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h8
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp5
-rw-r--r--llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.cpp3
-rw-r--r--llvm/lib/Target/X86/X86TargetTransformInfo.h2
-rw-r--r--llvm/lib/Target/XCore/XCoreTargetTransformInfo.h3
11 files changed, 54 insertions, 15 deletions
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
index 95cda63b017..d5ef0e5bea3 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h
@@ -85,7 +85,8 @@ public:
bool enableInterleavedAccessVectorization() { return true; }
- unsigned getNumberOfRegisters(bool Vector) {
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 32;
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 47e98dac9f6..b878ea3a171 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -122,7 +122,8 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector) {
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector) {
if (ST->hasNEON())
return 16;
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
index 40e53668701..764335128d4 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.cpp
@@ -594,10 +594,37 @@ bool PPCTTIImpl::enableInterleavedAccessVectorization() {
return true;
}
-unsigned PPCTTIImpl::getNumberOfRegisters(bool Vector) {
- if (Vector && !ST->hasAltivec() && !ST->hasQPX())
- return 0;
- return ST->hasVSX() ? 64 : 32;
+unsigned PPCTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ assert(ClassID == GPRRC || ClassID == FPRRC ||
+ ClassID == VRRC || ClassID == VSXRC);
+ if (ST->hasVSX()) {
+ assert(ClassID == GPRRC || ClassID == VSXRC);
+ return ClassID == GPRRC ? 32 : 64;
+ }
+ assert(ClassID == GPRRC || ClassID == FPRRC || ClassID == VRRC);
+ return 32;
+}
+
+unsigned PPCTTIImpl::getRegisterClassForType(bool Vector, Type *Ty) const {
+ if (Vector)
+ return ST->hasVSX() ? VSXRC : VRRC;
+ else if (Ty && Ty->getScalarType()->isFloatTy())
+ return ST->hasVSX() ? VSXRC : FPRRC;
+ else
+ return GPRRC;
+}
+
+const char* PPCTTIImpl::getRegisterClassName(unsigned ClassID) const {
+
+ switch (ClassID) {
+ default:
+ llvm_unreachable("unknown register class");
+ return "PPC::unknown register class";
+ case GPRRC: return "PPC::GPRRC";
+ case FPRRC: return "PPC::FPRRC";
+ case VRRC: return "PPC::VRRC";
+ case VSXRC: return "PPC::VSXRC";
+ }
}
unsigned PPCTTIImpl::getRegisterBitWidth(bool Vector) const {
diff --git a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
index 5d76ee418b6..294c970f21f 100644
--- a/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
+++ b/llvm/lib/Target/PowerPC/PPCTargetTransformInfo.h
@@ -72,7 +72,13 @@ public:
TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
bool IsZeroCmp) const;
bool enableInterleavedAccessVectorization();
- unsigned getNumberOfRegisters(bool Vector);
+
+ enum PPCRegisterClass {
+ GPRRC, FPRRC, VRRC, VSXRC
+ };
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+ const char* getRegisterClassName(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize();
unsigned getPrefetchDistance();
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
index 8d45e67d73c..11c99aa1117 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
@@ -304,7 +304,8 @@ bool SystemZTTIImpl::isLSRCostLess(TargetTransformInfo::LSRCost &C1,
C2.ScaleCost, C2.SetupCost);
}
-unsigned SystemZTTIImpl::getNumberOfRegisters(bool Vector) {
+unsigned SystemZTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (!Vector)
// Discount the stack pointer. Also leave out %r0, since it can't
// be used in an address.
diff --git a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
index 16ce2ef1d7a..e59badeb944 100644
--- a/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZTargetTransformInfo.h
@@ -56,7 +56,7 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getCacheLineSize() { return 256; }
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
index 46ef765ce0f..1c53e90daea 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp
@@ -25,10 +25,11 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const {
return TargetTransformInfo::PSK_FastHardware;
}
-unsigned WebAssemblyTTIImpl::getNumberOfRegisters(bool Vector) {
- unsigned Result = BaseT::getNumberOfRegisters(Vector);
+unsigned WebAssemblyTTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ unsigned Result = BaseT::getNumberOfRegisters(ClassID);
// For SIMD, use at least 16 registers, as a rough guess.
+ bool Vector = (ClassID == 1);
if (Vector)
Result = std::max(Result, 16u);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
index 1b11b4b631e..f0ecc73e91d 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h
@@ -53,7 +53,7 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getArithmeticInstrCost(
unsigned Opcode, Type *Ty,
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
index b634da1d51f..2f419b78f83 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -116,7 +116,8 @@ llvm::Optional<unsigned> X86TTIImpl::getCacheAssociativity(
llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
}
-unsigned X86TTIImpl::getNumberOfRegisters(bool Vector) {
+unsigned X86TTIImpl::getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector && !ST->hasSSE1())
return 0;
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index 9b948dbbb4c..3ff1896f505 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -116,7 +116,7 @@ public:
/// \name Vector TTI Implementations
/// @{
- unsigned getNumberOfRegisters(bool Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
unsigned getRegisterBitWidth(bool Vector) const;
unsigned getLoadStoreVecRegBitWidth(unsigned AS) const;
unsigned getMaxInterleaveFactor(unsigned VF);
diff --git a/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h b/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
index 3fecaaa5972..58df1f290ec 100644
--- a/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
+++ b/llvm/lib/Target/XCore/XCoreTargetTransformInfo.h
@@ -40,7 +40,8 @@ public:
: BaseT(TM, F.getParent()->getDataLayout()), ST(TM->getSubtargetImpl()),
TLI(ST->getTargetLowering()) {}
- unsigned getNumberOfRegisters(bool Vector) {
+ unsigned getNumberOfRegisters(unsigned ClassID) const {
+ bool Vector = (ClassID == 1);
if (Vector) {
return 0;
}
OpenPOWER on IntegriCloud