summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorEvan Cheng <evan.cheng@apple.com>2009-01-02 05:35:45 +0000
committerEvan Cheng <evan.cheng@apple.com>2009-01-02 05:35:45 +0000
commit4c91aa3418f7e0ea2976d78ff552a6ba943ed165 (patch)
tree969c7b2f5737f6c314dddab58bf1a32981326692
parent13f3a33f448d502aa52c2c91d357f5addf0f2375 (diff)
downloadbcm5719-llvm-4c91aa3418f7e0ea2976d78ff552a6ba943ed165.tar.gz
bcm5719-llvm-4c91aa3418f7e0ea2976d78ff552a6ba943ed165.zip
Do not isel load folding bt instructions for pentium m, core, core2, and AMD processors. These are significantly slower than a load followed by a bt of a register.
llvm-svn: 61557
-rw-r--r--llvm/lib/Target/X86/X86.td34
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td5
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.cpp36
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h5
-rw-r--r--llvm/test/CodeGen/X86/bt.ll2
5 files changed, 53 insertions, 29 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 6d08b36be44..8867298abb8 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -48,6 +48,8 @@ def Feature3DNowA : SubtargetFeature<"3dnowa", "X863DNowLevel", "ThreeDNowA",
def Feature64Bit : SubtargetFeature<"64bit", "HasX86_64", "true",
"Support 64-bit instructions",
[FeatureSSE2]>;
+def FeatureSlowBTMem : SubtargetFeature<"slow-bt-mem", "IsBTMemSlow", "true",
+ "Bit testing of memory is slow">;
//===----------------------------------------------------------------------===//
// X86 processors supported.
@@ -66,27 +68,27 @@ def : Proc<"i686", []>;
def : Proc<"pentiumpro", []>;
def : Proc<"pentium2", [FeatureMMX]>;
def : Proc<"pentium3", [FeatureSSE1]>;
-def : Proc<"pentium-m", [FeatureSSE2]>;
+def : Proc<"pentium-m", [FeatureSSE2, FeatureSlowBTMem]>;
def : Proc<"pentium4", [FeatureSSE2]>;
-def : Proc<"x86-64", [Feature64Bit]>;
-def : Proc<"yonah", [FeatureSSE3]>;
-def : Proc<"prescott", [FeatureSSE3]>;
-def : Proc<"nocona", [FeatureSSE3, Feature64Bit]>;
-def : Proc<"core2", [FeatureSSSE3, Feature64Bit]>;
-def : Proc<"penryn", [FeatureSSE41, Feature64Bit]>;
+def : Proc<"x86-64", [Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"yonah", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"prescott", [FeatureSSE3, FeatureSlowBTMem]>;
+def : Proc<"nocona", [FeatureSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"core2", [FeatureSSSE3, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"penryn", [FeatureSSE41, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"k6", [FeatureMMX]>;
def : Proc<"k6-2", [FeatureMMX, Feature3DNow]>;
def : Proc<"k6-3", [FeatureMMX, Feature3DNow]>;
-def : Proc<"athlon", [FeatureMMX, Feature3DNowA]>;
-def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA]>;
-def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA]>;
-def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA]>;
-def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA]>;
-def : Proc<"k8", [Feature3DNowA, Feature64Bit]>;
-def : Proc<"opteron", [Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon64", [Feature3DNowA, Feature64Bit]>;
-def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit]>;
+def : Proc<"athlon", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-tbird", [FeatureMMX, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-4", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-xp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"athlon-mp", [FeatureSSE1, Feature3DNowA, FeatureSlowBTMem]>;
+def : Proc<"k8", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"opteron", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"athlon64", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
+def : Proc<"athlon-fx", [Feature3DNowA, Feature64Bit, FeatureSlowBTMem]>;
def : Proc<"winchip-c6", [FeatureMMX]>;
def : Proc<"winchip2", [FeatureMMX, Feature3DNow]>;
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index be36cba5e4d..b00ca647542 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -222,6 +222,7 @@ def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
def OptForSpeed : Predicate<"!OptForSize">;
+def FastBTMem : Predicate<"!Subtarget->isBTMemSlow()">;
//===----------------------------------------------------------------------===//
// X86 Instruction Format Definitions.
@@ -2666,11 +2667,11 @@ def BT32rr : I<0xA3, MRMSrcReg, (outs), (ins GR32:$src1, GR32:$src2),
def BT16mr : I<0xA3, MRMDestMem, (outs), (ins i16mem:$src1, GR16:$src2),
"bt{w}\t{$src2, $src1|$src1, $src2}",
[(X86bt (loadi16 addr:$src1), GR16:$src2),
- (implicit EFLAGS)]>, OpSize, TB;
+ (implicit EFLAGS)]>, OpSize, TB, Requires<[FastBTMem]>;
def BT32mr : I<0xA3, MRMDestMem, (outs), (ins i32mem:$src1, GR32:$src2),
"bt{l}\t{$src2, $src1|$src1, $src2}",
[(X86bt (loadi32 addr:$src1), GR32:$src2),
- (implicit EFLAGS)]>, TB;
+ (implicit EFLAGS)]>, TB, Requires<[FastBTMem]>;
} // Defs = [EFLAGS]
// Sign/Zero extenders
diff --git a/llvm/lib/Target/X86/X86Subtarget.cpp b/llvm/lib/Target/X86/X86Subtarget.cpp
index 106ce46525c..33a7b453464 100644
--- a/llvm/lib/Target/X86/X86Subtarget.cpp
+++ b/llvm/lib/Target/X86/X86Subtarget.cpp
@@ -149,6 +149,18 @@ bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
return true;
}
+static void DetectFamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+
void X86Subtarget::AutoDetectSubtargetFeatures() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
union {
@@ -169,8 +181,15 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
if ((ECX >> 19) & 0x1) X86SSELevel = SSE41;
if ((ECX >> 20) & 0x1) X86SSELevel = SSE42;
- if (memcmp(text.c, "GenuineIntel", 12) == 0 ||
- memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ bool IsIntel = memcmp(text.c, "GenuineIntel", 12) == 0;
+ bool IsAMD = !IsIntel && memcmp(text.c, "AuthenticAMD", 12) == 0;
+ if (IsIntel || IsAMD) {
+ // Determine if bit test memory instructions are slow.
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
+ IsBTMemSlow = IsAMD || (Family == 6 && Model >= 13);
+
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
HasX86_64 = (EDX >> 29) & 0x1;
}
@@ -180,15 +199,9 @@ static const char *GetCurrentX86CPU() {
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
if (X86::GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
return "generic";
- unsigned Family = (EAX >> 8) & 0xf; // Bits 8 - 11
- unsigned Model = (EAX >> 4) & 0xf; // Bits 4 - 7
- if (Family == 6 || Family == 0xf) {
- if (Family == 0xf)
- // Examine extended family ID if family ID is F.
- Family += (EAX >> 20) & 0xff; // Bits 20 - 27
- // Examine extended model ID if family ID is 6 or F.
- Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
- }
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectFamilyModel(EAX, Family, Model);
X86::GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
bool Em64T = (EDX >> 29) & 0x1;
@@ -285,6 +298,7 @@ X86Subtarget::X86Subtarget(const Module &M, const std::string &FS, bool is64Bit)
, X86SSELevel(NoMMXSSE)
, X863DNowLevel(NoThreeDNow)
, HasX86_64(false)
+ , IsBTMemSlow(false)
, DarwinVers(0)
, IsLinux(false)
, stackAlignment(8)
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index f405ac798bb..646a953370f 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -64,6 +64,9 @@ protected:
/// HasX86_64 - True if the processor supports X86-64 instructions.
///
bool HasX86_64;
+
+ /// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
+ bool IsBTMemSlow;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
/// version of the platform, e.g. 8 = 10.4 (Tiger), 9 = 10.5 (Leopard), etc.
@@ -127,6 +130,8 @@ public:
bool has3DNow() const { return X863DNowLevel >= ThreeDNow; }
bool has3DNowA() const { return X863DNowLevel >= ThreeDNowA; }
+ bool isBTMemSlow() const { return IsBTMemSlow; }
+
unsigned getAsmFlavor() const {
return AsmFlavor != Unset ? unsigned(AsmFlavor) : 0;
}
diff --git a/llvm/test/CodeGen/X86/bt.ll b/llvm/test/CodeGen/X86/bt.ll
index bc77a58ce2b..b63a3f8ecf7 100644
--- a/llvm/test/CodeGen/X86/bt.ll
+++ b/llvm/test/CodeGen/X86/bt.ll
@@ -1,4 +1,6 @@
; RUN: llvm-as < %s | llc | grep btl
+; RUN: llvm-as < %s | llc -mcpu=pentium4 | grep btl | grep esp
+; RUN: llvm-as < %s | llc -mcpu=penryn | grep btl | not grep esp
; PR3253
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin8"
OpenPOWER on IntegriCloud