summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/lib/Target/X86/X86.td2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.cpp2
-rw-r--r--llvm/lib/Target/X86/X86Instr3DNow.td20
-rw-r--r--llvm/lib/Target/X86/X86InstrInfo.td3
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td2
-rw-r--r--llvm/lib/Target/X86/X86Subtarget.h11
-rw-r--r--llvm/test/CodeGen/X86/prefetch.ll102
-rw-r--r--llvm/test/MC/Disassembler/X86/x86-32.txt3
-rw-r--r--llvm/test/MC/X86/3DNow.s2
9 files changed, 126 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index f756341a63b..9a4af17ab84 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -137,7 +137,7 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ",
def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true",
"Enable AVX-512 PreFetch Instructions",
[FeatureAVX512]>;
-def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1",
+def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1",
"true",
"Prefetch with Intent to Write and T1 Hint">;
def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true",
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 17c856fcd43..16dc33cca8e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -461,7 +461,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::SRL_PARTS, VT, Custom);
}
- if (Subtarget.hasSSE1())
+ if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow())
setOperationAction(ISD::PREFETCH , MVT::Other, Legal);
setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);
diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td
index 2acd8d17beb..0d30b7d47f3 100644
--- a/llvm/lib/Target/X86/X86Instr3DNow.td
+++ b/llvm/lib/Target/X86/X86Instr3DNow.td
@@ -116,14 +116,30 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", I3DNOW_MISC_FUNC_ITINS, 1>;
def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms",
[(int_x86_mmx_femms)], IIC_MMX_EMMS>;
+// PREFETCHWT1 is supported we want to use it for everything but T0.
+def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{
+ return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1();
+}]>;
+
+// Use PREFETCHWT1 for NTA, T2, T1.
+def PrefetchWT1Level : ImmLeaf<i32, [{
+ return Imm < 3;
+}]>;
+
let SchedRW = [WriteLoad] in {
+let Predicates = [Has3DNow, NoSSEPrefetch] in
def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr),
"prefetch\t$addr",
- [(prefetch addr:$addr, (i32 0), imm, (i32 1))],
+ [(prefetch addr:$addr, imm, imm, (i32 1))],
IIC_SSE_PREFETCH>;
+
def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr",
- [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))],
+ [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))],
IIC_SSE_PREFETCH>, TB, Requires<[HasPrefetchW]>;
+
+def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr",
+ [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))],
+ IIC_SSE_PREFETCH>, TB, Requires<[HasPREFETCHWT1]>;
}
// "3DNowA" instructions
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 42e89cb4831..fdf3e73e4fc 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -874,7 +874,10 @@ def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
+def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
+def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">;
def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">;
+def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">;
def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">;
def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">;
def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index a86a0bfc168..b48fa184197 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3487,7 +3487,7 @@ let Predicates = [UseSSE2] in {
//===----------------------------------------------------------------------===//
// Prefetch intrinsic.
-let Predicates = [HasSSE1], SchedRW = [WriteLoad] in {
+let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in {
def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src),
"prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))],
IIC_SSE_PREFETCH>, TB;
diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h
index 842320b548c..c9435890fc1 100644
--- a/llvm/lib/Target/X86/X86Subtarget.h
+++ b/llvm/lib/Target/X86/X86Subtarget.h
@@ -201,7 +201,7 @@ protected:
bool HasCLZERO;
/// Processor has Prefetch with intent to Write instruction
- bool HasPFPREFETCHWT1;
+ bool HasPREFETCHWT1;
/// True if SHLD instructions are slow.
bool IsSHLDSlow;
@@ -517,7 +517,14 @@ public:
bool hasRTM() const { return HasRTM; }
bool hasADX() const { return HasADX; }
bool hasSHA() const { return HasSHA; }
- bool hasPRFCHW() const { return HasPRFCHW; }
+ bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; }
+ bool hasPREFETCHWT1() const { return HasPREFETCHWT1; }
+ bool hasSSEPrefetch() const {
+ // We implicitly enable these when we have a write prefix supporting cache
+ // level OR if we have prfchw, but don't already have a read prefetch from
+ // 3dnow.
+ return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1();
+ }
bool hasRDSEED() const { return HasRDSEED; }
bool hasLAHFSAHF() const { return HasLAHFSAHF; }
bool hasMWAITX() const { return HasMWAITX; }
diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll
index 17a9ac994a7..839948174a4 100644
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@@ -1,27 +1,101 @@
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s
-; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
-; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=SLM
-; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW
-; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
+; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=PRFCHW3DNOW
+
+; Rules:
+; 3dnow by itself get you just the single prefetch instruction with no hints
+; sse provides prefetch0/1/2/nta
+; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint.
+; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0
+; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled
; rdar://10538297
define void @t(i8* %ptr) nounwind {
+; SSE-LABEL: t:
+; SSE: # %bb.0: # %entry
+; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT: prefetcht2 (%eax)
+; SSE-NEXT: prefetcht1 (%eax)
+; SSE-NEXT: prefetcht0 (%eax)
+; SSE-NEXT: prefetchnta (%eax)
+; SSE-NEXT: prefetcht2 (%eax)
+; SSE-NEXT: prefetcht1 (%eax)
+; SSE-NEXT: prefetcht0 (%eax)
+; SSE-NEXT: prefetchnta (%eax)
+; SSE-NEXT: retl
+;
+; PRFCHWSSE-LABEL: t:
+; PRFCHWSSE: # %bb.0: # %entry
+; PRFCHWSSE-NEXT: movl {{[0-9]+}}(%esp), %eax
+; PRFCHWSSE-NEXT: prefetcht2 (%eax)
+; PRFCHWSSE-NEXT: prefetcht1 (%eax)
+; PRFCHWSSE-NEXT: prefetcht0 (%eax)
+; PRFCHWSSE-NEXT: prefetchnta (%eax)
+; PRFCHWSSE-NEXT: prefetchw (%eax)
+; PRFCHWSSE-NEXT: prefetchw (%eax)
+; PRFCHWSSE-NEXT: prefetchw (%eax)
+; PRFCHWSSE-NEXT: prefetchw (%eax)
+; PRFCHWSSE-NEXT: retl
+;
+; PREFETCHWT1-LABEL: t:
+; PREFETCHWT1: # %bb.0: # %entry
+; PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax
+; PREFETCHWT1-NEXT: prefetcht2 (%eax)
+; PREFETCHWT1-NEXT: prefetcht1 (%eax)
+; PREFETCHWT1-NEXT: prefetcht0 (%eax)
+; PREFETCHWT1-NEXT: prefetchnta (%eax)
+; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT: prefetchw (%eax)
+; PREFETCHWT1-NEXT: prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT: retl
+;
+; 3DNOW-LABEL: t:
+; 3DNOW: # %bb.0: # %entry
+; 3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: prefetch (%eax)
+; 3DNOW-NEXT: retl
+;
+; PRFCHW3DNOW-LABEL: t:
+; PRFCHW3DNOW: # %bb.0: # %entry
+; PRFCHW3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; PRFCHW3DNOW-NEXT: prefetch (%eax)
+; PRFCHW3DNOW-NEXT: prefetch (%eax)
+; PRFCHW3DNOW-NEXT: prefetch (%eax)
+; PRFCHW3DNOW-NEXT: prefetch (%eax)
+; PRFCHW3DNOW-NEXT: prefetchw (%eax)
+; PRFCHW3DNOW-NEXT: prefetchw (%eax)
+; PRFCHW3DNOW-NEXT: prefetchw (%eax)
+; PRFCHW3DNOW-NEXT: prefetchw (%eax)
+; PRFCHW3DNOW-NEXT: retl
entry:
-; CHECK: prefetcht2
-; CHECK: prefetcht1
-; CHECK: prefetcht0
-; CHECK: prefetchnta
-; PRFCHW: prefetchw
-; NOPRFCHW-NOT: prefetchw
-; SLM: prefetchw
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 1, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 2, i32 1 )
tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
+ tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 0, i32 1 )
ret void
}
-declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
diff --git a/llvm/test/MC/Disassembler/X86/x86-32.txt b/llvm/test/MC/Disassembler/X86/x86-32.txt
index 4211721ec48..cc05dfb6f89 100644
--- a/llvm/test/MC/Disassembler/X86/x86-32.txt
+++ b/llvm/test/MC/Disassembler/X86/x86-32.txt
@@ -667,6 +667,9 @@
# CHECK: prefetchw (%eax)
0x0f 0x0d 0x08
+# CHECK: prefetchwt1 (%eax)
+0x0f 0x0d 0x10
+
# CHECK: adcxl %eax, %eax
0x66 0x0f 0x38 0xf6 0xc0
diff --git a/llvm/test/MC/X86/3DNow.s b/llvm/test/MC/X86/3DNow.s
index 871857b155d..e66e39b547a 100644
--- a/llvm/test/MC/X86/3DNow.s
+++ b/llvm/test/MC/X86/3DNow.s
@@ -72,8 +72,10 @@ femms
// CHECK: prefetch (%rax) # encoding: [0x0f,0x0d,0x00]
// CHECK: prefetchw (%rax) # encoding: [0x0f,0x0d,0x08]
+// CHECK: prefetchwt1 (%rax) # encoding: [0x0f,0x0d,0x10]
prefetch (%rax)
prefetchw (%rax)
+prefetchwt1 (%rax)
// CHECK: pf2iw %mm2, %mm1 # encoding: [0x0f,0x0f,0xca,0x1c]
OpenPOWER on IntegriCloud