diff options
| -rw-r--r-- | llvm/lib/Target/X86/X86.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.cpp | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Instr3DNow.td | 20 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrInfo.td | 3 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86InstrSSE.td | 2 | ||||
| -rw-r--r-- | llvm/lib/Target/X86/X86Subtarget.h | 11 | ||||
| -rw-r--r-- | llvm/test/CodeGen/X86/prefetch.ll | 102 | ||||
| -rw-r--r-- | llvm/test/MC/Disassembler/X86/x86-32.txt | 3 | ||||
| -rw-r--r-- | llvm/test/MC/X86/3DNow.s | 2 |
9 files changed, 126 insertions, 21 deletions
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td index f756341a63b..9a4af17ab84 100644 --- a/llvm/lib/Target/X86/X86.td +++ b/llvm/lib/Target/X86/X86.td @@ -137,7 +137,7 @@ def FeatureVPOPCNTDQ : SubtargetFeature<"avx512vpopcntdq", "HasVPOPCNTDQ", def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; -def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPFPREFETCHWT1", +def FeaturePREFETCHWT1 : SubtargetFeature<"prefetchwt1", "HasPREFETCHWT1", "true", "Prefetch with Intent to Write and T1 Hint">; def FeatureDQI : SubtargetFeature<"avx512dq", "HasDQI", "true", diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 17c856fcd43..16dc33cca8e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -461,7 +461,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::SRL_PARTS, VT, Custom); } - if (Subtarget.hasSSE1()) + if (Subtarget.hasSSEPrefetch() || Subtarget.has3DNow()) setOperationAction(ISD::PREFETCH , MVT::Other, Legal); setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom); diff --git a/llvm/lib/Target/X86/X86Instr3DNow.td b/llvm/lib/Target/X86/X86Instr3DNow.td index 2acd8d17beb..0d30b7d47f3 100644 --- a/llvm/lib/Target/X86/X86Instr3DNow.td +++ b/llvm/lib/Target/X86/X86Instr3DNow.td @@ -116,14 +116,30 @@ defm PMULHRW : I3DNow_binop_rm_int<0xB7, "pmulhrw", I3DNOW_MISC_FUNC_ITINS, 1>; def FEMMS : I3DNow<0x0E, RawFrm, (outs), (ins), "femms", [(int_x86_mmx_femms)], IIC_MMX_EMMS>; +// PREFETCHWT1 is supported we want to use it for everything but T0. +def PrefetchWLevel : PatFrag<(ops), (i32 imm), [{ + return N->getSExtValue() == 3 || !Subtarget->hasPREFETCHWT1(); +}]>; + +// Use PREFETCHWT1 for NTA, T2, T1. +def PrefetchWT1Level : ImmLeaf<i32, [{ + return Imm < 3; +}]>; + let SchedRW = [WriteLoad] in { +let Predicates = [Has3DNow, NoSSEPrefetch] in def PREFETCH : I3DNow<0x0D, MRM0m, (outs), (ins i8mem:$addr), "prefetch\t$addr", - [(prefetch addr:$addr, (i32 0), imm, (i32 1))], + [(prefetch addr:$addr, imm, imm, (i32 1))], IIC_SSE_PREFETCH>; + def PREFETCHW : I<0x0D, MRM1m, (outs), (ins i8mem:$addr), "prefetchw\t$addr", - [(prefetch addr:$addr, (i32 1), (i32 3), (i32 1))], + [(prefetch addr:$addr, (i32 1), (i32 PrefetchWLevel), (i32 1))], IIC_SSE_PREFETCH>, TB, Requires<[HasPrefetchW]>; + +def PREFETCHWT1 : I<0x0D, MRM2m, (outs), (ins i8mem:$addr), "prefetchwt1\t$addr", + [(prefetch addr:$addr, (i32 1), (i32 PrefetchWT1Level), (i32 1))], + IIC_SSE_PREFETCH>, TB, Requires<[HasPREFETCHWT1]>; } // "3DNowA" instructions diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td index 42e89cb4831..fdf3e73e4fc 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.td +++ b/llvm/lib/Target/X86/X86InstrInfo.td @@ -874,7 +874,10 @@ def HasADX : Predicate<"Subtarget->hasADX()">; def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; +def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">; +def NoSSEPrefetch : Predicate<"!Subtarget->hasSSEPrefetch()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; +def HasPREFETCHWT1 : Predicate<"Subtarget->hasPREFETCHWT1()">; def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def HasMWAITX : Predicate<"Subtarget->hasMWAITX()">; def HasCLZERO : Predicate<"Subtarget->hasCLZERO()">; diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index a86a0bfc168..b48fa184197 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -3487,7 +3487,7 @@ let Predicates = [UseSSE2] in { //===----------------------------------------------------------------------===// // Prefetch intrinsic. -let Predicates = [HasSSE1], SchedRW = [WriteLoad] in { +let Predicates = [HasSSEPrefetch], SchedRW = [WriteLoad] in { def PREFETCHT0 : I<0x18, MRM1m, (outs), (ins i8mem:$src), "prefetcht0\t$src", [(prefetch addr:$src, imm, (i32 3), (i32 1))], IIC_SSE_PREFETCH>, TB; diff --git a/llvm/lib/Target/X86/X86Subtarget.h b/llvm/lib/Target/X86/X86Subtarget.h index 842320b548c..c9435890fc1 100644 --- a/llvm/lib/Target/X86/X86Subtarget.h +++ b/llvm/lib/Target/X86/X86Subtarget.h @@ -201,7 +201,7 @@ protected: bool HasCLZERO; /// Processor has Prefetch with intent to Write instruction - bool HasPFPREFETCHWT1; + bool HasPREFETCHWT1; /// True if SHLD instructions are slow. bool IsSHLDSlow; @@ -517,7 +517,14 @@ public: bool hasRTM() const { return HasRTM; } bool hasADX() const { return HasADX; } bool hasSHA() const { return HasSHA; } - bool hasPRFCHW() const { return HasPRFCHW; } + bool hasPRFCHW() const { return HasPRFCHW || HasPREFETCHWT1; } + bool hasPREFETCHWT1() const { return HasPREFETCHWT1; } + bool hasSSEPrefetch() const { + // We implicitly enable these when we have a write prefix supporting cache + // level OR if we have prfchw, but don't already have a read prefetch from + // 3dnow. + return hasSSE1() || (hasPRFCHW() && !has3DNow()) || hasPREFETCHWT1(); + } bool hasRDSEED() const { return HasRDSEED; } bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool hasMWAITX() const { return HasMWAITX; } diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll index 17a9ac994a7..839948174a4 100644 --- a/llvm/test/CodeGen/X86/prefetch.ll +++ b/llvm/test/CodeGen/X86/prefetch.ll @@ -1,27 +1,101 @@ -; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s -; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s -; RUN: llc < %s -mtriple=i686-- -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW -; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=SLM -; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW -; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=SSE +; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prfchw | FileCheck %s -check-prefix=PRFCHWSSE +; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHWSSE +; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE +; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE +; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE +; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 +; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 +; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1 +; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW +; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=PRFCHW3DNOW + +; Rules: +; 3dnow by itself get you just the single prefetch instruction with no hints +; sse provides prefetch0/1/2/nta +; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint. +; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0 +; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled ; rdar://10538297 define void @t(i8* %ptr) nounwind { +; SSE-LABEL: t: +; SSE: # %bb.0: # %entry +; SSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; SSE-NEXT: prefetcht2 (%eax) +; SSE-NEXT: prefetcht1 (%eax) +; SSE-NEXT: prefetcht0 (%eax) +; SSE-NEXT: prefetchnta (%eax) +; SSE-NEXT: prefetcht2 (%eax) +; SSE-NEXT: prefetcht1 (%eax) +; SSE-NEXT: prefetcht0 (%eax) +; SSE-NEXT: prefetchnta (%eax) +; SSE-NEXT: retl +; +; PRFCHWSSE-LABEL: t: +; PRFCHWSSE: # %bb.0: # %entry +; PRFCHWSSE-NEXT: movl {{[0-9]+}}(%esp), %eax +; PRFCHWSSE-NEXT: prefetcht2 (%eax) +; PRFCHWSSE-NEXT: prefetcht1 (%eax) +; PRFCHWSSE-NEXT: prefetcht0 (%eax) +; PRFCHWSSE-NEXT: prefetchnta (%eax) +; PRFCHWSSE-NEXT: prefetchw (%eax) +; PRFCHWSSE-NEXT: prefetchw (%eax) +; PRFCHWSSE-NEXT: prefetchw (%eax) +; PRFCHWSSE-NEXT: prefetchw (%eax) +; PRFCHWSSE-NEXT: retl +; +; PREFETCHWT1-LABEL: t: +; PREFETCHWT1: # %bb.0: # %entry +; PREFETCHWT1-NEXT: movl {{[0-9]+}}(%esp), %eax +; PREFETCHWT1-NEXT: prefetcht2 (%eax) +; PREFETCHWT1-NEXT: prefetcht1 (%eax) +; PREFETCHWT1-NEXT: prefetcht0 (%eax) +; PREFETCHWT1-NEXT: prefetchnta (%eax) +; PREFETCHWT1-NEXT: prefetchwt1 (%eax) +; PREFETCHWT1-NEXT: prefetchwt1 (%eax) +; PREFETCHWT1-NEXT: prefetchw (%eax) +; PREFETCHWT1-NEXT: prefetchwt1 (%eax) +; PREFETCHWT1-NEXT: retl +; +; 3DNOW-LABEL: t: +; 3DNOW: # %bb.0: # %entry +; 3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: prefetch (%eax) +; 3DNOW-NEXT: retl +; +; PRFCHW3DNOW-LABEL: t: +; PRFCHW3DNOW: # %bb.0: # %entry +; PRFCHW3DNOW-NEXT: movl {{[0-9]+}}(%esp), %eax +; PRFCHW3DNOW-NEXT: prefetch (%eax) +; PRFCHW3DNOW-NEXT: prefetch (%eax) +; PRFCHW3DNOW-NEXT: prefetch (%eax) +; PRFCHW3DNOW-NEXT: prefetch (%eax) +; PRFCHW3DNOW-NEXT: prefetchw (%eax) +; PRFCHW3DNOW-NEXT: prefetchw (%eax) +; PRFCHW3DNOW-NEXT: prefetchw (%eax) +; PRFCHW3DNOW-NEXT: prefetchw (%eax) +; PRFCHW3DNOW-NEXT: retl entry: -; CHECK: prefetcht2 -; CHECK: prefetcht1 -; CHECK: prefetcht0 -; CHECK: prefetchnta -; PRFCHW: prefetchw -; NOPRFCHW-NOT: prefetchw -; SLM: prefetchw tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 1, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 2, i32 1 ) tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 ) + tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 0, i32 1 ) ret void } -declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind +declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind diff --git a/llvm/test/MC/Disassembler/X86/x86-32.txt b/llvm/test/MC/Disassembler/X86/x86-32.txt index 4211721ec48..cc05dfb6f89 100644 --- a/llvm/test/MC/Disassembler/X86/x86-32.txt +++ b/llvm/test/MC/Disassembler/X86/x86-32.txt @@ -667,6 +667,9 @@ # CHECK: prefetchw (%eax) 0x0f 0x0d 0x08 +# CHECK: prefetchwt1 (%eax) +0x0f 0x0d 0x10 + # CHECK: adcxl %eax, %eax 0x66 0x0f 0x38 0xf6 0xc0 diff --git a/llvm/test/MC/X86/3DNow.s b/llvm/test/MC/X86/3DNow.s index 871857b155d..e66e39b547a 100644 --- a/llvm/test/MC/X86/3DNow.s +++ b/llvm/test/MC/X86/3DNow.s @@ -72,8 +72,10 @@ femms // CHECK: prefetch (%rax) # encoding: [0x0f,0x0d,0x00] // CHECK: prefetchw (%rax) # encoding: [0x0f,0x0d,0x08] +// CHECK: prefetchwt1 (%rax) # encoding: [0x0f,0x0d,0x10] prefetch (%rax) prefetchw (%rax) +prefetchwt1 (%rax) // CHECK: pf2iw %mm2, %mm1 # encoding: [0x0f,0x0f,0xca,0x1c] |

