[X86] Add prefetchwt1 instruction and overhaul priorities and isel enabling for prefetch instructions.

Previously prefetch was only considered legal if sse was enabled, but it should be supported with 3dnow as well. The prfchw flag now imply at least some form of prefetch without the write hint is available, either the sse or 3dnow version. This is true even if 3dnow and sse are explicitly disabled. Similarly prefetchwt1 feature implies availability of prefetchw and the the prefetcht0/1/2/nta instructions. This way we can support _MM_HINT_ET0 using prefetchw and _MM_HINT_ET1 with prefetchwt1. And its assumed that if we have levels for the write hint we would have levels for the non-write hint, thus why we enable the sse prefetch instructions. I believe this behavior is consistent with gcc. I've updated the prefetch.ll to test all of these combinations. llvm-svn: 321335
author: Craig Topper <craig.topper@intel.com> 2017-12-22 02:30:30 +0000
committer: Craig Topper <craig.topper@intel.com> 2017-12-22 02:30:30 +0000
commit: e268598dd3c8df79e2c2ca6a392fe5128bb276c8 (patch)
tree: 2aa934665b80961c82555a2dbf7458872135c3f9 /llvm/test/CodeGen
parent: 9befe89367d24c7f3bdae1f7659cb3654e571e11 (diff)
download: bcm5719-llvm-e268598dd3c8df79e2c2ca6a392fe5128bb276c8.tar.gz
bcm5719-llvm-e268598dd3c8df79e2c2ca6a392fe5128bb276c8.zip
1 files changed, 88 insertions, 14 deletions
diff --git a/llvm/test/CodeGen/X86/prefetch.ll b/llvm/test/CodeGen/X86/prefetch.ll
index 17a9ac994a7..839948174a4 100644
--- a/llvm/test/CodeGen/X86/prefetch.ll
+++ b/llvm/test/CodeGen/X86/prefetch.ll
@@ -1,27 +1,101 @@
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s
-; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHW
-; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=SLM
-; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHW
-; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=NOPRFCHW
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+avx | FileCheck %s --check-prefix=SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+prfchw | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mcpu=slm | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 | FileCheck %s -check-prefix=PRFCHWSSE
+; RUN: llc < %s -mtriple=i686-- -mcpu=btver2 -mattr=-prfchw | FileCheck %s -check-prefix=SSE
+; RUN: llc < %s -mtriple=i686-- -mattr=+sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=i686-- -mattr=-sse,+3dnow,+prefetchwt1 | FileCheck %s -check-prefix=PREFETCHWT1
+; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow | FileCheck %s -check-prefix=3DNOW
+; RUN: llc < %s -mtriple=i686-- -mattr=+3dnow,+prfchw | FileCheck %s -check-prefix=PRFCHW3DNOW
+
+; Rules:
+; 3dnow by itself get you just the single prefetch instruction with no hints
+; sse provides prefetch0/1/2/nta
+; supporting prefetchw, but not 3dnow implicitly provides prefetcht0/1/2/nta regardless of sse setting as we need something to fall back to for the non-write hint.
+; supporting prefetchwt1 implies prefetcht0/1/2/nta and prefetchw regardless of other settings. this allows levels for non-write and gives us an instruction for write+T0
+; 3dnow prefetch instruction will only get used if you have no other prefetch instructions enabled
 
 ; rdar://10538297
 
 define void @t(i8* %ptr) nounwind  {
+; SSE-LABEL: t:
+; SSE:       # %bb.0: # %entry
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    prefetcht2 (%eax)
+; SSE-NEXT:    prefetcht1 (%eax)
+; SSE-NEXT:    prefetcht0 (%eax)
+; SSE-NEXT:    prefetchnta (%eax)
+; SSE-NEXT:    prefetcht2 (%eax)
+; SSE-NEXT:    prefetcht1 (%eax)
+; SSE-NEXT:    prefetcht0 (%eax)
+; SSE-NEXT:    prefetchnta (%eax)
+; SSE-NEXT:    retl
+;
+; PRFCHWSSE-LABEL: t:
+; PRFCHWSSE:       # %bb.0: # %entry
+; PRFCHWSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; PRFCHWSSE-NEXT:    prefetcht2 (%eax)
+; PRFCHWSSE-NEXT:    prefetcht1 (%eax)
+; PRFCHWSSE-NEXT:    prefetcht0 (%eax)
+; PRFCHWSSE-NEXT:    prefetchnta (%eax)
+; PRFCHWSSE-NEXT:    prefetchw (%eax)
+; PRFCHWSSE-NEXT:    prefetchw (%eax)
+; PRFCHWSSE-NEXT:    prefetchw (%eax)
+; PRFCHWSSE-NEXT:    prefetchw (%eax)
+; PRFCHWSSE-NEXT:    retl
+;
+; PREFETCHWT1-LABEL: t:
+; PREFETCHWT1:       # %bb.0: # %entry
+; PREFETCHWT1-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; PREFETCHWT1-NEXT:    prefetcht2 (%eax)
+; PREFETCHWT1-NEXT:    prefetcht1 (%eax)
+; PREFETCHWT1-NEXT:    prefetcht0 (%eax)
+; PREFETCHWT1-NEXT:    prefetchnta (%eax)
+; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT:    prefetchw (%eax)
+; PREFETCHWT1-NEXT:    prefetchwt1 (%eax)
+; PREFETCHWT1-NEXT:    retl
+;
+; 3DNOW-LABEL: t:
+; 3DNOW:       # %bb.0: # %entry
+; 3DNOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    prefetch (%eax)
+; 3DNOW-NEXT:    retl
+;
+; PRFCHW3DNOW-LABEL: t:
+; PRFCHW3DNOW:       # %bb.0: # %entry
+; PRFCHW3DNOW-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetch (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    prefetchw (%eax)
+; PRFCHW3DNOW-NEXT:    retl
 entry:
-; CHECK: prefetcht2
-; CHECK: prefetcht1
-; CHECK: prefetcht0
-; CHECK: prefetchnta
-; PRFCHW: prefetchw
-; NOPRFCHW-NOT: prefetchw
-; SLM: prefetchw
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 1, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 3, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 0, i32 0, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 1, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 2, i32 1 )
 	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 3, i32 1 )
+	tail call void @llvm.prefetch( i8* %ptr, i32 1, i32 0, i32 1 )
 	ret void
 }
 
-declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind 
+declare void @llvm.prefetch(i8*, i32, i32, i32) nounwind
author	Craig Topper <craig.topper@intel.com>	2017-12-22 02:30:30 +0000
committer	Craig Topper <craig.topper@intel.com>	2017-12-22 02:30:30 +0000
commit	e268598dd3c8df79e2c2ca6a392fe5128bb276c8 (patch)
tree	2aa934665b80961c82555a2dbf7458872135c3f9 /llvm/test/CodeGen
parent	9befe89367d24c7f3bdae1f7659cb3654e571e11 (diff)
download	bcm5719-llvm-e268598dd3c8df79e2c2ca6a392fe5128bb276c8.tar.gz bcm5719-llvm-e268598dd3c8df79e2c2ca6a392fe5128bb276c8.zip