summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-10-17 17:27:06 +0000
committerAndrea Di Biagio <Andrea_DiBiagio@sn.scee.net>2014-10-17 17:27:06 +0000
commitc48cb86f05ef43082f8ed4055219b2e6724e21ab (patch)
tree4b91ad257057ea5c99a3ed7408d4643b541bdb8e
parente8aab7480c2550b51fd60d6eb102b361e70931d7 (diff)
downloadbcm5719-llvm-c48cb86f05ef43082f8ed4055219b2e6724e21ab.tar.gz
bcm5719-llvm-c48cb86f05ef43082f8ed4055219b2e6724e21ab.zip
[X86] Fix missed selection of non-temporal store of zero vector.
When the input to a store instruction was a zero vector, the backend always selected a normal vector store regardless of the non-temporal hint. This is fixed by this patch. This fixes PR19370. llvm-svn: 220054
-rw-r--r--llvm/lib/Target/X86/X86InstrSSE.td8
-rw-r--r--llvm/test/CodeGen/X86/nontemporal-2.ll31
2 files changed, 39 insertions, 0 deletions
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index 4b283a2988a..3874c1968b5 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -3939,6 +3939,14 @@ def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
PS, Requires<[HasSSE2]>;
} // SchedRW = [WriteStore]
+let Predicates = [HasAVX, NoVLX] in {
+ def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (VMOVNTPSmr addr:$dst, VR128:$src)>;
+}
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (MOVNTPSmr addr:$dst, VR128:$src)>;
+
} // AddedComplexity
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/X86/nontemporal-2.ll b/llvm/test/CodeGen/X86/nontemporal-2.ll
new file mode 100644
index 00000000000..9d0cb9a5edf
--- /dev/null
+++ b/llvm/test/CodeGen/X86/nontemporal-2.ll
@@ -0,0 +1,31 @@
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7 | FileCheck %s -check-prefix=CHECK -check-prefix=SSE
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=corei7-avx | FileCheck %s -check-prefix=CHECK -check-prefix=AVX
+
+
+; Make sure that we generate non-temporal stores for the test cases below.
+
+define void @test1(<4 x float>* %dst) {
+; CHECK-LABEL: test1:
+; SSE: movntps
+; AVX: vmovntps
+ store <4 x float> zeroinitializer, <4 x float>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test2(<4 x i32>* %dst) {
+; CHECK-LABEL: test2:
+; SSE: movntps
+; AVX: vmovntps
+ store <4 x i32> zeroinitializer, <4 x i32>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+define void @test3(<2 x double>* %dst) {
+; CHECK-LABEL: test3:
+; SSE: movntps
+; AVX: vmovntps
+ store <2 x double> zeroinitializer, <2 x double>* %dst, align 16, !nontemporal !1
+ ret void
+}
+
+!1 = metadata !{i32 1}
OpenPOWER on IntegriCloud