summaryrefslogtreecommitdiffstats
path: root/llvm/lib
diff options
context:
space:
mode:
authorAhmed Bougacha <ahmed.bougacha@gmail.com>2015-09-02 23:25:39 +0000
committerAhmed Bougacha <ahmed.bougacha@gmail.com>2015-09-02 23:25:39 +0000
commitb03ea02479c82430b4149609e0ac3e0490d5ca12 (patch)
tree1eba0a16380443680a0e0d9fd40640fbd1b7cb8e /llvm/lib
parent78425200ee5a51df88ed62eafc51ae70a5ecc112 (diff)
downloadbcm5719-llvm-b03ea02479c82430b4149609e0ac3e0490d5ca12.tar.gz
bcm5719-llvm-b03ea02479c82430b4149609e0ac3e0490d5ca12.zip
[X86] Require 32-byte alignment for 32-byte VMOVNTs.
We used to accept (and even test, and generate) 16-byte alignment for 32-byte nontemporal stores, but they require 32-byte alignment, per SDM. Found by inspection. Instead of hardcoding 16 in the patfrag, check for natural alignment. Also fix the autoupgrade and the various tests. Also, use explicit -mattr instead of -mcpu: I stared at the output several minutes wondering why I get 2x movntps for the unaligned case (which is the ideal output, but needs some work: see FIXME), until I remembered corei7-avx implies +slow-unaligned-mem-32. llvm-svn: 246733
Diffstat (limited to 'llvm/lib')
-rw-r--r--llvm/lib/IR/AutoUpgrade.cpp2
-rw-r--r--llvm/lib/Target/X86/X86InstrFragmentsSIMD.td6
2 files changed, 5 insertions, 3 deletions
diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp
index aeefa38f74d..71448feb9ab 100644
--- a/llvm/lib/IR/AutoUpgrade.cpp
+++ b/llvm/lib/IR/AutoUpgrade.cpp
@@ -370,7 +370,7 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
"cast");
StoreInst *SI = Builder.CreateStore(Arg1, BC);
SI->setMetadata(M->getMDKindID("nontemporal"), Node);
- SI->setAlignment(16);
+ SI->setAlignment(32);
// Remove intrinsic.
CI->eraseFromParent();
diff --git a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
index b8ab1feed9e..19bf986c33c 100644
--- a/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
+++ b/llvm/lib/Target/X86/X86InstrFragmentsSIMD.td
@@ -626,12 +626,14 @@ def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
(nontemporalstore node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() >= 16;
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ return St->getAlignment() >= St->getMemoryVT().getStoreSize();
}]>;
def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
(nontemporalstore node:$val, node:$ptr), [{
- return cast<StoreSDNode>(N)->getAlignment() < 16;
+ StoreSDNode *St = cast<StoreSDNode>(N);
+ return St->getAlignment() < St->getMemoryVT().getStoreSize();
}]>;
def mgatherv4i32 : PatFrag<(ops node:$src1, node:$src2, node:$src3),
OpenPOWER on IntegriCloud