summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--llvm/include/llvm/CodeGen/TargetLowering.h10
-rw-r--r--llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp2
-rw-r--r--llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h2
-rw-r--r--llvm/lib/Target/X86/X86ISelLowering.h6
-rw-r--r--llvm/test/CodeGen/X86/vector-trunc-widen.ll4
5 files changed, 15 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index d00cc1675cd..ad17fd84558 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -435,10 +435,12 @@ public:
return false;
}
- /// Allow store merging after legalization in addition to before legalization.
- /// This may catch stores that do not exist earlier (eg, stores created from
- /// intrinsics).
- virtual bool mergeStoresAfterLegalization() const { return true; }
+ /// Allow store merging for the specified type after legalization in addition
+ /// to before legalization. This may transform stores that do not exist
+ /// earlier (for example, stores created from intrinsics).
+ virtual bool mergeStoresAfterLegalization(EVT MemVT) const {
+ return true;
+ }
/// Returns if it's reasonable to merge stores to MemVT size.
virtual bool canMergeStoresTo(unsigned AS, EVT MemVT,
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 1d1699ce589..33ef68c2f1f 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16085,7 +16085,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Always perform this optimization before types are legal. If the target
// prefers, also try this after legalization to catch stores that were created
// by intrinsics or other nodes.
- if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) {
+ if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) {
while (true) {
// There can be multiple store sequences on the same chain.
// Keep trying to merge store sequences until we are unable to do so
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
index 74d5d80ee68..a17f5dae576 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h
@@ -234,7 +234,7 @@ public:
// MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges;
// MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for
// now.
- bool mergeStoresAfterLegalization() const override { return false; }
+ bool mergeStoresAfterLegalization(EVT) const override { return false; }
bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override {
return true;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
index 7eed866614a..42b5b06268a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -799,7 +799,11 @@ namespace llvm {
/// This method returns the name of a target specific DAG node.
const char *getTargetNodeName(unsigned Opcode) const override;
- bool mergeStoresAfterLegalization() const override { return true; }
+ /// Do not merge vector stores after legalization because that may conflict
+ /// with x86-specific store splitting optimizations.
+ bool mergeStoresAfterLegalization(EVT MemVT) const override {
+ return !MemVT.isVector();
+ }
bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT,
const SelectionDAG &DAG) const override;
diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
index 1eff810f757..54ebdbe026a 100644
--- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll
@@ -2076,8 +2076,8 @@ define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, <8 x i16>
; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1
; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3]
; AVX2-NEXT: shlq $4, %rdi
-; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
-; AVX2-NEXT: vmovdqu %ymm0, (%rsi,%rdi)
+; AVX2-NEXT: vmovdqu %xmm0, (%rsi,%rdi)
+; AVX2-NEXT: vmovdqu %xmm1, 16(%rsi,%rdi)
; AVX2-NEXT: vzeroupper
; AVX2-NEXT: retq
;
OpenPOWER on IntegriCloud