diff options
-rw-r--r-- | llvm/include/llvm/CodeGen/TargetLowering.h | 10 | ||||
-rw-r--r-- | llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 | ||||
-rw-r--r-- | llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 | ||||
-rw-r--r-- | llvm/lib/Target/X86/X86ISelLowering.h | 6 | ||||
-rw-r--r-- | llvm/test/CodeGen/X86/vector-trunc-widen.ll | 4 |
5 files changed, 15 insertions, 9 deletions
diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d00cc1675cd..ad17fd84558 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -435,10 +435,12 @@ public: return false; } - /// Allow store merging after legalization in addition to before legalization. - /// This may catch stores that do not exist earlier (eg, stores created from - /// intrinsics). - virtual bool mergeStoresAfterLegalization() const { return true; } + /// Allow store merging for the specified type after legalization in addition + /// to before legalization. This may transform stores that do not exist + /// earlier (for example, stores created from intrinsics). + virtual bool mergeStoresAfterLegalization(EVT MemVT) const { + return true; + } /// Returns if it's reasonable to merge stores to MemVT size. virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1d1699ce589..33ef68c2f1f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16085,7 +16085,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Always perform this optimization before types are legal. If the target // prefers, also try this after legalization to catch stores that were created // by intrinsics or other nodes. - if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { + if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) { while (true) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 74d5d80ee68..a17f5dae576 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -234,7 +234,7 @@ public: // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for // now. - bool mergeStoresAfterLegalization() const override { return false; } + bool mergeStoresAfterLegalization(EVT) const override { return false; } bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { return true; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7eed866614a..42b5b06268a 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -799,7 +799,11 @@ namespace llvm { /// This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; - bool mergeStoresAfterLegalization() const override { return true; } + /// Do not merge vector stores after legalization because that may conflict + /// with x86-specific store splitting optimizations. + bool mergeStoresAfterLegalization(EVT MemVT) const override { + return !MemVT.isVector(); + } bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const SelectionDAG &DAG) const override; diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll index 1eff810f757..54ebdbe026a 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll @@ -2076,8 +2076,8 @@ define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, <8 x i16> ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] ; AVX2-NEXT: shlq $4, %rdi -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqu %ymm0, (%rsi,%rdi) +; AVX2-NEXT: vmovdqu %xmm0, (%rsi,%rdi) +; AVX2-NEXT: vmovdqu %xmm1, 16(%rsi,%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; |