summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorCraig Topper <craig.topper@intel.com>2018-12-19 18:45:57 +0000
committerCraig Topper <craig.topper@intel.com>2018-12-19 18:45:57 +0000
commit291470347a2e4d7b6aa0ecb854e61f074cc80590 (patch)
tree863ad5aa3a1e7bc8f606bd51a76e2254e60aff9a
parent4b7396e25f7354c0abf2fc59c3b30b59c38a3c25 (diff)
downloadbcm5719-llvm-291470347a2e4d7b6aa0ecb854e61f074cc80590.tar.gz
bcm5719-llvm-291470347a2e4d7b6aa0ecb854e61f074cc80590.zip
[X86] Fix assert fails in pass X86AvoidSFBPass
Fixes https://bugs.llvm.org/show_bug.cgi?id=38743 The function removeRedundantBlockingStores is supposed to remove any blocking stores contained in each other in lockingStoresDispSizeMap. But it currently looks only at the previous one, which will miss some cases that result in assert. This patch refine the function to check all previous layouts until find the uncontained one. So all redundant stores will be removed. Patch by Pengfei Wang Differential Revision: https://reviews.llvm.org/D55642 llvm-svn: 349660
-rw-r--r--llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp27
-rw-r--r--llvm/test/CodeGen/X86/pr38743.ll94
2 files changed, 108 insertions, 13 deletions
diff --git a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
index 2850baf7a65..627a6cb1451 100644
--- a/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
+++ b/llvm/lib/Target/X86/X86AvoidStoreForwardingBlocks.cpp
@@ -641,21 +641,22 @@ removeRedundantBlockingStores(DisplacementSizeMap &BlockingStoresDispSizeMap) {
if (BlockingStoresDispSizeMap.size() <= 1)
return;
- int64_t PrevDisp = BlockingStoresDispSizeMap.begin()->first;
- unsigned PrevSize = BlockingStoresDispSizeMap.begin()->second;
- SmallVector<int64_t, 2> ForRemoval;
- for (auto DispSizePair = std::next(BlockingStoresDispSizeMap.begin());
- DispSizePair != BlockingStoresDispSizeMap.end(); ++DispSizePair) {
- int64_t CurrDisp = DispSizePair->first;
- unsigned CurrSize = DispSizePair->second;
- if (CurrDisp + CurrSize <= PrevDisp + PrevSize) {
- ForRemoval.push_back(PrevDisp);
+ SmallVector<std::pair<int64_t, unsigned>, 0> DispSizeStack;
+ for (auto DispSizePair : BlockingStoresDispSizeMap) {
+ int64_t CurrDisp = DispSizePair.first;
+ unsigned CurrSize = DispSizePair.second;
+ while (DispSizeStack.size()) {
+ int64_t PrevDisp = DispSizeStack.back().first;
+ unsigned PrevSize = DispSizeStack.back().second;
+ if (CurrDisp + CurrSize > PrevDisp + PrevSize)
+ break;
+ DispSizeStack.pop_back();
}
- PrevDisp = CurrDisp;
- PrevSize = CurrSize;
+ DispSizeStack.push_back(DispSizePair);
}
- for (auto Disp : ForRemoval)
- BlockingStoresDispSizeMap.erase(Disp);
+ BlockingStoresDispSizeMap.clear();
+ for (auto Disp : DispSizeStack)
+ BlockingStoresDispSizeMap.insert(Disp);
}
bool X86AvoidSFBPass::runOnMachineFunction(MachineFunction &MF) {
diff --git a/llvm/test/CodeGen/X86/pr38743.ll b/llvm/test/CodeGen/X86/pr38743.ll
new file mode 100644
index 00000000000..ac5d48ef5f5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr38743.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+%0 = type { %1 }
+%1 = type { %2 }
+%2 = type { %3 }
+%3 = type { %4 }
+%4 = type { %5 }
+%5 = type { i64, i64, i8* }
+%6 = type { %7, [23 x i8] }
+%7 = type { i8 }
+
+@.str.16 = external dso_local unnamed_addr constant [16 x i8], align 1
+@.str.17 = external dso_local unnamed_addr constant [12 x i8], align 1
+@.str.18 = external dso_local unnamed_addr constant [15 x i8], align 1
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #0
+
+define void @pr38743() #1 align 2 {
+; CHECK-LABEL: pr38743:
+; CHECK: # %bb.0: # %bb
+; CHECK-NEXT: cmpl $3, %eax
+; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: # %bb.1: # %bb
+; CHECK-NEXT: cmpl $1, %eax
+; CHECK-NEXT: je .LBB0_2
+; CHECK-NEXT: # %bb.3: # %bb5
+; CHECK-NEXT: movzwl .str.17+{{.*}}(%rip), %eax
+; CHECK-NEXT: movw %ax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq {{.*}}(%rip), %rax
+; CHECK-NEXT: jmp .LBB0_5
+; CHECK-NEXT: .LBB0_4: # %bb8
+; CHECK-NEXT: movq .str.18+{{.*}}(%rip), %rax
+; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq {{.*}}(%rip), %rax
+; CHECK-NEXT: jmp .LBB0_5
+; CHECK-NEXT: .LBB0_2: # %bb2
+; CHECK-NEXT: movq .str.16+{{.*}}(%rip), %rax
+; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq {{.*}}(%rip), %rax
+; CHECK-NEXT: .LBB0_5: # %bb12
+; CHECK-NEXT: movq %rax, -{{[0-9]+}}(%rsp)
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rax
+; CHECK-NEXT: movq %rax, (%rax)
+; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %al
+; CHECK-NEXT: movq -{{[0-9]+}}(%rsp), %rcx
+; CHECK-NEXT: movzwl -{{[0-9]+}}(%rsp), %edx
+; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %esi
+; CHECK-NEXT: movb -{{[0-9]+}}(%rsp), %dil
+; CHECK-NEXT: movb %al, (%rax)
+; CHECK-NEXT: movq %rcx, 1(%rax)
+; CHECK-NEXT: movw %dx, 9(%rax)
+; CHECK-NEXT: movl %esi, 11(%rax)
+; CHECK-NEXT: movb %dil, 15(%rax)
+; CHECK-NEXT: retq
+bb:
+ %tmp = alloca %0, align 16
+ %tmp1 = bitcast %0* %tmp to i8*
+ switch i32 undef, label %bb11 [
+ i32 1, label %bb2
+ i32 4, label %bb5
+ i32 2, label %bb5
+ i32 3, label %bb8
+ ]
+
+bb2: ; preds = %bb
+ %tmp3 = bitcast %0* %tmp to %6*
+ %tmp4 = getelementptr inbounds %6, %6* %tmp3, i64 0, i32 1, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp4, i8* align 1 getelementptr inbounds ([16 x i8], [16 x i8]* @.str.16, i64 0, i64 0), i64 15, i1 false)
+ br label %bb12
+
+bb5: ; preds = %bb, %bb
+ %tmp6 = bitcast %0* %tmp to %6*
+ %tmp7 = getelementptr inbounds %6, %6* %tmp6, i64 0, i32 1, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp7, i8* align 1 getelementptr inbounds ([12 x i8], [12 x i8]* @.str.17, i64 0, i64 0), i64 10, i1 false)
+ br label %bb12
+
+bb8: ; preds = %bb
+ %tmp9 = bitcast %0* %tmp to %6*
+ %tmp10 = getelementptr inbounds %6, %6* %tmp9, i64 0, i32 1, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 1 %tmp10, i8* align 1 getelementptr inbounds ([15 x i8], [15 x i8]* @.str.18, i64 0, i64 0), i64 14, i1 false)
+ br label %bb12
+
+bb11: ; preds = %bb
+ unreachable
+
+bb12: ; preds = %bb8, %bb5, %bb2
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 8 undef, i8* nonnull align 16 %tmp1, i64 24, i1 false) #2
+ ret void
+}
+
+attributes #0 = { argmemonly nounwind }
+attributes #1 = { "target-features"="+sse,+sse2,+sse3,+sse4.2" }
+attributes #2 = { nounwind }
OpenPOWER on IntegriCloud