diff options
| author | Dehao Chen <dehao@google.com> | 2016-09-09 18:42:35 +0000 |
|---|---|---|
| committer | Dehao Chen <dehao@google.com> | 2016-09-09 18:42:35 +0000 |
| commit | 22ce5eb051591b828b1ce4238624b6e95d334a5b (patch) | |
| tree | 7620ab9e27ab4e5ee5e3fa1d5baf39f4f14ae3d3 /llvm | |
| parent | a5edf655af475d80b83e9b685aea896f0ee2ad18 (diff) | |
| download | bcm5719-llvm-22ce5eb051591b828b1ce4238624b6e95d334a5b.tar.gz bcm5719-llvm-22ce5eb051591b828b1ce4238624b6e95d334a5b.zip | |
Do not widen load for different variable in GVN.
Summary:
Widening load in GVN is too early because it will block other optimizations like PRE, LICM.
https://llvm.org/bugs/show_bug.cgi?id=29110
The SPECCPU2006 benchmark impact of this patch:
Reference: o2_nopatch
(1): o2_patched
Benchmark Base:Reference (1)
-------------------------------------------------------
spec/2006/fp/C++/444.namd 25.2 -0.08%
spec/2006/fp/C++/447.dealII 45.92 +1.05%
spec/2006/fp/C++/450.soplex 41.7 -0.26%
spec/2006/fp/C++/453.povray 35.65 +1.68%
spec/2006/fp/C/433.milc 23.79 +0.42%
spec/2006/fp/C/470.lbm 41.88 -1.12%
spec/2006/fp/C/482.sphinx3 47.94 +1.67%
spec/2006/int/C++/471.omnetpp 22.46 -0.36%
spec/2006/int/C++/473.astar 21.19 +0.24%
spec/2006/int/C++/483.xalancbmk 36.09 -0.11%
spec/2006/int/C/400.perlbench 33.28 +1.35%
spec/2006/int/C/401.bzip2 22.76 -0.04%
spec/2006/int/C/403.gcc 32.36 +0.12%
spec/2006/int/C/429.mcf 41.04 -0.41%
spec/2006/int/C/445.gobmk 26.94 +0.04%
spec/2006/int/C/456.hmmer 24.5 -0.20%
spec/2006/int/C/458.sjeng 28 -0.46%
spec/2006/int/C/462.libquantum 55.25 +0.27%
spec/2006/int/C/464.h264ref 45.87 +0.72%
geometric mean +0.23%
For most benchmarks, it's a wash, but we do see stable improvements on some benchmarks, e.g. 447,453,482,400.
Reviewers: davidxl, hfinkel, dberlin, sanjoy, reames
Subscribers: gberry, junbuml
Differential Revision: https://reviews.llvm.org/D24096
llvm-svn: 281074
Diffstat (limited to 'llvm')
| -rw-r--r-- | llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 38 | ||||
| -rw-r--r-- | llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll | 5 | ||||
| -rw-r--r-- | llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll | 11 | ||||
| -rw-r--r-- | llvm/test/Transforms/GVN/PRE/rle.ll | 9 | ||||
| -rw-r--r-- | llvm/test/Transforms/GVN/big-endian.ll | 6 | ||||
| -rw-r--r-- | llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll | 4 | ||||
| -rw-r--r-- | llvm/test/Transforms/GVN/pr25440.ll | 4 |
7 files changed, 23 insertions, 54 deletions
diff --git a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index fe0f6a2d261..bd3f7906084 100644 --- a/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -234,26 +234,6 @@ MemDepResult MemoryDependenceResults::getCallSiteDependencyFrom( return MemDepResult::getNonFuncLocal(); } -/// Return true if LI is a load that would fully overlap MemLoc if done as -/// a wider legal integer load. -/// -/// MemLocBase, MemLocOffset are lazily computed here the first time the -/// base/offs of memloc is needed. -static bool isLoadLoadClobberIfExtendedToFullWidth(const MemoryLocation &MemLoc, - const Value *&MemLocBase, - int64_t &MemLocOffs, - const LoadInst *LI) { - const DataLayout &DL = LI->getModule()->getDataLayout(); - - // If we haven't already computed the base/offset of MemLoc, do so now. - if (!MemLocBase) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, DL); - - unsigned Size = MemoryDependenceResults::getLoadLoadClobberFullWidthSize( - MemLocBase, MemLocOffs, MemLoc.Size, LI); - return Size != 0; -} - unsigned MemoryDependenceResults::getLoadLoadClobberFullWidthSize( const Value *MemLocBase, int64_t MemLocOffs, unsigned MemLocSize, const LoadInst *LI) { @@ -410,9 +390,6 @@ MemoryDependenceResults::getInvariantGroupPointerDependency(LoadInst *LI, MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( const MemoryLocation &MemLoc, bool isLoad, BasicBlock::iterator ScanIt, BasicBlock *BB, Instruction *QueryInst, unsigned *Limit) { - - const Value *MemLocBase = nullptr; - int64_t MemLocOffset = 0; bool isInvariantLoad = false; if (!Limit) { @@ -550,21 +527,8 @@ MemDepResult MemoryDependenceResults::getSimplePointerDependencyFrom( AliasResult R = AA.alias(LoadLoc, MemLoc); if (isLoad) { - if (R == NoAlias) { - // If this is an over-aligned integer load (for example, - // "load i8* %P, align 4") see if it would obviously overlap with the - // queried location if widened to a larger load (e.g. if the queried - // location is 1 byte at P+1). If so, return it as a load/load - // clobber result, allowing the client to decide to widen the load if - // it wants to. - if (IntegerType *ITy = dyn_cast<IntegerType>(LI->getType())) { - if (LI->getAlignment() * 8 > ITy->getPrimitiveSizeInBits() && - isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, - MemLocOffset, LI)) - return MemDepResult::getClobber(Inst); - } + if (R == NoAlias) continue; - } // Must aliased loads are defs of each other. if (R == MustAlias) diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll index b5ef8f20689..052d56d21c0 100644 --- a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll +++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll @@ -34,7 +34,7 @@ define void @end_test_widening_bad() { ret void } -;; Accessing bytes 4 and 5. Ok to widen to i16. +;; Accessing bytes 4 and 5. No widen to i16. define i32 @test_widening_ok(i8* %P) nounwind ssp noredzone sanitize_address { entry: @@ -45,7 +45,8 @@ entry: %add = add nsw i32 %conv, %conv2 ret i32 %add ; CHECK: @test_widening_ok -; CHECK: __asan_report_load2 +; CHECK: __asan_report_load1 +; CHECK: __asan_report_load1 ; CHECK-NOT: __asan_report ; CHECK: end_test_widening_ok } diff --git a/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll b/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll index 1e5e0912cd7..65826fb9213 100644 --- a/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll +++ b/llvm/test/Transforms/GVN/PRE/load-pre-nonlocal.ll @@ -51,15 +51,16 @@ for.end: } ; %1 is partially redundant if %0 can be widened to a 64-bit load. +; But we should not widen %0 to 64-bit load. ; CHECK-LABEL: define i32 @overaligned_load ; CHECK: if.then: -; CHECK: %0 = load i64 -; CHECK: [[LSHR:%[0-9]+]] = lshr i64 %0, 32, !dbg [[LSHR_LOC:![0-9]+]] -; CHECK: trunc i64 [[LSHR]] to i32 +; CHECK-NOT: %0 = load i64 +; CHECK-NOT: [[LSHR:%[0-9]+]] = lshr i64 %0, 32, !dbg [[LSHR_LOC:![0-9]+]] +; CHECK-NOT: trunc i64 [[LSHR]] to i32 ; CHECK: if.end: -; CHECK-NOT: %1 = load i32, i32* -; CHECK: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}}) +; CHECK: %1 = load i32, i32* +; CHECK-NOT: [[LSHR_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}}) define i32 @overaligned_load(i32 %a, i32* nocapture %b) !dbg !13 { entry: diff --git a/llvm/test/Transforms/GVN/PRE/rle.ll b/llvm/test/Transforms/GVN/PRE/rle.ll index 3f42135b409..121fe6ebb3d 100644 --- a/llvm/test/Transforms/GVN/PRE/rle.ll +++ b/llvm/test/Transforms/GVN/PRE/rle.ll @@ -624,6 +624,7 @@ if.end: ;;===----------------------------------------------------------------------===;; ;; Load Widening +;; We explicitly choose NOT to widen. And are testing to make sure we don't. ;;===----------------------------------------------------------------------===;; %widening1 = type { i32, i8, i8, i8, i8 } @@ -640,7 +641,8 @@ entry: ret i32 %add ; CHECK-LABEL: @test_widening1( ; CHECK-NOT: load -; CHECK: load i16, i16* +; CHECK: load i8, i8* +; CHECK: load i8, i8* ; CHECK-NOT: load ; CHECK: ret i32 } @@ -664,7 +666,10 @@ entry: ret i32 %add3 ; CHECK-LABEL: @test_widening2( ; CHECK-NOT: load -; CHECK: load i32, i32* +; CHECK: load i8, i8* +; CHECK: load i8, i8* +; CHECK: load i8, i8* +; CHECK: load i8, i8* ; CHECK-NOT: load ; CHECK: ret i32 } diff --git a/llvm/test/Transforms/GVN/big-endian.ll b/llvm/test/Transforms/GVN/big-endian.ll index faaf3ef465b..f6b7374c17c 100644 --- a/llvm/test/Transforms/GVN/big-endian.ll +++ b/llvm/test/Transforms/GVN/big-endian.ll @@ -7,9 +7,9 @@ target triple = "powerpc64-unknown-linux-gnu" ;; loads reusing a load value. define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) { ; CHECK-LABEL: @test1 -; CHECK: [[V1:%.*]] = load i16, i16* %{{.*}} -; CHECK: [[V2:%.*]] = lshr i16 [[V1]], 8 -; CHECK: trunc i16 [[V2]] to i1 +; CHECK-NOT: [[V1:%.*]] = load i16, i16* %{{.*}} +; CHECK-NOT: [[V2:%.*]] = lshr i16 [[V1]], 8 +; CHECK-NOT: trunc i16 [[V2]] to i1 %valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0 %valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8 diff --git a/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll b/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll index 2e790db1b2a..a83d7b62d39 100644 --- a/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll +++ b/llvm/test/Transforms/GVN/no_speculative_loads_with_asan.ll @@ -25,9 +25,7 @@ define i32 @TestNoAsan() { } ; CHECK-LABEL: @TestNoAsan -; CHECK: %[[LOAD:[^ ]+]] = load i32 -; CHECK: {{.*}} = ashr i32 %[[LOAD]] -; CHECK-NOT: {{.*}} = phi +; CHECK: ret i32 0 define i32 @TestAsan() sanitize_address { %1 = tail call noalias i8* @_Znam(i64 2) diff --git a/llvm/test/Transforms/GVN/pr25440.ll b/llvm/test/Transforms/GVN/pr25440.ll index 14e2c30f04b..21ccc4e77b6 100644 --- a/llvm/test/Transforms/GVN/pr25440.ll +++ b/llvm/test/Transforms/GVN/pr25440.ll @@ -19,7 +19,7 @@ bb0: ; preds = %land.lhs.true, %entry %x.tr = phi %struct.a* [ %x, %entry ], [ null, %land.lhs.true ] %code1 = getelementptr inbounds %struct.a, %struct.a* %x.tr, i32 0, i32 0 %0 = load i16, i16* %code1, align 4 -; CHECK: load i32, i32* +; CHECK: load i16, i16* %conv = zext i16 %0 to i32 switch i32 %conv, label %if.end.50 [ i32 43, label %cleanup @@ -38,7 +38,7 @@ if.then.26: ; preds = %if.then.5 cond.false: ; preds = %if.then.26 ; CHECK: cond.false: -; CHECK-NOT: load +; CHECK: load i16 %mode = getelementptr inbounds %struct.a, %struct.a* %x.tr.lcssa163, i32 0, i32 1 %bf.load = load i16, i16* %mode, align 2 %bf.shl = shl i16 %bf.load, 8 |

