diff options
| author | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
|---|---|---|
| committer | Eric Christopher <echristo@gmail.com> | 2019-04-17 04:52:47 +0000 |
| commit | cee313d288a4faf0355d76fb6e0e927e211d08a5 (patch) | |
| tree | d386075318d761197779a96e5d8fc0dc7b06342b /llvm/test/Transforms/Inline | |
| parent | c3d6a929fdd92fd06d4304675ade8d7210ee711a (diff) | |
| download | bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.tar.gz bcm5719-llvm-cee313d288a4faf0355d76fb6e0e927e211d08a5.zip | |
Revert "Temporarily Revert "Add basic loop fusion pass.""
The reversion apparently deleted the test/Transforms directory.
Will be re-reverting again.
llvm-svn: 358552
Diffstat (limited to 'llvm/test/Transforms/Inline')
188 files changed, 17222 insertions, 0 deletions
diff --git a/llvm/test/Transforms/Inline/2003-09-14-InlineValue.ll b/llvm/test/Transforms/Inline/2003-09-14-InlineValue.ll new file mode 100644 index 00000000000..4f1f61b4c07 --- /dev/null +++ b/llvm/test/Transforms/Inline/2003-09-14-InlineValue.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -inline -disable-output + +declare i32 @External() + +define internal i32 @Callee() { + %I = call i32 @External( ) ; <i32> [#uses=2] + %J = add i32 %I, %I ; <i32> [#uses=1] + ret i32 %J +} + +define i32 @Caller() personality i32 (...)* @__gxx_personality_v0 { + %V = invoke i32 @Callee( ) + to label %Ok unwind label %Bad ; <i32> [#uses=1] + +Ok: ; preds = %0 + ret i32 %V + +Bad: ; preds = %0 + %exn = landingpad {i8*, i32} + cleanup + ret i32 0 +} + +declare i32 @__gxx_personality_v0(...) + diff --git a/llvm/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll b/llvm/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll new file mode 100644 index 00000000000..9a5fcaeea7d --- /dev/null +++ b/llvm/test/Transforms/Inline/2003-09-22-PHINodeInlineFail.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -inline -disable-output + +define i32 @main() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @__main( ) + to label %LongJmpBlkPost unwind label %LongJmpBlkPre + +LongJmpBlkPost: + ret i32 0 + +LongJmpBlkPre: + %i.3 = phi i32 [ 0, %entry ] + %exn = landingpad {i8*, i32} + cleanup + ret i32 0 +} + +define void @__main() { + ret void +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll b/llvm/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll new file mode 100644 index 00000000000..2311cdab518 --- /dev/null +++ b/llvm/test/Transforms/Inline/2003-09-22-PHINodesInExceptionDest.ll @@ -0,0 +1,32 @@ +; RUN: opt < %s -inline -disable-output + +define i32 @main() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @__main( ) + to label %Call2Invoke unwind label %LongJmpBlkPre + +Call2Invoke: ; preds = %entry + br label %exit + +LongJmpBlkPre: ; preds = %Call2Invoke, %entry + %i.3 = phi i32 [ 0, %entry ] + %exn = landingpad {i8*, i32} + cleanup + br label %exit + +exit: + ret i32 0 +} + +define void @__main() { + call void @__llvm_getGlobalCtors( ) + call void @__llvm_getGlobalDtors( ) + ret void +} + +declare i32 @__gxx_personality_v0(...) + +declare void @__llvm_getGlobalCtors() + +declare void @__llvm_getGlobalDtors() + diff --git a/llvm/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll b/llvm/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll new file mode 100644 index 00000000000..ce7d1fb3276 --- /dev/null +++ b/llvm/test/Transforms/Inline/2003-09-22-PHINodesInNormalInvokeDest.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -inline -disable-output + +define i32 @main() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @__main( ) + to label %else unwind label %RethrowExcept + +else: ; preds = %LJDecisionBB, %entry + %i.2 = phi i32 [ 36, %entry ], [ %i.2, %LJDecisionBB ] ; <i32> [#uses=1] + br label %LJDecisionBB + +LJDecisionBB: ; preds = %else + br label %else + +RethrowExcept: ; preds = %entry + %exn = landingpad {i8*, i32} + cleanup + ret i32 0 +} + +define void @__main() { + ret void +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll b/llvm/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll new file mode 100644 index 00000000000..4a80d37c097 --- /dev/null +++ b/llvm/test/Transforms/Inline/2003-10-13-AllocaDominanceProblem.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -inline -disable-output + +define i32 @reload() { +reloadentry: + br label %A + +A: ; preds = %reloadentry + call void @callee( ) + ret i32 0 +} + +define internal void @callee() { +entry: + %X = alloca i8, i32 0 ; <i8*> [#uses=0] + %Y = bitcast i32 0 to i32 ; <i32> [#uses=1] + %Z = alloca i8, i32 %Y ; <i8*> [#uses=0] + ret void +} + diff --git a/llvm/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll b/llvm/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll new file mode 100644 index 00000000000..62a7594ba43 --- /dev/null +++ b/llvm/test/Transforms/Inline/2004-04-15-InlineDeletesCall.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -inline -disable-output + +; Inlining the first call caused the inliner function to delete the second +; call. Then the inliner tries to inline the second call, which no longer +; exists. + +define internal void @Callee1() { + unreachable +} + +define void @Callee2() { + ret void +} + +define void @caller() { + call void @Callee1( ) + call void @Callee2( ) + ret void +} + diff --git a/llvm/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll b/llvm/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll new file mode 100644 index 00000000000..fabad30bb5a --- /dev/null +++ b/llvm/test/Transforms/Inline/2004-04-20-InlineLinkOnce.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -inline -prune-eh -disable-output + +define linkonce void @caller() { + call void @callee( ) + ret void +} + +define linkonce void @callee() { + ret void +} + diff --git a/llvm/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll b/llvm/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll new file mode 100644 index 00000000000..866327f64a8 --- /dev/null +++ b/llvm/test/Transforms/Inline/2004-10-17-InlineFunctionWithoutReturn.ll @@ -0,0 +1,11 @@ +; RUN: opt < %s -inline -disable-output + +define i32 @test() { + unreachable +} + +define i32 @caller() { + %X = call i32 @test( ) ; <i32> [#uses=1] + ret i32 %X +} + diff --git a/llvm/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll b/llvm/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll new file mode 100644 index 00000000000..415495eb515 --- /dev/null +++ b/llvm/test/Transforms/Inline/2006-01-14-CallGraphUpdate.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -inline -prune-eh -disable-output + + %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" } + %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, i32*, i32*, i32*, i32*, i32*, i32*, %"struct.std::locale" } + %"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" } + %"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 } + %"struct.std::ios_base::_Words" = type { i8*, i32 } + %"struct.std::locale" = type { %"struct.std::locale::_Impl"* } + %"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** } + %"struct.std::locale::facet" = type { i32 (...)**, i32 } + %"struct.std::ostreambuf_iterator<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"*, i32 } + +define void @_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE6do_putES3_RSt8ios_basewl(%"struct.std::ostreambuf_iterator<wchar_t,std::char_traits<wchar_t> >"* %agg.result, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* %this, %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"* %__s.0__, i32 %__s.1__, %"struct.std::ios_base"* %__io, i32 %__fill, i32 %__v) { +entry: + tail call fastcc void @_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE13_M_insert_intIlEES3_S3_RSt8ios_basewT_( ) + ret void +} + +define fastcc void @_ZNKSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEE13_M_insert_intIlEES3_S3_RSt8ios_basewT_() { +entry: + %tmp.38 = shl i32 0, 3 ; <i32> [#uses=1] + %tmp.39 = alloca i8, i32 %tmp.38 ; <i8*> [#uses=0] + ret void +} + diff --git a/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll b/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll new file mode 100644 index 00000000000..a0ddacdbd5b --- /dev/null +++ b/llvm/test/Transforms/Inline/2006-07-12-InlinePruneCGUpdate.ll @@ -0,0 +1,840 @@ +; RUN: opt < %s -inline -prune-eh -disable-output +; PR827 +@_ZTV8CRjii = internal global [1 x i32 (...)*] [ i32 (...)* @_ZN8CRjii12NlFeeEPN5Jr7sE ] ; <[1 x i32 (...)*]*> [#uses=0] + +define internal i32 @_ZN8CRjii12NlFeeEPN5Jr7sE(...) { +entry: + br i1 false, label %cond_true, label %cond_false179 + +cond_true: ; preds = %entry + br label %bb9 + +bb: ; preds = %cond_true14 + br label %bb9 + +bb9: ; preds = %bb, %cond_true + br i1 false, label %cond_true14, label %cond_false + +cond_true14: ; preds = %bb9 + br label %bb + +cond_false: ; preds = %bb9 + br label %bb15 + +cond_next: ; No predecessors! + br label %bb15 + +bb15: ; preds = %cond_next, %cond_false + br label %bb24 + +bb17: ; preds = %cond_true29 + br label %bb24 + +bb24: ; preds = %bb17, %bb15 + br i1 false, label %cond_true29, label %cond_false30 + +cond_true29: ; preds = %bb24 + br label %bb17 + +cond_false30: ; preds = %bb24 + br label %bb32 + +cond_next31: ; No predecessors! + br label %bb32 + +bb32: ; preds = %cond_next31, %cond_false30 + br label %bb41 + +bb34: ; preds = %cond_true46 + br label %bb41 + +bb41: ; preds = %bb34, %bb32 + br i1 false, label %cond_true46, label %cond_false47 + +cond_true46: ; preds = %bb41 + br label %bb34 + +cond_false47: ; preds = %bb41 + br label %bb49 + +cond_next48: ; No predecessors! + br label %bb49 + +bb49: ; preds = %cond_next48, %cond_false47 + br label %bb58 + +bb51: ; preds = %cond_true63 + br label %bb58 + +bb58: ; preds = %bb51, %bb49 + br i1 false, label %cond_true63, label %cond_false64 + +cond_true63: ; preds = %bb58 + br label %bb51 + +cond_false64: ; preds = %bb58 + br label %bb66 + +cond_next65: ; No predecessors! + br label %bb66 + +bb66: ; preds = %cond_next65, %cond_false64 + br label %bb76 + +bb68: ; preds = %cond_true81 + br label %bb76 + +bb76: ; preds = %bb68, %bb66 + br i1 false, label %cond_true81, label %cond_false82 + +cond_true81: ; preds = %bb76 + br label %bb68 + +cond_false82: ; preds = %bb76 + br label %bb84 + +cond_next83: ; No predecessors! + br label %bb84 + +bb84: ; preds = %cond_next83, %cond_false82 + br label %bb94 + +bb86: ; preds = %cond_true99 + br label %bb94 + +bb94: ; preds = %bb86, %bb84 + br i1 false, label %cond_true99, label %cond_false100 + +cond_true99: ; preds = %bb94 + br label %bb86 + +cond_false100: ; preds = %bb94 + br label %bb102 + +cond_next101: ; No predecessors! + br label %bb102 + +bb102: ; preds = %cond_next101, %cond_false100 + br label %bb112 + +bb104: ; preds = %cond_true117 + br label %bb112 + +bb112: ; preds = %bb104, %bb102 + br i1 false, label %cond_true117, label %cond_false118 + +cond_true117: ; preds = %bb112 + br label %bb104 + +cond_false118: ; preds = %bb112 + br label %bb120 + +cond_next119: ; No predecessors! + br label %bb120 + +bb120: ; preds = %cond_next119, %cond_false118 + br label %bb130 + +bb122: ; preds = %cond_true135 + br label %bb130 + +bb130: ; preds = %bb122, %bb120 + br i1 false, label %cond_true135, label %cond_false136 + +cond_true135: ; preds = %bb130 + br label %bb122 + +cond_false136: ; preds = %bb130 + br label %bb138 + +cond_next137: ; No predecessors! + br label %bb138 + +bb138: ; preds = %cond_next137, %cond_false136 + br label %bb148 + +bb140: ; preds = %cond_true153 + call fastcc void @_Zjrf1( ) + br label %bb148 + +bb148: ; preds = %bb140, %bb138 + br i1 false, label %cond_true153, label %cond_false154 + +cond_true153: ; preds = %bb148 + br label %bb140 + +cond_false154: ; preds = %bb148 + br label %bb156 + +cond_next155: ; No predecessors! + br label %bb156 + +bb156: ; preds = %cond_next155, %cond_false154 + br label %bb166 + +bb158: ; preds = %cond_true171 + br label %bb166 + +bb166: ; preds = %bb158, %bb156 + br i1 false, label %cond_true171, label %cond_false172 + +cond_true171: ; preds = %bb166 + br label %bb158 + +cond_false172: ; preds = %bb166 + br label %bb174 + +cond_next173: ; No predecessors! + br label %bb174 + +bb174: ; preds = %cond_next173, %cond_false172 + br label %cleanup + +cleanup: ; preds = %bb174 + br label %finally + +finally: ; preds = %cleanup + br label %cond_next180 + +cond_false179: ; preds = %entry + br label %cond_next180 + +cond_next180: ; preds = %cond_false179, %finally + br label %return + +return: ; preds = %cond_next180 + ret i32 0 +} + +define internal fastcc void @_Zjrf2() { +entry: + br label %bb3 + +bb: ; preds = %cond_true + br label %bb3 + +bb3: ; preds = %bb, %entry + %tmp5 = load i8*, i8** null ; <i8*> [#uses=1] + %tmp = icmp ne i8* null, %tmp5 ; <i1> [#uses=1] + br i1 %tmp, label %cond_true, label %cond_false + +cond_true: ; preds = %bb3 + br label %bb + +cond_false: ; preds = %bb3 + br label %bb6 + +cond_next: ; No predecessors! + br label %bb6 + +bb6: ; preds = %cond_next, %cond_false + br label %return + +return: ; preds = %bb6 + ret void +} + +define internal fastcc void @_Zjrf3() { +entry: + call fastcc void @_Zjrf2( ) + br label %return + +return: ; preds = %entry + ret void +} + +define internal fastcc void @_Zjrf4() { +entry: + br label %bb6 + +bb: ; preds = %cond_true + br label %bb6 + +bb6: ; preds = %bb, %entry + br i1 false, label %cond_true, label %cond_false + +cond_true: ; preds = %bb6 + br label %bb + +cond_false: ; preds = %bb6 + br label %bb8 + +cond_next: ; No predecessors! + br label %bb8 + +bb8: ; preds = %cond_next, %cond_false + br i1 false, label %cond_true9, label %cond_false12 + +cond_true9: ; preds = %bb8 + call fastcc void @_Zjrf3( ) + br label %cond_next13 + +cond_false12: ; preds = %bb8 + br label %cond_next13 + +cond_next13: ; preds = %cond_false12, %cond_true9 + br label %return + +return: ; preds = %cond_next13 + ret void +} + +define internal fastcc void @_Zjrf5() { +entry: + call fastcc void @_Zjrf4( ) + br label %return + +return: ; preds = %entry + ret void +} + +define internal fastcc void @_Zjrf6() { +entry: + call fastcc void @_Zjrf5( ) + br label %return + +return: ; preds = %entry + ret void +} + +define internal fastcc void @_Zjrf7() { +entry: + br label %cleanup + +cleanup: ; preds = %entry + br label %finally + +finally: ; preds = %cleanup + call fastcc void @_Zjrf6( ) + br label %cleanup9 + +cleanup9: ; preds = %finally + br label %finally8 + +finally8: ; preds = %cleanup9 + br label %cleanup11 + +cleanup11: ; preds = %finally8 + br label %finally10 + +finally10: ; preds = %cleanup11 + br label %finally23 + +finally23: ; preds = %finally10 + br label %return + +return: ; preds = %finally23 + ret void +} + +define internal fastcc void @_Zjrf11() { +entry: + br label %bb7 + +bb: ; preds = %cond_true + br label %bb7 + +bb7: ; preds = %bb, %entry + br i1 false, label %cond_true, label %cond_false + +cond_true: ; preds = %bb7 + br label %bb + +cond_false: ; preds = %bb7 + br label %bb9 + +cond_next: ; No predecessors! + br label %bb9 + +bb9: ; preds = %cond_next, %cond_false + br label %return + ; No predecessors! + br i1 false, label %cond_true12, label %cond_false15 + +cond_true12: ; preds = %0 + call fastcc void @_Zjrf3( ) + br label %cond_next16 + +cond_false15: ; preds = %0 + br label %cond_next16 + +cond_next16: ; preds = %cond_false15, %cond_true12 + br label %return + +return: ; preds = %cond_next16, %bb9 + ret void +} + +define internal fastcc void @_Zjrf9() { +entry: + call fastcc void @_Zjrf11( ) + br label %return + +return: ; preds = %entry + ret void +} + +define internal fastcc void @_Zjrf10() { +entry: + call fastcc void @_Zjrf9( ) + br label %return + +return: ; preds = %entry + ret void +} + +define internal fastcc void @_Zjrf8() { +entry: + br i1 false, label %cond_true, label %cond_false201 + +cond_true: ; preds = %entry + br i1 false, label %cond_true36, label %cond_false + +cond_true36: ; preds = %cond_true + br label %cleanup + +cleanup: ; preds = %cond_true36 + br label %finally + +finally: ; preds = %cleanup + br label %cond_next189 + +cond_false: ; preds = %cond_true + br i1 false, label %cond_true99, label %cond_false137 + +cond_true99: ; preds = %cond_false + br label %cleanup136 + +cleanup136: ; preds = %cond_true99 + br label %finally135 + +finally135: ; preds = %cleanup136 + br label %cond_next + +cond_false137: ; preds = %cond_false + call fastcc void @_Zjrf10( ) + br label %cleanup188 + +cleanup188: ; preds = %cond_false137 + br label %finally187 + +finally187: ; preds = %cleanup188 + br label %cond_next + +cond_next: ; preds = %finally187, %finally135 + br label %cond_next189 + +cond_next189: ; preds = %cond_next, %finally + br label %cond_next202 + +cond_false201: ; preds = %entry + br label %cond_next202 + +cond_next202: ; preds = %cond_false201, %cond_next189 + br label %return + +return: ; preds = %cond_next202 + ret void +} + +define internal fastcc void @_Zjrf1() { +entry: + br label %bb492 + +bb: ; preds = %cond_true499 + br label %cleanup + +cleanup: ; preds = %bb + br label %finally + +finally: ; preds = %cleanup + br label %cleanup11 + +cleanup11: ; preds = %finally + br label %finally10 + +finally10: ; preds = %cleanup11 + br i1 false, label %cond_true, label %cond_false286 + +cond_true: ; preds = %finally10 + br label %cleanup26 + +cleanup26: ; preds = %cond_true + br label %finally25 + +finally25: ; preds = %cleanup26 + br label %bb30 + +bb27: ; preds = %cond_true37 + br label %bb30 + +bb30: ; preds = %bb27, %finally25 + br i1 false, label %cond_true37, label %cond_false + +cond_true37: ; preds = %bb30 + br label %bb27 + +cond_false: ; preds = %bb30 + br label %bb38 + +cond_next: ; No predecessors! + br label %bb38 + +bb38: ; preds = %cond_next, %cond_false + br label %bb148 + +bb40: ; preds = %cond_true156 + br label %bb139 + +bb41: ; preds = %cond_true142 + call fastcc void @_Zjrf7( ) + br label %bb105 + +bb44: ; preds = %cond_true112 + br label %bb74 + +bb66: ; preds = %cond_true80 + br label %bb74 + +bb74: ; preds = %bb66, %bb44 + br i1 false, label %cond_true80, label %cond_false81 + +cond_true80: ; preds = %bb74 + br label %bb66 + +cond_false81: ; preds = %bb74 + br label %bb83 + +cond_next82: ; No predecessors! + br label %bb83 + +bb83: ; preds = %cond_next82, %cond_false81 + br label %cleanup97 + +cleanup97: ; preds = %bb83 + br label %finally96 + +finally96: ; preds = %cleanup97 + br label %cleanup99 + +cleanup99: ; preds = %finally96 + br label %finally98 + +finally98: ; preds = %cleanup99 + br label %bb105 + +bb105: ; preds = %finally98, %bb41 + br i1 false, label %cond_true112, label %cond_false113 + +cond_true112: ; preds = %bb105 + br label %bb44 + +cond_false113: ; preds = %bb105 + br label %bb115 + +cond_next114: ; No predecessors! + br label %bb115 + +bb115: ; preds = %cond_next114, %cond_false113 + br i1 false, label %cond_true119, label %cond_false123 + +cond_true119: ; preds = %bb115 + call fastcc void @_Zjrf8( ) + br label %cond_next124 + +cond_false123: ; preds = %bb115 + br label %cond_next124 + +cond_next124: ; preds = %cond_false123, %cond_true119 + br i1 false, label %cond_true131, label %cond_false132 + +cond_true131: ; preds = %cond_next124 + br label %cleanup135 + +cond_false132: ; preds = %cond_next124 + br label %cond_next133 + +cond_next133: ; preds = %cond_false132 + br label %cleanup136 + +cleanup135: ; preds = %cond_true131 + br label %done + +cleanup136: ; preds = %cond_next133 + br label %finally134 + +finally134: ; preds = %cleanup136 + br label %bb139 + +bb139: ; preds = %finally134, %bb40 + br i1 false, label %cond_true142, label %cond_false143 + +cond_true142: ; preds = %bb139 + br label %bb41 + +cond_false143: ; preds = %bb139 + br label %bb145 + +cond_next144: ; No predecessors! + br label %bb145 + +bb145: ; preds = %cond_next144, %cond_false143 + br label %bb148 + +bb148: ; preds = %bb145, %bb38 + br i1 false, label %cond_true156, label %cond_false157 + +cond_true156: ; preds = %bb148 + br label %bb40 + +cond_false157: ; preds = %bb148 + br label %bb159 + +cond_next158: ; No predecessors! + br label %bb159 + +bb159: ; preds = %cond_next158, %cond_false157 + br label %done + +done: ; preds = %bb159, %cleanup135 + br label %bb214 + +bb185: ; preds = %cond_true218 + br i1 false, label %cond_true193, label %cond_false206 + +cond_true193: ; preds = %bb185 + br label %cond_next211 + +cond_false206: ; preds = %bb185 + br label %cond_next211 + +cond_next211: ; preds = %cond_false206, %cond_true193 + br label %bb214 + +bb214: ; preds = %cond_next211, %done + br i1 false, label %cond_true218, label %cond_false219 + +cond_true218: ; preds = %bb214 + br label %bb185 + +cond_false219: ; preds = %bb214 + br label %bb221 + +cond_next220: ; No predecessors! + br label %bb221 + +bb221: ; preds = %cond_next220, %cond_false219 + br i1 false, label %cond_true236, label %cond_false245 + +cond_true236: ; preds = %bb221 + br label %cond_next249 + +cond_false245: ; preds = %bb221 + br label %cond_next249 + +cond_next249: ; preds = %cond_false245, %cond_true236 + br i1 false, label %cond_true272, label %cond_false277 + +cond_true272: ; preds = %cond_next249 + br label %cond_next278 + +cond_false277: ; preds = %cond_next249 + br label %cond_next278 + +cond_next278: ; preds = %cond_false277, %cond_true272 + br label %cleanup285 + +cleanup285: ; preds = %cond_next278 + br label %finally284 + +finally284: ; preds = %cleanup285 + br label %cond_next287 + +cond_false286: ; preds = %finally10 + br label %cond_next287 + +cond_next287: ; preds = %cond_false286, %finally284 + br i1 false, label %cond_true317, label %cond_false319 + +cond_true317: ; preds = %cond_next287 + br label %cond_next321 + +cond_false319: ; preds = %cond_next287 + br label %cond_next321 + +cond_next321: ; preds = %cond_false319, %cond_true317 + br label %bb348 + +bb335: ; preds = %cond_true355 + br label %bb348 + +bb348: ; preds = %bb335, %cond_next321 + br i1 false, label %cond_true355, label %cond_false356 + +cond_true355: ; preds = %bb348 + br label %bb335 + +cond_false356: ; preds = %bb348 + br label %bb358 + +cond_next357: ; No predecessors! + br label %bb358 + +bb358: ; preds = %cond_next357, %cond_false356 + br i1 false, label %cond_true363, label %cond_false364 + +cond_true363: ; preds = %bb358 + br label %bb388 + +cond_false364: ; preds = %bb358 + br label %cond_next365 + +cond_next365: ; preds = %cond_false364 + br i1 false, label %cond_true370, label %cond_false371 + +cond_true370: ; preds = %cond_next365 + br label %bb388 + +cond_false371: ; preds = %cond_next365 + br label %cond_next372 + +cond_next372: ; preds = %cond_false371 + br i1 false, label %cond_true385, label %cond_false386 + +cond_true385: ; preds = %cond_next372 + br label %bb388 + +cond_false386: ; preds = %cond_next372 + br label %cond_next387 + +cond_next387: ; preds = %cond_false386 + br label %bb389 + +bb388: ; preds = %cond_true385, %cond_true370, %cond_true363 + br label %bb389 + +bb389: ; preds = %bb388, %cond_next387 + br i1 false, label %cond_true392, label %cond_false443 + +cond_true392: ; preds = %bb389 + br label %bb419 + +bb402: ; preds = %cond_true425 + br i1 false, label %cond_true406, label %cond_false412 + +cond_true406: ; preds = %bb402 + br label %cond_next416 + +cond_false412: ; preds = %bb402 + br label %cond_next416 + +cond_next416: ; preds = %cond_false412, %cond_true406 + br label %bb419 + +bb419: ; preds = %cond_next416, %cond_true392 + br i1 false, label %cond_true425, label %cond_false426 + +cond_true425: ; preds = %bb419 + br label %bb402 + +cond_false426: ; preds = %bb419 + br label %bb428 + +cond_next427: ; No predecessors! + br label %bb428 + +bb428: ; preds = %cond_next427, %cond_false426 + br label %cond_next478 + +cond_false443: ; preds = %bb389 + br label %bb460 + +bb450: ; preds = %cond_true466 + br label %bb460 + +bb460: ; preds = %bb450, %cond_false443 + br i1 false, label %cond_true466, label %cond_false467 + +cond_true466: ; preds = %bb460 + br label %bb450 + +cond_false467: ; preds = %bb460 + br label %bb469 + +cond_next468: ; No predecessors! + br label %bb469 + +bb469: ; preds = %cond_next468, %cond_false467 + br label %cond_next478 + +cond_next478: ; preds = %bb469, %bb428 + br label %cleanup485 + +cleanup485: ; preds = %cond_next478 + br label %finally484 + +finally484: ; preds = %cleanup485 + br label %cleanup487 + +cleanup487: ; preds = %finally484 + br label %finally486 + +finally486: ; preds = %cleanup487 + br label %cleanup489 + +cleanup489: ; preds = %finally486 + br label %finally488 + +finally488: ; preds = %cleanup489 + br label %bb492 + +bb492: ; preds = %finally488, %entry + br i1 false, label %cond_true499, label %cond_false500 + +cond_true499: ; preds = %bb492 + br label %bb + +cond_false500: ; preds = %bb492 + br label %bb502 + +cond_next501: ; No predecessors! + br label %bb502 + +bb502: ; preds = %cond_next501, %cond_false500 + br label %return + +return: ; preds = %bb502 + ret void +} + +define internal fastcc void @_ZSt26__unguarded_insertion_sortIN9__gnu_cxx17__normal_iteratorIPSsSt6vectorISsSaISsEEEEEvT_S7_() { +entry: + br label %bb12 + +bb: ; preds = %cond_true + br label %cleanup + +cleanup: ; preds = %bb + br label %finally + +finally: ; preds = %cleanup + br label %bb12 + +bb12: ; preds = %finally, %entry + br i1 false, label %cond_true, label %cond_false + +cond_true: ; preds = %bb12 + br label %bb + +cond_false: ; preds = %bb12 + br label %bb14 + +cond_next: ; No predecessors! + br label %bb14 + +bb14: ; preds = %cond_next, %cond_false + br label %return + +return: ; preds = %bb14 + ret void +} diff --git a/llvm/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll b/llvm/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll new file mode 100644 index 00000000000..b4d630d8e38 --- /dev/null +++ b/llvm/test/Transforms/Inline/2006-11-09-InlineCGUpdate-2.ll @@ -0,0 +1,252 @@ +; RUN: opt < %s -inline -prune-eh -disable-output +; PR993 +target datalayout = "e-p:32:32" +target triple = "i386-unknown-openbsd3.9" + %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* } + %"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** } + %struct.__sFILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, i8*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 } + %struct.__sbuf = type { i8*, i32 } + %"struct.std::__basic_file<char>" = type { %struct.__sFILE*, i1 } + %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" } + %"struct.std::bad_alloc" = type { %"struct.__gnu_cxx::char_producer<char>" } + %"struct.std::basic_filebuf<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", i32, %"struct.std::__basic_file<char>", i32, %union.__mbstate_t, %union.__mbstate_t, i8*, i32, i1, i1, i1, i1, i8, i8*, i8*, i1, %"struct.std::codecvt<char,char,__mbstate_t>"*, i8*, i32, i8*, i8* } + %"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i1, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* } + %"struct.std::basic_iostream<char,std::char_traits<char> >" = type { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_ios<char,std::char_traits<char> >" } + %"struct.std::basic_ofstream<char,std::char_traits<char> >" = type { %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_filebuf<char,std::char_traits<char> >", %"struct.std::basic_ios<char,std::char_traits<char> >" } + %"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" } + %"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" } + %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" } + %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" } + %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 } + %"struct.std::codecvt<char,char,__mbstate_t>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", i32* } + %"struct.std::ctype<char>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", i32*, i1, i32*, i32*, i32* } + %"struct.std::domain_error" = type { %"struct.std::logic_error" } + %"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %struct.__sbuf, [8 x %struct.__sbuf], i32, %struct.__sbuf*, %"struct.std::locale" } + %"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 } + %"struct.std::ios_base::_Words" = type { i8*, i32 } + %"struct.std::locale" = type { %"struct.std::locale::_Impl"* } + %"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** } + %"struct.std::locale::facet" = type { i32 (...)**, i32 } + %"struct.std::logic_error" = type { %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" } + %union.__mbstate_t = type { i64, [120 x i8] } +@.str_1 = external global [17 x i8] ; <[17 x i8]*> [#uses=0] +@.str_9 = external global [24 x i8] ; <[24 x i8]*> [#uses=0] + +define void @main() { +entry: + call fastcc void @_ZNSt14basic_ofstreamIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode( ) + ret void +} + +define fastcc void @_ZNSt14basic_ofstreamIcSt11char_traitsIcEE4openEPKcSt13_Ios_Openmode() { +entry: + %tmp.6 = icmp eq %"struct.std::basic_filebuf<char,std::char_traits<char> >"* null, null ; <i1> [#uses=1] + br i1 %tmp.6, label %then, label %UnifiedReturnBlock + +then: ; preds = %entry + tail call fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate( ) + ret void + +UnifiedReturnBlock: ; preds = %entry + ret void +} + +define fastcc void @_ZN10__cxxabiv111__terminateEPFvvE() { +entry: + unreachable +} + +define void @_ZNSdD0Ev() { +entry: + unreachable +} + +define void @_ZThn8_NSdD1Ev() { +entry: + ret void +} + +define void @_ZNSt13basic_filebufIcSt11char_traitsIcEED0Ev() { +entry: + ret void +} + +define void @_ZNSt13basic_filebufIcSt11char_traitsIcEE9pbackfailEi() { +entry: + unreachable +} + +define fastcc void @_ZNSoD2Ev() { +entry: + unreachable +} + +define fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEED2Ev() { +entry: + unreachable +} + +define fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate() { +entry: + tail call fastcc void @_ZSt19__throw_ios_failurePKc( ) + ret void +} + +declare fastcc void @_ZNSaIcED1Ev() + +define fastcc void @_ZNSsC1EPKcRKSaIcE() { +entry: + tail call fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type( ) + unreachable +} + +define fastcc void @_ZSt14__convert_to_vIyEvPKcRT_RSt12_Ios_IostateRKPii() { +entry: + ret void +} + +define fastcc void @_ZNSt7num_getIcSt19istreambuf_iteratorIcSt11char_traitsIcEEEC1Ej() { +entry: + ret void +} + +define fastcc void @_ZSt19__throw_ios_failurePKc() { +entry: + call fastcc void @_ZNSsC1EPKcRKSaIcE( ) + unreachable +} + +define void @_GLOBAL__D__ZSt23lexicographical_compareIPKaS1_EbT_S2_T0_S3_() { +entry: + ret void +} + +define void @_ZNSt9bad_allocD1Ev() { +entry: + unreachable +} + +define fastcc void @_ZSt19__throw_logic_errorPKc() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke fastcc void @_ZNSt11logic_errorC1ERKSs( ) + to label %try_exit.0 unwind label %try_catch.0 + +try_catch.0: ; preds = %entry + %exn = landingpad {i8*, i32} + catch i8* null + resume { i8*, i32 } %exn + +try_exit.0: ; preds = %entry + unreachable +} + +define fastcc void @_ZNSt11logic_errorC1ERKSs() { +entry: + call fastcc void @_ZNSsC1ERKSs( ) + ret void +} + +define void @_ZNSt12domain_errorD1Ev() { +entry: + unreachable +} + +define fastcc void @_ZSt20__throw_length_errorPKc() { +entry: + call fastcc void @_ZNSt12length_errorC1ERKSs( ) + unreachable +} + +define fastcc void @_ZNSt12length_errorC1ERKSs() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke fastcc void @_ZNSsC1ERKSs( ) + to label %_ZNSt11logic_errorC2ERKSs.exit unwind label %invoke_catch.i + +invoke_catch.i: ; preds = %entry + %exn = landingpad {i8*, i32} + catch i8* null + resume { i8*, i32 } %exn + +_ZNSt11logic_errorC2ERKSs.exit: ; preds = %entry + ret void +} + +define fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE() { +entry: + call fastcc void @_ZSt20__throw_length_errorPKc( ) + unreachable +} + +define fastcc void @_ZNSs12_S_constructIN9__gnu_cxx17__normal_iteratorIPcSsEEEES2_T_S4_RKSaIcESt20forward_iterator_tag() { +entry: + unreachable +} + +define fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type() { +entry: + br i1 false, label %then.1.i, label %endif.1.i + +then.1.i: ; preds = %entry + call fastcc void @_ZSt19__throw_logic_errorPKc( ) + br label %endif.1.i + +endif.1.i: ; preds = %then.1.i, %entry + call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( ) + unreachable +} + +define fastcc void @_ZNSsC1ERKSs() personality i32 (...)* @__gxx_personality_v0 { +entry: + call fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_( ) + invoke fastcc void @_ZNSaIcEC1ERKS_( ) + to label %invoke_cont.1 unwind label %invoke_catch.1 + +invoke_catch.1: ; preds = %entry + %exn = landingpad {i8*, i32} + catch i8* null + call fastcc void @_ZNSaIcED1Ev( ) + resume { i8*, i32 } %exn + +invoke_cont.1: ; preds = %entry + call fastcc void @_ZNSaIcEC2ERKS_( ) + ret void +} + +define fastcc void @_ZNSaIcEC1ERKS_() { +entry: + ret void +} + +define fastcc void @_ZNSs7replaceEN9__gnu_cxx17__normal_iteratorIPcSsEES2_jc() { +entry: + ret void +} + +define fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_() { +entry: + br i1 false, label %else.i, label %cond_true + +cond_true: ; preds = %entry + ret void + +else.i: ; preds = %entry + tail call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( ) + unreachable +} + +define fastcc void @_ZNSaIcEC2ERKS_() { +entry: + ret void +} + +define fastcc void @_ZN9__gnu_cxx12__pool_allocILb1ELi0EE8allocateEj() { +entry: + ret void +} + +define fastcc void @_ZN9__gnu_cxx12__pool_allocILb1ELi0EE9_S_refillEj() { +entry: + unreachable +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll b/llvm/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll new file mode 100644 index 00000000000..8a613e534c7 --- /dev/null +++ b/llvm/test/Transforms/Inline/2006-11-09-InlineCGUpdate.ll @@ -0,0 +1,343 @@ +; RUN: opt < %s -inline -prune-eh -disable-output +; PR992 +target datalayout = "e-p:32:32" +target triple = "i686-pc-linux-gnu" + %struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i32, [52 x i8] } + %struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 } + %"struct.__cxxabiv1::__array_type_info" = type { %"struct.std::type_info" } + %"struct.__cxxabiv1::__si_class_type_info" = type { %"struct.__cxxabiv1::__array_type_info", %"struct.__cxxabiv1::__array_type_info"* } + %"struct.__gnu_cxx::_Rope_rep_alloc_base<char,std::allocator<char>, true>" = type { i32 } + %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" = type { i8* } + %"struct.__gnu_cxx::__normal_iterator<const wchar_t*,std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >" = type { i32* } + %"struct.__gnu_cxx::char_producer<char>" = type { i32 (...)** } + %"struct.__gnu_cxx::stdio_sync_filebuf<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", %struct._IO_FILE*, i32 } + %"struct.__gnu_cxx::stdio_sync_filebuf<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >", %struct._IO_FILE*, i32 } + %struct.__locale_struct = type { [13 x %struct.locale_data*], i16*, i32*, i32*, [13 x i8*] } + %struct.__mbstate_t = type { i32, %"struct.__gnu_cxx::_Rope_rep_alloc_base<char,std::allocator<char>, true>" } + %struct.locale_data = type opaque + %"struct.std::__basic_file<char>" = type { %struct._IO_FILE*, i1 } + %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>" = type { %"struct.std::locale::facet" } + %"struct.std::basic_filebuf<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >", i32, %"struct.std::__basic_file<char>", i32, %struct.__mbstate_t, %struct.__mbstate_t, i8*, i32, i1, i1, i1, i1, i8, i8*, i8*, i1, %"struct.std::codecvt<char,char,__mbstate_t>"*, i8*, i32, i8*, i8* } + %"struct.std::basic_filebuf<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >", i32, %"struct.std::__basic_file<char>", i32, %struct.__mbstate_t, %struct.__mbstate_t, i32*, i32, i1, i1, i1, i1, i32, i32*, i32*, i1, %"struct.std::codecvt<char,char,__mbstate_t>"*, i8*, i32, i8*, i8* } + %"struct.std::basic_fstream<char,std::char_traits<char> >" = type { { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>" }, %"struct.std::basic_filebuf<char,std::char_traits<char> >", %"struct.std::basic_ios<char,std::char_traits<char> >" } + %"struct.std::basic_fstream<wchar_t,std::char_traits<wchar_t> >" = type { { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>" }, %"struct.std::basic_filebuf<wchar_t,std::char_traits<wchar_t> >", %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" } + %"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i1, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* } + %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<wchar_t,std::char_traits<wchar_t> >"*, i32, i1, %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"*, %"struct.std::codecvt<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"*, %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>"* } + %"struct.std::basic_iostream<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::locale::facet", %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" } + %"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" } + %"struct.std::basic_ostream<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, %"struct.std::basic_ios<wchar_t,std::char_traits<wchar_t> >" } + %"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" } + %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >" = type { i32 (...)**, i32*, i32*, i32*, i32*, i32*, i32*, %"struct.std::locale" } + %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" = type { %"struct.__gnu_cxx::__normal_iterator<char*,std::basic_string<char, std::char_traits<char>, std::allocator<char> > >" } + %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep" = type { %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" } + %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >::_Rep_base" = type { i32, i32, i32 } + %"struct.std::basic_string<wchar_t,std::char_traits<wchar_t>,std::allocator<wchar_t> >" = type { %"struct.__gnu_cxx::__normal_iterator<const wchar_t*,std::basic_string<wchar_t, std::char_traits<wchar_t>, std::allocator<wchar_t> > >" } + %"struct.std::codecvt<char,char,__mbstate_t>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", %struct.__locale_struct* } + %"struct.std::collate<char>" = type { %"struct.std::locale::facet", %struct.__locale_struct* } + %"struct.std::collate_byname<char>" = type { %"struct.std::collate<char>" } + %"struct.std::ctype<char>" = type { %"struct.std::__codecvt_abstract_base<char,char,__mbstate_t>", %struct.__locale_struct*, i1, i32*, i32*, i16* } + %"struct.std::ctype_byname<char>" = type { %"struct.std::ctype<char>" } + %"struct.std::domain_error" = type { %"struct.std::logic_error" } + %"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" } + %"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 } + %"struct.std::ios_base::_Words" = type { i8*, i32 } + %"struct.std::istreambuf_iterator<char,std::char_traits<char> >" = type { %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, i32 } + %"struct.std::istreambuf_iterator<wchar_t,std::char_traits<wchar_t> >" = type { %"struct.std::basic_streambuf<wchar_t,std::char_traits<wchar_t> >"*, i32 } + %"struct.std::locale" = type { %"struct.std::locale::_Impl"* } + %"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** } + %"struct.std::locale::facet" = type { i32 (...)**, i32 } + %"struct.std::logic_error" = type { %"struct.__gnu_cxx::char_producer<char>", %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >" } + %"struct.std::type_info" = type { i32 (...)**, i8* } +@.str_11 = external global [42 x i8] ; <[42 x i8]*> [#uses=0] +@.str_9 = external global [24 x i8] ; <[24 x i8]*> [#uses=0] +@.str_1 = external global [17 x i8] ; <[17 x i8]*> [#uses=0] + +define void @main() { +entry: + tail call fastcc void @_ZNSolsEi( ) + ret void +} + +define fastcc void @_ZNSolsEi() { +entry: + %tmp.22 = icmp eq i32 0, 0 ; <i1> [#uses=1] + br i1 %tmp.22, label %else, label %then + +then: ; preds = %entry + ret void + +else: ; preds = %entry + tail call fastcc void @_ZNSolsEl( ) + ret void +} + +define void @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_() { +entry: + ret void +} + +define fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate() { +entry: + tail call fastcc void @_ZSt19__throw_ios_failurePKc( ) + ret void +} + +define fastcc void @_ZNSo3putEc() { +entry: + ret void +} + +define fastcc void @_ZNSolsEl() { +entry: + %tmp.21.i = icmp eq %"struct.std::basic_ostream<char,std::char_traits<char> >"* null, null ; <i1> [#uses=1] + br i1 %tmp.21.i, label %endif.0.i, label %shortcirc_next.i + +shortcirc_next.i: ; preds = %entry + ret void + +endif.0.i: ; preds = %entry + call fastcc void @_ZNSt9basic_iosIcSt11char_traitsIcEE8setstateESt12_Ios_Iostate( ) + ret void +} + +define fastcc void @_ZSt19__throw_ios_failurePKc() { +entry: + call fastcc void @_ZNSsC1EPKcRKSaIcE( ) + ret void +} + +define fastcc void @_ZNSt8ios_baseD2Ev() { +entry: + unreachable +} + +define void @_ZN9__gnu_cxx18stdio_sync_filebufIwSt11char_traitsIwEE5uflowEv() { +entry: + unreachable +} + +define void @_ZN9__gnu_cxx18stdio_sync_filebufIcSt11char_traitsIcEED1Ev() { +entry: + unreachable +} + +define void @_ZNSt15basic_streambufIcSt11char_traitsIcEE6setbufEPci() { +entry: + ret void +} + +define fastcc void @_ZSt9use_facetISt5ctypeIcEERKT_RKSt6locale() { +entry: + ret void +} + +declare fastcc void @_ZNSaIcED1Ev() + +define fastcc void @_ZSt19__throw_logic_errorPKc() { +entry: + call fastcc void @_ZNSt11logic_errorC1ERKSs( ) + ret void +} + +define fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE() { +entry: + br i1 false, label %then.0, label %endif.0 + +then.0: ; preds = %entry + call fastcc void @_ZSt20__throw_length_errorPKc( ) + ret void + +endif.0: ; preds = %entry + ret void +} + +define fastcc void @_ZSt20__throw_length_errorPKc() { +entry: + call fastcc void @_ZNSt12length_errorC1ERKSs( ) + ret void +} + +define fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type() { +entry: + br i1 false, label %then.1.i, label %endif.1.i + +then.1.i: ; preds = %entry + call fastcc void @_ZSt19__throw_logic_errorPKc( ) + ret void + +endif.1.i: ; preds = %entry + call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( ) + unreachable +} + +define fastcc void @_ZNSsC1ERKSs() personality i32 (...)* @__gxx_personality_v0 { +entry: + call fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_( ) + invoke fastcc void @_ZNSaIcEC1ERKS_( ) + to label %invoke_cont.1 unwind label %invoke_catch.1 + +invoke_catch.1: ; preds = %entry + %exn = landingpad {i8*, i32} + catch i8* null + call fastcc void @_ZNSaIcED1Ev( ) + resume { i8*, i32 } %exn + +invoke_cont.1: ; preds = %entry + call fastcc void @_ZNSaIcEC2ERKS_( ) + ret void +} + +define fastcc void @_ZNSs7reserveEj() { +entry: + ret void +} + +define fastcc void @_ZNSaIcEC1ERKS_() { +entry: + ret void +} + +define fastcc void @_ZNSs4_Rep7_M_grabERKSaIcES2_() { +entry: + br i1 false, label %else.i, label %cond_true + +cond_true: ; preds = %entry + ret void + +else.i: ; preds = %entry + tail call fastcc void @_ZNSs4_Rep9_S_createEjRKSaIcE( ) + ret void +} + +define fastcc void @_ZNSsC1EPKcRKSaIcE() { +entry: + tail call fastcc void @_ZNSs16_S_construct_auxIPKcEEPcT_S3_RKSaIcE12__false_type( ) + unreachable +} + +define fastcc void @_ZNSaIcEC2ERKS_() { +entry: + ret void +} + +define void @_ZNSt7num_putIwSt19ostreambuf_iteratorIwSt11char_traitsIwEEED1Ev() { +entry: + unreachable +} + +define void @_ZNSt14collate_bynameIcED1Ev() { +entry: + unreachable +} + +define void @_ZNKSt7num_getIcSt19istreambuf_iteratorIcSt11char_traitsIcEEE6do_getES3_S3_RSt8ios_baseRSt12_Ios_IostateRy() { +entry: + ret void +} + +define void @_ZNSt23__codecvt_abstract_baseIcc11__mbstate_tED1Ev() { +entry: + unreachable +} + +define void @_ZNSt12ctype_bynameIcED0Ev() { +entry: + unreachable +} + +define fastcc void @_ZNSt8messagesIwEC1Ej() { +entry: + ret void +} + +define fastcc void @_ZSt14__convert_to_vIlEvPKcRT_RSt12_Ios_IostateRKP15__locale_structi() { +entry: + ret void +} + +define fastcc void @_ZNSt8time_getIwSt19istreambuf_iteratorIwSt11char_traitsIwEEEC1Ej() { +entry: + ret void +} + +define fastcc void @_ZNSt8time_getIcSt19istreambuf_iteratorIcSt11char_traitsIcEEEC1Ej() { +entry: + ret void +} + +define fastcc void @_ZNKSt7num_getIwSt19istreambuf_iteratorIwSt11char_traitsIwEEE16_M_extract_floatES3_S3_RSt8ios_baseRSt12_Ios_IostateRSs() { +entry: + unreachable +} + +define fastcc void @_ZNSbIwSt11char_traitsIwESaIwEE4swapERS2_() { +entry: + ret void +} + +define void @_ZNSt14basic_iostreamIwSt11char_traitsIwEED0Ev() { +entry: + unreachable +} + +define void @_ZNSt15basic_streambufIcSt11char_traitsIcEE9showmanycEv() { +entry: + ret void +} + +define void @_ZNSt9exceptionD0Ev() { +entry: + unreachable +} + +define fastcc void @_ZNSt11logic_errorC1ERKSs() { +entry: + call fastcc void @_ZNSsC1ERKSs( ) + ret void +} + +define fastcc void @_ZNSt11logic_errorD2Ev() { +entry: + unreachable +} + +define fastcc void @_ZNSt12length_errorC1ERKSs() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke fastcc void @_ZNSsC1ERKSs( ) + to label %_ZNSt11logic_errorC2ERKSs.exit unwind label %invoke_catch.i + +invoke_catch.i: ; preds = %entry + %exn = landingpad {i8*, i32} + catch i8* null + resume { i8*, i32 } %exn + +_ZNSt11logic_errorC2ERKSs.exit: ; preds = %entry + ret void +} + +define void @_ZNK10__cxxabiv120__si_class_type_info20__do_find_public_srcEiPKvPKNS_17__class_type_infoES2_() { +entry: + ret void +} + +define fastcc void @_ZNSbIwSt11char_traitsIwESaIwEE16_S_construct_auxIPKwEEPwT_S7_RKS1_12__false_type() { +entry: + ret void +} + +define void @_ZTv0_n12_NSt13basic_fstreamIwSt11char_traitsIwEED1Ev() { +entry: + ret void +} + +define void @_ZNSt13basic_fstreamIcSt11char_traitsIcEED1Ev() { +entry: + unreachable +} + +define fastcc void @_ZNSt5ctypeIcEC1EPKtbj() { +entry: + ret void +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/2007-04-15-InlineEH.ll b/llvm/test/Transforms/Inline/2007-04-15-InlineEH.ll new file mode 100644 index 00000000000..482c4efcf09 --- /dev/null +++ b/llvm/test/Transforms/Inline/2007-04-15-InlineEH.ll @@ -0,0 +1,69 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; PR1335 + +target triple = "i686-pc-linux-gnu" + +declare i32 @__gxx_personality_v0(...) + +declare void @a() + +declare void @b() + +declare void @c() + +define void @f() { +; CHECK-LABEL: define void @f() +entry: + call void asm "rdtsc\0A\09movl %eax, $0\0A\09movl %edx, $1", "=*imr,=*imr,~{dirflag},~{fpsr},~{flags},~{dx},~{ax}"( i32* null, i32* null ) nounwind +; CHECK: call void asm + unreachable +} + +define void @g() personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: define void @g() personality i32 (...)* @__gxx_personality_v0 +entry: + invoke void @a() to label %invcont1 unwind label %cleanup +; CHECK-NOT: {{call|invoke}} +; CHECK: invoke void @a() + +invcont1: + invoke void @b() to label %invcont2 unwind label %cleanup +; CHECK-NOT: {{call|invoke}} +; CHECK: invoke void @b() + +invcont2: + invoke void @c() to label %invcont3 unwind label %cleanup +; CHECK-NOT: {{call|invoke}} +; CHECK: invoke void @c() + +invcont3: + invoke void @f() to label %invcont4 unwind label %cleanup +; CHECK-NOT: {{call|invoke}} +; CHECK: call void asm +; CHECK-NOT: {{call|invoke}} + +invcont4: + ret void + +cleanup: + %ex = landingpad {i8*, i32} cleanup + resume { i8*, i32 } %ex +} + +define void @h() { +; CHECK-LABEL: define void @h() personality i32 (...)* @__gxx_personality_v0 +entry: + call void @g() +; CHECK-NOT: {{call|invoke}} +; CHECK: invoke void @a() +; CHECK-NOT: {{call|invoke}} +; CHECK: invoke void @b() +; CHECK-NOT: {{call|invoke}} +; CHECK: invoke void @c() +; CHECK-NOT: {{call|invoke}} +; CHECK: call void asm +; CHECK-NOT: {{call|invoke}} + + ret void +} diff --git a/llvm/test/Transforms/Inline/2007-06-25-WeakInline.ll b/llvm/test/Transforms/Inline/2007-06-25-WeakInline.ll new file mode 100644 index 00000000000..064cda6632e --- /dev/null +++ b/llvm/test/Transforms/Inline/2007-06-25-WeakInline.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; 'bar' can be overridden at link-time, don't inline it. +define weak void @bar() { +; CHECK-LABEL: define weak void @bar() +entry: + ret void +} + +define void @foo() { +; CHECK-LABEL: define void @foo() +entry: + tail call void @bar() +; CHECK: tail call void @bar() + ret void +} + diff --git a/llvm/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll b/llvm/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll new file mode 100644 index 00000000000..2930aec3dbf --- /dev/null +++ b/llvm/test/Transforms/Inline/2007-12-19-InlineNoUnwind.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +declare i1 @extern() + +define internal i32 @test() { +; CHECK-NOT: define .* @test() +entry: + %n = call i1 @extern() + br i1 %n, label %r, label %u + +r: + ret i32 0 + +u: + unreachable +} + +define i32 @caller() { +; CHECK-LABEL: define i32 @caller() +entry: + %X = call i32 @test() nounwind +; CHECK-NOT: call i32 @test() +; CHECK: call i1 @extern() #0 +; CHECK: br i1 %{{.*}}, label %[[R:.*]], label %[[U:.*]] + +; CHECK: [[U]]: +; CHECK: unreachable + +; CHECK: [[R]]: + ret i32 %X +; CHECK: ret i32 0 +} + +; CHECK: attributes #0 = { nounwind } diff --git a/llvm/test/Transforms/Inline/2008-09-02-NoInline.ll b/llvm/test/Transforms/Inline/2008-09-02-NoInline.ll new file mode 100644 index 00000000000..902b53bfe9b --- /dev/null +++ b/llvm/test/Transforms/Inline/2008-09-02-NoInline.ll @@ -0,0 +1,17 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +define i32 @fn2() noinline { +; CHECK-LABEL: define i32 @fn2() +entry: + ret i32 1 +} + +define i32 @fn3() { +; CHECK-LABEL: define i32 @fn3() +entry: + %r = call i32 @fn2() +; CHECK: call i32 @fn2() + + ret i32 %r +} diff --git a/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll b/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll new file mode 100644 index 00000000000..2c4341c7e35 --- /dev/null +++ b/llvm/test/Transforms/Inline/2009-01-08-NoInlineDynamicAlloca.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; Do not inline calls with variable-sized alloca. + +@q = common global i8* null + +define i8* @a(i32 %i) nounwind { +; CHECK-LABEL: define i8* @a +entry: + %i_addr = alloca i32 + %retval = alloca i8* + %p = alloca i8* + %"alloca point" = bitcast i32 0 to i32 + store i32 %i, i32* %i_addr + %0 = load i32, i32* %i_addr, align 4 + %1 = alloca i8, i32 %0 + store i8* %1, i8** %p, align 4 + %2 = load i8*, i8** %p, align 4 + store i8* %2, i8** @q, align 4 + br label %return + +return: + %retval1 = load i8*, i8** %retval + ret i8* %retval1 +} + +define void @b(i32 %i) nounwind { +; CHECK-LABEL: define void @b +entry: + %i_addr = alloca i32 + %"alloca point" = bitcast i32 0 to i32 + store i32 %i, i32* %i_addr + %0 = load i32, i32* %i_addr, align 4 + %1 = call i8* @a(i32 %0) nounwind +; CHECK: call i8* @a + br label %return + +return: + ret void +} diff --git a/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll b/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll new file mode 100644 index 00000000000..8d8f20feb73 --- /dev/null +++ b/llvm/test/Transforms/Inline/2009-01-13-RecursiveInlineCrash.ll @@ -0,0 +1,293 @@ +; RUN: opt < %s -inline -argpromotion -disable-output +; ModuleID = '<stdin>' +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9.6" + %struct.quad_struct = type { i32, i32, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct*, %struct.quad_struct* } +@NumNodes = external global i32 ; <i32*> [#uses=0] +@"\01LC" = external constant [43 x i8] ; <[43 x i8]*> [#uses=0] +@"\01LC1" = external constant [19 x i8] ; <[19 x i8]*> [#uses=0] +@"\01LC2" = external constant [17 x i8] ; <[17 x i8]*> [#uses=0] + +declare i32 @dealwithargs(i32, i8** nocapture) nounwind + +declare i32 @atoi(i8*) + +define internal fastcc i32 @adj(i32 %d, i32 %ct) nounwind readnone { +entry: + switch i32 %d, label %return [ + i32 0, label %bb + i32 1, label %bb10 + i32 2, label %bb5 + i32 3, label %bb15 + ] + +bb: ; preds = %entry + switch i32 %ct, label %bb3 [ + i32 1, label %return + i32 0, label %return + ] + +bb3: ; preds = %bb + ret i32 0 + +bb5: ; preds = %entry + switch i32 %ct, label %bb8 [ + i32 3, label %return + i32 2, label %return + ] + +bb8: ; preds = %bb5 + ret i32 0 + +bb10: ; preds = %entry + switch i32 %ct, label %bb13 [ + i32 1, label %return + i32 3, label %return + ] + +bb13: ; preds = %bb10 + ret i32 0 + +bb15: ; preds = %entry + switch i32 %ct, label %bb18 [ + i32 2, label %return + i32 0, label %return + ] + +bb18: ; preds = %bb15 + ret i32 0 + +return: ; preds = %bb15, %bb15, %bb10, %bb10, %bb5, %bb5, %bb, %bb, %entry + ret i32 1 +} + +declare fastcc i32 @reflect(i32, i32) nounwind readnone + +declare i32 @CountTree(%struct.quad_struct* nocapture) nounwind readonly + +define internal fastcc %struct.quad_struct* @child(%struct.quad_struct* nocapture %tree, i32 %ct) nounwind readonly { +entry: + switch i32 %ct, label %bb5 [ + i32 0, label %bb1 + i32 1, label %bb + i32 2, label %bb3 + i32 3, label %bb2 + ] + +bb: ; preds = %entry + %0 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 3 ; <%struct.quad_struct**> [#uses=1] + %1 = load %struct.quad_struct*, %struct.quad_struct** %0, align 4 ; <%struct.quad_struct*> [#uses=1] + ret %struct.quad_struct* %1 + +bb1: ; preds = %entry + %2 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 2 ; <%struct.quad_struct**> [#uses=1] + %3 = load %struct.quad_struct*, %struct.quad_struct** %2, align 4 ; <%struct.quad_struct*> [#uses=1] + ret %struct.quad_struct* %3 + +bb2: ; preds = %entry + %4 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 5 ; <%struct.quad_struct**> [#uses=1] + %5 = load %struct.quad_struct*, %struct.quad_struct** %4, align 4 ; <%struct.quad_struct*> [#uses=1] + ret %struct.quad_struct* %5 + +bb3: ; preds = %entry + %6 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 4 ; <%struct.quad_struct**> [#uses=1] + %7 = load %struct.quad_struct*, %struct.quad_struct** %6, align 4 ; <%struct.quad_struct*> [#uses=1] + ret %struct.quad_struct* %7 + +bb5: ; preds = %entry + ret %struct.quad_struct* null +} + +define internal fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* nocapture %tree, i32 %d) nounwind readonly { +entry: + %0 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 6 ; <%struct.quad_struct**> [#uses=1] + %1 = load %struct.quad_struct*, %struct.quad_struct** %0, align 4 ; <%struct.quad_struct*> [#uses=4] + %2 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 1 ; <i32*> [#uses=1] + %3 = load i32, i32* %2, align 4 ; <i32> [#uses=2] + %4 = icmp eq %struct.quad_struct* %1, null ; <i1> [#uses=1] + br i1 %4, label %bb3, label %bb + +bb: ; preds = %entry + %5 = call fastcc i32 @adj(i32 %d, i32 %3) nounwind ; <i32> [#uses=1] + %6 = icmp eq i32 %5, 0 ; <i1> [#uses=1] + br i1 %6, label %bb3, label %bb1 + +bb1: ; preds = %bb + %7 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %1, i32 %d) nounwind ; <%struct.quad_struct*> [#uses=1] + br label %bb3 + +bb3: ; preds = %bb1, %bb, %entry + %q.0 = phi %struct.quad_struct* [ %7, %bb1 ], [ %1, %bb ], [ %1, %entry ] ; <%struct.quad_struct*> [#uses=4] + %8 = icmp eq %struct.quad_struct* %q.0, null ; <i1> [#uses=1] + br i1 %8, label %bb7, label %bb4 + +bb4: ; preds = %bb3 + %9 = getelementptr %struct.quad_struct, %struct.quad_struct* %q.0, i32 0, i32 0 ; <i32*> [#uses=1] + %10 = load i32, i32* %9, align 4 ; <i32> [#uses=1] + %11 = icmp eq i32 %10, 2 ; <i1> [#uses=1] + br i1 %11, label %bb5, label %bb7 + +bb5: ; preds = %bb4 + %12 = call fastcc i32 @reflect(i32 %d, i32 %3) nounwind ; <i32> [#uses=1] + %13 = call fastcc %struct.quad_struct* @child(%struct.quad_struct* %q.0, i32 %12) nounwind ; <%struct.quad_struct*> [#uses=1] + ret %struct.quad_struct* %13 + +bb7: ; preds = %bb4, %bb3 + ret %struct.quad_struct* %q.0 +} + +declare fastcc i32 @sum_adjacent(%struct.quad_struct* nocapture, i32, i32, i32) nounwind readonly + +define i32 @perimeter(%struct.quad_struct* nocapture %tree, i32 %size) nounwind readonly { +entry: + %0 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 0 ; <i32*> [#uses=1] + %1 = load i32, i32* %0, align 4 ; <i32> [#uses=1] + %2 = icmp eq i32 %1, 2 ; <i1> [#uses=1] + br i1 %2, label %bb, label %bb2 + +bb: ; preds = %entry + %3 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 4 ; <%struct.quad_struct**> [#uses=1] + %4 = load %struct.quad_struct*, %struct.quad_struct** %3, align 4 ; <%struct.quad_struct*> [#uses=1] + %5 = sdiv i32 %size, 2 ; <i32> [#uses=1] + %6 = call i32 @perimeter(%struct.quad_struct* %4, i32 %5) nounwind ; <i32> [#uses=1] + %7 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 5 ; <%struct.quad_struct**> [#uses=1] + %8 = load %struct.quad_struct*, %struct.quad_struct** %7, align 4 ; <%struct.quad_struct*> [#uses=1] + %9 = sdiv i32 %size, 2 ; <i32> [#uses=1] + %10 = call i32 @perimeter(%struct.quad_struct* %8, i32 %9) nounwind ; <i32> [#uses=1] + %11 = add i32 %10, %6 ; <i32> [#uses=1] + %12 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 3 ; <%struct.quad_struct**> [#uses=1] + %13 = load %struct.quad_struct*, %struct.quad_struct** %12, align 4 ; <%struct.quad_struct*> [#uses=1] + %14 = sdiv i32 %size, 2 ; <i32> [#uses=1] + %15 = call i32 @perimeter(%struct.quad_struct* %13, i32 %14) nounwind ; <i32> [#uses=1] + %16 = add i32 %15, %11 ; <i32> [#uses=1] + %17 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 2 ; <%struct.quad_struct**> [#uses=1] + %18 = load %struct.quad_struct*, %struct.quad_struct** %17, align 4 ; <%struct.quad_struct*> [#uses=1] + %19 = sdiv i32 %size, 2 ; <i32> [#uses=1] + %20 = call i32 @perimeter(%struct.quad_struct* %18, i32 %19) nounwind ; <i32> [#uses=1] + %21 = add i32 %20, %16 ; <i32> [#uses=1] + ret i32 %21 + +bb2: ; preds = %entry + %22 = getelementptr %struct.quad_struct, %struct.quad_struct* %tree, i32 0, i32 0 ; <i32*> [#uses=1] + %23 = load i32, i32* %22, align 4 ; <i32> [#uses=1] + %24 = icmp eq i32 %23, 0 ; <i1> [#uses=1] + br i1 %24, label %bb3, label %bb23 + +bb3: ; preds = %bb2 + %25 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 0) nounwind ; <%struct.quad_struct*> [#uses=4] + %26 = icmp eq %struct.quad_struct* %25, null ; <i1> [#uses=1] + br i1 %26, label %bb8, label %bb4 + +bb4: ; preds = %bb3 + %27 = getelementptr %struct.quad_struct, %struct.quad_struct* %25, i32 0, i32 0 ; <i32*> [#uses=1] + %28 = load i32, i32* %27, align 4 ; <i32> [#uses=1] + %29 = icmp eq i32 %28, 1 ; <i1> [#uses=1] + br i1 %29, label %bb8, label %bb6 + +bb6: ; preds = %bb4 + %30 = getelementptr %struct.quad_struct, %struct.quad_struct* %25, i32 0, i32 0 ; <i32*> [#uses=1] + %31 = load i32, i32* %30, align 4 ; <i32> [#uses=1] + %32 = icmp eq i32 %31, 2 ; <i1> [#uses=1] + br i1 %32, label %bb7, label %bb8 + +bb7: ; preds = %bb6 + %33 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %25, i32 3, i32 2, i32 %size) nounwind ; <i32> [#uses=1] + br label %bb8 + +bb8: ; preds = %bb7, %bb6, %bb4, %bb3 + %retval1.1 = phi i32 [ 0, %bb6 ], [ %33, %bb7 ], [ %size, %bb4 ], [ %size, %bb3 ] ; <i32> [#uses=3] + %34 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 1) nounwind ; <%struct.quad_struct*> [#uses=4] + %35 = icmp eq %struct.quad_struct* %34, null ; <i1> [#uses=1] + br i1 %35, label %bb10, label %bb9 + +bb9: ; preds = %bb8 + %36 = getelementptr %struct.quad_struct, %struct.quad_struct* %34, i32 0, i32 0 ; <i32*> [#uses=1] + %37 = load i32, i32* %36, align 4 ; <i32> [#uses=1] + %38 = icmp eq i32 %37, 1 ; <i1> [#uses=1] + br i1 %38, label %bb10, label %bb11 + +bb10: ; preds = %bb9, %bb8 + %39 = add i32 %retval1.1, %size ; <i32> [#uses=1] + br label %bb13 + +bb11: ; preds = %bb9 + %40 = getelementptr %struct.quad_struct, %struct.quad_struct* %34, i32 0, i32 0 ; <i32*> [#uses=1] + %41 = load i32, i32* %40, align 4 ; <i32> [#uses=1] + %42 = icmp eq i32 %41, 2 ; <i1> [#uses=1] + br i1 %42, label %bb12, label %bb13 + +bb12: ; preds = %bb11 + %43 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %34, i32 2, i32 0, i32 %size) nounwind ; <i32> [#uses=1] + %44 = add i32 %43, %retval1.1 ; <i32> [#uses=1] + br label %bb13 + +bb13: ; preds = %bb12, %bb11, %bb10 + %retval1.2 = phi i32 [ %retval1.1, %bb11 ], [ %44, %bb12 ], [ %39, %bb10 ] ; <i32> [#uses=3] + %45 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 2) nounwind ; <%struct.quad_struct*> [#uses=4] + %46 = icmp eq %struct.quad_struct* %45, null ; <i1> [#uses=1] + br i1 %46, label %bb15, label %bb14 + +bb14: ; preds = %bb13 + %47 = getelementptr %struct.quad_struct, %struct.quad_struct* %45, i32 0, i32 0 ; <i32*> [#uses=1] + %48 = load i32, i32* %47, align 4 ; <i32> [#uses=1] + %49 = icmp eq i32 %48, 1 ; <i1> [#uses=1] + br i1 %49, label %bb15, label %bb16 + +bb15: ; preds = %bb14, %bb13 + %50 = add i32 %retval1.2, %size ; <i32> [#uses=1] + br label %bb18 + +bb16: ; preds = %bb14 + %51 = getelementptr %struct.quad_struct, %struct.quad_struct* %45, i32 0, i32 0 ; <i32*> [#uses=1] + %52 = load i32, i32* %51, align 4 ; <i32> [#uses=1] + %53 = icmp eq i32 %52, 2 ; <i1> [#uses=1] + br i1 %53, label %bb17, label %bb18 + +bb17: ; preds = %bb16 + %54 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %45, i32 0, i32 1, i32 %size) nounwind ; <i32> [#uses=1] + %55 = add i32 %54, %retval1.2 ; <i32> [#uses=1] + br label %bb18 + +bb18: ; preds = %bb17, %bb16, %bb15 + %retval1.3 = phi i32 [ %retval1.2, %bb16 ], [ %55, %bb17 ], [ %50, %bb15 ] ; <i32> [#uses=3] + %56 = call fastcc %struct.quad_struct* @gtequal_adj_neighbor(%struct.quad_struct* %tree, i32 3) nounwind ; <%struct.quad_struct*> [#uses=4] + %57 = icmp eq %struct.quad_struct* %56, null ; <i1> [#uses=1] + br i1 %57, label %bb20, label %bb19 + +bb19: ; preds = %bb18 + %58 = getelementptr %struct.quad_struct, %struct.quad_struct* %56, i32 0, i32 0 ; <i32*> [#uses=1] + %59 = load i32, i32* %58, align 4 ; <i32> [#uses=1] + %60 = icmp eq i32 %59, 1 ; <i1> [#uses=1] + br i1 %60, label %bb20, label %bb21 + +bb20: ; preds = %bb19, %bb18 + %61 = add i32 %retval1.3, %size ; <i32> [#uses=1] + ret i32 %61 + +bb21: ; preds = %bb19 + %62 = getelementptr %struct.quad_struct, %struct.quad_struct* %56, i32 0, i32 0 ; <i32*> [#uses=1] + %63 = load i32, i32* %62, align 4 ; <i32> [#uses=1] + %64 = icmp eq i32 %63, 2 ; <i1> [#uses=1] + br i1 %64, label %bb22, label %bb23 + +bb22: ; preds = %bb21 + %65 = call fastcc i32 @sum_adjacent(%struct.quad_struct* %56, i32 1, i32 3, i32 %size) nounwind ; <i32> [#uses=1] + %66 = add i32 %65, %retval1.3 ; <i32> [#uses=1] + ret i32 %66 + +bb23: ; preds = %bb21, %bb2 + %retval1.0 = phi i32 [ 0, %bb2 ], [ %retval1.3, %bb21 ] ; <i32> [#uses=1] + ret i32 %retval1.0 +} + +declare i32 @main(i32, i8** nocapture) noreturn nounwind + +declare i32 @printf(i8*, ...) nounwind + +declare void @exit(i32) noreturn nounwind + +declare fastcc i32 @CheckOutside(i32, i32) nounwind readnone + +declare fastcc i32 @CheckIntersect(i32, i32, i32) nounwind readnone + +declare %struct.quad_struct* @MakeTree(i32, i32, i32, i32, i32, %struct.quad_struct*, i32, i32) nounwind diff --git a/llvm/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll b/llvm/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll new file mode 100644 index 00000000000..c8629ea22eb --- /dev/null +++ b/llvm/test/Transforms/Inline/2009-05-07-CallUsingSelfCrash.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -inline -disable-output +; PR4123 + %struct.S0 = type <{ i32 }> + %struct.S1 = type <{ i8, i8, i8, i8, %struct.S0 }> + %struct.S2 = type <{ %struct.S1, i32 }> + +define void @func_113(%struct.S1* noalias nocapture sret %agg.result, i8 signext %p_114) noreturn nounwind { +entry: + unreachable + +for.inc: ; preds = %for.inc + %call48 = call fastcc signext i8 @safe_sub_func_uint8_t_u_u(i8 signext %call48) ; <i8> [#uses=1] + br label %for.inc +} + +define fastcc signext i8 @safe_sub_func_uint8_t_u_u(i8 signext %_ui1) nounwind readnone { +entry: + ret i8 %_ui1 +} + diff --git a/llvm/test/Transforms/Inline/2010-05-12-ValueMap.ll b/llvm/test/Transforms/Inline/2010-05-12-ValueMap.ll new file mode 100644 index 00000000000..f452907efd0 --- /dev/null +++ b/llvm/test/Transforms/Inline/2010-05-12-ValueMap.ll @@ -0,0 +1,28 @@ +; RUN: opt -inline -mergefunc -disable-output < %s + +; This tests for a bug where the inliner kept the functions in a ValueMap after +; it had completed and a ModulePass started to run. LLVM would crash deleting +; a function that was still a key in the ValueMap. + +define internal fastcc void @list_Cdr1918() nounwind inlinehint { + unreachable +} + +define internal fastcc void @list_PairSecond1927() nounwind inlinehint { + call fastcc void @list_Cdr1918() nounwind inlinehint + unreachable +} + +define internal fastcc void @list_Cdr3164() nounwind inlinehint { + unreachable +} + +define internal fastcc void @list_Nconc3167() nounwind inlinehint { + call fastcc void @list_Cdr3164() nounwind inlinehint + unreachable +} + +define void @term_Equal() nounwind { + call fastcc void @list_Cdr3164() nounwind inlinehint + unreachable +} diff --git a/llvm/test/Transforms/Inline/AArch64/binop.ll b/llvm/test/Transforms/Inline/AArch64/binop.ll new file mode 100644 index 00000000000..051528991e4 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/binop.ll @@ -0,0 +1,291 @@ +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @pad() +@glbl = external global i32 + +define i32 @outer_add1(i32 %a) { +; CHECK-LABEL: @outer_add1( +; CHECK-NOT: call i32 @add + %C = call i32 @add(i32 %a, i32 0) + ret i32 %C +} + +define i32 @outer_add2(i32 %a) { +; CHECK-LABEL: @outer_add2( +; CHECK-NOT: call i32 @add + %C = call i32 @add(i32 0, i32 %a) + ret i32 %C +} + +define i32 @add(i32 %a, i32 %b) { + %add = add i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %add +} + + + +define i32 @outer_sub1(i32 %a) { +; CHECK-LABEL: @outer_sub1( +; CHECK-NOT: call i32 @sub1 + %C = call i32 @sub1(i32 %a, i32 0) + ret i32 %C +} + +define i32 @sub1(i32 %a, i32 %b) { + %sub = sub i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %sub +} + + +define i32 @outer_sub2(i32 %a) { +; CHECK-LABEL: @outer_sub2( +; CHECK-NOT: call i32 @sub2 + %C = call i32 @sub2(i32 %a) + ret i32 %C +} + +define i32 @sub2(i32 %a) { + %sub = sub i32 %a, %a + call void @pad() + ret i32 %sub +} + + + +define i32 @outer_mul1(i32 %a) { +; CHECK-LABEL: @outer_mul1( +; CHECK-NOT: call i32 @mul + %C = call i32 @mul(i32 %a, i32 0) + ret i32 %C +} + +define i32 @outer_mul2(i32 %a) { +; CHECK-LABEL: @outer_mul2( +; CHECK-NOT: call i32 @mul + %C = call i32 @mul(i32 %a, i32 1) + ret i32 %C +} + +define i32 @mul(i32 %a, i32 %b) { + %mul = mul i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %mul +} + + + +define i32 @outer_div1(i32 %a) { +; CHECK-LABEL: @outer_div1( +; CHECK-NOT: call i32 @div1 + %C = call i32 @div1(i32 0, i32 %a) + ret i32 %C +} + +define i32 @outer_div2(i32 %a) { +; CHECK-LABEL: @outer_div2( +; CHECK-NOT: call i32 @div1 + %C = call i32 @div1(i32 %a, i32 1) + ret i32 %C +} + +define i32 @div1(i32 %a, i32 %b) { + %div = sdiv i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %div +} + + +define i32 @outer_div3(i32 %a) { +; CHECK-LABEL: @outer_div3( +; CHECK-NOT: call i32 @div + %C = call i32 @div2(i32 %a) + ret i32 %C +} + +define i32 @div2(i32 %a) { + %div = sdiv i32 %a, %a + call void @pad() + ret i32 %div +} + + + +define i32 @outer_rem1(i32 %a) { +; CHECK-LABEL: @outer_rem1( +; CHECK-NOT: call i32 @rem + %C = call i32 @rem1(i32 0, i32 %a) + ret i32 %C +} + +define i32 @outer_rem2(i32 %a) { +; CHECK-LABEL: @outer_rem2( +; CHECK-NOT: call i32 @rem + %C = call i32 @rem1(i32 %a, i32 1) + ret i32 %C +} + +define i32 @rem1(i32 %a, i32 %b) { + %rem = urem i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %rem +} + + +define i32 @outer_rem3(i32 %a) { +; CHECK-LABEL: @outer_rem3( +; CHECK-NOT: call i32 @rem + %C = call i32 @rem2(i32 %a) + ret i32 %C +} + +define i32 @rem2(i32 %a) { + %rem = urem i32 %a, %a + call void @pad() + ret i32 %rem +} + + + +define i32 @outer_shl1(i32 %a) { +; CHECK-LABEL: @outer_shl1( +; CHECK-NOT: call i32 @shl + %C = call i32 @shl(i32 %a, i32 0) + ret i32 %C +} + +define i32 @shl(i32 %a, i32 %b) { + %shl = shl i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %shl +} + + + +define i32 @outer_shr1(i32 %a) { +; CHECK-LABEL: @outer_shr1( +; CHECK-NOT: call i32 @shr + %C = call i32 @shr(i32 %a, i32 0) + ret i32 %C +} + +define i32 @shr(i32 %a, i32 %b) { + %shr = ashr i32 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i32 %shr +} + + + +define i1 @outer_and1(i1 %a) { +; check-label: @outer_and1( +; check-not: call i1 @and1 + %c = call i1 @and1(i1 %a, i1 false) + ret i1 %c +} + +define i1 @outer_and2(i1 %a) { +; check-label: @outer_and2( +; check-not: call i1 @and1 + %c = call i1 @and1(i1 %a, i1 true) + ret i1 %c +} + +define i1 @and1(i1 %a, i1 %b) { + %and = and i1 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i1 %and +} + + +define i1 @outer_and3(i1 %a) { +; check-label: @outer_and3( +; check-not: call i1 @and2 + %c = call i1 @and2(i1 %a) + ret i1 %c +} + +define i1 @and2(i1 %a) { + %and = and i1 %a, %a + call void @pad() + ret i1 %and +} + + + +define i1 @outer_or1(i1 %a) { +; check-label: @outer_or1( +; check-not: call i1 @or1 + %c = call i1 @or1(i1 %a, i1 false) + ret i1 %c +} + +define i1 @outer_or2(i1 %a) { +; check-label: @outer_or2( +; check-not: call i1 @or1 + %c = call i1 @or1(i1 %a, i1 true) + ret i1 %c +} + +define i1 @or1(i1 %a, i1 %b) { + %or = or i1 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i1 %or +} + + +define i1 @outer_or3(i1 %a) { +; check-label: @outer_or3( +; check-not: call i1 @or2 + %c = call i1 @or2(i1 %a) + ret i1 %c +} + +define i1 @or2(i1 %a) { + %or = or i1 %a, %a + call void @pad() + ret i1 %or +} + + + +define i1 @outer_xor1(i1 %a) { +; check-label: @outer_xor1( +; check-not: call i1 @xor + %c = call i1 @xor1(i1 %a, i1 false) + ret i1 %c +} + +define i1 @xor1(i1 %a, i1 %b) { + %xor = xor i1 %a, %b + call void @pad() + store i32 0, i32* @glbl + ret i1 %xor +} + + +define i1 @outer_xor3(i1 %a) { +; check-label: @outer_xor3( +; check-not: call i1 @xor + %c = call i1 @xor2(i1 %a) + ret i1 %c +} + +define i1 @xor2(i1 %a) { + %xor = xor i1 %a, %a + call void @pad() + ret i1 %xor +} diff --git a/llvm/test/Transforms/Inline/AArch64/ext.ll b/llvm/test/Transforms/Inline/AArch64/ext.ll new file mode 100644 index 00000000000..04095c04ee8 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/ext.ll @@ -0,0 +1,249 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define i32 @outer1(i32* %ptr, i32 %i) { + %C = call i32 @inner1(i32* %ptr, i32 %i) + ret i32 %C +} + +; sext can be folded into gep. +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 4 +define i32 @inner1(i32* %ptr, i32 %i) { + %E = sext i32 %i to i64 + %G = getelementptr inbounds i32, i32* %ptr, i64 %E + %L = load i32, i32* %G + ret i32 %L +} + +define i32 @outer2(i32* %ptr, i32 %i) { + %C = call i32 @inner2(i32* %ptr, i32 %i) + ret i32 %C +} + +; zext from i32 to i64 is free. +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 4 +define i32 @inner2(i32* %ptr, i32 %i) { + %E = zext i32 %i to i64 + %G = getelementptr inbounds i32, i32* %ptr, i64 %E + %L = load i32, i32* %G + ret i32 %L +} + +define i32 @outer3(i32* %ptr, i16 %i) { + %C = call i32 @inner3(i32* %ptr, i16 %i) + ret i32 %C +} + +; zext can be folded into gep. +; CHECK: Analyzing call of inner3 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 4 +define i32 @inner3(i32* %ptr, i16 %i) { + %E = zext i16 %i to i64 + %G = getelementptr inbounds i32, i32* %ptr, i64 %E + %L = load i32, i32* %G + ret i32 %L +} + +define i16 @outer4(i8* %ptr) { + %C = call i16 @inner4(i8* %ptr) + ret i16 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner4 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i16 @inner4(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i16 + ret i16 %E +} + +define i16 @outer5(i8* %ptr) { + %C = call i16 @inner5(i8* %ptr) + ret i16 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner5 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i16 @inner5(i8* %ptr) { + %L = load i8, i8* %ptr + %E = sext i8 %L to i16 + ret i16 %E +} + +define i32 @outer6(i8* %ptr) { + %C = call i32 @inner6(i8* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner6 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner6(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i32 + ret i32 %E +} + +define i32 @outer7(i8* %ptr) { + %C = call i32 @inner7(i8* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner7 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner7(i8* %ptr) { + %L = load i8, i8* %ptr + %E = sext i8 %L to i32 + ret i32 %E +} + +define i32 @outer8(i16* %ptr) { + %C = call i32 @inner8(i16* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner8 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner8(i16* %ptr) { + %L = load i16, i16* %ptr + %E = zext i16 %L to i32 + ret i32 %E +} + +define i32 @outer9(i16* %ptr) { + %C = call i32 @inner9(i16* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner9 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner9(i16* %ptr) { + %L = load i16, i16* %ptr + %E = sext i16 %L to i32 + ret i32 %E +} + +define i64 @outer10(i8* %ptr) { + %C = call i64 @inner10(i8* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner10 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner10(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i64 + ret i64 %E +} + +define i64 @outer11(i8* %ptr) { + %C = call i64 @inner11(i8* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner11 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner11(i8* %ptr) { + %L = load i8, i8* %ptr + %E = sext i8 %L to i64 + ret i64 %E +} + +define i64 @outer12(i16* %ptr) { + %C = call i64 @inner12(i16* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner12 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner12(i16* %ptr) { + %L = load i16, i16* %ptr + %E = zext i16 %L to i64 + ret i64 %E +} + +define i64 @outer13(i16* %ptr) { + %C = call i64 @inner13(i16* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner13 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner13(i16* %ptr) { + %L = load i16, i16* %ptr + %E = sext i16 %L to i64 + ret i64 %E +} + +define i64 @outer14(i32* %ptr) { + %C = call i64 @inner14(i32* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner14 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner14(i32* %ptr) { + %L = load i32, i32* %ptr + %E = zext i32 %L to i64 + ret i64 %E +} + +define i64 @outer15(i32* %ptr) { + %C = call i64 @inner15(i32* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner15 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner15(i32* %ptr) { + %L = load i32, i32* %ptr + %E = sext i32 %L to i64 + ret i64 %E +} + +define i64 @outer16(i32 %V1, i64 %V2) { + %C = call i64 @inner16(i32 %V1, i64 %V2) + ret i64 %C +} + +; sext can be folded into shl. +; CHECK: Analyzing call of inner16 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 4 +define i64 @inner16(i32 %V1, i64 %V2) { + %E = sext i32 %V1 to i64 + %S = shl i64 %E, 3 + %A = add i64 %V2, %S + ret i64 %A +} diff --git a/llvm/test/Transforms/Inline/AArch64/gep-cost.ll b/llvm/test/Transforms/Inline/AArch64/gep-cost.ll new file mode 100644 index 00000000000..7d191d37f1f --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/gep-cost.ll @@ -0,0 +1,51 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=aarch64--linux-gnu -mcpu=kryo -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +define void @outer1([4 x i32]* %ptr, i32 %i) { + call void @inner1([4 x i32]* %ptr, i32 %i) + ret void +} + +define void @outer2([4 x i32]* %ptr, i32 %i) { + call void @inner2([4 x i32]* %ptr, i32 %i) + ret void +} + +define void @outer3([4 x i32]* %ptr, i32 %j) { + call void @inner3([4 x i32]* %ptr, i32 0, i32 %j) + ret void +} + +; The gep in inner1() is reg+reg, which is a legal addressing mode for AArch64. +; Thus, both the gep and ret can be simplified. +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define void @inner1([4 x i32]* %ptr, i32 %i) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 0, i32 %i + ret void +} + +; The gep in inner2() is reg+imm+reg, which is not a legal addressing mode for +; AArch64. Thus, only the ret can be simplified and not the gep. +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 1 +; CHECK: NumInstructions: 2 +define void @inner2([4 x i32]* %ptr, i32 %i) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 1, i32 %i + ret void +} + +; The gep in inner3() is reg+reg because %i is a known constant from the +; callsite. This case is a legal addressing mode for AArch64. Thus, both the +; gep and ret can be simplified. +; CHECK: Analyzing call of inner3 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define void @inner3([4 x i32]* %ptr, i32 %i, i32 %j) { + %G = getelementptr inbounds [4 x i32], [4 x i32]* %ptr, i32 %i, i32 %j + ret void +} diff --git a/llvm/test/Transforms/Inline/AArch64/inline-target-attr.ll b/llvm/test/Transforms/Inline/AArch64/inline-target-attr.ll new file mode 100644 index 00000000000..af87ff6e740 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/inline-target-attr.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=aarch64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +define i32 @strict_align() #2 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: strict_align +; CHECK: call i32 (...) @baz() +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+crc,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+crc,+neon,+crypto" } +attributes #2 = { "target-cpu"="generic" "target-features"="+crc,+neon,+strict-align" } diff --git a/llvm/test/Transforms/Inline/AArch64/lit.local.cfg b/llvm/test/Transforms/Inline/AArch64/lit.local.cfg new file mode 100644 index 00000000000..7184443994b --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AArch64' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/Inline/AArch64/logical-and-or.ll b/llvm/test/Transforms/Inline/AArch64/logical-and-or.ll new file mode 100644 index 00000000000..e9235971562 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/logical-and-or.ll @@ -0,0 +1,94 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +; FIXME: Once the 'or' or 'and' is simplified the second compare is dead, but +; the inline cost model has already added the cost. + +define i1 @outer1(i32 %a) { + %C = call i1 @inner1(i32 0, i32 %a) + ret i1 %C +} + +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 4 +define i1 @inner1(i32 %a, i32 %b) { + %tobool = icmp eq i32 %a, 0 ; Simplifies to true + %tobool1 = icmp eq i32 %b, 0 ; Should be dead once 'or' is simplified + %or.cond = or i1 %tobool, %tobool1 ; Simplifies to true + ret i1 %or.cond ; Simplifies to ret i1 true +} + +define i1 @outer2(i32 %a) { + %C = call i1 @inner2(i32 1, i32 %a) + ret i1 %C +} + +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 4 +define i1 @inner2(i32 %a, i32 %b) { + %tobool = icmp eq i32 %a, 0 ; Simplifies to false + %tobool1 = icmp eq i32 %b, 0 ; Should be dead once 'and' is simplified + %and.cond = and i1 %tobool, %tobool1 ; Simplifies to false + ret i1 %and.cond ; Simplifies to ret i1 false +} + + +define i32 @outer3(i32 %a) { + %C = call i32 @inner3(i32 4294967295, i32 %a) + ret i32 %C +} + +; CHECK: Analyzing call of inner3 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner3(i32 %a, i32 %b) { + %or.cond = or i32 %a, %b ; Simplifies to 4294967295 + ret i32 %or.cond ; Simplifies to ret i32 4294967295 +} + + +define i32 @outer4(i32 %a) { + %C = call i32 @inner4(i32 0, i32 %a) + ret i32 %C +} + +; CHECK: Analyzing call of inner4 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 2 +define i32 @inner4(i32 %a, i32 %b) { + %and.cond = and i32 %a, %b ; Simplifies to 0 + ret i32 %and.cond ; Simplifies to ret i32 0 +} + +define i1 @outer5(i32 %a) { + %C = call i1 @inner5(i32 0, i32 %a) + ret i1 %C +} + +; CHECK: Analyzing call of inner5 +; CHECK: NumInstructionsSimplified: 4 +; CHECK: NumInstructions: 5 +define i1 @inner5(i32 %a, i32 %b) { + %tobool = icmp eq i32 %a, 0 ; Simplifies to true + %tobool1 = icmp eq i32 %b, 0 ; Should be dead once 'or' is simplified + %or.cond = or i1 %tobool, %tobool1 ; Simplifies to true + br i1 %or.cond, label %end, label %isfalse ; Simplifies to br label %end + +isfalse: ; This block is unreachable once inlined + call void @dead() + call void @dead() + call void @dead() + call void @dead() + call void @dead() + br label %end + +end: + ret i1 %or.cond ; Simplifies to ret i1 true +} + +declare void @dead() diff --git a/llvm/test/Transforms/Inline/AArch64/phi.ll b/llvm/test/Transforms/Inline/AArch64/phi.ll new file mode 100644 index 00000000000..63999f58b2c --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/phi.ll @@ -0,0 +1,504 @@ +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @pad() +@glbl = external global i32 + +define i1 @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call i1 @inner1 + %C = call i1 @inner1() + ret i1 %C +} + +define i1 @inner1() { +entry: + br label %if_true + +if_true: + %phi = phi i1 [0, %entry], [%phi, %if_true] ; Simplified to 0 + br i1 %phi, label %if_true, label %exit + +exit: + store i32 0, i32* @glbl + store i32 1, i32* @glbl + store i32 2, i32* @glbl + store i32 3, i32* @glbl + store i32 4, i32* @glbl + ret i1 %phi +} + + +define i1 @outer2(i1 %val) { +; CHECK-LABEL: @outer2( +; CHECK: call i1 @inner2 + %C = call i1 @inner2(i1 %val) + ret i1 %C +} + +define i1 @inner2(i1 %val) { +entry: + br label %if_true + +if_true: + %phi = phi i1 [%val, %entry], [%phi, %if_true] ; Cannot be simplified to a constant + br i1 %phi, label %if_true, label %exit + +exit: + call void @pad() + ret i1 %phi +} + + +define i1 @outer3(i1 %cond) { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i1 @inner3 + %C = call i1 @inner3(i1 %cond) + ret i1 %C +} + +define i1 @inner3(i1 %cond) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [0, %entry], [0, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + store i32 0, i32* @glbl + store i32 1, i32* @glbl + store i32 2, i32* @glbl + store i32 3, i32* @glbl + store i32 4, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer4(i1 %cond) { +; CHECK-LABEL: @outer4( +; CHECK-NOT: call i1 @inner4 + %C = call i1 @inner4(i1 %cond, i32 0) + ret i1 %C +} + +define i1 @inner4(i1 %cond, i32 %val) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [0, %entry], [%val, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp +} + + +define i1 @outer5_1(i1 %cond) { +; CHECK-LABEL: @outer5_1( +; CHECK-NOT: call i1 @inner5 + %C = call i1 @inner5(i1 %cond, i32 0, i32 0) + ret i1 %C +} + + +define i1 @outer5_2(i1 %cond) { +; CHECK-LABEL: @outer5_2( +; CHECK: call i1 @inner5 + %C = call i1 @inner5(i1 %cond, i32 0, i32 1) + ret i1 %C +} + +define i1 @inner5(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Can be simplified to a constant if %val1 and %val2 are the same constants + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer6(i1 %cond, i32 %val) { +; CHECK-LABEL: @outer6( +; CHECK-NOT: call i1 @inner6 + %C = call i1 @inner6(i1 true, i32 %val, i32 0) + ret i1 %C +} + +define i1 @inner6(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer7(i1 %cond, i32 %val) { +; CHECK-LABEL: @outer7( +; CHECK-NOT: call i1 @inner7 + %C = call i1 @inner7(i1 false, i32 0, i32 %val) + ret i1 %C +} + +define i1 @inner7(i1 %cond, i32 %val1, i32 %val2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32 [%val1, %entry], [%val2, %if_true] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i1 %cmp +} + + +define i1 @outer8_1() { +; CHECK-LABEL: @outer8_1( +; CHECK-NOT: call i1 @inner8 + %C = call i1 @inner8(i32 0) + ret i1 %C +} + + + +define i1 @outer8_2() { +; CHECK-LABEL: @outer8_2( +; CHECK-NOT: call i1 @inner8 + %C = call i1 @inner8(i32 3) + ret i1 %C +} + +define i1 @inner8(i32 %cond) { +entry: + switch i32 %cond, label %default [ i32 0, label %zero + i32 1, label %one + i32 2, label %two ] + +zero: + br label %exit + +one: + br label %exit + +two: + br label %exit + +default: + br label %exit + +exit: + %phi = phi i32 [0, %zero], [1, %one], [2, %two], [-1, %default] ; Can be simplified to a constant if the switch condition is known + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp +} + + +define i1 @outer9(i1 %cond) { +; CHECK-LABEL: @outer9( +; CHECK-NOT: call i1 @inner9 + %C = call i1 @inner9(i32 0, i1 %cond) + ret i1 %C +} + +define i1 @inner9(i32 %cond1, i1 %cond2) { +entry: + switch i32 %cond1, label %exit [ i32 0, label %zero + i32 1, label %one + i32 2, label %two ] + +zero: + br label %exit + +one: + br label %exit + +two: + br i1 %cond2, label %two_true, label %two_false + +two_true: + br label %exit + +two_false: + br label %exit + +exit: + %phi = phi i32 [0, %zero], [1, %one], [2, %two_true], [2, %two_false], [-1, %entry] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp +} + + +define i32 @outer10(i1 %cond) { +; CHECK-LABEL: @outer10( +; CHECK-NOT: call i32 @inner10 + %A = alloca i32 + %C = call i32 @inner10(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner10(i1 %cond, i32* %A) { +entry: + br label %if_true + +if_true: + %phi = phi i32* [%A, %entry], [%phi, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + br i1 %cond, label %if_true, label %exit + +exit: + call void @pad() + ret i32 %load +} + + +define i32 @outer11(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer11( +; CHECK: call i32 @inner11 + %C = call i32 @inner11(i1 %cond, i32* %ptr) + ret i32 %C +} + +define i32 @inner11(i1 %cond, i32* %ptr) { +entry: + br label %if_true + +if_true: + %phi = phi i32* [%ptr, %entry], [%phi, %if_true] ; Cannot be simplified + %load = load i32, i32* %phi + br i1 %cond, label %if_true, label %exit + +exit: + call void @pad() + ret i32 %load +} + + +define i32 @outer12(i1 %cond) { +; CHECK-LABEL: @outer12( +; CHECK-NOT: call i32 @inner12 + %A = alloca i32 + %C = call i32 @inner12(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner12(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr, %entry], [%ptr, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + ret i32 %load +} + + +define i32 @outer13(i1 %cond) { +; CHECK-LABEL: @outer13( +; CHECK-NOT: call i32 @inner13 + %A = alloca i32 + %C = call i32 @inner13(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner13(i1 %cond, i32* %ptr) { +entry: + %gep1 = getelementptr inbounds i32, i32* %ptr, i32 2 + %gep2 = getelementptr inbounds i32, i32* %ptr, i32 1 + br i1 %cond, label %if_true, label %exit + +if_true: + %gep3 = getelementptr inbounds i32, i32* %gep2, i32 1 + br label %exit + +exit: + %phi = phi i32* [%gep1, %entry], [%gep3, %if_true] ; Simplifeid to %gep1 + %load = load i32, i32* %phi + call void @pad() + ret i32 %load +} + + +define i32 @outer14(i1 %cond) { +; CHECK-LABEL: @outer14( +; CHECK: call i32 @inner14 + %A1 = alloca i32 + %A2 = alloca i32 + %C = call i32 @inner14(i1 %cond, i32* %A1, i32* %A2) + ret i32 %C +} + +define i32 @inner14(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Cannot be simplified + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + ret i32 %load +} + + +define i32 @outer15(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer15( +; CHECK-NOT: call i32 @inner15 + %A = alloca i32 + %C = call i32 @inner15(i1 true, i32* %ptr, i32* %A) + ret i32 %C +} + +define i32 @inner15(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %load +} + + +define i32 @outer16(i1 %cond, i32* %ptr) { +; CHECK-LABEL: @outer16( +; CHECK-NOT: call i32 @inner16 + %A = alloca i32 + %C = call i32 @inner16(i1 false, i32* %A, i32* %ptr) + ret i32 %C +} + +define i32 @inner16(i1 %cond, i32* %ptr1, i32* %ptr2) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr1, %entry], [%ptr2, %if_true] ; Simplified to %A + %load = load i32, i32* %phi + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %load +} + + +define i1 @outer17(i1 %cond) { +; CHECK-LABEL: @outer17( +; CHECK: call i1 @inner17 + %A = alloca i32 + %C = call i1 @inner17(i1 %cond, i32* %A) + ret i1 %C +} + +define i1 @inner17(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [null, %entry], [%ptr, %if_true] ; Cannot be mapped to a constant + %cmp = icmp eq i32* %phi, null + call void @pad() + ret i1 %cmp +} + + +define i1 @outer18(i1 %cond) { +; CHECK-LABEL: @outer18( +; CHECK-NOT: call i1 @inner18 + %C = call i1 @inner18(i1 %cond, i1 true) + ret i1 %C +} + +define i1 @inner18(i1 %cond1, i1 %cond2) { +entry: + br i1 %cond1, label %block1, label %block2 + +block1: + br i1 %cond2, label %block3, label %block4 + +block2: + br i1 %cond2, label %block5, label %block4 + +block3: + %phi = phi i32 [0, %block1], [1, %block4], [0, %block5] ; Simplified to 0 + %cmp = icmp eq i32 %phi, 0 + call void @pad() + ret i1 %cmp + +block4: ; Unreachable block + br label %block3 + +block5: + br label %block3 +} + + +define i1 @outer19(i1 %cond) { +; CHECK-LABEL: @outer19( +; CHECK: call i1 @inner19 + %A = alloca i32 + %C = call i1 @inner19(i1 %cond, i32* %A) + ret i1 %C +} + +define i1 @inner19(i1 %cond, i32* %ptr) { +entry: + br i1 %cond, label %if_true, label %exit + +if_true: + br label %exit + +exit: + %phi = phi i32* [%ptr, %entry], [null, %if_true] ; Cannot be mapped to a constant + %cmp = icmp eq i32* %phi, null + call void @pad() + ret i1 %cmp +} diff --git a/llvm/test/Transforms/Inline/AArch64/select.ll b/llvm/test/Transforms/Inline/AArch64/select.ll new file mode 100644 index 00000000000..fd5929dab4e --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/select.ll @@ -0,0 +1,251 @@ +; RUN: opt -inline -mtriple=aarch64--linux-gnu -S -o - < %s -inline-threshold=0 | FileCheck %s + +target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" +target triple = "aarch64--linux-gnu" + +declare void @pad() +@glbl = external global i32 + +define i32 @outer1(i1 %cond) { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call i32 @inner1 + %C = call i32 @inner1(i1 %cond, i32 1) + ret i32 %C +} + +define i32 @inner1(i1 %cond, i32 %val) { + %select = select i1 %cond, i32 1, i32 %val ; Simplified to 1 + call void @pad() + store i32 0, i32* @glbl + ret i32 %select ; Simplifies to ret i32 1 +} + + +define i32 @outer2(i32 %val) { +; CHECK-LABEL: @outer2( +; CHECK-NOT: call i32 @inner2 + %C = call i32 @inner2(i1 true, i32 %val) + ret i32 %C +} + +define i32 @inner2(i1 %cond, i32 %val) { + %select = select i1 %cond, i32 1, i32 %val ; Simplifies to 1 + call void @pad() + store i32 0, i32* @glbl + ret i32 %select ; Simplifies to ret i32 1 +} + + +define i32 @outer3(i32 %val) { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i32 @inner3 + %C = call i32 @inner3(i1 false, i32 %val) + ret i32 %C +} + +define i32 @inner3(i1 %cond, i32 %val) { + %select = select i1 %cond, i32 %val, i32 -1 ; Simplifies to -1 + call void @pad() + store i32 0, i32* @glbl + ret i32 %select ; Simplifies to ret i32 -1 +} + + +define i32 @outer4() { +; CHECK-LABEL: @outer4( +; CHECK-NOT: call i32 @inner4 + %C = call i32 @inner4(i1 true, i32 1, i32 -1) + ret i32 %C +} + +define i32 @inner4(i1 %cond, i32 %val1, i32 %val2) { + %select = select i1 %cond, i32 %val1, i32 %val2 ; Simplifies to 1 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %select ; Simplifies to ret i32 1 +} + + +define i1 @outer5() { +; CHECK-LABEL: @outer5( +; CHECK-NOT: call i1 @inner5 + %C = call i1 @inner5(i1 true, i1 true, i1 false) + ret i1 %C +} + +declare void @dead() + +define i1 @inner5(i1 %cond, i1 %val1, i1 %val2) { + %select = select i1 %cond, i1 %val1, i1 %val2 ; Simplifies to true + br i1 %select, label %exit, label %isfalse ; Simplifies to br label %end + +isfalse: ; This block is unreachable once inlined + call void @dead() + br label %exit + +exit: + store i32 0, i32* @glbl + ret i1 %select ; Simplifies to ret i1 true +} + + +define i32 @outer6(i1 %cond) { +; CHECK-LABEL: @outer6( +; CHECK-NOT: call i32 @inner6 + %A = alloca i32 + %C = call i32 @inner6(i1 %cond, i32* %A) + ret i32 %C +} + +define i32 @inner6(i1 %cond, i32* %ptr) { + %G1 = getelementptr inbounds i32, i32* %ptr, i32 1 + %G2 = getelementptr inbounds i32, i32* %G1, i32 1 + %G3 = getelementptr inbounds i32, i32* %ptr, i32 2 + %select = select i1 %cond, i32* %G2, i32* %G3 ; Simplified to %A[2] + %load = load i32, i32* %select ; SROA'ed + call void @pad() + ret i32 %load ; Simplified +} + + +define i32 @outer7(i32* %ptr) { +; CHECK-LABEL: @outer7( +; CHECK-NOT: call i32 @inner7 + %A = alloca i32 + %C = call i32 @inner7(i1 true, i32* %A, i32* %ptr) + ret i32 %C +} + +define i32 @inner7(i1 %cond, i32* %p1, i32* %p2) { + %select = select i1 %cond, i32* %p1, i32* %p2 ; Simplifies to %A + %load = load i32, i32* %select ; SROA'ed + call void @pad() + store i32 0, i32* @glbl + ret i32 %load ; Simplified +} + + +define i32 @outer8(i32* %ptr) { +; CHECK-LABEL: @outer8( +; CHECK-NOT: call i32 @inner8 + %A = alloca i32 + %C = call i32 @inner8(i1 false, i32* %ptr, i32* %A) + ret i32 %C +} + +define i32 @inner8(i1 %cond, i32* %p1, i32* %p2) { + %select = select i1 %cond, i32* %p1, i32* %p2 ; Simplifies to %A + %load = load i32, i32* %select ; SROA'ed + call void @pad() + store i32 0, i32* @glbl + ret i32 %load ; Simplified +} + + +define <2 x i32> @outer9(<2 x i32> %val) { +; CHECK-LABEL: @outer9( +; CHECK-NOT: call <2 x i32> @inner9 + %C = call <2 x i32> @inner9(<2 x i1> <i1 true, i1 true>, <2 x i32> %val) + ret <2 x i32> %C +} + +define <2 x i32> @inner9(<2 x i1> %cond, <2 x i32> %val) { + %select = select <2 x i1> %cond, <2 x i32> <i32 1, i32 1>, <2 x i32> %val ; Simplifies to <1, 1> + call void @pad() + store i32 0, i32* @glbl + ret <2 x i32> %select ; Simplifies to ret <2 x i32> <1, 1> +} + + +define <2 x i32> @outer10(<2 x i32> %val) { +; CHECK-LABEL: @outer10( +; CHECK-NOT: call <2 x i32> @inner10 + %C = call <2 x i32> @inner10(<2 x i1> <i1 false, i1 false>, <2 x i32> %val) + ret <2 x i32> %C +} + +define <2 x i32> @inner10(<2 x i1> %cond, <2 x i32> %val) { + %select = select <2 x i1> %cond, < 2 x i32> %val, <2 x i32> <i32 -1, i32 -1> ; Simplifies to <-1, -1> + call void @pad() + store i32 0, i32* @glbl + ret <2 x i32> %select ; Simplifies to ret <2 x i32> <-1, -1> +} + + +define <2 x i32> @outer11() { +; CHECK-LABEL: @outer11( +; CHECK-NOT: call <2 x i32> @inner11 + %C = call <2 x i32> @inner11(<2 x i1> <i1 true, i1 false>) + ret <2 x i32> %C +} + +define <2 x i32> @inner11(<2 x i1> %cond) { + %select = select <2 x i1> %cond, <2 x i32> <i32 1, i32 1>, < 2 x i32> <i32 -1, i32 -1> ; Simplifies to <1, -1> + call void @pad() + ret <2 x i32> %select ; Simplifies to ret <2 x i32> <1, -1> +} + + +define i1 @outer12(i32* %ptr) { +; CHECK-LABEL: @outer12( +; CHECK-NOT: call i1 @inner12 + %C = call i1 @inner12(i1 true, i32* @glbl, i32* %ptr) + ret i1 %C +} + +define i1 @inner12(i1 %cond, i32* %ptr1, i32* %ptr2) { + %select = select i1 %cond, i32* %ptr1, i32* %ptr2 ; Simplified to @glbl + %cmp = icmp eq i32* %select, @glbl ; Simplified to true + call void @pad() + store i32 0, i32* @glbl + ret i1 %cmp ; Simplifies to ret i1 true +} + + +define <2 x i32> @outer13(<2 x i32> %val1, <2 x i32> %val2) { +; CHECK-LABEL: @outer13( +; CHECK: call <2 x i32> @inner13 + %C = call <2 x i32> @inner13(<2 x i1> <i1 true, i1 false>, <2 x i32> %val1, <2 x i32> %val2) + ret <2 x i32> %C +} + +define <2 x i32> @inner13(<2 x i1> %cond, <2 x i32> %val1, < 2 x i32> %val2) { + %select = select <2 x i1> %cond, <2 x i32> %val1, < 2 x i32> %val2 ; Cannot be Simplified + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret <2 x i32> %select ; Simplified +} + + +define i32 @outer14(i32 %val1, i32 %val2) { +; CHECK-LABEL: @outer14( +; CHECK-NOT: call i32 @inner14 + %C = call i32 @inner14(i1 true, i32 %val1, i32 %val2) + ret i32 %C +} + +define i32 @inner14(i1 %cond, i32 %val1, i32 %val2) { + %select = select i1 %cond, i32 %val1, i32 %val2 ; Simplified to %val1 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %select ; Simplifies to ret i32 %val1 +} + + +define i32 @outer15(i32 %val1, i32 %val2) { +; CHECK-LABEL: @outer15( +; CHECK-NOT: call i32 @inner15 + %C = call i32 @inner15(i1 false, i32 %val1, i32 %val2) + ret i32 %C +} + +define i32 @inner15(i1 %cond, i32 %val1, i32 %val2) { + %select = select i1 %cond, i32 %val1, i32 %val2 ; Simplified to %val2 + call void @pad() + store i32 0, i32* @glbl + store i32 1, i32* @glbl + ret i32 %select ; Simplifies to ret i32 %val2 +} diff --git a/llvm/test/Transforms/Inline/AArch64/switch.ll b/llvm/test/Transforms/Inline/AArch64/switch.ll new file mode 100644 index 00000000000..154956e2b75 --- /dev/null +++ b/llvm/test/Transforms/Inline/AArch64/switch.ll @@ -0,0 +1,160 @@ +; RUN: opt < %s -inline -inline-threshold=20 -S -mtriple=aarch64-none-linux | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S -mtriple=aarch64-none-linux | FileCheck %s + +define i32 @callee_range(i32 %a, i32* %P) { + switch i32 %a, label %sw.default [ + i32 0, label %sw.bb0 + i32 1000, label %sw.bb1 + i32 2000, label %sw.bb1 + i32 3000, label %sw.bb1 + i32 4000, label %sw.bb1 + i32 5000, label %sw.bb1 + i32 6000, label %sw.bb1 + i32 7000, label %sw.bb1 + i32 8000, label %sw.bb1 + i32 9000, label %sw.bb1 + ] + +sw.default: + store volatile i32 %a, i32* %P + br label %return +sw.bb0: + store volatile i32 %a, i32* %P + br label %return +sw.bb1: + store volatile i32 %a, i32* %P + br label %return +return: + ret i32 42 +} + +define i32 @caller_range(i32 %a, i32* %P) { +; CHECK-LABEL: @caller_range( +; CHECK: call i32 @callee_range + %r = call i32 @callee_range(i32 %a, i32* %P) + ret i32 %r +} + +define i32 @callee_bittest(i32 %a, i32* %P) { + switch i32 %a, label %sw.default [ + i32 0, label %sw.bb0 + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb0 + i32 4, label %sw.bb1 + i32 5, label %sw.bb2 + i32 6, label %sw.bb0 + i32 7, label %sw.bb1 + i32 8, label %sw.bb2 + ] + +sw.default: + store volatile i32 %a, i32* %P + br label %return + +sw.bb0: + store volatile i32 %a, i32* %P + br label %return + +sw.bb1: + store volatile i32 %a, i32* %P + br label %return + +sw.bb2: + br label %return + +return: + ret i32 42 +} + + +define i32 @caller_bittest(i32 %a, i32* %P) { +; CHECK-LABEL: @caller_bittest( +; CHECK-NOT: call i32 @callee_bittest + %r= call i32 @callee_bittest(i32 %a, i32* %P) + ret i32 %r +} + +define i32 @callee_jumptable(i32 %a, i32* %P) { + switch i32 %a, label %sw.default [ + i32 1001, label %sw.bb101 + i32 1002, label %sw.bb102 + i32 1003, label %sw.bb103 + i32 1004, label %sw.bb104 + i32 1005, label %sw.bb101 + i32 1006, label %sw.bb102 + i32 1007, label %sw.bb103 + i32 1008, label %sw.bb104 + i32 1009, label %sw.bb101 + i32 1010, label %sw.bb102 + i32 1011, label %sw.bb103 + i32 1012, label %sw.bb104 + ] + +sw.default: + br label %return + +sw.bb101: + store volatile i32 %a, i32* %P + br label %return + +sw.bb102: + store volatile i32 %a, i32* %P + br label %return + +sw.bb103: + store volatile i32 %a, i32* %P + br label %return + +sw.bb104: + store volatile i32 %a, i32* %P + br label %return + +return: + ret i32 42 +} + +define i32 @caller_jumptable(i32 %a, i32 %b, i32* %P) { +; CHECK-LABEL: @caller_jumptable( +; CHECK: call i32 @callee_jumptable + %r = call i32 @callee_jumptable(i32 %b, i32* %P) + ret i32 %r +} + + +define internal i32 @callee_negativeCost(i32 %t) { +entry: + switch i32 %t, label %sw.default [ + i32 1, label %sw.bb + i32 0, label %sw.bb1 + i32 42, label %sw.bb2 + i32 43, label %sw.bb3 + ] + +sw.bb: ; preds = %entry + br label %cleanup + +sw.bb1: ; preds = %entry + br label %cleanup + +sw.bb2: ; preds = %entry + br label %cleanup + +sw.bb3: ; preds = %entry + br label %cleanup + +sw.default: ; preds = %entry + br label %cleanup + +cleanup: ; preds = %sw.default, %sw.bb3, %sw.bb2, %sw.bb1, %sw.bb + %retval.0 = phi i32 [ 1, %sw.default ], [ 3, %sw.bb3 ], [ 2, %sw.bb2 ], [ 0, %sw.bb1 ], [ 0, %sw.bb ] + ret i32 %retval.0 +} + +define i32 @caller_negativeCost(i32 %t) { +; CHECK-LABEL: @caller_negativeCost( +; CHECK-NOT: call i32 @callee_negativeCost +entry: + %call = call i32 @callee_negativeCost(i32 %t) + ret i32 %call +} diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-dx10-clamp.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-dx10-clamp.ll new file mode 100644 index 00000000000..f25904e66a4 --- /dev/null +++ b/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-dx10-clamp.ll @@ -0,0 +1,107 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +define i32 @func_default() #0 { + ret i32 0 +} + +define i32 @func_dx10_clamp_enabled() #1 { + ret i32 0 +} + +define i32 @func_dx10_clamp_disabled() #2 { + ret i32 0 +} + +; CHECK-LABEL: @default_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @dx10_clamp_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @dx10_clamp_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @dx10_clamp_enabled_call_dx10_clamp_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @dx10_clamp_enabled_call_dx10_clamp_enabled() #1 { + %call = call i32 @func_dx10_clamp_enabled() + ret i32 %call +} + +; CHECK-LABEL: @dx10_clamp_enabled_call_dx10_clamp_disabled( +; CHECK-NEXT: call i32 @func_dx10_clamp_disabled() +define i32 @dx10_clamp_enabled_call_dx10_clamp_disabled() #1 { + %call = call i32 @func_dx10_clamp_disabled() + ret i32 %call +} + +; CHECK-LABEL: @dx10_clamp_disabled_call_default( +; CHECK-NEXT: call i32 @func_default() +define i32 @dx10_clamp_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @dx10_clamp_disabled_call_dx10_clamp_enabled( +; CHECK-NEXT: call i32 @func_dx10_clamp_enabled() +define i32 @dx10_clamp_disabled_call_dx10_clamp_enabled() #2 { + %call = call i32 @func_dx10_clamp_enabled() + ret i32 %call +} + +; CHECK-LABEL: @dx10_clamp_disabled_call_dx10_clamp_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @dx10_clamp_disabled_call_dx10_clamp_disabled() #2 { + %call = call i32 @func_dx10_clamp_disabled() + ret i32 %call +} + +; Shader calling a compute function +; CHECK-LABEL: @amdgpu_ps_default_call_default( +; CHECK-NEXT: call i32 @func_default() +define amdgpu_ps i32 @amdgpu_ps_default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; Shader with dx10_clamp enabled calling a compute function. Default +; also implies ieee_mode, so this isn't inlinable. +; CHECK-LABEL: @amdgpu_ps_dx10_clamp_enabled_call_default( +; CHECK-NEXT: call i32 @func_default() +define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @amdgpu_ps_dx10_clamp_disabled_call_default( +; CHECK-NEXT: call i32 @func_default() +define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @amdgpu_ps_dx10_clamp_enabled_ieee_call_default( +; CHECK-NEXT: ret i32 0 +define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_enabled_ieee_call_default() #3 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @amdgpu_ps_dx10_clamp_disabled_ieee_call_default( +; CHECK-NEXT: call i32 @func_default() +define amdgpu_ps i32 @amdgpu_ps_dx10_clamp_disabled_ieee_call_default() #4 { + %call = call i32 @func_default() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "amdgpu-dx10-clamp"="true" } +attributes #2 = { nounwind "amdgpu-dx10-clamp"="false" } +attributes #3 = { nounwind "amdgpu-dx10-clamp"="true" "amdgpu-ieee"="true" } +attributes #4 = { nounwind "amdgpu-dx10-clamp"="false" "amdgpu-ieee"="true" } diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-ieee.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-ieee.ll new file mode 100644 index 00000000000..cfb08a9ff73 --- /dev/null +++ b/llvm/test/Transforms/Inline/AMDGPU/inline-amdgpu-ieee.ll @@ -0,0 +1,90 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +define i32 @func_default() #0 { + ret i32 0 +} + +define i32 @func_ieee_enabled() #1 { + ret i32 0 +} + +define i32 @func_ieee_disabled() #2 { + ret i32 0 +} + +; CHECK-LABEL: @default_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ieee_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @ieee_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ieee_enabled_call_ieee_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @ieee_enabled_call_ieee_enabled() #1 { + %call = call i32 @func_ieee_enabled() + ret i32 %call +} + +; CHECK-LABEL: @ieee_enabled_call_ieee_disabled( +; CHECK-NEXT: call i32 @func_ieee_disabled() +define i32 @ieee_enabled_call_ieee_disabled() #1 { + %call = call i32 @func_ieee_disabled() + ret i32 %call +} + +; CHECK-LABEL: @ieee_disabled_call_default( +; CHECK-NEXT: call i32 @func_default() +define i32 @ieee_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ieee_disabled_call_ieee_enabled( +; CHECK-NEXT: call i32 @func_ieee_enabled() +define i32 @ieee_disabled_call_ieee_enabled() #2 { + %call = call i32 @func_ieee_enabled() + ret i32 %call +} + +; CHECK-LABEL: @ieee_disabled_call_ieee_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @ieee_disabled_call_ieee_disabled() #2 { + %call = call i32 @func_ieee_disabled() + ret i32 %call +} + +; Shader calling a compute function +; CHECK-LABEL: @amdgpu_ps_default_call_default( +; CHECK-NEXT: call i32 @func_default() +define amdgpu_ps i32 @amdgpu_ps_default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; Shader with ieee enabled calling a compute function +; CHECK-LABEL: @amdgpu_ps_ieee_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define amdgpu_ps i32 @amdgpu_ps_ieee_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @amdgpu_ps_ieee_disabled_call_default( +; CHECK-NEXT: call i32 @func_default() +define amdgpu_ps i32 @amdgpu_ps_ieee_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "amdgpu-ieee"="true" } +attributes #2 = { nounwind "amdgpu-ieee"="false" } diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll new file mode 100644 index 00000000000..87330c72d44 --- /dev/null +++ b/llvm/test/Transforms/Inline/AMDGPU/inline-target-cpu.ll @@ -0,0 +1,103 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +; CHECK-LABEL: @func_no_target_cpu( +define i32 @func_no_target_cpu() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_call_no_target_cpu() #1 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_features_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_features_call_no_target_cpu() #2 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +; CHECK-LABEL: @fp32_denormals( +define i32 @fp32_denormals() #3 { + ret i32 0 +} + +; CHECK-LABEL: @no_fp32_denormals_call_f32_denormals( +; CHECK-NEXT: call i32 @fp32_denormals() +define i32 @no_fp32_denormals_call_f32_denormals() #4 { + %call = call i32 @fp32_denormals() + ret i32 %call +} + +; Make sure gfx9 can call unspecified functions because of movrel +; feature change. +; CHECK-LABEL: @gfx9_target_features_call_no_target_cpu( +; CHECK-NEXT: ret i32 0 +define i32 @gfx9_target_features_call_no_target_cpu() #5 { + %call = call i32 @func_no_target_cpu() + ret i32 %call +} + +define i32 @func_no_halfrate64ops() #6 { + ret i32 0 +} + +define i32 @func_with_halfrate64ops() #7 { + ret i32 0 +} + +; CHECK-LABEL: @call_func_without_halfrate64ops( +; CHECK-NEXT: ret i32 0 +define i32 @call_func_without_halfrate64ops() #7 { + %call = call i32 @func_no_halfrate64ops() + ret i32 %call +} + +; CHECK-LABEL: @call_func_with_halfrate64ops( +; CHECK-NEXT: ret i32 0 +define i32 @call_func_with_halfrate64ops() #6 { + %call = call i32 @func_with_halfrate64ops() + ret i32 %call +} + +define i32 @func_no_loadstoreopt() #8 { + ret i32 0 +} + +define i32 @func_with_loadstoreopt() #9 { + ret i32 0 +} + +; CHECK-LABEL: @call_func_without_loadstoreopt( +; CHECK-NEXT: ret i32 0 +define i32 @call_func_without_loadstoreopt() #9 { + %call = call i32 @func_no_loadstoreopt() + ret i32 %call +} + +define i32 @enable_codeobjectv3() #10 { + ret i32 999 +} + +; CHECK-LABEL: @disable_codeobjectv3_call_codeobjectv3( +; CHECK-NEXT: ret i32 999 +define i32 @disable_codeobjectv3_call_codeobjectv3() #11 { + %call = call i32 @enable_codeobjectv3() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-cpu"="fiji" } +attributes #2 = { nounwind "target-cpu"="fiji" "target-features"="+fp32-denormals" } +attributes #3 = { nounwind "target-features"="+fp32-denormals" } +attributes #4 = { nounwind "target-features"="-fp32-denormals" } +attributes #5 = { nounwind "target-cpu"="gfx900" } +attributes #6 = { nounwind "target-features"="-half-rate-64-ops" } +attributes #7 = { nounwind "target-features"="+half-rate-64-ops" } +attributes #8 = { nounwind "target-features"="-load-store-opt" } +attributes #9 = { nounwind "target-features"="+load-store-opt" } +attributes #10 = { nounwind "target-features"="+code-object-v3" } +attributes #11 = { nounwind "target-features"="-code-object-v3" } diff --git a/llvm/test/Transforms/Inline/AMDGPU/inline-target-feature-sram-ecc.ll b/llvm/test/Transforms/Inline/AMDGPU/inline-target-feature-sram-ecc.ll new file mode 100644 index 00000000000..d7aa65d753c --- /dev/null +++ b/llvm/test/Transforms/Inline/AMDGPU/inline-target-feature-sram-ecc.ll @@ -0,0 +1,70 @@ +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -inline < %s | FileCheck %s +; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -passes='cgscc(inline)' < %s | FileCheck %s + +; sram-ecc can be safely ignored when inlining, since no intrinisics +; or other directly exposed operations depend on it. + +define i32 @func_default() #0 { + ret i32 0 +} + +define i32 @func_ecc_enabled() #1 { + ret i32 0 +} + +define i32 @func_ecc_disabled() #2 { + ret i32 0 +} + +; CHECK-LABEL: @default_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @default_call_default() #0 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ecc_enabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_enabled_call_default() #1 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ecc_enabled_call_ecc_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_enabled_call_ecc_enabled() #1 { + %call = call i32 @func_ecc_enabled() + ret i32 %call +} + +; CHECK-LABEL: @ecc_enabled_call_ecc_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_enabled_call_ecc_disabled() #1 { + %call = call i32 @func_ecc_disabled() + ret i32 %call +} + +; CHECK-LABEL: @ecc_disabled_call_default( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_disabled_call_default() #2 { + %call = call i32 @func_default() + ret i32 %call +} + +; CHECK-LABEL: @ecc_disabled_call_ecc_enabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_disabled_call_ecc_enabled() #2 { + %call = call i32 @func_ecc_enabled() + ret i32 %call +} + +; CHECK-LABEL: @ecc_disabled_call_ecc_disabled( +; CHECK-NEXT: ret i32 0 +define i32 @ecc_disabled_call_ecc_disabled() #2 { + %call = call i32 @func_ecc_disabled() + ret i32 %call +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind "target-features"="+sram-ecc" } +attributes #2 = { nounwind "target-features"="-sram-ecc" } diff --git a/llvm/test/Transforms/Inline/AMDGPU/lit.local.cfg b/llvm/test/Transforms/Inline/AMDGPU/lit.local.cfg new file mode 100644 index 00000000000..2a665f06be7 --- /dev/null +++ b/llvm/test/Transforms/Inline/AMDGPU/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'AMDGPU' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/Inline/ARM/inline-fp.ll b/llvm/test/Transforms/Inline/ARM/inline-fp.ll new file mode 100644 index 00000000000..be3dd2a93fd --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/inline-fp.ll @@ -0,0 +1,113 @@ +; RUN: opt -S -inline -mtriple=arm-eabi -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=NOFP +; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2 -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=FULLFP +; RUN: opt -S -inline -mtriple=arm-eabi -mattr=+vfp2,+fp-only-sp -pass-remarks=.* -pass-remarks-missed=.* < %s 2>&1 | FileCheck %s -check-prefix=SINGLEFP +; Make sure that soft float implementations are calculated as being more expensive +; to the inliner. + +; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single not inlined into test_single because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_cheap inlined into test_single_cheap with (cost=-15, threshold=75) +; NOFP-DAG: single_cheap inlined into test_single_cheap with (cost=-15015, threshold=75) +; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; NOFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +; FULLFP-DAG: single inlined into test_single with (cost=0, threshold=75) +; FULLFP-DAG: single inlined into test_single with (cost=-15000, threshold=75) +; FULLFP-DAG: single_cheap inlined into test_single_cheap with (cost=-15, threshold=75) +; FULLFP-DAG: single_cheap inlined into test_single_cheap with (cost=-15015, threshold=75) +; FULLFP-DAG: double inlined into test_double with (cost=0, threshold=75) +; FULLFP-DAG: double inlined into test_double with (cost=-15000, threshold=75) +; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; FULLFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +; SINGLEFP-DAG: single inlined into test_single with (cost=0, threshold=75) +; SINGLEFP-DAG: single inlined into test_single with (cost=-15000, threshold=75) +; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with (cost=-15, threshold=75) +; SINGLEFP-DAG: single_cheap inlined into test_single_cheap with (cost=-15015, threshold=75) +; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: double not inlined into test_double because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) +; SINGLEFP-DAG: single_force_soft not inlined into test_single_force_soft because too costly to inline (cost=125, threshold=75) + +define i32 @test_single(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call float @single(i32 %a, i8 zeroext %b) + %call2 = call float @single(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_single_cheap(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call float @single_cheap(i32 %a, i8 zeroext %b) + %call2 = call float @single_cheap(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_double(i32 %a, i8 %b, i32 %c, i8 %d) #0 { + %call = call double @double(i32 %a, i8 zeroext %b) + %call2 = call double @double(i32 %c, i8 zeroext %d) + ret i32 0 +} + +define i32 @test_single_force_soft(i32 %a, i8 %b, i32 %c, i8 %d) #1 { + %call = call float @single_force_soft(i32 %a, i8 zeroext %b) #1 + %call2 = call float @single_force_soft(i32 %c, i8 zeroext %d) #1 + ret i32 0 +} + +define internal float @single(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +define internal float @single_cheap(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = bitcast i32 %sub to float + %conv2 = bitcast i32 %response to float + %0 = tail call float @llvm.pow.f32(float %conv2, float %conv1) + %1 = tail call float @llvm.pow.f32(float %0, float %0) + %2 = tail call float @llvm.pow.f32(float %1, float %1) + ret float %2 +} + +define internal double @double(i32 %response, i8 zeroext %value1) #0 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to double + %0 = tail call double @llvm.pow.f64(double 0x3FF028F5C0000000, double %conv1) + %mul = fmul double %0, 2.620000e+03 + %conv2 = sitofp i32 %response to double + %sub3 = fsub double %conv2, %mul + %div = fdiv double %sub3, %mul + ret double %div +} + +define internal float @single_force_soft(i32 %response, i8 zeroext %value1) #1 { +entry: + %conv = zext i8 %value1 to i32 + %sub = add nsw i32 %conv, -1 + %conv1 = sitofp i32 %sub to float + %0 = tail call float @llvm.pow.f32(float 0x3FF028F5C0000000, float %conv1) + %mul = fmul float %0, 2.620000e+03 + %conv2 = sitofp i32 %response to float + %sub3 = fsub float %conv2, %mul + %div = fdiv float %sub3, %mul + ret float %div +} + +declare float @llvm.pow.f32(float, float) optsize minsize +declare double @llvm.pow.f64(double, double) optsize minsize + +attributes #0 = { optsize } +attributes #1 = { optsize "use-soft-float"="true" "target-features"="+soft-float" } diff --git a/llvm/test/Transforms/Inline/ARM/inline-target-attr.ll b/llvm/test/Transforms/Inline/ARM/inline-target-attr.ll new file mode 100644 index 00000000000..5bbecd20352 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/inline-target-attr.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=arm-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. +; ARM has implemented a target attribute that will verify that the attribute +; sets are compatible. + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +define i32 @thumb_fn() #2 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: thumb_fn +; CHECK: call i32 @foo +} + +define i32 @strict_align() #3 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: strict_align +; CHECK: call i32 (...) @baz() +} + +define i32 @soft_float_fn() #4 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: soft_float_fn +; CHECK: call i32 @foo +} + +attributes #0 = { "target-cpu"="generic" "target-features"="+dsp,+neon" } +attributes #1 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16" } +attributes #2 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+thumb-mode" } +attributes #3 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+strict-align" } +attributes #4 = { "target-cpu"="generic" "target-features"="+dsp,+neon,+fp16,+soft-float" } diff --git a/llvm/test/Transforms/Inline/ARM/lit.local.cfg b/llvm/test/Transforms/Inline/ARM/lit.local.cfg new file mode 100644 index 00000000000..236e1d34416 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/lit.local.cfg @@ -0,0 +1,2 @@ +if not 'ARM' in config.root.targets: + config.unsupported = True diff --git a/llvm/test/Transforms/Inline/ARM/loop-add.ll b/llvm/test/Transforms/Inline/ARM/loop-add.ll new file mode 100644 index 00000000000..a4717bc95b7 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/loop-add.ll @@ -0,0 +1,95 @@ +; RUN: opt -inline %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: void @doCalls +define void @doCalls(i8* nocapture %p1, i8* nocapture %p2, i32 %n) #0 { +entry: + %div = lshr i32 %n, 1 +; CHECK: call void @LoopCall + tail call void @LoopCall(i8* %p1, i8* %p2, i32 %div) #0 + + %div2 = lshr i32 %n, 2 +; CHECK: call void @LoopCall + tail call void @LoopCall(i8* %p1, i8* %p2, i32 %div2) #0 + +; CHECK-NOT: call void @LoopCall + tail call void @LoopCall(i8* %p2, i8* %p1, i32 0) #0 + +; CHECK-NOT: call void @LoopCall_internal + tail call void @LoopCall_internal(i8* %p1, i8* %p2, i32 %div2) #0 + + %div3 = lshr i32 %n, 4 +; CHECK-NOT: call void @SimpleCall + tail call void @SimpleCall(i8* %p2, i8* %p1, i32 %div3) #0 + ret void +} + +; CHECK-LABEL: define void @LoopCall +define void @LoopCall(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + %c = icmp ne i32 %num, 0 + br i1 %c, label %while.cond, label %while.end + +while.cond: ; preds = %while.body, %entry + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %while.body ] + %p_dest.0 = phi i8* [ %dest, %entry ], [ %incdec.ptr2, %while.body ] + %p_source.0 = phi i8* [ %source, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq i32 %num.addr.0, 0 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i8, i8* %p_source.0, i32 1 + %0 = load i8, i8* %p_source.0, align 1 + %1 = trunc i32 %num.addr.0 to i8 + %conv1 = add i8 %0, %1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %p_dest.0, i32 1 + store i8 %conv1, i8* %p_dest.0, align 1 + %dec = add i32 %num.addr.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +; CHECK-LABEL-NOT: define void @LoopCall_internal +define internal void @LoopCall_internal(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + %c = icmp ne i32 %num, 0 + br i1 %c, label %while.cond, label %while.end + +while.cond: ; preds = %while.body, %entry + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %while.body ] + %p_dest.0 = phi i8* [ %dest, %entry ], [ %incdec.ptr2, %while.body ] + %p_source.0 = phi i8* [ %source, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq i32 %num.addr.0, 0 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i8, i8* %p_source.0, i32 1 + %0 = load i8, i8* %p_source.0, align 1 + %1 = trunc i32 %num.addr.0 to i8 + %conv1 = add i8 %0, %1 + %incdec.ptr2 = getelementptr inbounds i8, i8* %p_dest.0, i32 1 + store i8 %conv1, i8* %p_dest.0, align 1 + %dec = add i32 %num.addr.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +; CHECK-LABEL: define void @SimpleCall +define void @SimpleCall(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + %arrayidx = getelementptr inbounds i8, i8* %source, i32 %num + %0 = load i8, i8* %arrayidx, align 1 + %1 = xor i8 %0, 127 + %arrayidx2 = getelementptr inbounds i8, i8* %dest, i32 %num + store i8 %1, i8* %arrayidx2, align 1 + ret void +} + +attributes #0 = { minsize optsize } + diff --git a/llvm/test/Transforms/Inline/ARM/loop-memcpy.ll b/llvm/test/Transforms/Inline/ARM/loop-memcpy.ll new file mode 100644 index 00000000000..3b3625c6027 --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/loop-memcpy.ll @@ -0,0 +1,87 @@ +; RUN: opt -inline %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; CHECK-LABEL: define void @matcpy +define void @matcpy(i8* %dest, i8* %source, i32 %num) #0 { +entry: + %0 = ptrtoint i8* %dest to i32 + %1 = ptrtoint i8* %source to i32 + %2 = xor i32 %0, %1 + %3 = and i32 %2, 3 + %cmp = icmp eq i32 %3, 0 + br i1 %cmp, label %if.then, label %if.else20 + +if.then: ; preds = %entry + %sub = sub i32 0, %0 + %and2 = and i32 %sub, 3 + %add = or i32 %and2, 4 + %cmp3 = icmp ugt i32 %add, %num + br i1 %cmp3, label %if.else, label %if.then4 + +if.then4: ; preds = %if.then + %sub5 = sub i32 %num, %and2 + %shr = and i32 %sub5, -4 + %sub7 = sub i32 %sub5, %shr + %tobool = icmp eq i32 %and2, 0 + br i1 %tobool, label %if.end, label %if.then8 + +if.then8: ; preds = %if.then4 +; CHECK: call fastcc void @memcpy + call fastcc void @memcpy(i8* %dest, i8* %source, i32 %and2) #0 + %add.ptr = getelementptr inbounds i8, i8* %dest, i32 %and2 + %add.ptr9 = getelementptr inbounds i8, i8* %source, i32 %and2 + br label %if.end + +if.end: ; preds = %if.then4, %if.then8 + %p_dest.0 = phi i8* [ %add.ptr, %if.then8 ], [ %dest, %if.then4 ] + %p_source.0 = phi i8* [ %add.ptr9, %if.then8 ], [ %source, %if.then4 ] + %tobool14 = icmp eq i32 %sub7, 0 + br i1 %tobool14, label %if.end22, label %if.then15 + +if.then15: ; preds = %if.end + %add.ptr13 = getelementptr inbounds i8, i8* %p_source.0, i32 %shr + %add.ptr11 = getelementptr inbounds i8, i8* %p_dest.0, i32 %shr +; CHECK: call fastcc void @memcpy + call fastcc void @memcpy(i8* %add.ptr11, i8* %add.ptr13, i32 %sub7) #0 + br label %if.end22 + +if.else: ; preds = %if.then + call fastcc void @memcpy(i8* %dest, i8* %source, i32 %num) #0 + br label %if.end22 + +if.else20: ; preds = %entry + call fastcc void @memcpy(i8* %dest, i8* %source, i32 %num) #0 + br label %if.end22 + +if.end22: ; preds = %if.then15, %if.end, %if.else, %if.else20 + ret void +} + +; CHECK-LABEL: define internal void @memcpy +define internal void @memcpy(i8* nocapture %dest, i8* nocapture readonly %source, i32 %num) #0 { +entry: + br label %while.cond + +while.cond: ; preds = %while.body, %entry + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %while.body ] + %p_dest.0 = phi i8* [ %dest, %entry ], [ %incdec.ptr1, %while.body ] + %p_source.0 = phi i8* [ %source, %entry ], [ %incdec.ptr, %while.body ] + %cmp = icmp eq i32 %num.addr.0, 0 + br i1 %cmp, label %while.end, label %while.body + +while.body: ; preds = %while.cond + %incdec.ptr = getelementptr inbounds i8, i8* %p_source.0, i32 1 + %0 = load i8, i8* %p_source.0, align 1 + %incdec.ptr1 = getelementptr inbounds i8, i8* %p_dest.0, i32 1 + store i8 %0, i8* %p_dest.0, align 1 + %dec = add i32 %num.addr.0, -1 + br label %while.cond + +while.end: ; preds = %while.cond + ret void +} + +attributes #0 = { minsize optsize } + diff --git a/llvm/test/Transforms/Inline/ARM/loop-noinline.ll b/llvm/test/Transforms/Inline/ARM/loop-noinline.ll new file mode 100644 index 00000000000..8438d16b03e --- /dev/null +++ b/llvm/test/Transforms/Inline/ARM/loop-noinline.ll @@ -0,0 +1,49 @@ +; RUN: opt -inline %s -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7m-arm-none-eabi" + +; Check we don't inline loops at -Oz. They tend to be larger than we +; expect. + +; CHECK: define i8* @H +@digits = constant [16 x i8] c"0123456789ABCDEF", align 1 +define i8* @H(i8* %p, i32 %val, i32 %num) #0 { +entry: + br label %do.body + +do.body: ; preds = %do.body, %entry + %p.addr.0 = phi i8* [ %p, %entry ], [ %incdec.ptr, %do.body ] + %val.addr.0 = phi i32 [ %val, %entry ], [ %shl, %do.body ] + %num.addr.0 = phi i32 [ %num, %entry ], [ %dec, %do.body ] + %shr = lshr i32 %val.addr.0, 28 + %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* @digits, i32 0, i32 %shr + %0 = load i8, i8* %arrayidx, align 1 + %incdec.ptr = getelementptr inbounds i8, i8* %p.addr.0, i32 1 + store i8 %0, i8* %p.addr.0, align 1 + %shl = shl i32 %val.addr.0, 4 + %dec = add i32 %num.addr.0, -1 + %tobool = icmp eq i32 %dec, 0 + br i1 %tobool, label %do.end, label %do.body + +do.end: ; preds = %do.body + %scevgep = getelementptr i8, i8* %p, i32 %num + ret i8* %scevgep +} + +define nonnull i8* @call1(i8* %p, i32 %val, i32 %num) #0 { +entry: +; CHECK: tail call i8* @H + %call = tail call i8* @H(i8* %p, i32 %val, i32 %num) #0 + ret i8* %call +} + +define nonnull i8* @call2(i8* %p, i32 %val) #0 { +entry: +; CHECK: tail call i8* @H + %call = tail call i8* @H(i8* %p, i32 %val, i32 32) #0 + ret i8* %call +} + +attributes #0 = { minsize optsize } + diff --git a/llvm/test/Transforms/Inline/PR4909.ll b/llvm/test/Transforms/Inline/PR4909.ll new file mode 100644 index 00000000000..86b005c8a15 --- /dev/null +++ b/llvm/test/Transforms/Inline/PR4909.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -partial-inliner -disable-output +; RUN: opt < %s -passes=partial-inliner -disable-output + +define i32 @f() { +entry: + br label %return + +return: ; preds = %entry + ret i32 undef +} + +define i32 @g() { +entry: + %0 = call i32 @f() + ret i32 %0 +} diff --git a/llvm/test/Transforms/Inline/PowerPC/ext.ll b/llvm/test/Transforms/Inline/PowerPC/ext.ll new file mode 100644 index 00000000000..f7a409467b2 --- /dev/null +++ b/llvm/test/Transforms/Inline/PowerPC/ext.ll @@ -0,0 +1,140 @@ +; REQUIRES: asserts +; RUN: opt -inline -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "E-m:e-i64:64-n32:64" +target triple = "powerpc64le-ibm-linux-gnu" + +define i16 @outer1(i8* %ptr) { + %C = call i16 @inner1(i8* %ptr) + ret i16 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i16 @inner1(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i16 + ret i16 %E +} + +define i32 @outer2(i8* %ptr) { + %C = call i32 @inner2(i8* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner2(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i32 + ret i32 %E +} + +define i32 @outer3(i16* %ptr) { + %C = call i32 @inner3(i16* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner3 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner3(i16* %ptr) { + %L = load i16, i16* %ptr + %E = zext i16 %L to i32 + ret i32 %E +} + +define i32 @outer4(i16* %ptr) { + %C = call i32 @inner4(i16* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner4 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner4(i16* %ptr) { + %L = load i16, i16* %ptr + %E = sext i16 %L to i32 + ret i32 %E +} + +define i64 @outer5(i8* %ptr) { + %C = call i64 @inner5(i8* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner5 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner5(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i64 + ret i64 %E +} + +define i64 @outer6(i16* %ptr) { + %C = call i64 @inner6(i16* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner6 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner6(i16* %ptr) { + %L = load i16, i16* %ptr + %E = zext i16 %L to i64 + ret i64 %E +} + +define i64 @outer7(i16* %ptr) { + %C = call i64 @inner7(i16* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner7 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner7(i16* %ptr) { + %L = load i16, i16* %ptr + %E = sext i16 %L to i64 + ret i64 %E +} + +define i64 @outer8(i32* %ptr) { + %C = call i64 @inner8(i32* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner8 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner8(i32* %ptr) { + %L = load i32, i32* %ptr + %E = zext i32 %L to i64 + ret i64 %E +} + +define i64 @outer9(i32* %ptr) { + %C = call i64 @inner9(i32* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner9 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner9(i32* %ptr) { + %L = load i32, i32* %ptr + %E = sext i32 %L to i64 + ret i64 %E +} diff --git a/llvm/test/Transforms/Inline/PowerPC/lit.local.cfg b/llvm/test/Transforms/Inline/PowerPC/lit.local.cfg new file mode 100644 index 00000000000..5d33887ff0a --- /dev/null +++ b/llvm/test/Transforms/Inline/PowerPC/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'PowerPC' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/Inline/X86/ext.ll b/llvm/test/Transforms/Inline/X86/ext.ll new file mode 100644 index 00000000000..bffda385279 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/ext.ll @@ -0,0 +1,201 @@ +; REQUIRES: asserts +; RUN: opt -inline -mtriple=x86_64-unknown-unknown -S -debug-only=inline-cost < %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-unknown" + +define i32 @outer1(i32* %ptr, i32 %i) { + %C = call i32 @inner1(i32* %ptr, i32 %i) + ret i32 %C +} + +; zext from i32 to i64 is free. +; CHECK: Analyzing call of inner1 +; CHECK: NumInstructionsSimplified: 3 +; CHECK: NumInstructions: 4 +define i32 @inner1(i32* %ptr, i32 %i) { + %E = zext i32 %i to i64 + %G = getelementptr inbounds i32, i32* %ptr, i64 %E + %L = load i32, i32* %G + ret i32 %L +} + +define i16 @outer2(i8* %ptr) { + %C = call i16 @inner2(i8* %ptr) + ret i16 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner2 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i16 @inner2(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i16 + ret i16 %E +} + +define i16 @outer3(i8* %ptr) { + %C = call i16 @inner3(i8* %ptr) + ret i16 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner3 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i16 @inner3(i8* %ptr) { + %L = load i8, i8* %ptr + %E = sext i8 %L to i16 + ret i16 %E +} + +define i32 @outer4(i8* %ptr) { + %C = call i32 @inner4(i8* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner4 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner4(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i32 + ret i32 %E +} + +define i32 @outer5(i8* %ptr) { + %C = call i32 @inner5(i8* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner5 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner5(i8* %ptr) { + %L = load i8, i8* %ptr + %E = sext i8 %L to i32 + ret i32 %E +} + +define i32 @outer6(i16* %ptr) { + %C = call i32 @inner6(i16* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner6 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner6(i16* %ptr) { + %L = load i16, i16* %ptr + %E = zext i16 %L to i32 + ret i32 %E +} + +define i32 @outer7(i16* %ptr) { + %C = call i32 @inner7(i16* %ptr) + ret i32 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner7 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i32 @inner7(i16* %ptr) { + %L = load i16, i16* %ptr + %E = sext i16 %L to i32 + ret i32 %E +} + +define i64 @outer8(i8* %ptr) { + %C = call i64 @inner8(i8* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner8 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner8(i8* %ptr) { + %L = load i8, i8* %ptr + %E = zext i8 %L to i64 + ret i64 %E +} + +define i64 @outer9(i8* %ptr) { + %C = call i64 @inner9(i8* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner9 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner9(i8* %ptr) { + %L = load i8, i8* %ptr + %E = sext i8 %L to i64 + ret i64 %E +} + +define i64 @outer10(i16* %ptr) { + %C = call i64 @inner10(i16* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner10 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner10(i16* %ptr) { + %L = load i16, i16* %ptr + %E = zext i16 %L to i64 + ret i64 %E +} + +define i64 @outer11(i16* %ptr) { + %C = call i64 @inner11(i16* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner11 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner11(i16* %ptr) { + %L = load i16, i16* %ptr + %E = sext i16 %L to i64 + ret i64 %E +} + +define i64 @outer12(i32* %ptr) { + %C = call i64 @inner12(i32* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner12 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner12(i32* %ptr) { + %L = load i32, i32* %ptr + %E = zext i32 %L to i64 + ret i64 %E +} + +define i64 @outer13(i32* %ptr) { + %C = call i64 @inner13(i32* %ptr) + ret i64 %C +} + +; It is an ExtLoad. +; CHECK: Analyzing call of inner13 +; CHECK: NumInstructionsSimplified: 2 +; CHECK: NumInstructions: 3 +define i64 @inner13(i32* %ptr) { + %L = load i32, i32* %ptr + %E = sext i32 %L to i64 + ret i64 %E +} diff --git a/llvm/test/Transforms/Inline/X86/inline-target-attr.ll b/llvm/test/Transforms/Inline/X86/inline-target-attr.ll new file mode 100644 index 00000000000..d0846352ff1 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/inline-target-attr.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -inline | FileCheck %s +; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -S -passes='cgscc(inline)' | FileCheck %s +; Check that we only inline when we have compatible target attributes. +; X86 has implemented a target attribute that will verify that the attribute +; sets are compatible. + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i32 @foo() #0 { +entry: + %call = call i32 (...) @baz() + ret i32 %call +; CHECK-LABEL: foo +; CHECK: call i32 (...) @baz() +} +declare i32 @baz(...) #0 + +define i32 @bar() #1 { +entry: + %call = call i32 @foo() + ret i32 %call +; CHECK-LABEL: bar +; CHECK: call i32 (...) @baz() +} + +define i32 @qux() #0 { +entry: + %call = call i32 @bar() + ret i32 %call +; CHECK-LABEL: qux +; CHECK: call i32 @bar() +} + +attributes #0 = { "target-cpu"="x86-64" "target-features"="+sse,+sse2" } +attributes #1 = { "target-cpu"="x86-64" "target-features"="+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3" } diff --git a/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll b/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll new file mode 100644 index 00000000000..a0325441ed9 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/inline-target-cpu-i686.ll @@ -0,0 +1,15 @@ +; RUN: opt < %s -mtriple=i686-unknown-unknown -S -inline | FileCheck %s + +define i32 @func_target_cpu_nocona() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_prescott_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_prescott_call_target_cpu_nocona() #1 { + %call = call i32 @func_target_cpu_nocona() + ret i32 %call +} + +attributes #0 = { nounwind "target-cpu"="nocona" } +attributes #1 = { nounwind "target-cpu"="prescott" } diff --git a/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll b/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll new file mode 100644 index 00000000000..fa04a77d4a5 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/inline-target-cpu-x86_64.ll @@ -0,0 +1,43 @@ +; RUN: opt < %s -mtriple=x86_64-unknown-unknown -S -inline | FileCheck %s + +define i32 @func_target_cpu_base() #0 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_k8_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_k8_call_target_cpu_base() #1 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_nehalem_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_nehalem_call_target_cpu_base() #2 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +; CHECK-LABEL: @target_cpu_target_goldmont_call_target_cpu_base( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_goldmont_call_target_cpu_base() #3 { + %call = call i32 @func_target_cpu_base() + ret i32 %call +} + +define i32 @func_target_cpu_nocona() #4 { + ret i32 0 +} + +; CHECK-LABEL: @target_cpu_target_base_call_target_cpu_nocona( +; CHECK-NEXT: ret i32 0 +define i32 @target_cpu_target_base_call_target_cpu_nocona() #0 { + %call = call i32 @func_target_cpu_nocona() + ret i32 %call +} + +attributes #0 = { nounwind "target-cpu"="x86-64" } +attributes #1 = { nounwind "target-cpu"="k8" } +attributes #2 = { nounwind "target-cpu"="nehalem" } +attributes #3 = { nounwind "target-cpu"="goldmont" } +attributes #4 = { nounwind "target-cpu"="nocona" "target-features"="-sse3" } diff --git a/llvm/test/Transforms/Inline/X86/lit.local.cfg b/llvm/test/Transforms/Inline/X86/lit.local.cfg new file mode 100644 index 00000000000..e71f3cc4c41 --- /dev/null +++ b/llvm/test/Transforms/Inline/X86/lit.local.cfg @@ -0,0 +1,3 @@ +if not 'X86' in config.root.targets: + config.unsupported = True + diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll new file mode 100644 index 00000000000..c91fe804617 --- /dev/null +++ b/llvm/test/Transforms/Inline/align.ll @@ -0,0 +1,98 @@ +; RUN: opt -inline -preserve-alignment-assumptions-during-inlining -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @hello(float* align 128 nocapture %a, float* nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + ret void +} + +define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +entry: + tail call void @hello(float* %a, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %ptrint = ptrtoint float* %a to i64 +; CHECK: %maskedptr = and i64 %ptrint, 127 +; CHECK: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: call void @llvm.assume(i1 %maskcond) +; CHECK: %0 = load float, float* %c, align 4 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4 +; CHECK: %1 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 { +entry: + tail call void @hello(float* %a, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @fooa(float* nocapture align 128 %a, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float, float* %c, align 4 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4 +; CHECK: %1 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +define void @hello2(float* align 128 nocapture %a, float* align 128 nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %b, i64 8 + store float %0, float* %arrayidx1, align 4 + ret void +} + +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + tail call void @hello2(float* %a, float* %b, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %ptrint = ptrtoint float* %a to i64 +; CHECK: %maskedptr = and i64 %ptrint, 127 +; CHECK: %maskcond = icmp eq i64 %maskedptr, 0 +; CHECK: call void @llvm.assume(i1 %maskcond) +; CHECK: %ptrint1 = ptrtoint float* %b to i64 +; CHECK: %maskedptr2 = and i64 %ptrint1, 127 +; CHECK: %maskcond3 = icmp eq i64 %maskedptr2, 0 +; CHECK: call void @llvm.assume(i1 %maskcond3) +; CHECK: %0 = load float, float* %c, align 4 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4 +; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; CHECK: store float %0, float* %arrayidx1.i, align 4 +; CHECK: %1 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +attributes #0 = { nounwind uwtable } + diff --git a/llvm/test/Transforms/Inline/alloca-bonus.ll b/llvm/test/Transforms/Inline/alloca-bonus.ll new file mode 100644 index 00000000000..c5c2ce11cc5 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-bonus.ll @@ -0,0 +1,162 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=8 | FileCheck %s +; RUN: opt -passes='cgscc(inline)' < %s -S -o - -inline-threshold=8 | FileCheck %s + +target datalayout = "p:32:32" + +declare void @llvm.lifetime.start.p0i8(i64 %size, i8* nocapture %ptr) + +@glbl = external global i32 + +define void @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call void @inner1 + %ptr = alloca i32 + call void @inner1(i32* %ptr) + ret void +} + +define void @inner1(i32 *%ptr) { + %A = load i32, i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr inbounds i32, i32* %ptr, i32 0 + %D = getelementptr inbounds i32, i32* %ptr, i32 1 + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) + call void @extern() + ret void +} + +define void @outer2() { +; CHECK-LABEL: @outer2( +; CHECK: call void @inner2 + %ptr = alloca i32 + call void @inner2(i32* %ptr) + ret void +} + +; %D poisons this call, scalar-repl can't handle that instruction. +define void @inner2(i32 *%ptr) { + %A = load i32, i32* %ptr + store i32 0, i32* %ptr + %C = getelementptr inbounds i32, i32* %ptr, i32 0 + %D = getelementptr inbounds i32, i32* %ptr, i32 %A + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) + call void @extern() + ret void +} + +define void @outer3() { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call void @inner3 + %ptr = alloca i32 + call void @inner3(i32* %ptr, i1 undef) + ret void +} + +define void @inner3(i32 *%ptr, i1 %x) { + %A = icmp eq i32* %ptr, null + %B = and i1 %x, %A + call void @extern() + br i1 %A, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %t1 = load i32, i32* %ptr + %t2 = add i32 %t1, 1 + %t3 = add i32 %t2, 1 + %t4 = add i32 %t3, 1 + %t5 = add i32 %t4, 1 + %t6 = add i32 %t5, 1 + %t7 = add i32 %t6, 1 + %t8 = add i32 %t7, 1 + %t9 = add i32 %t8, 1 + %t10 = add i32 %t9, 1 + %t11 = add i32 %t10, 1 + %t12 = add i32 %t11, 1 + %t13 = add i32 %t12, 1 + %t14 = add i32 %t13, 1 + %t15 = add i32 %t14, 1 + %t16 = add i32 %t15, 1 + %t17 = add i32 %t16, 1 + %t18 = add i32 %t17, 1 + %t19 = add i32 %t18, 1 + %t20 = add i32 %t19, 1 + ret void +bb.false: + ret void +} + +define void @outer4(i32 %A) { +; CHECK-LABEL: @outer4( +; CHECK-NOT: call void @inner4 + %ptr = alloca i32 + call void @inner4(i32* %ptr, i32 %A) + ret void +} + +; %B poisons this call, scalar-repl can't handle that instruction. However, we +; still want to detect that the icmp and branch *can* be handled. +define void @inner4(i32 *%ptr, i32 %A) { + %B = getelementptr inbounds i32, i32* %ptr, i32 %A + %C = icmp eq i32* %ptr, null + call void @extern() + br i1 %C, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %t1 = load i32, i32* %ptr + %t2 = add i32 %t1, 1 + %t3 = add i32 %t2, 1 + %t4 = add i32 %t3, 1 + %t5 = add i32 %t4, 1 + %t6 = add i32 %t5, 1 + %t7 = add i32 %t6, 1 + %t8 = add i32 %t7, 1 + %t9 = add i32 %t8, 1 + %t10 = add i32 %t9, 1 + %t11 = add i32 %t10, 1 + %t12 = add i32 %t11, 1 + %t13 = add i32 %t12, 1 + %t14 = add i32 %t13, 1 + %t15 = add i32 %t14, 1 + %t16 = add i32 %t15, 1 + %t17 = add i32 %t16, 1 + %t18 = add i32 %t17, 1 + %t19 = add i32 %t18, 1 + %t20 = add i32 %t19, 1 + ret void +bb.false: + ret void +} + +define void @outer5() { +; CHECK-LABEL: @outer5( +; CHECK-NOT: call void @inner5 + %ptr = alloca i32 + call void @inner5(i1 false, i32* %ptr) + ret void +} + +; %D poisons this call, scalar-repl can't handle that instruction. However, if +; the flag is set appropriately, the poisoning instruction is inside of dead +; code, and so shouldn't be counted. +define void @inner5(i1 %flag, i32 *%ptr) { + %A = load i32, i32* %ptr + store i32 0, i32* %ptr + call void @extern() + %C = getelementptr inbounds i32, i32* %ptr, i32 0 + br i1 %flag, label %if.then, label %exit + +if.then: + %D = getelementptr inbounds i32, i32* %ptr, i32 %A + %E = bitcast i32* %ptr to i8* + %F = select i1 false, i32* %ptr, i32* @glbl + call void @llvm.lifetime.start.p0i8(i64 0, i8* %E) + ret void + +exit: + ret void +} + +declare void @extern() diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll new file mode 100644 index 00000000000..26131342219 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare-merge.ll @@ -0,0 +1,101 @@ +; Test that alloca merging in the inliner places dbg.declare calls immediately +; after the merged alloca. Not at the end of the entry BB, and definitely not +; before the alloca. +; +; clang -g -S -emit-llvm -Xclang -disable-llvm-optzns +; +;__attribute__((always_inline)) void f() { +; char aaa[100]; +; aaa[10] = 1; +;} +; +;__attribute__((always_inline)) void g() { +; char bbb[100]; +; bbb[20] = 1; +;} +; +;void h() { +; f(); +; g(); +;} +; +; RUN: opt -always-inline -S < %s | FileCheck %s +; +; CHECK: define void @h() +; CHECK-NEXT: entry: +; CHECK-NEXT: %[[AI:.*]] = alloca [100 x i8] +; CHECK-NEXT: call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]], +; CHECK-NEXT: call void @llvm.dbg.declare(metadata [100 x i8]* %[[AI]], + + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: alwaysinline nounwind uwtable +define void @f() #0 !dbg !4 { +entry: + %aaa = alloca [100 x i8], align 16 + call void @llvm.dbg.declare(metadata [100 x i8]* %aaa, metadata !12, metadata !17), !dbg !18 + %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %aaa, i64 0, i64 10, !dbg !19 + store i8 1, i8* %arrayidx, align 2, !dbg !20 + ret void, !dbg !21 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: alwaysinline nounwind uwtable +define void @g() #0 !dbg !7 { +entry: + %bbb = alloca [100 x i8], align 16 + call void @llvm.dbg.declare(metadata [100 x i8]* %bbb, metadata !22, metadata !17), !dbg !23 + %arrayidx = getelementptr inbounds [100 x i8], [100 x i8]* %bbb, i64 0, i64 20, !dbg !24 + store i8 1, i8* %arrayidx, align 4, !dbg !25 + ret void, !dbg !26 +} + +; Function Attrs: nounwind uwtable +define void @h() #2 !dbg !8 { +entry: + call void @f(), !dbg !27 + call void @g(), !dbg !28 + ret void, !dbg !29 +} + +attributes #0 = { alwaysinline nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9, !10} +!llvm.ident = !{!11} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "../1.c", directory: "/code/llvm-git/build") +!2 = !{} +!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, retainedNodes: !2) +!5 = !DISubroutineType(types: !6) +!6 = !{null} +!7 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, unit: !0, retainedNodes: !2) +!8 = distinct !DISubprogram(name: "h", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, isOptimized: false, unit: !0, retainedNodes: !2) +!9 = !{i32 2, !"Dwarf Version", i32 4} +!10 = !{i32 2, !"Debug Info Version", i32 3} +!11 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"} +!12 = !DILocalVariable(name: "aaa", scope: !4, file: !1, line: 2, type: !13) +!13 = !DICompositeType(tag: DW_TAG_array_type, baseType: !14, size: 800, align: 8, elements: !15) +!14 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char) +!15 = !{!16} +!16 = !DISubrange(count: 100) +!17 = !DIExpression() +!18 = !DILocation(line: 2, column: 8, scope: !4) +!19 = !DILocation(line: 3, column: 3, scope: !4) +!20 = !DILocation(line: 3, column: 11, scope: !4) +!21 = !DILocation(line: 4, column: 1, scope: !4) +!22 = !DILocalVariable(name: "bbb", scope: !7, file: !1, line: 7, type: !13) +!23 = !DILocation(line: 7, column: 8, scope: !7) +!24 = !DILocation(line: 8, column: 3, scope: !7) +!25 = !DILocation(line: 8, column: 11, scope: !7) +!26 = !DILocation(line: 9, column: 1, scope: !7) +!27 = !DILocation(line: 12, column: 3, scope: !8) +!28 = !DILocation(line: 13, column: 3, scope: !8) +!29 = !DILocation(line: 14, column: 1, scope: !8) diff --git a/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll new file mode 100644 index 00000000000..07e931d0d22 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-dbgdeclare.ll @@ -0,0 +1,131 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +; struct A { +; int arg0; +; double arg1[2]; +; } a, b; +; +; void fn3(A p1) { +; if (p1.arg0) +; a = p1; +; } +; +; void fn4() { fn3(b); } +; +; void fn5() { +; while (1) +; fn4(); +; } +; ModuleID = 'test.cpp' +source_filename = "test/Transforms/Inline/alloca-dbgdeclare.ll" +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "aarch64-apple-darwin" + +%struct.A = type { i32, [2 x double] } + +@a = global %struct.A zeroinitializer, align 8, !dbg !0 +@b = global %struct.A zeroinitializer, align 8, !dbg !12 + +; Function Attrs: nounwind +declare void @_Z3fn31A(%struct.A* nocapture readonly) #0 + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: argmemonly nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1) #2 + +; Function Attrs: nounwind +define void @_Z3fn4v() #0 !dbg !22 { +entry: +; Test that the dbg.declare is moved together with the alloca. +; CHECK: define void @_Z3fn5v() +; CHECK-NEXT: entry: +; CHECK-NEXT: %agg.tmp.sroa.3.i = alloca [20 x i8], align 4 +; CHECK-NEXT: call void @llvm.dbg.declare(metadata [20 x i8]* %agg.tmp.sroa.3.i, + %agg.tmp.sroa.3 = alloca [20 x i8], align 4 + tail call void @llvm.dbg.declare(metadata [20 x i8]* %agg.tmp.sroa.3, metadata !25, metadata !30), !dbg !31 + %agg.tmp.sroa.0.0.copyload = load i32, i32* getelementptr inbounds (%struct.A, %struct.A* @b, i64 0, i32 0), align 8, !dbg !33 + tail call void @llvm.dbg.value(metadata i32 %agg.tmp.sroa.0.0.copyload, metadata !25, metadata !34), !dbg !31 + %agg.tmp.sroa.3.0..sroa_idx = getelementptr inbounds [20 x i8], [20 x i8]* %agg.tmp.sroa.3, i64 0, i64 0, !dbg !33 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %agg.tmp.sroa.3.0..sroa_idx, i8* align 4 getelementptr (i8, i8* bitcast (%struct.A* @b to i8*), i64 4), i64 20, i1 false), !dbg !33 + tail call void @llvm.dbg.declare(metadata %struct.A* undef, metadata !25, metadata !35) #0, !dbg !31 + %tobool.i = icmp eq i32 %agg.tmp.sroa.0.0.copyload, 0, !dbg !36 + br i1 %tobool.i, label %_Z3fn31A.exit, label %if.then.i, !dbg !38 + +if.then.i: ; preds = %entry + store i32 %agg.tmp.sroa.0.0.copyload, i32* getelementptr inbounds (%struct.A, %struct.A* @a, i64 0, i32 0), align 8, !dbg !39 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 getelementptr (i8, i8* bitcast (%struct.A* @a to i8*), i64 4), i8* align 4 %agg.tmp.sroa.3.0..sroa_idx, i64 20, i1 false), !dbg !39 + br label %_Z3fn31A.exit, !dbg !39 + +_Z3fn31A.exit: ; preds = %if.then.i, %entry + + ret void, !dbg !33 +} + +; Function Attrs: noreturn nounwind +define void @_Z3fn5v() #3 !dbg !40 { +entry: + br label %while.body, !dbg !41 + +while.body: ; preds = %while.body, %entry + call void @_Z3fn4v(), !dbg !42 + br label %while.body, !dbg !41 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { argmemonly nounwind } +attributes #3 = { noreturn nounwind } + +!llvm.dbg.cu = !{!14} +!llvm.module.flags = !{!19, !20} +!llvm.ident = !{!21} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = !DIGlobalVariable(name: "a", scope: null, file: !2, line: 4, type: !3, isLocal: false, isDefinition: true) +!2 = !DIFile(filename: "test.cpp", directory: "") +!3 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", file: !2, line: 1, size: 192, align: 64, elements: !4, identifier: "_ZTS1A") +!4 = !{!5, !7} +!5 = !DIDerivedType(tag: DW_TAG_member, name: "arg0", scope: !3, file: !2, line: 2, baseType: !6, size: 32, align: 32) +!6 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!7 = !DIDerivedType(tag: DW_TAG_member, name: "arg1", scope: !3, file: !2, line: 3, baseType: !8, size: 128, align: 64, offset: 64) +!8 = !DICompositeType(tag: DW_TAG_array_type, baseType: !9, size: 128, align: 64, elements: !10) +!9 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float) +!10 = !{!11} +!11 = !DISubrange(count: 2) +!12 = !DIGlobalVariableExpression(var: !13, expr: !DIExpression()) +!13 = !DIGlobalVariable(name: "b", scope: null, file: !2, line: 4, type: !3, isLocal: false, isDefinition: true) +!14 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !15, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !16, retainedTypes: !17, globals: !18, imports: !16) +!15 = !DIFile(filename: "<stdin>", directory: "") +!16 = !{} +!17 = !{!3} +!18 = !{!0, !12} +!19 = !{i32 2, !"Dwarf Version", i32 4} +!20 = !{i32 2, !"Debug Info Version", i32 3} +!21 = !{!"clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)"} +!22 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", scope: !2, file: !2, line: 11, type: !23, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: true, unit: !14, retainedNodes: !16) +!23 = !DISubroutineType(types: !24) +!24 = !{null} +!25 = !DILocalVariable(name: "p1", arg: 1, scope: !26, file: !2, line: 6, type: !3) +!26 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", scope: !2, file: !2, line: 6, type: !27, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !14, retainedNodes: !29) +!27 = !DISubroutineType(types: !28) +!28 = !{null, !3} +!29 = !{!25} +!30 = !DIExpression(DW_OP_LLVM_fragment, 32, 160) +!31 = !DILocation(line: 6, scope: !26, inlinedAt: !32) +!32 = distinct !DILocation(line: 11, scope: !22) +!33 = !DILocation(line: 11, scope: !22) +!34 = !DIExpression(DW_OP_LLVM_fragment, 0, 32) +!35 = !DIExpression(DW_OP_deref) +!36 = !DILocation(line: 7, scope: !37, inlinedAt: !32) +!37 = distinct !DILexicalBlock(scope: !26, file: !2, line: 7) +!38 = !DILocation(line: 7, scope: !26, inlinedAt: !32) +!39 = !DILocation(line: 8, scope: !37, inlinedAt: !32) +!40 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", scope: !2, file: !2, line: 13, type: !23, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, unit: !14, retainedNodes: !16) +!41 = !DILocation(line: 14, scope: !40) +!42 = !DILocation(line: 15, scope: !40) + diff --git a/llvm/test/Transforms/Inline/alloca-in-scc.ll b/llvm/test/Transforms/Inline/alloca-in-scc.ll new file mode 100644 index 00000000000..92649a6e073 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-in-scc.ll @@ -0,0 +1,31 @@ +; RUN: opt < %s -inline | llvm-dis + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin10.0" + +define i32 @main(i32 %argc, i8** %argv) nounwind ssp { +entry: + call fastcc void @c() nounwind + unreachable +} + +define internal fastcc void @a() nounwind ssp { +entry: + %al = alloca [3 x i32], align 4 + %0 = getelementptr inbounds [3 x i32], [3 x i32]* %al, i32 0, i32 2 + + call fastcc void @c() nounwind + unreachable +} + +define internal fastcc void @b() nounwind ssp { +entry: + tail call fastcc void @a() nounwind ssp + unreachable +} + +define internal fastcc void @c() nounwind ssp { +entry: + call fastcc void @b() nounwind + unreachable +} diff --git a/llvm/test/Transforms/Inline/alloca-merge-align.ll b/llvm/test/Transforms/Inline/alloca-merge-align.ll new file mode 100644 index 00000000000..70b94f7b69a --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca-merge-align.ll @@ -0,0 +1,104 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.s = type { i32, i32 } + +define void @foo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 4 + %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0 + %0 = load i32, i32* %a1, align 4 + %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4 + %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1 + %1 = load i32, i32* %b, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4 + call void @bar(i32* %arrayidx) #2 + ret void +} + +define void @foo0(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32] + %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0 + %0 = load i32, i32* %a1, align 4 + %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4 + %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1 + %1 = load i32, i32* %b, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4 + call void @bar(i32* %arrayidx) #2 + ret void +} + +define void @foo1(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 1 + %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0 + %0 = load i32, i32* %a1, align 4 + %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 4 + %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1 + %1 = load i32, i32* %b, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4 + call void @bar(i32* %arrayidx) #2 + ret void +} + +declare void @bar(i32*) #1 + +define void @goo(%struct.s* byval nocapture readonly %a) { +entry: + %x = alloca [2 x i32], align 32 + %a1 = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 0 + %0 = load i32, i32* %a1, align 4 + %arrayidx = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 0 + store i32 %0, i32* %arrayidx, align 32 + %b = getelementptr inbounds %struct.s, %struct.s* %a, i64 0, i32 1 + %1 = load i32, i32* %b, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], [2 x i32]* %x, i64 0, i64 1 + store i32 %1, i32* %arrayidx2, align 4 + call void @bar(i32* %arrayidx) #2 + ret void +} + +; CHECK-LABEL: @main +; CHECK: alloca [2 x i32], align 32 +; CHECK-NOT: alloca [2 x i32] +; CHECK: ret i32 0 + +define signext i32 @main() { +entry: + %a = alloca i64, align 8 + %tmpcast = bitcast i64* %a to %struct.s* + store i64 0, i64* %a, align 8 + %a1 = bitcast i64* %a to i32* + store i32 1, i32* %a1, align 8 + call void @foo(%struct.s* byval %tmpcast) + store i32 2, i32* %a1, align 8 + call void @goo(%struct.s* byval %tmpcast) + ret i32 0 +} + +; CHECK-LABEL: @test0 +; CHECK: alloca [2 x i32], align 32 +; CHECK-NOT: alloca [2 x i32] +; CHECK: ret i32 0 + +define signext i32 @test0() { +entry: + %a = alloca i64, align 8 + %tmpcast = bitcast i64* %a to %struct.s* + store i64 0, i64* %a, align 8 + %a1 = bitcast i64* %a to i32* + store i32 1, i32* %a1, align 8 + call void @foo0(%struct.s* byval %tmpcast) + store i32 2, i32* %a1, align 8 + call void @goo(%struct.s* byval %tmpcast) + ret i32 0 +} diff --git a/llvm/test/Transforms/Inline/alloca_test.ll b/llvm/test/Transforms/Inline/alloca_test.ll new file mode 100644 index 00000000000..cd0713955a2 --- /dev/null +++ b/llvm/test/Transforms/Inline/alloca_test.ll @@ -0,0 +1,56 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; This test ensures that alloca instructions in the entry block for an inlined +; function are moved to the top of the function they are inlined into. +; +; RUN: opt -S -inline < %s | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s + +define i32 @func(i32 %i) { + %X = alloca i32 + store i32 %i, i32* %X + ret i32 %i +} + +declare void @bar() + +define i32 @main(i32 %argc) { +; CHECK-LABEL: @main( +; CHECK-NEXT: Entry: +; CHECK-NEXT: [[X_I:%.*]] = alloca i32 +; +Entry: + call void @bar( ) + %X = call i32 @func( i32 7 ) + %Y = add i32 %X, %argc + ret i32 %Y +} + +; https://llvm.org/bugs/show_bug.cgi?id=27277 +; Don't assume that the size is a ConstantInt (an undef value is also a constant). + +define void @PR27277(i32 %p1) { +; CHECK-LABEL: @PR27277( +; CHECK-NEXT: [[VLA:%.*]] = alloca double, i32 %p1 +; CHECK-NEXT: call void @PR27277(i32 undef) +; CHECK-NEXT: ret void +; + %vla = alloca double, i32 %p1 + call void @PR27277(i32 undef) + ret void +} + +; Don't assume that the size is a ConstantInt (a ConstExpr is also a constant). + +@GV = common global i32* null + +define void @PR27277_part2(i32 %p1) { +; CHECK-LABEL: @PR27277_part2( +; CHECK-NEXT: [[VLA:%.*]] = alloca double, i32 %p1 +; CHECK-NEXT: call void @PR27277_part2(i32 ptrtoint (i32** @GV to i32)) +; CHECK-NEXT: ret void +; + %vla = alloca double, i32 %p1 + call void @PR27277_part2(i32 ptrtoint (i32** @GV to i32)) + ret void +} + diff --git a/llvm/test/Transforms/Inline/always-inline.ll b/llvm/test/Transforms/Inline/always-inline.ll new file mode 100644 index 00000000000..791eb94779b --- /dev/null +++ b/llvm/test/Transforms/Inline/always-inline.ll @@ -0,0 +1,318 @@ +; RUN: opt < %s -inline-threshold=0 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL +; +; Ensure the threshold has no impact on these decisions. +; RUN: opt < %s -inline-threshold=20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL +; RUN: opt < %s -inline-threshold=-20000000 -always-inline -S | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-CALL +; +; The new pass manager doesn't re-use any threshold based infrastructure for +; the always inliner, but test that we get the correct result. The new PM +; always inliner also doesn't support inlining call-site alwaysinline +; annotations. It isn't clear that this is a reasonable use case for +; 'alwaysinline'. +; RUN: opt < %s -passes=always-inline -S | FileCheck %s --check-prefix=CHECK + +define internal i32 @inner1() alwaysinline { +; CHECK-NOT: @inner1( + ret i32 1 +} +define i32 @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call +; CHECK: ret + + %r = call i32 @inner1() + ret i32 %r +} + +; The always inliner can't DCE arbitrary internal functions. PR2945 +define internal i32 @pr2945() nounwind { +; CHECK-LABEL: @pr2945( + ret i32 0 +} + +define internal void @inner2(i32 %N) alwaysinline { +; CHECK-NOT: @inner2( + %P = alloca i32, i32 %N + ret void +} +define void @outer2(i32 %N) { +; The always inliner (unlike the normal one) should be willing to inline +; a function with a dynamic alloca into one without a dynamic alloca. +; rdar://6655932 +; +; CHECK-LABEL: @outer2( +; CHECK-NOT: call void @inner2 +; CHECK-NOT: call void @inner2 +; CHECK: ret void + + call void @inner2( i32 %N ) + ret void +} + +declare i32 @a() returns_twice +declare i32 @b() returns_twice + +; Cannot alwaysinline when that would introduce a returns_twice call. +define internal i32 @inner3() alwaysinline { +; CHECK-LABEL: @inner3( +entry: + %call = call i32 @a() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} +define i32 @outer3() { +entry: +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i32 @a +; CHECK: ret + + %call = call i32 @inner3() + %add = add nsw i32 1, %call + ret i32 %add +} + +define internal i32 @inner4() alwaysinline returns_twice { +; CHECK-NOT: @inner4( +entry: + %call = call i32 @b() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @outer4() { +entry: +; CHECK-LABEL: @outer4( +; CHECK: call i32 @b() +; CHECK: ret + + %call = call i32 @inner4() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +; We can't inline this even though it has alwaysinline! +define internal i32 @inner5(i8* %addr) alwaysinline { +; CHECK-LABEL: @inner5( +entry: + indirectbr i8* %addr, [ label %one, label %two ] + +one: + ret i32 42 + +two: + ret i32 44 +} +define i32 @outer5(i32 %x) { +; CHECK-LABEL: @outer5( +; CHECK: call i32 @inner5 +; CHECK: ret + + %cmp = icmp slt i32 %x, 42 + %addr = select i1 %cmp, i8* blockaddress(@inner5, %one), i8* blockaddress(@inner5, %two) + %call = call i32 @inner5(i8* %addr) + ret i32 %call +} + +; We alwaysinline a function that call itself recursively. +define internal void @inner6(i32 %x) alwaysinline { +; CHECK-LABEL: @inner6( +entry: + %icmp = icmp slt i32 %x, 0 + br i1 %icmp, label %return, label %bb + +bb: + %sub = sub nsw i32 %x, 1 + call void @inner6(i32 %sub) + ret void + +return: + ret void +} +define void @outer6() { +; CHECK-LABEL: @outer6( +; CHECK: call void @inner6(i32 42) +; CHECK: ret + +entry: + call void @inner6(i32 42) + ret void +} + +; This is not an alwaysinline function and is actually external. +define i32 @inner7() { +; CHECK-LABEL: @inner7( + ret i32 1 +} +define i32 @outer7() { +; CHECK-CALL-LABEL: @outer7( +; CHECK-CALL-NOT: call +; CHECK-CALL: ret + + %r = call i32 @inner7() alwaysinline + ret i32 %r +} + +define internal float* @inner8(float* nocapture align 128 %a) alwaysinline { +; CHECK-NOT: @inner8( + ret float* %a +} +define float @outer8(float* nocapture %a) { +; CHECK-LABEL: @outer8( +; CHECK-NOT: call float* @inner8 +; CHECK: ret + + %inner_a = call float* @inner8(float* %a) + %f = load float, float* %inner_a, align 4 + ret float %f +} + + +; The 'inner9*' and 'outer9' functions are designed to check that we remove +; a function that is inlined by the always inliner even when it is used by +; a complex constant expression prior to being inlined. + +; The 'a' function gets used in a complex constant expression that, despite +; being constant folded, means it isn't dead. As a consequence it shouldn't be +; deleted. If it is, then the constant expression needs to become more complex +; to accurately test this scenario. +define internal void @inner9a(i1 %b) alwaysinline { +; CHECK-LABEL: @inner9a( +entry: + ret void +} + +define internal void @inner9b(i1 %b) alwaysinline { +; CHECK-NOT: @inner9b( +entry: + ret void +} + +declare void @dummy9(i1 %b) + +define void @outer9() { +; CHECK-LABEL: @outer9( +entry: + ; First we use @inner9a in a complex constant expression that may get folded + ; but won't get removed, and then we call it which will get inlined. Despite + ; this the function can't be deleted because of the constant expression + ; usage. + %sink = alloca i1 + store volatile i1 icmp eq (i64 ptrtoint (void (i1)* @inner9a to i64), i64 ptrtoint(void (i1)* @dummy9 to i64)), i1* %sink +; CHECK: store volatile + call void @inner9a(i1 false) +; CHECK-NOT: call void @inner9a + + ; Next we call @inner9b passing in a constant expression. This constant + ; expression will in fact be removed by inlining, so we should also be able + ; to delete the function. + call void @inner9b(i1 icmp eq (i64 ptrtoint (void (i1)* @inner9b to i64), i64 ptrtoint(void (i1)* @dummy9 to i64))) +; CHECK-NOT: @inner9b + + ret void +; CHECK: ret void +} + +; The 'inner10' and 'outer10' functions test a frustrating consquence of the +; current 'alwaysinline' semantic model. Because such functions are allowed to +; be external functions, it may be necessary to both inline all of their uses +; and leave them in the final output. These tests can be removed if and when +; we restrict alwaysinline further. +define void @inner10() alwaysinline { +; CHECK-LABEL: @inner10( +entry: + ret void +} + +define void @outer10() { +; CHECK-LABEL: @outer10( +entry: + call void @inner10() +; CHECK-NOT: call void @inner10 + + ret void +; CHECK: ret void +} + +; The 'inner11' and 'outer11' functions test another dimension of non-internal +; functions with alwaysinline. These functions use external linkages that we can +; actually remove safely and so we should. +define linkonce void @inner11a() alwaysinline { +; CHECK-NOT: @inner11a( +entry: + ret void +} + +define available_externally void @inner11b() alwaysinline { +; CHECK-NOT: @inner11b( +entry: + ret void +} + +define void @outer11() { +; CHECK-LABEL: @outer11( +entry: + call void @inner11a() + call void @inner11b() +; CHECK-NOT: call void @inner11a +; CHECK-NOT: call void @inner11b + + ret void +; CHECK: ret void +} + +; The 'inner12' and 'outer12' functions test that we don't remove functions +; which are part of a comdat group even if they otherwise seem dead. +$comdat12 = comdat any + +define linkonce void @inner12() alwaysinline comdat($comdat12) { +; CHECK-LABEL: @inner12( + ret void +} + +define void @outer12() comdat($comdat12) { +; CHECK-LABEL: @outer12( +entry: + call void @inner12() +; CHECK-NOT: call void @inner12 + + ret void +; CHECK: ret void +} + +; The 'inner13*' and 'outer13' functions test that we do remove functions +; which are part of a comdat group where all of the members are removed during +; always inlining. +$comdat13 = comdat any + +define linkonce void @inner13a() alwaysinline comdat($comdat13) { +; CHECK-NOT: @inner13a( + ret void +} + +define linkonce void @inner13b() alwaysinline comdat($comdat13) { +; CHECK-NOT: @inner13b( + ret void +} + +define void @outer13() { +; CHECK-LABEL: @outer13( +entry: + call void @inner13a() + call void @inner13b() +; CHECK-NOT: call void @inner13a +; CHECK-NOT: call void @inner13b + + ret void +; CHECK: ret void +} + +define void @inner14() readnone nounwind { +; CHECK: define void @inner14 + ret void +} + +define void @outer14() { +; CHECK: call void @inner14 + call void @inner14() + ret void +} diff --git a/llvm/test/Transforms/Inline/arg-attr-propagation.ll b/llvm/test/Transforms/Inline/arg-attr-propagation.ll new file mode 100644 index 00000000000..3d18e8047e5 --- /dev/null +++ b/llvm/test/Transforms/Inline/arg-attr-propagation.ll @@ -0,0 +1,50 @@ +; RUN: opt -inline -S < %s | FileCheck %s + +; The callee guarantees that the pointer argument is nonnull and dereferenceable. +; That information should transfer to the caller. + +define i32 @callee(i32* dereferenceable(32) %t1) { +; CHECK-LABEL: @callee(i32* dereferenceable(32) %t1) +; CHECK-NEXT: [[T2:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2]] +; + %t2 = load i32, i32* %t1 + ret i32 %t2 +} + +; FIXME: All dereferenceability information is lost. +; The caller argument could be known nonnull and dereferenceable(32). + +define i32 @caller1(i32* %t1) { +; CHECK-LABEL: @caller1(i32* %t1) +; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2_I]] +; + %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1) + ret i32 %t2 +} + +; The caller argument is nonnull, but that can be explicit. +; The dereferenceable amount could be increased. + +define i32 @caller2(i32* dereferenceable(31) %t1) { +; CHECK-LABEL: @caller2(i32* dereferenceable(31) %t1) +; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2_I]] +; + %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1) + ret i32 %t2 +} + +; The caller argument is nonnull, but that can be explicit. +; Make sure that we don't propagate a smaller dereferenceable amount. + +define i32 @caller3(i32* dereferenceable(33) %t1) { +; CHECK-LABEL: @caller3(i32* dereferenceable(33) %t1) +; CHECK-NEXT: [[T2_I:%.*]] = load i32, i32* %t1 +; CHECK-NEXT: ret i32 [[T2_I]] +; + %t2 = tail call i32 @callee(i32* dereferenceable(32) %t1) + ret i32 %t2 +} + diff --git a/llvm/test/Transforms/Inline/array-alloca.ll b/llvm/test/Transforms/Inline/array-alloca.ll new file mode 100644 index 00000000000..b71dafee00f --- /dev/null +++ b/llvm/test/Transforms/Inline/array-alloca.ll @@ -0,0 +1,37 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +%struct.A = type { i32 } + +define void @callee1(i32 %M) { +entry: + %vla = alloca i32, i32 %M, align 16 + ret void +} + +define void @callee2(i32 %M) { +entry: + %vla = alloca %struct.A, i32 %M, align 16 + ret void +} + +define void @callee3(i128 %M) { +entry: + %vla = alloca i32, i128 %M, align 16 + ret void +} + +; CHECK-LABEL: @caller +define void @caller() #0 { +entry: + call void @caller() +; CHECK-NOT: call void @callee1 + call void @callee1(i32 256) +; CHECK: call void @callee2 + call void @callee2(i32 4096) +; CHECK: call void @callee3 +; This is to test that there is no overflow in computing allocated size +; call void @callee3(i128 0x8000000000000000); + call void @callee3(i128 9223372036854775808); + ret void +} + diff --git a/llvm/test/Transforms/Inline/array_merge.ll b/llvm/test/Transforms/Inline/array_merge.ll new file mode 100644 index 00000000000..b2eafeb0456 --- /dev/null +++ b/llvm/test/Transforms/Inline/array_merge.ll @@ -0,0 +1,26 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; rdar://7173846 +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin10.0" + +define internal void @foo() nounwind ssp { +entry: + %A = alloca [100 x i32] + %B = alloca [100 x i32] + call void @bar([100 x i32]* %A, [100 x i32]* %B) nounwind + ret void +} + +declare void @bar([100 x i32]*, [100 x i32]*) + +define void @test() nounwind ssp { +entry: +; CHECK: @test() +; CHECK-NEXT: entry: +; CHECK-NEXT: %A.i = alloca +; CHECK-NEXT: %B.i = alloca +; CHECK-NOT: alloca + call void @foo() nounwind + call void @foo() nounwind + ret void +} diff --git a/llvm/test/Transforms/Inline/attributes.ll b/llvm/test/Transforms/Inline/attributes.ll new file mode 100644 index 00000000000..028f3b0f197 --- /dev/null +++ b/llvm/test/Transforms/Inline/attributes.ll @@ -0,0 +1,418 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define i32 @noattr_callee(i32 %i) { + ret i32 %i +} + +define i32 @sanitize_address_callee(i32 %i) sanitize_address { + ret i32 %i +} + +define i32 @sanitize_hwaddress_callee(i32 %i) sanitize_hwaddress { + ret i32 %i +} + +define i32 @sanitize_thread_callee(i32 %i) sanitize_thread { + ret i32 %i +} + +define i32 @sanitize_memory_callee(i32 %i) sanitize_memory { + ret i32 %i +} + +define i32 @safestack_callee(i32 %i) safestack { + ret i32 %i +} + +define i32 @slh_callee(i32 %i) speculative_load_hardening { + ret i32 %i +} + +define i32 @alwaysinline_callee(i32 %i) alwaysinline { + ret i32 %i +} + +define i32 @alwaysinline_sanitize_address_callee(i32 %i) alwaysinline sanitize_address { + ret i32 %i +} + +define i32 @alwaysinline_sanitize_hwaddress_callee(i32 %i) alwaysinline sanitize_hwaddress { + ret i32 %i +} + +define i32 @alwaysinline_sanitize_thread_callee(i32 %i) alwaysinline sanitize_thread { + ret i32 %i +} + +define i32 @alwaysinline_sanitize_memory_callee(i32 %i) alwaysinline sanitize_memory { + ret i32 %i +} + +define i32 @alwaysinline_safestack_callee(i32 %i) alwaysinline safestack { + ret i32 %i +} + + +; Check that: +; * noattr callee is inlined into noattr caller, +; * sanitize_(address|memory|thread) callee is not inlined into noattr caller, +; * alwaysinline callee is always inlined no matter what sanitize_* attributes are present. + +define i32 @test_no_sanitize_address(i32 %arg) { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_address_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_no_sanitize_address( +; CHECK-NEXT: @sanitize_address_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_no_sanitize_hwaddress(i32 %arg) { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_hwaddress_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_hwaddress_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_no_sanitize_hwaddress( +; CHECK-NEXT: @sanitize_hwaddress_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_no_sanitize_memory(i32 %arg) { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_memory_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_no_sanitize_memory( +; CHECK-NEXT: @sanitize_memory_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_no_sanitize_thread(i32 %arg) { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_thread_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_no_sanitize_thread( +; CHECK-NEXT: @sanitize_thread_callee +; CHECK-NEXT: ret i32 +} + + +; Check that: +; * noattr callee is not inlined into sanitize_(address|memory|thread) caller, +; * sanitize_(address|memory|thread) callee is inlined into the caller with the same attribute, +; * alwaysinline callee is always inlined no matter what sanitize_* attributes are present. + +define i32 @test_sanitize_address(i32 %arg) sanitize_address { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_address_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_address_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_sanitize_address( +; CHECK-NEXT: @noattr_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_sanitize_hwaddress(i32 %arg) sanitize_hwaddress { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_hwaddress_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_hwaddress_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_sanitize_hwaddress( +; CHECK-NEXT: @noattr_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_sanitize_memory(i32 %arg) sanitize_memory { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_memory_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_memory_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_sanitize_memory( +; CHECK-NEXT: @noattr_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_sanitize_thread(i32 %arg) sanitize_thread { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @sanitize_thread_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_sanitize_thread_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_sanitize_thread( +; CHECK-NEXT: @noattr_callee +; CHECK-NEXT: ret i32 +} + +define i32 @test_safestack(i32 %arg) safestack { + %x1 = call i32 @noattr_callee(i32 %arg) + %x2 = call i32 @safestack_callee(i32 %x1) + %x3 = call i32 @alwaysinline_callee(i32 %x2) + %x4 = call i32 @alwaysinline_safestack_callee(i32 %x3) + ret i32 %x4 +; CHECK-LABEL: @test_safestack( +; CHECK-NEXT: @noattr_callee +; CHECK-NEXT: ret i32 +} + +; Can inline a normal function into an SLH'ed function. +define i32 @test_caller_slh(i32 %i) speculative_load_hardening { +; CHECK-LABEL: @test_caller_slh( +; CHECK-SAME: ) [[SLH:.*]] { +; CHECK-NOT: call +; CHECK: ret i32 +entry: + %callee = call i32 @noattr_callee(i32 %i) + ret i32 %callee +} + +; Can inline a SLH'ed function into a normal one, propagating SLH. +define i32 @test_callee_slh(i32 %i) { +; CHECK-LABEL: @test_callee_slh( +; CHECK-SAME: ) [[SLH:.*]] { +; CHECK-NOT: call +; CHECK: ret i32 +entry: + %callee = call i32 @slh_callee(i32 %i) + ret i32 %callee +} + +; Check that a function doesn't get inlined if target-cpu strings don't match +; exactly. +define i32 @test_target_cpu_callee0(i32 %i) "target-cpu"="corei7" { + ret i32 %i +} + +define i32 @test_target_cpu0(i32 %i) "target-cpu"="corei7" { + %1 = call i32 @test_target_cpu_callee0(i32 %i) + ret i32 %1 +; CHECK-LABEL: @test_target_cpu0( +; CHECK-NOT: @test_target_cpu_callee0 +} + +define i32 @test_target_cpu_callee1(i32 %i) "target-cpu"="x86-64" { + ret i32 %i +} + +define i32 @test_target_cpu1(i32 %i) "target-cpu"="corei7" { + %1 = call i32 @test_target_cpu_callee1(i32 %i) + ret i32 %1 +; CHECK-LABEL: @test_target_cpu1( +; CHECK-NEXT: @test_target_cpu_callee1 +; CHECK-NEXT: ret i32 +} + +; Check that a function doesn't get inlined if target-features strings don't +; match exactly. +define i32 @test_target_features_callee0(i32 %i) "target-features"="+sse4.2" { + ret i32 %i +} + +define i32 @test_target_features0(i32 %i) "target-features"="+sse4.2" { + %1 = call i32 @test_target_features_callee0(i32 %i) + ret i32 %1 +; CHECK-LABEL: @test_target_features0( +; CHECK-NOT: @test_target_features_callee0 +} + +define i32 @test_target_features_callee1(i32 %i) "target-features"="+avx2" { + ret i32 %i +} + +define i32 @test_target_features1(i32 %i) "target-features"="+sse4.2" { + %1 = call i32 @test_target_features_callee1(i32 %i) + ret i32 %1 +; CHECK-LABEL: @test_target_features1( +; CHECK-NEXT: @test_target_features_callee1 +; CHECK-NEXT: ret i32 +} + +define i32 @less-precise-fpmad_callee0(i32 %i) "less-precise-fpmad"="false" { + ret i32 %i +; CHECK: @less-precise-fpmad_callee0(i32 %i) [[FPMAD_FALSE:#[0-9]+]] { +; CHECK-NEXT: ret i32 +} + +define i32 @less-precise-fpmad_callee1(i32 %i) "less-precise-fpmad"="true" { + ret i32 %i +; CHECK: @less-precise-fpmad_callee1(i32 %i) [[FPMAD_TRUE:#[0-9]+]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_less-precise-fpmad0(i32 %i) "less-precise-fpmad"="false" { + %1 = call i32 @less-precise-fpmad_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_less-precise-fpmad0(i32 %i) [[FPMAD_FALSE]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_less-precise-fpmad1(i32 %i) "less-precise-fpmad"="false" { + %1 = call i32 @less-precise-fpmad_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_less-precise-fpmad1(i32 %i) [[FPMAD_FALSE]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_less-precise-fpmad2(i32 %i) "less-precise-fpmad"="true" { + %1 = call i32 @less-precise-fpmad_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_less-precise-fpmad2(i32 %i) [[FPMAD_FALSE]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_less-precise-fpmad3(i32 %i) "less-precise-fpmad"="true" { + %1 = call i32 @less-precise-fpmad_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_less-precise-fpmad3(i32 %i) [[FPMAD_TRUE]] { +; CHECK-NEXT: ret i32 +} + +define i32 @no-implicit-float_callee0(i32 %i) { + ret i32 %i +; CHECK: @no-implicit-float_callee0(i32 %i) { +; CHECK-NEXT: ret i32 +} + +define i32 @no-implicit-float_callee1(i32 %i) noimplicitfloat { + ret i32 %i +; CHECK: @no-implicit-float_callee1(i32 %i) [[NOIMPLICITFLOAT:#[0-9]+]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-implicit-float0(i32 %i) { + %1 = call i32 @no-implicit-float_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_no-implicit-float0(i32 %i) { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-implicit-float1(i32 %i) { + %1 = call i32 @no-implicit-float_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_no-implicit-float1(i32 %i) [[NOIMPLICITFLOAT]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-implicit-float2(i32 %i) noimplicitfloat { + %1 = call i32 @no-implicit-float_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_no-implicit-float2(i32 %i) [[NOIMPLICITFLOAT]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-implicit-float3(i32 %i) noimplicitfloat { + %1 = call i32 @no-implicit-float_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_no-implicit-float3(i32 %i) [[NOIMPLICITFLOAT]] { +; CHECK-NEXT: ret i32 +} + +; Check that no-jump-tables flag propagates from inlined callee to caller + +define i32 @no-use-jump-tables_callee0(i32 %i) { + ret i32 %i +; CHECK: @no-use-jump-tables_callee0(i32 %i) { +; CHECK-NEXT: ret i32 +} + +define i32 @no-use-jump-tables_callee1(i32 %i) "no-jump-tables"="true" { + ret i32 %i +; CHECK: @no-use-jump-tables_callee1(i32 %i) [[NOUSEJUMPTABLES:#[0-9]+]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-use-jump-tables0(i32 %i) { + %1 = call i32 @no-use-jump-tables_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_no-use-jump-tables0(i32 %i) { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-use-jump-tables1(i32 %i) { + %1 = call i32 @no-use-jump-tables_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_no-use-jump-tables1(i32 %i) [[NOUSEJUMPTABLES]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-use-jump-tables2(i32 %i) "no-jump-tables"="true" { + %1 = call i32 @no-use-jump-tables_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_no-use-jump-tables2(i32 %i) [[NOUSEJUMPTABLES]] { +; CHECK-NEXT: ret i32 +} + +define i32 @test_no-use-jump-tables3(i32 %i) "no-jump-tables"="true" { + %1 = call i32 @no-use-jump-tables_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_no-use-jump-tables3(i32 %i) [[NOUSEJUMPTABLES]] { +; CHECK-NEXT: ret i32 +} + +; Callee with "null-pointer-is-valid"="true" attribute should not be inlined +; into a caller without this attribute. +; Exception: alwaysinline callee can still be inlined but +; "null-pointer-is-valid"="true" should get copied to caller. + +define i32 @null-pointer-is-valid_callee0(i32 %i) "null-pointer-is-valid"="true" { + ret i32 %i +; CHECK: @null-pointer-is-valid_callee0(i32 %i) +; CHECK-NEXT: ret i32 +} + +define i32 @null-pointer-is-valid_callee1(i32 %i) alwaysinline "null-pointer-is-valid"="true" { + ret i32 %i +; CHECK: @null-pointer-is-valid_callee1(i32 %i) +; CHECK-NEXT: ret i32 +} + +define i32 @null-pointer-is-valid_callee2(i32 %i) { + ret i32 %i +; CHECK: @null-pointer-is-valid_callee2(i32 %i) +; CHECK-NEXT: ret i32 +} + +; No inlining since caller does not have "null-pointer-is-valid"="true" attribute. +define i32 @test_null-pointer-is-valid0(i32 %i) { + %1 = call i32 @null-pointer-is-valid_callee0(i32 %i) + ret i32 %1 +; CHECK: @test_null-pointer-is-valid0( +; CHECK: call i32 @null-pointer-is-valid_callee0 +; CHECK-NEXT: ret i32 +} + +; alwaysinline should force inlining even when caller does not have +; "null-pointer-is-valid"="true" attribute. However, the attribute should be +; copied to caller. +define i32 @test_null-pointer-is-valid1(i32 %i) "null-pointer-is-valid"="false" { + %1 = call i32 @null-pointer-is-valid_callee1(i32 %i) + ret i32 %1 +; CHECK: @test_null-pointer-is-valid1(i32 %i) [[NULLPOINTERISVALID:#[0-9]+]] { +; CHECK-NEXT: ret i32 +} + +; Can inline since both caller and callee have "null-pointer-is-valid"="true" +; attribute. +define i32 @test_null-pointer-is-valid2(i32 %i) "null-pointer-is-valid"="true" { + %1 = call i32 @null-pointer-is-valid_callee2(i32 %i) + ret i32 %1 +; CHECK: @test_null-pointer-is-valid2(i32 %i) [[NULLPOINTERISVALID]] { +; CHECK-NEXT: ret i32 +} + +; CHECK: attributes [[SLH]] = { speculative_load_hardening } +; CHECK: attributes [[FPMAD_FALSE]] = { "less-precise-fpmad"="false" } +; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" } +; CHECK: attributes [[NOIMPLICITFLOAT]] = { noimplicitfloat } +; CHECK: attributes [[NOUSEJUMPTABLES]] = { "no-jump-tables"="true" } +; CHECK: attributes [[NULLPOINTERISVALID]] = { "null-pointer-is-valid"="true" } diff --git a/llvm/test/Transforms/Inline/basictest.ll b/llvm/test/Transforms/Inline/basictest.ll new file mode 100644 index 00000000000..f34ed084113 --- /dev/null +++ b/llvm/test/Transforms/Inline/basictest.ll @@ -0,0 +1,117 @@ +; RUN: opt < %s -inline -sroa -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(sroa))' -S | FileCheck %s +target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128" + +define i32 @test1f(i32 %i) { + ret i32 %i +} + +define i32 @test1(i32 %W) { + %X = call i32 @test1f(i32 7) + %Y = add i32 %X, %W + ret i32 %Y +; CHECK-LABEL: @test1( +; CHECK-NEXT: %Y = add i32 7, %W +; CHECK-NEXT: ret i32 %Y +} + + + +; rdar://7339069 + +%T = type { i32, i32 } + +; CHECK-NOT: @test2f( +define internal %T* @test2f(i1 %cond, %T* %P) { + br i1 %cond, label %T, label %F + +T: + %A = getelementptr %T, %T* %P, i32 0, i32 0 + store i32 42, i32* %A + ret %T* %P + +F: + ret %T* %P +} + +define i32 @test2(i1 %cond) { + %A = alloca %T + + %B = call %T* @test2f(i1 %cond, %T* %A) + %C = getelementptr %T, %T* %B, i32 0, i32 0 + %D = load i32, i32* %C + ret i32 %D + +; CHECK-LABEL: @test2( +; CHECK-NOT: = alloca +; CHECK: ret i32 +} + +declare void @barrier() noduplicate + +define internal i32 @f() { + call void @barrier() noduplicate + ret i32 1 +} + +define i32 @g() { + call void @barrier() noduplicate + ret i32 2 +} + +define internal i32 @h() { + call void @barrier() noduplicate + ret i32 3 +} + +define i32 @test3() { + %b = call i32 @f() + ret i32 %b +} + +; The call to @f cannot be inlined as there is another callsite +; calling @f, and @f contains a noduplicate call. +; +; The call to @g cannot be inlined as it has external linkage. +; +; The call to @h *can* be inlined. + +; CHECK-LABEL: @test( +define i32 @test() { +; CHECK: call i32 @f() + %a = call i32 @f() +; CHECK: call i32 @g() + %b = call i32 @g() +; CHECK-NOT: call i32 @h() + %c = call i32 @h() + + %d = add i32 %a, %b + %e = add i32 %d, %c + + ret i32 %e +; CHECK: } +} + +; Inliner shouldn't delete calls it can't inline, even if they're trivially dead +; CHECK-LABEL: @outer4( +define void @outer4(void ()* %inner4) { +entry: +; CHECK: call void %inner4() + call void %inner4() nounwind readnone + ret void +} + +declare void @inner5_inner() + +define void @inner5(void ()* %x) { + call void %x() nounwind readnone + ret void +} + +; Inliner shouldn't delete calls it can't inline, even if they're trivially dead and temporarily indirect +; CHECK-LABEL: @outer5( +define void @outer5() { +; CHECK: call void @inner5_inner( + call void @inner5(void ()* @inner5_inner) + ret void +} diff --git a/llvm/test/Transforms/Inline/bfi-update.ll b/llvm/test/Transforms/Inline/bfi-update.ll new file mode 100644 index 00000000000..94584e2e6ce --- /dev/null +++ b/llvm/test/Transforms/Inline/bfi-update.ll @@ -0,0 +1,93 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 -inline-cold-callsite-threshold=0 -hot-callsite-threshold=50 | FileCheck %s +; This tests incremental updates to caller's BFI as a callee gets inlined. +; In bottom-up inlining, first c->e inlining is considered and fails because +; e's size exceeds the threshold of 50. Then a->c inlining is considered and it +; succeeds. a's BFI is updated incrementally. As c's blocks get pruned, the +; block with label cond_false is removed and since the remanining code is +; straight-line a single block gets cloned into a. This block should get the +; maximum block frequency among the original blocks in c. If it gets the +; frequency of the block with label cond_true in @c, its frequency will be +; 1/10th of function a's entry block frequency, resulting in a callsite count of +; 2 (since a's entry count is 20) which means that a->e callsite will be +; considered cold and not inlined. + +@data = external global i32 +; CHECK-LABEL: define i32 @a( +define i32 @a(i32 %a1) !prof !21 { +; CHECK-NOT: call i32 @c +; CHECK-NOT: call i32 @e +; CHECK: ret +entry: + %cond = icmp sle i32 %a1, 1 + %a2 = call i32 @c(i32 1) + br label %exit +exit: + ret i32 %a2 +} + +declare void @ext(); + +; CHECK: @c(i32 %c1) !prof [[COUNT1:![0-9]+]] +define i32 @c(i32 %c1) !prof !23 { + call void @ext() + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false, !prof !25 + +cond_false: + br label %exit + +cond_true: + %c11 = call i32 @e(i32 %c1) + br label %exit +exit: + %c12 = phi i32 [ 0, %cond_false], [ %c11, %cond_true ] + ret i32 %c12 +} + + +; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]] +define i32 @e(i32 %c1) !prof !24 { + call void @ext() + call void @ext() + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false + +cond_false: + call void @ext() + %c2 = load i32, i32* @data, align 4 + %c3 = add i32 %c1, %c2 + %c4 = mul i32 %c3, %c2 + %c5 = add i32 %c4, %c2 + %c6 = mul i32 %c5, %c2 + %c7 = add i32 %c6, %c2 + %c8 = mul i32 %c7, %c2 + %c9 = add i32 %c8, %c2 + %c10 = mul i32 %c9, %c2 + ret i32 %c10 + +cond_true: + ret i32 0 +} + +; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 480} +; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 80} +!21 = !{!"function_entry_count", i64 20} +!23 = !{!"function_entry_count", i64 500} +!24 = !{!"function_entry_count", i64 100} +!25 = !{!"branch_weights", i32 1, i32 9} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 1000, i32 1} +!13 = !{i32 999000, i64 1000, i32 1} +!14 = !{i32 999999, i64 5, i32 2} diff --git a/llvm/test/Transforms/Inline/blockaddress.ll b/llvm/test/Transforms/Inline/blockaddress.ll new file mode 100644 index 00000000000..ab0f5adb20a --- /dev/null +++ b/llvm/test/Transforms/Inline/blockaddress.ll @@ -0,0 +1,51 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +; PR10162 + +; Make sure doit is not inlined since the blockaddress is taken +; which could be unsafe +; CHECK: store i8* blockaddress(@doit, %here), i8** %pptr, align 8 + +@i = global i32 1, align 4 +@ptr1 = common global i8* null, align 8 + +define void @doit(i8** nocapture %pptr, i32 %cond) nounwind uwtable { +entry: + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end, label %here + +here: + store i8* blockaddress(@doit, %here), i8** %pptr, align 8 + br label %if.end + +if.end: + ret void +} + +define void @f(i32 %cond) nounwind uwtable { +entry: + call void @doit(i8** @ptr1, i32 %cond) + ret void +} + +; PR27233: We can inline @run into @init. Don't crash on it. +; +; CHECK-LABEL: define void @init +; CHECK: store i8* blockaddress(@run, %bb) +; CHECK-SAME: @run.bb +define void @init() { +entry: + call void @run() + ret void +} + +define void @run() { +entry: + store i8* blockaddress(@run, %bb), i8** getelementptr inbounds ([1 x i8*], [1 x i8*]* @run.bb, i64 0, i64 0), align 8 + ret void + +bb: + unreachable +} + +@run.bb = global [1 x i8*] zeroinitializer diff --git a/llvm/test/Transforms/Inline/byval-tail-call.ll b/llvm/test/Transforms/Inline/byval-tail-call.ll new file mode 100644 index 00000000000..8aafe7943f4 --- /dev/null +++ b/llvm/test/Transforms/Inline/byval-tail-call.ll @@ -0,0 +1,75 @@ +; RUN: opt < %s -basicaa -tailcallelim -inline -instcombine -dse -S | FileCheck %s +; RUN: opt < %s -aa-pipeline=basic-aa -passes='function(tailcallelim),cgscc(inline,function(instcombine,dse))' -S | FileCheck %s +; PR7272 + +; Calls that capture byval parameters cannot be marked as tail calls. Other +; tails that don't capture byval parameters can still be tail calls. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32" +target triple = "i386-pc-linux-gnu" + +declare void @ext(i32*) + +define void @bar(i32* byval %x) { + call void @ext(i32* %x) + ret void +} + +define void @foo(i32* %x) { +; CHECK-LABEL: define void @foo( +; CHECK: llvm.lifetime.start +; CHECK: store i32 %2, i32* %x + call void @bar(i32* byval %x) + ret void +} + +define internal void @qux(i32* byval %x) { + call void @ext(i32* %x) + tail call void @ext(i32* null) + ret void +} + +define void @frob(i32* %x) { +; CHECK-LABEL: define void @frob( +; CHECK: %[[POS:.*]] = alloca i32 +; CHECK: %[[VAL:.*]] = load i32, i32* %x +; CHECK: store i32 %[[VAL]], i32* %[[POS]] +; CHECK: {{^ *}}call void @ext(i32* nonnull %[[POS]] +; CHECK: tail call void @ext(i32* null) +; CHECK: ret void + tail call void @qux(i32* byval %x) + ret void +} + +; A byval parameter passed into a function which is passed out as byval does +; not block the call from being marked as tail. + +declare void @ext2(i32* byval) + +define void @bar2(i32* byval %x) { + call void @ext2(i32* byval %x) + ret void +} + +define void @foobar(i32* %x) { +; CHECK-LABEL: define void @foobar( +; CHECK: %[[POS:.*]] = alloca i32 +; CHECK: %[[VAL:.*]] = load i32, i32* %x +; CHECK: store i32 %[[VAL]], i32* %[[POS]] +; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]] +; CHECK: ret void + tail call void @bar2(i32* byval %x) + ret void +} + +define void @barfoo() { +; CHECK-LABEL: define void @barfoo( +; CHECK: %[[POS:.*]] = alloca i32 +; CHECK: %[[VAL:.*]] = load i32, i32* %x +; CHECK: store i32 %[[VAL]], i32* %[[POS]] +; CHECK: tail call void @ext2(i32* byval nonnull %[[POS]] +; CHECK: ret void + %x = alloca i32 + tail call void @bar2(i32* byval %x) + ret void +} diff --git a/llvm/test/Transforms/Inline/byval.ll b/llvm/test/Transforms/Inline/byval.ll new file mode 100644 index 00000000000..ea2bec2cee3 --- /dev/null +++ b/llvm/test/Transforms/Inline/byval.ll @@ -0,0 +1,165 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; The verifier does catch problems with inlining of byval arguments that has a +; different address space compared to the alloca. But running instcombine +; after inline used to trigger asserts unless we disallow such inlining. +; RUN: opt < %s -inline -instcombine -disable-output 2>/dev/null + +target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64" + +; Inlining a byval struct should cause an explicit copy into an alloca. + + %struct.ss = type { i32, i64 } +@.str = internal constant [10 x i8] c"%d, %lld\0A\00" ; <[10 x i8]*> [#uses=1] + +define internal void @f(%struct.ss* byval %b) nounwind { +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 ; <i32*> [#uses=2] + %tmp1 = load i32, i32* %tmp, align 4 ; <i32> [#uses=1] + %tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=1] + store i32 %tmp2, i32* %tmp, align 4 + ret void +} + +declare i32 @printf(i8*, ...) nounwind + +define i32 @test1() nounwind { +entry: + %S = alloca %struct.ss ; <%struct.ss*> [#uses=4] + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 ; <i32*> [#uses=1] + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 ; <i64*> [#uses=1] + store i64 2, i64* %tmp4, align 4 + call void @f( %struct.ss* byval %S ) nounwind + ret i32 0 +; CHECK: @test1() +; CHECK: %S1 = alloca %struct.ss +; CHECK: %S = alloca %struct.ss +; CHECK: call void @llvm.memcpy +; CHECK: ret i32 0 +} + +; Inlining a byval struct should NOT cause an explicit copy +; into an alloca if the function is readonly + +define internal i32 @f2(%struct.ss* byval %b) nounwind readonly { +entry: + %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0 ; <i32*> [#uses=2] + %tmp1 = load i32, i32* %tmp, align 4 ; <i32> [#uses=1] + %tmp2 = add i32 %tmp1, 1 ; <i32> [#uses=1] + ret i32 %tmp2 +} + +define i32 @test2() nounwind { +entry: + %S = alloca %struct.ss ; <%struct.ss*> [#uses=4] + %tmp1 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 0 ; <i32*> [#uses=1] + store i32 1, i32* %tmp1, align 8 + %tmp4 = getelementptr %struct.ss, %struct.ss* %S, i32 0, i32 1 ; <i64*> [#uses=1] + store i64 2, i64* %tmp4, align 4 + %X = call i32 @f2( %struct.ss* byval %S ) nounwind + ret i32 %X +; CHECK: @test2() +; CHECK: %S = alloca %struct.ss +; CHECK-NOT: call void @llvm.memcpy +; CHECK: ret i32 +} + + +; Inlining a byval with an explicit alignment needs to use *at least* that +; alignment on the generated alloca. +; PR8769 +declare void @g3(%struct.ss* %p) + +define internal void @f3(%struct.ss* byval align 64 %b) nounwind { + call void @g3(%struct.ss* %b) ;; Could make alignment assumptions! + ret void +} + +define void @test3() nounwind { +entry: + %S = alloca %struct.ss, align 1 ;; May not be aligned. + call void @f3( %struct.ss* byval align 64 %S) nounwind + ret void +; CHECK: @test3() +; CHECK: %S1 = alloca %struct.ss, align 64 +; CHECK: %S = alloca %struct.ss +; CHECK: call void @llvm.memcpy +; CHECK: call void @g3(%struct.ss* %S1) +; CHECK: ret void +} + + +; Inlining a byval struct should NOT cause an explicit copy +; into an alloca if the function is readonly, but should increase an alloca's +; alignment to satisfy an explicit alignment request. + +define internal i32 @f4(%struct.ss* byval align 64 %b) nounwind readonly { + call void @g3(%struct.ss* %b) + ret i32 4 +} + +define i32 @test4() nounwind { +entry: + %S = alloca %struct.ss, align 2 ; <%struct.ss*> [#uses=4] + %X = call i32 @f4( %struct.ss* byval align 64 %S ) nounwind + ret i32 %X +; CHECK: @test4() +; CHECK: %S = alloca %struct.ss, align 64 +; CHECK-NOT: call void @llvm.memcpy +; CHECK: call void @g3 +; CHECK: ret i32 4 +} + +%struct.S0 = type { i32 } + +@b = global %struct.S0 { i32 1 }, align 4 +@a = common global i32 0, align 4 + +define internal void @f5(%struct.S0* byval nocapture readonly align 4 %p) { +entry: + store i32 0, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4 + %f2 = getelementptr inbounds %struct.S0, %struct.S0* %p, i64 0, i32 0 + %0 = load i32, i32* %f2, align 4 + store i32 %0, i32* @a, align 4 + ret void +} + +define i32 @test5() { +entry: + tail call void @f5(%struct.S0* byval align 4 @b) + %0 = load i32, i32* @a, align 4 + ret i32 %0 +; CHECK: @test5() +; CHECK: store i32 0, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4 +; CHECK-NOT: load i32, i32* getelementptr inbounds (%struct.S0, %struct.S0* @b, i64 0, i32 0), align 4 +} + +; Inlining a byval struct that is in a different address space compared to the +; alloca address space is at the moment not expected. That would need +; adjustments inside the inlined function since the address space attribute of +; the inlined argument changes. + +%struct.S1 = type { i32 } + +@d = addrspace(1) global %struct.S1 { i32 1 }, align 4 +@c = common addrspace(1) global i32 0, align 4 + +define internal void @f5_as1(%struct.S1 addrspace(1)* byval nocapture readonly align 4 %p) { +entry: + store i32 0, i32 addrspace(1)* getelementptr inbounds (%struct.S1, %struct.S1 addrspace(1)* @d, i64 0, i32 0), align 4 + %f2 = getelementptr inbounds %struct.S1, %struct.S1 addrspace(1)* %p, i64 0, i32 0 + %0 = load i32, i32 addrspace(1)* %f2, align 4 + store i32 %0, i32 addrspace(1)* @c, align 4 + ret void +} + +define i32 @test5_as1() { +entry: + tail call void @f5_as1(%struct.S1 addrspace(1)* byval align 4 @d) + %0 = load i32, i32 addrspace(1)* @c, align 4 + ret i32 %0 +; CHECK: @test5_as1() +; CHECK: call void @f5_as1 +} diff --git a/llvm/test/Transforms/Inline/byval_lifetime.ll b/llvm/test/Transforms/Inline/byval_lifetime.ll new file mode 100644 index 00000000000..4517e448018 --- /dev/null +++ b/llvm/test/Transforms/Inline/byval_lifetime.ll @@ -0,0 +1,26 @@ +; RUN: opt -S -inline < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +; By inlining foo, an alloca is created in main to hold the byval argument, so +; a lifetime marker should be generated as well by default. + +%struct.foo = type { i32, [16 x i32] } + +@gFoo = global %struct.foo zeroinitializer, align 8 + +define i32 @foo(%struct.foo* byval align 8 %f, i32 %a) { +entry: + %a1 = getelementptr inbounds %struct.foo, %struct.foo* %f, i32 0, i32 1 + %arrayidx = getelementptr inbounds [16 x i32], [16 x i32]* %a1, i32 0, i32 %a + %tmp2 = load i32, i32* %arrayidx, align 1 + ret i32 %tmp2 +} + +define i32 @main(i32 %argc, i8** %argv) { +; CHECK-LABEL: @main +; CHECK: llvm.lifetime.start +; CHECK: memcpy +entry: + %call = call i32 @foo(%struct.foo* byval align 8 @gFoo, i32 %argc) + ret i32 %call +} diff --git a/llvm/test/Transforms/Inline/callgraph-update.ll b/llvm/test/Transforms/Inline/callgraph-update.ll new file mode 100644 index 00000000000..1a1799e5cfb --- /dev/null +++ b/llvm/test/Transforms/Inline/callgraph-update.ll @@ -0,0 +1,36 @@ +; RUN: opt < %s -inline -loop-rotate -verify-dom-info -verify-loop-info -disable-output +; PR3601 +declare void @solve() + +define internal fastcc void @read() { + br label %bb4 + +bb3: + br label %bb4 + +bb4: + call void @solve() + br i1 false, label %bb5, label %bb3 + +bb5: + unreachable +} + +define internal fastcc void @parse() { + call fastcc void @read() + ret void +} + +define void @main() personality i32 (...)* @__gxx_personality_v0 { + invoke fastcc void @parse() + to label %invcont unwind label %lpad + +invcont: + unreachable + +lpad: + %exn = landingpad {i8*, i32} + cleanup + unreachable +} +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/casts.ll b/llvm/test/Transforms/Inline/casts.ll new file mode 100644 index 00000000000..6354a531bc0 --- /dev/null +++ b/llvm/test/Transforms/Inline/casts.ll @@ -0,0 +1,18 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +define i32 @testByte(i8 %X) { +entry: + %tmp = icmp ne i8 %X, 0 + %tmp.i = zext i1 %tmp to i32 + ret i32 %tmp.i +} + +define i32 @main() { +; CHECK-LABEL: define i32 @main() +entry: + %rslt = call i32 @testByte(i8 123) +; CHECK-NOT: call + ret i32 %rslt +; CHECK: ret i32 1 +} diff --git a/llvm/test/Transforms/Inline/cfg_preserve_test.ll b/llvm/test/Transforms/Inline/cfg_preserve_test.ll new file mode 100644 index 00000000000..c5b6b1bffc1 --- /dev/null +++ b/llvm/test/Transforms/Inline/cfg_preserve_test.ll @@ -0,0 +1,20 @@ +; This test ensures that inlining an "empty" function does not destroy the CFG +; +; RUN: opt < %s -inline -S | FileCheck %s + +define i32 @func(i32 %i) { + ret i32 %i +} + + +define i32 @main() { +; CHECK-LABEL: define i32 @main() +entry: + %X = call i32 @func(i32 7) +; CHECK-NOT: call +; CHECK-NOT: br + + ret i32 %X +; CHECK: ret i32 7 +} + diff --git a/llvm/test/Transforms/Inline/cgscc-cycle.ll b/llvm/test/Transforms/Inline/cgscc-cycle.ll new file mode 100644 index 00000000000..bc3bdc99fff --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-cycle.ll @@ -0,0 +1,232 @@ +; This test contains extremely tricky call graph structures for the inliner to +; handle correctly. They form cycles where the inliner introduces code that is +; immediately or can eventually be transformed back into the original code. And +; each step changes the call graph and so will trigger iteration. This requires +; some out-of-band way to prevent infinitely re-inlining and re-transforming the +; code. +; +; RUN: opt < %s -passes='cgscc(inline,function(sroa,instcombine))' -inline-threshold=50 -S | FileCheck %s + + +; The `test1_*` collection of functions form a directly cycling pattern. + +define void @test1_a(i8** %ptr) { +; CHECK-LABEL: define void @test1_a( +entry: + call void @test1_b(i8* bitcast (void (i8*, i1, i32)* @test1_b to i8*), i1 false, i32 0) +; Inlining and simplifying this call will reliably produce the exact same call, +; over and over again. However, each inlining increments the count, and so we +; expect this test case to stop after one round of inlining with a final +; argument of '1'. +; CHECK-NOT: call +; CHECK: call void @test1_b(i8* bitcast (void (i8*, i1, i32)* @test1_b to i8*), i1 false, i32 1) +; CHECK-NOT: call + + ret void +} + +define void @test1_b(i8* %arg, i1 %flag, i32 %inline_count) { +; CHECK-LABEL: define void @test1_b( +entry: + %a = alloca i8* + store i8* %arg, i8** %a +; This alloca and store should remain through any optimization. +; CHECK: %[[A:.*]] = alloca +; CHECK: store i8* %arg, i8** %[[A]] + + br i1 %flag, label %bb1, label %bb2 + +bb1: + call void @test1_a(i8** %a) noinline + br label %bb2 + +bb2: + %cast = bitcast i8** %a to void (i8*, i1, i32)** + %p = load void (i8*, i1, i32)*, void (i8*, i1, i32)** %cast + %inline_count_inc = add i32 %inline_count, 1 + call void %p(i8* %arg, i1 %flag, i32 %inline_count_inc) +; And we should continue to load and call indirectly through optimization. +; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i1, i32)** +; CHECK: %[[P:.*]] = load void (i8*, i1, i32)*, void (i8*, i1, i32)** %[[CAST]] +; CHECK: call void %[[P]]( + + ret void +} + +define void @test2_a(i8** %ptr) { +; CHECK-LABEL: define void @test2_a( +entry: + call void @test2_b(i8* bitcast (void (i8*, i8*, i1, i32)* @test2_b to i8*), i8* bitcast (void (i8*, i8*, i1, i32)* @test2_c to i8*), i1 false, i32 0) +; Inlining and simplifying this call will reliably produce the exact same call, +; but only after doing two rounds if inlining, first from @test2_b then +; @test2_c. We check the exact number of inlining rounds before we cut off to +; break the cycle by inspecting the last paramater that gets incremented with +; each inlined function body. +; CHECK-NOT: call +; CHECK: call void @test2_b(i8* bitcast (void (i8*, i8*, i1, i32)* @test2_b to i8*), i8* bitcast (void (i8*, i8*, i1, i32)* @test2_c to i8*), i1 false, i32 2) +; CHECK-NOT: call + ret void +} + +define void @test2_b(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count) { +; CHECK-LABEL: define void @test2_b( +entry: + %a = alloca i8* + store i8* %arg2, i8** %a +; This alloca and store should remain through any optimization. +; CHECK: %[[A:.*]] = alloca +; CHECK: store i8* %arg2, i8** %[[A]] + + br i1 %flag, label %bb1, label %bb2 + +bb1: + call void @test2_a(i8** %a) noinline + br label %bb2 + +bb2: + %p = load i8*, i8** %a + %cast = bitcast i8* %p to void (i8*, i8*, i1, i32)* + %inline_count_inc = add i32 %inline_count, 1 + call void %cast(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count_inc) +; And we should continue to load and call indirectly through optimization. +; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i8*, i1, i32)** +; CHECK: %[[P:.*]] = load void (i8*, i8*, i1, i32)*, void (i8*, i8*, i1, i32)** %[[CAST]] +; CHECK: call void %[[P]]( + + ret void +} + +define void @test2_c(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count) { +; CHECK-LABEL: define void @test2_c( +entry: + %a = alloca i8* + store i8* %arg1, i8** %a +; This alloca and store should remain through any optimization. +; CHECK: %[[A:.*]] = alloca +; CHECK: store i8* %arg1, i8** %[[A]] + + br i1 %flag, label %bb1, label %bb2 + +bb1: + call void @test2_a(i8** %a) noinline + br label %bb2 + +bb2: + %p = load i8*, i8** %a + %cast = bitcast i8* %p to void (i8*, i8*, i1, i32)* + %inline_count_inc = add i32 %inline_count, 1 + call void %cast(i8* %arg1, i8* %arg2, i1 %flag, i32 %inline_count_inc) +; And we should continue to load and call indirectly through optimization. +; CHECK: %[[CAST:.*]] = bitcast i8** %[[A]] to void (i8*, i8*, i1, i32)** +; CHECK: %[[P:.*]] = load void (i8*, i8*, i1, i32)*, void (i8*, i8*, i1, i32)** %[[CAST]] +; CHECK: call void %[[P]]( + + ret void +} + +; Another infinite inlining case. The initial callgraph is like following: +; +; test3_a <---> test3_b +; | ^ +; v | +; test3_c <---> test3_d +; +; For all the call edges in the call graph, only test3_c and test3_d can be +; inlined into test3_a, and no other call edge can be inlined. +; +; After test3_c is inlined into test3_a, the original call edge test3_a->test3_c +; will be removed, a new call edge will be added and the call graph becomes: +; +; test3_a <---> test3_b +; \ ^ +; v / +; test3_c <---> test3_d +; But test3_a, test3_b, test3_c and test3_d still belong to the same SCC. +; +; Then after test3_a->test3_d is inlined, when test3_a->test3_d is converted to +; a ref edge, the original SCC will be split into two: {test3_c, test3_d} and +; {test3_a, test3_b}, immediately after the newly added ref edge +; test3_a->test3_c will be converted to a call edge, and the two SCCs will be +; merged into the original one again. During this cycle, the original SCC will +; be added into UR.CWorklist again and this creates an infinite loop. + +@a = global i64 0 +@b = global i64 0 + +define void @test3_c(i32 %i) { +entry: + %cmp = icmp eq i32 %i, 5 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i64 @random() + %t0 = load i64, i64* @a + %add = add nsw i64 %t0, %call + store i64 %add, i64* @a + br label %if.end + +if.end: ; preds = %entry, %if.then + tail call void @test3_d(i32 %i) + %t6 = load i64, i64* @a + %add85 = add nsw i64 %t6, 1 + store i64 %add85, i64* @a + ret void +} + +declare i64 @random() + +define void @test3_d(i32 %i) { +entry: + %cmp = icmp eq i32 %i, 5 + br i1 %cmp, label %if.end, label %if.then + +if.then: ; preds = %entry + %call = tail call i64 @random() + %t0 = load i64, i64* @a + %add = add nsw i64 %t0, %call + store i64 %add, i64* @a + br label %if.end + +if.end: ; preds = %entry, %if.then + tail call void @test3_c(i32 %i) + tail call void @test3_b() + %t6 = load i64, i64* @a + %add79 = add nsw i64 %t6, 3 + store i64 %add79, i64* @a + ret void +} + +; Function Attrs: noinline +define void @test3_b() #0 { +entry: + tail call void @test3_a() + %t0 = load i64, i64* @a + %add = add nsw i64 %t0, 2 + store i64 %add, i64* @a + ret void +} + +; Check test3_c is inlined into test3_a once and only once. +; CHECK-LABEL: @test3_a( +; CHECK: tail call void @test3_b() +; CHECK-NEXT: tail call void @test3_d(i32 5) +; CHECK-NEXT: %[[LD1:.*]] = load i64, i64* @a +; CHECK-NEXT: %[[ADD1:.*]] = add nsw i64 %[[LD1]], 1 +; CHECK-NEXT: store i64 %[[ADD1]], i64* @a +; CHECK-NEXT: %[[LD2:.*]] = load i64, i64* @b +; CHECK-NEXT: %[[ADD2:.*]] = add nsw i64 %[[LD2]], 5 +; CHECK-NEXT: store i64 %[[ADD2]], i64* @b +; CHECK-NEXT: ret void + +; Function Attrs: noinline +define void @test3_a() #0 { +entry: + tail call void @test3_b() + tail call void @test3_c(i32 5) + %t0 = load i64, i64* @b + %add = add nsw i64 %t0, 5 + store i64 %add, i64* @b + ret void +} + +attributes #0 = { noinline } diff --git a/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll b/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll new file mode 100644 index 00000000000..5a429bc3a4f --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-incremental-invalidate.ll @@ -0,0 +1,206 @@ +; Test for a subtle bug when computing analyses during inlining and mutating +; the SCC structure. Without care, this can fail to invalidate analyses. +; +; RUN: opt < %s -passes='cgscc(inline,function(verify<domtree>))' -debug-pass-manager -S 2>&1 | FileCheck %s + +; First we check that the passes run in the way we expect. Otherwise this test +; may stop testing anything. +; +; CHECK-LABEL: Starting llvm::Module pass manager run. +; CHECK: Running pass: InlinerPass on (test1_f, test1_g, test1_h) +; CHECK: Running analysis: DominatorTreeAnalysis on test1_f +; CHECK: Running analysis: DominatorTreeAnalysis on test1_g +; CHECK: Invalidating all non-preserved analyses for: (test1_f) +; CHECK: Invalidating all non-preserved analyses for: test1_f +; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_f +; CHECK: Invalidating analysis: LoopAnalysis on test1_f +; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_f +; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_f +; CHECK: Invalidating all non-preserved analyses for: (test1_g, test1_h) +; CHECK: Invalidating all non-preserved analyses for: test1_g +; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_g +; CHECK: Invalidating analysis: LoopAnalysis on test1_g +; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_g +; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_g +; CHECK: Invalidating all non-preserved analyses for: test1_h +; CHECK: Invalidating analysis: DominatorTreeAnalysis on test1_h +; CHECK: Invalidating analysis: LoopAnalysis on test1_h +; CHECK: Invalidating analysis: BranchProbabilityAnalysis on test1_h +; CHECK: Invalidating analysis: BlockFrequencyAnalysis on test1_h +; CHECK-NOT: Invalidating analysis: +; CHECK: Starting llvm::Function pass manager run. +; CHECK-NEXT: Running pass: DominatorTreeVerifierPass on test1_g +; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_g +; CHECK-NEXT: Finished llvm::Function pass manager run. +; CHECK-NOT: Invalidating analysis: +; CHECK: Starting llvm::Function pass manager run. +; CHECK-NEXT: Running pass: DominatorTreeVerifierPass on test1_h +; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_h +; CHECK-NEXT: Finished llvm::Function pass manager run. +; CHECK-NOT: Invalidating analysis: +; CHECK: Running pass: DominatorTreeVerifierPass on test1_f +; CHECK-NEXT: Running analysis: DominatorTreeAnalysis on test1_f + +; An external function used to control branches. +declare i1 @flag() +; CHECK-LABEL: declare i1 @flag() + +; The utility function with interesting control flow that gets inlined below to +; perturb the dominator tree. +define internal void @callee() { +entry: + %ptr = alloca i8 + %flag = call i1 @flag() + br i1 %flag, label %then, label %else + +then: + store volatile i8 42, i8* %ptr + br label %return + +else: + store volatile i8 -42, i8* %ptr + br label %return + +return: + ret void +} + +; The 'test1_' prefixed functions work to carefully test that incrementally +; reducing an SCC in the inliner cannot accidentially leave stale function +; analysis results due to failing to invalidate them for all the functions. + +; The inliner visits this last function. It can't actually break any cycles +; here, but because we visit this function we compute fresh analyses for it. +; These analyses are then invalidated when we inline callee disrupting the +; CFG, and it is important that they be freed. +define void @test1_h() { +; CHECK-LABEL: define void @test1_h() +entry: + call void @test1_g() +; CHECK: call void @test1_g() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} + +; We visit this function second and here we inline the edge to 'test1_f' +; separating it into its own SCC. The current SCC is now just 'test1_g' and +; 'test1_h'. +define void @test1_g() { +; CHECK-LABEL: define void @test1_g() +entry: + ; This edge gets inlined away. + call void @test1_f() +; CHECK-NOT: call void @test1_f() +; CHECK: call void @test1_g() + + ; We force this edge to survive inlining. + call void @test1_h() noinline +; CHECK: call void @test1_h() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} + +; We visit this function first in the inliner, and while we inline callee +; perturbing the CFG, we don't inline anything else and the SCC structure +; remains in tact. +define void @test1_f() { +; CHECK-LABEL: define void @test1_f() +entry: + ; We force this edge to survive inlining. + call void @test1_g() noinline +; CHECK: call void @test1_g() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} + +; The 'test2_' prefixed code works to carefully trigger forming an SCC with +; a dominator tree for one of the functions but not the other and without even +; a function analysis manager proxy for the SCC that things get merged into. +; Without proper handling when updating the call graph this will find a stale +; dominator tree. + +@test2_global = external global i32, align 4 + +define void @test2_hoge(i1 (i32*)* %arg) { +; CHECK-LABEL: define void @test2_hoge( +bb: + %tmp2 = call zeroext i1 %arg(i32* @test2_global) +; CHECK: call zeroext i1 %arg( + br label %bb3 + +bb3: + %tmp5 = call zeroext i1 %arg(i32* @test2_global) +; CHECK: call zeroext i1 %arg( + br i1 %tmp5, label %bb3, label %bb6 + +bb6: + ret void +} + +define zeroext i1 @test2_widget(i32* %arg) { +; CHECK-LABEL: define zeroext i1 @test2_widget( +bb: + %tmp1 = alloca i8, align 1 + %tmp2 = alloca i32, align 4 + call void @test2_quux() +; CHECK-NOT: call +; +; CHECK: call zeroext i1 @test2_widget(i32* @test2_global) +; CHECK-NEXT: br label %[[NEW_BB:.*]] +; +; CHECK: [[NEW_BB]]: +; CHECK-NEXT: call zeroext i1 @test2_widget(i32* @test2_global) +; +; CHECK: {{.*}}: + + call void @test2_hoge.1(i32* %arg) +; CHECK-NEXT: call void @test2_hoge.1( + + %tmp4 = call zeroext i1 @test2_barney(i32* %tmp2) + %tmp5 = zext i1 %tmp4 to i32 + store i32 %tmp5, i32* %tmp2, align 4 + %tmp6 = call zeroext i1 @test2_barney(i32* null) + call void @test2_ham(i8* %tmp1) +; CHECK: call void @test2_ham( + + call void @test2_quux() +; CHECK-NOT: call +; +; CHECK: call zeroext i1 @test2_widget(i32* @test2_global) +; CHECK-NEXT: br label %[[NEW_BB:.*]] +; +; CHECK: [[NEW_BB]]: +; CHECK-NEXT: call zeroext i1 @test2_widget(i32* @test2_global) +; +; CHECK: {{.*}}: + ret i1 true +; CHECK-NEXT: ret i1 true +} + +define internal void @test2_quux() { +; CHECK-NOT: @test2_quux +bb: + call void @test2_hoge(i1 (i32*)* @test2_widget) + ret void +} + +declare void @test2_hoge.1(i32*) + +declare zeroext i1 @test2_barney(i32*) + +declare void @test2_ham(i8*) diff --git a/llvm/test/Transforms/Inline/cgscc-invalidate.ll b/llvm/test/Transforms/Inline/cgscc-invalidate.ll new file mode 100644 index 00000000000..69d84f65e25 --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-invalidate.ll @@ -0,0 +1,104 @@ +; This test tries to ensure that the inliner successfully invalidates function +; analyses after inlining into the function body. +; +; The strategy for these tests is to compute domtree over all the functions, +; then run the inliner, and then verify the domtree. Then we can arrange the +; inline to disturb the domtree (easy) and detect any stale cached entries in +; the verifier. We do the initial computation both *inside* the CGSCC walk and +; in a pre-step to make sure both work. +; +; RUN: opt < %s -passes='function(require<domtree>),cgscc(inline,function(verify<domtree>))' -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(function(require<domtree>),inline,function(verify<domtree>))' -S | FileCheck %s + +; An external function used to control branches. +declare i1 @flag() +; CHECK-LABEL: declare i1 @flag() + +; The utility function with interesting control flow that gets inlined below to +; perturb the dominator tree. +define internal void @callee() { +; CHECK-LABEL: @callee +entry: + %ptr = alloca i8 + %flag = call i1 @flag() + br i1 %flag, label %then, label %else + +then: + store volatile i8 42, i8* %ptr + br label %return + +else: + store volatile i8 -42, i8* %ptr + br label %return + +return: + ret void +} + + +; The 'test1_' prefixed functions test the basic scenario of inlining +; destroying dominator tree. + +define void @test1_caller() { +; CHECK-LABEL: define void @test1_caller() +entry: + call void @callee() +; CHECK-NOT: @callee + ret void +; CHECK: ret void +} + + +; The 'test2_' prefixed functions test the scenario of not inlining preserving +; dominators. + +define void @test2_caller() { +; CHECK-LABEL: define void @test2_caller() +entry: + call void @callee() noinline +; CHECK: call void @callee + ret void +; CHECK: ret void +} + + +; The 'test3_' prefixed functions test the scenario of not inlining preserving +; dominators after splitting an SCC into two smaller SCCs. + +; This function ends up split into a separate SCC, which can cause its analyses +; to become stale if the splitting doesn't properly invalidate things. Also, as +; a consequence of being split out, test3_f is too large to inline by the time +; we get here. +define void @test3_g() { +; CHECK-LABEL: define void @test3_g() +entry: + ; Create the second edge in the SCC cycle. + call void @test3_f() +; CHECK: call void @test3_f() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} + +; The second function gets visited first and we end up inlining everything we +; can into this routine. That splits test3_g into a separate SCC that is enqued +; for later processing. +define void @test3_f() { +; CHECK-LABEL: define void @test3_f() +entry: + ; Create the first edge in the SCC cycle. + call void @test3_g() +; CHECK-NOT: @test3_g() +; CHECK: call void @test3_f() + + ; Pull interesting CFG into this function. + call void @callee() +; CHECK-NOT: call void @callee() + + ret void +; CHECK: ret void +} diff --git a/llvm/test/Transforms/Inline/cgscc-update.ll b/llvm/test/Transforms/Inline/cgscc-update.ll new file mode 100644 index 00000000000..b251a5d070c --- /dev/null +++ b/llvm/test/Transforms/Inline/cgscc-update.ll @@ -0,0 +1,184 @@ +; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs,inline)' -S | FileCheck %s +; This test runs the inliner and the function attribute deduction. It ensures +; that when the inliner mutates the call graph it correctly updates the CGSCC +; iteration so that we can compute refined function attributes. In this way it +; is leveraging function attribute computation to observe correct call graph +; updates. + +; Boring unknown external function call. +; CHECK: declare void @unknown() +declare void @unknown() + +; Sanity check: this should get annotated as readnone. +; CHECK: Function Attrs: nounwind readnone +; CHECK-NEXT: declare void @readnone() +declare void @readnone() readnone nounwind + +; The 'test1_' prefixed functions are designed to trigger forming a new direct +; call in the inlined body of the function. After that, we form a new SCC and +; using that can deduce precise function attrs. + +; This function should no longer exist. +; CHECK-NOT: @test1_f() +define internal void @test1_f(void()* %p) { +entry: + call void %p() + ret void +} + +; This function should have had 'readnone' deduced for its SCC. +; CHECK: Function Attrs: noinline nounwind readnone +; CHECK-NEXT: define void @test1_g() +define void @test1_g() noinline { +entry: + call void @test1_f(void()* @test1_h) + ret void +} + +; This function should have had 'readnone' deduced for its SCC. +; CHECK: Function Attrs: noinline nounwind readnone +; CHECK-NEXT: define void @test1_h() +define void @test1_h() noinline { +entry: + call void @test1_g() + call void @readnone() + ret void +} + + +; The 'test2_' prefixed functions are designed to trigger forming a new direct +; call due to RAUW-ing the returned value of a called function into the caller. +; This too should form a new SCC which can then be reasoned about to compute +; precise function attrs. + +; This function should no longer exist. +; CHECK-NOT: @test2_f() +define internal void()* @test2_f() { +entry: + ret void()* @test2_h +} + +; This function should have had 'readnone' deduced for its SCC. +; CHECK: Function Attrs: noinline nounwind readnone +; CHECK-NEXT: define void @test2_g() +define void @test2_g() noinline { +entry: + %p = call void()* @test2_f() + call void %p() + ret void +} + +; This function should have had 'readnone' deduced for its SCC. +; CHECK: Function Attrs: noinline nounwind readnone +; CHECK-NEXT: define void @test2_h() +define void @test2_h() noinline { +entry: + call void @test2_g() + call void @readnone() + ret void +} + + +; The 'test3_' prefixed functions are designed to inline in a way that causes +; call sites to become trivially dead during the middle of inlining callsites of +; a single function to make sure that the inliner does not get confused by this +; pattern. + +; CHECK-NOT: @test3_maybe_unknown( +define internal void @test3_maybe_unknown(i1 %b) { +entry: + br i1 %b, label %then, label %exit + +then: + call void @unknown() + br label %exit + +exit: + ret void +} + +; CHECK-NOT: @test3_f( +define internal i1 @test3_f() { +entry: + ret i1 false +} + +; CHECK-NOT: @test3_g( +define internal i1 @test3_g(i1 %b) { +entry: + br i1 %b, label %then1, label %if2 + +then1: + call void @test3_maybe_unknown(i1 true) + br label %if2 + +if2: + %f = call i1 @test3_f() + br i1 %f, label %then2, label %exit + +then2: + call void @test3_maybe_unknown(i1 true) + br label %exit + +exit: + ret i1 false +} + +; FIXME: Currently the inliner doesn't successfully mark this as readnone +; because while it simplifies trivially dead CFGs when inlining callees it +; doesn't simplify the caller's trivially dead CFG and so we end with a dead +; block calling @unknown. +; CHECK-NOT: Function Attrs: readnone +; CHECK: define void @test3_h() +define void @test3_h() { +entry: + %g = call i1 @test3_g(i1 false) + br i1 %g, label %then, label %exit + +then: + call void @test3_maybe_unknown(i1 true) + br label %exit + +exit: + call void @test3_maybe_unknown(i1 false) + ret void +} + + +; The 'test4_' prefixed functions are designed to trigger forming a new direct +; call in the inlined body of the function similar to 'test1_'. However, after +; that we continue to inline another edge of the graph forcing us to do a more +; interesting call graph update for the new call edge. Eventually, we still +; form a new SCC and should use that can deduce precise function attrs. + +; This function should have had 'readnone' deduced for its SCC. +; CHECK: Function Attrs: noinline nounwind readnone +; CHECK-NEXT: define void @test4_f1() +define void @test4_f1() noinline { +entry: + call void @test4_h() + ret void +} + +; CHECK-NOT: @test4_f2 +define internal void @test4_f2() { +entry: + call void @test4_f1() + ret void +} + +; CHECK-NOT: @test4_g +define internal void @test4_g(void()* %p) { +entry: + call void %p() + ret void +} + +; This function should have had 'readnone' deduced for its SCC. +; CHECK: Function Attrs: noinline nounwind readnone +; CHECK-NEXT: define void @test4_h() +define void @test4_h() noinline { +entry: + call void @test4_g(void()* @test4_f2) + ret void +} diff --git a/llvm/test/Transforms/Inline/clear-analyses.ll b/llvm/test/Transforms/Inline/clear-analyses.ll new file mode 100644 index 00000000000..4b1d37ca29a --- /dev/null +++ b/llvm/test/Transforms/Inline/clear-analyses.ll @@ -0,0 +1,32 @@ +; Test that when a pass like correlated-propagation populates an analysis such +; as LVI with references back into the IR of a function that the inliner will +; delete, this doesn't crash or go awry despite the inliner clearing the analyses +; separately from when it deletes the function. +; +; RUN: opt -debug-pass-manager -S < %s 2>&1 \ +; RUN: -passes='cgscc(inline,function(correlated-propagation))' \ +; RUN: | FileCheck %s +; +; CHECK-LABEL: Starting llvm::Module pass manager run. +; CHECK: Running pass: InlinerPass on (callee) +; CHECK: Running pass: CorrelatedValuePropagationPass on callee +; CHECK: Running analysis: LazyValueAnalysis +; CHECK: Running pass: InlinerPass on (caller) +; CHECK: Clearing all analysis results for: callee +; CHECK: Running pass: CorrelatedValuePropagationPass on caller +; CHECK: Running analysis: LazyValueAnalysis + +define internal i32 @callee(i32 %x) { +; CHECK-NOT: @callee +entry: + ret i32 %x +} + +define i32 @caller(i32 %x) { +; CHECK-LABEL: define i32 @caller +entry: + %call = call i32 @callee(i32 %x) +; CHECK-NOT: call + ret i32 %call +; CHECK: ret i32 %x +} diff --git a/llvm/test/Transforms/Inline/comdat-ipo.ll b/llvm/test/Transforms/Inline/comdat-ipo.ll new file mode 100644 index 00000000000..0b9ccb9ccc0 --- /dev/null +++ b/llvm/test/Transforms/Inline/comdat-ipo.ll @@ -0,0 +1,20 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +define i32 @caller() { +; CHECK-LABEL: @caller( +; CHECK-NEXT: %val2 = call i32 @linkonce_callee(i32 42) +; CHECK-NEXT: ret i32 %val2 + + %val = call i32 @odr_callee() + %val2 = call i32 @linkonce_callee(i32 %val); + ret i32 %val2 +} + +define linkonce_odr i32 @odr_callee() { + ret i32 42 +} + +define linkonce i32 @linkonce_callee(i32 %val) { + ret i32 %val +} diff --git a/llvm/test/Transforms/Inline/crash-lifetime-marker.ll b/llvm/test/Transforms/Inline/crash-lifetime-marker.ll new file mode 100644 index 00000000000..7196616521e --- /dev/null +++ b/llvm/test/Transforms/Inline/crash-lifetime-marker.ll @@ -0,0 +1,25 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; InlineFunction would assert inside the loop that leaves lifetime markers if +; there was an zero-sized AllocaInst. Check that it doesn't assert and doesn't +; leave lifetime markers in that case. + +declare i32 @callee2(i8*) + +define i32 @callee1(i32 %count) { + %a0 = alloca i8, i32 %count, align 4 + %call0 = call i32 @callee2(i8* %a0) + ret i32 %call0 +} + +; CHECK-LABEL: define i32 @caller1( +; CHECK: [[ALLOCA:%[a-z0-9\.]+]] = alloca i8 +; CHECK-NOT: call void @llvm.lifetime.start.p0i8( +; CHECK: call i32 @callee2(i8* [[ALLOCA]]) +; CHECK-NOT: call void @llvm.lifetime.end.p0i8( + +define i32 @caller1(i32 %count) { + %call0 = call i32 @callee1(i32 0) + ret i32 %call0 +} diff --git a/llvm/test/Transforms/Inline/crash.ll b/llvm/test/Transforms/Inline/crash.ll new file mode 100644 index 00000000000..ec1c867bd05 --- /dev/null +++ b/llvm/test/Transforms/Inline/crash.ll @@ -0,0 +1,127 @@ +; RUN: opt < %s -inline -argpromotion -instcombine -disable-output + +; This test was failing because the inliner would inline @list_DeleteElement +; into @list_DeleteDuplicates and then into @inf_GetBackwardPartnerLits, +; turning the indirect call into a direct one. This allowed instcombine to see +; the bitcast and eliminate it, deleting the original call and introducing +; another one. This crashed the inliner because the new call was not in the +; callgraph. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin10.0" + + +define void @list_DeleteElement(i32 (i8*, i8*)* nocapture %Test) nounwind ssp { +entry: + %0 = call i32 %Test(i8* null, i8* undef) nounwind + ret void +} + + +define void @list_DeleteDuplicates(i32 (i8*, i8*)* nocapture %Test) nounwind ssp { +foo: + call void @list_DeleteElement(i32 (i8*, i8*)* %Test) nounwind ssp + call fastcc void @list_Rplacd1284() nounwind ssp + unreachable + +} + +define internal i32 @inf_LiteralsHaveSameSubtermAndAreFromSameClause(i32* nocapture %L1, i32* nocapture %L2) nounwind readonly ssp { +entry: + unreachable +} + + +define internal fastcc void @inf_GetBackwardPartnerLits(i32* nocapture %Flags) nounwind ssp { +test: + call void @list_DeleteDuplicates(i32 (i8*, i8*)* bitcast (i32 (i32*, i32*)* @inf_LiteralsHaveSameSubtermAndAreFromSameClause to i32 (i8*, i8*)*)) nounwind + ret void +} + + +define void @inf_BackwardEmptySortPlusPlus() nounwind ssp { +entry: + call fastcc void @inf_GetBackwardPartnerLits(i32* null) nounwind ssp + unreachable +} + +define void @inf_BackwardWeakening() nounwind ssp { +entry: + call fastcc void @inf_GetBackwardPartnerLits(i32* null) nounwind ssp + unreachable +} + +declare fastcc void @list_Rplacd1284() nounwind ssp + + + + +;============================ +; PR5208 + +define void @AAA() personality i32 (...)* @__gxx_personality_v0 { +entry: + %A = alloca i8, i32 undef, align 1 + invoke fastcc void @XXX() + to label %invcont98 unwind label %lpad156 + +invcont98: + unreachable + +lpad156: + %exn = landingpad {i8*, i32} + cleanup + unreachable +} + +declare i32 @__gxx_personality_v0(...) + +declare fastcc void @YYY() + +define internal fastcc void @XXX() personality i32 (...)* @__gxx_personality_v0 { +entry: + %B = alloca i8, i32 undef, align 1 + invoke fastcc void @YYY() + to label %bb260 unwind label %lpad + +bb260: + ret void + +lpad: + %exn = landingpad {i8*, i32} + cleanup + resume { i8*, i32 } %exn +} + + + +;; This exposed a crash handling devirtualized calls. +define void @f1(void ()* %f) ssp { +entry: + call void %f() + ret void +} + +define void @f4(i32 %size) ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @f1(void ()* @f3) + to label %invcont3 unwind label %lpad18 + +invcont3: ; preds = %bb1 + ret void + +lpad18: ; preds = %invcont3, %bb1 + %exn = landingpad {i8*, i32} + cleanup + unreachable +} + +define void @f3() ssp { +entry: + unreachable +} + +declare void @f5() ssp + + + diff --git a/llvm/test/Transforms/Inline/crash2.ll b/llvm/test/Transforms/Inline/crash2.ll new file mode 100644 index 00000000000..e3a136010ee --- /dev/null +++ b/llvm/test/Transforms/Inline/crash2.ll @@ -0,0 +1,29 @@ +; RUN: opt -inline -sroa -max-cg-scc-iterations=1 -disable-output < %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.3" + +declare i8* @f1(i8*) ssp align 2 + +define linkonce_odr void @f2(i8* %t) inlinehint ssp { +entry: + unreachable +} + +define linkonce_odr void @f3(void (i8*)* %__f) ssp { +entry: + %__f_addr = alloca void (i8*)*, align 8 + store void (i8*)* %__f, void (i8*)** %__f_addr + + %0 = load void (i8*)*, void (i8*)** %__f_addr, align 8 + call void %0(i8* undef) + call i8* @f1(i8* undef) ssp + unreachable +} + +define linkonce_odr void @f4(i8* %this) ssp align 2 { +entry: + %0 = alloca i32 + call void @f3(void (i8*)* @f2) ssp + ret void +} + diff --git a/llvm/test/Transforms/Inline/debug-info-duplicate-calls.ll b/llvm/test/Transforms/Inline/debug-info-duplicate-calls.ll new file mode 100644 index 00000000000..6e0dbc19427 --- /dev/null +++ b/llvm/test/Transforms/Inline/debug-info-duplicate-calls.ll @@ -0,0 +1,121 @@ +; RUN: opt < %s -always-inline -S | FileCheck %s +; RUN: opt -passes='always-inline' -S < %s | FileCheck %s + +; Original input generated from clang -emit-llvm -S -c -mllvm -disable-llvm-optzns +; +; #define CALLS1 f2(); f2(); +; #define CALLS2 f4(); f4(); +; void f1(); +; inline __attribute__((always_inline)) void f2() { +; f1(); +; } +; inline __attribute__((always_inline)) void f3() { +; CALLS1 +; } +; inline __attribute__((always_inline)) void f4() { +; f3(); +; } +; void f() { +; CALLS2 +; } + +; There should be unique locations for all 4 of these instructions, correctly +; describing the inlining that has occurred, even in the face of duplicate call +; site locations. + +; The nomenclature used for the tags here is <function name>[cs<number>] where +; 'cs' is an abbreviation for 'call site' and the number indicates which call +; site from within the named function this is. (so, given the above inlining, we +; should have 4 calls to 'f1', two from the first call to f4 and two from the +; second call to f4) + +; CHECK: call void @_Z2f1v(), !dbg [[fcs1_f4_f3cs1_f2:![0-9]+]] +; CHECK: call void @_Z2f1v(), !dbg [[fcs1_f4_f3cs2_f2:![0-9]+]] +; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs1_f2:![0-9]+]] +; CHECK: call void @_Z2f1v(), !dbg [[fcs2_f4_f3cs2_f2:![0-9]+]] + +; CHECK-DAG: [[F:![0-9]+]] = distinct !DISubprogram(name: "f" +; CHECK-DAG: [[F2:![0-9]+]] = distinct !DISubprogram(name: "f2" +; CHECK-DAG: [[F3:![0-9]+]] = distinct !DISubprogram(name: "f3" +; CHECK-DAG: [[F4:![0-9]+]] = distinct !DISubprogram(name: "f4" + +; CHECK-DAG: [[fcs1_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs1:![0-9]+]]) +; CHECK-DAG: [[fcs1_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4:![0-9]+]]) +; CHECK-DAG: [[fcs1_f4]] = {{.*}}, scope: [[F4]], inlinedAt: [[fcs1:![0-9]+]]) +; CHECK-DAG: [[fcs1]] = {{.*}}, scope: [[F]]) +; CHECK-DAG: [[fcs1_f4_f3cs2_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs2:![0-9]+]]) +; CHECK-DAG: [[fcs1_f4_f3cs2]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4]]) + +; CHECK-DAG: [[fcs2_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs2_f4_f3cs1:![0-9]+]]) +; CHECK-DAG: [[fcs2_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs2_f4:![0-9]+]]) +; CHECK-DAG: [[fcs2_f4]] = {{.*}}, scope: [[F4]], inlinedAt: [[fcs2:![0-9]+]]) +; CHECK-DAG: [[fcs2]] = {{.*}}, scope: [[F]]) +; CHECK-DAG: [[fcs2_f4_f3cs2_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs2_f4_f3cs2:![0-9]+]]) +; CHECK-DAG: [[fcs2_f4_f3cs2]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs2_f4]]) + +$_Z2f4v = comdat any + +$_Z2f3v = comdat any + +$_Z2f2v = comdat any + +; Function Attrs: uwtable +define void @_Z1fv() #0 !dbg !4 { +entry: + call void @_Z2f4v(), !dbg !13 + call void @_Z2f4v(), !dbg !13 + ret void, !dbg !14 +} + +; Function Attrs: alwaysinline inlinehint uwtable +define linkonce_odr void @_Z2f4v() #1 comdat !dbg !7 { +entry: + call void @_Z2f3v(), !dbg !15 + ret void, !dbg !16 +} + +; Function Attrs: alwaysinline inlinehint uwtable +define linkonce_odr void @_Z2f3v() #1 comdat !dbg !8 { +entry: + call void @_Z2f2v(), !dbg !17 + call void @_Z2f2v(), !dbg !17 + ret void, !dbg !18 +} + +; Function Attrs: alwaysinline inlinehint uwtable +define linkonce_odr void @_Z2f2v() #1 comdat !dbg !9 { +entry: + call void @_Z2f1v(), !dbg !19 + ret void, !dbg !20 +} + +declare void @_Z2f1v() #2 + +attributes #0 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { alwaysinline inlinehint uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!10, !11} +!llvm.ident = !{!12} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo") +!2 = !{} +!4 = distinct !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 13, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo") +!6 = !DISubroutineType(types: !2) +!7 = distinct !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, retainedNodes: !2) +!8 = distinct !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, retainedNodes: !2) +!9 = distinct !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, retainedNodes: !2) +!10 = !{i32 2, !"Dwarf Version", i32 4} +!11 = !{i32 2, !"Debug Info Version", i32 3} +!12 = !{!"clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)"} +!13 = !DILocation(line: 14, column: 3, scope: !4) +!14 = !DILocation(line: 15, column: 1, scope: !4) +!15 = !DILocation(line: 11, column: 3, scope: !7) +!16 = !DILocation(line: 12, column: 1, scope: !7) +!17 = !DILocation(line: 8, column: 3, scope: !8) +!18 = !DILocation(line: 9, column: 1, scope: !8) +!19 = !DILocation(line: 5, column: 3, scope: !9) +!20 = !DILocation(line: 6, column: 1, scope: !9) diff --git a/llvm/test/Transforms/Inline/debug-invoke.ll b/llvm/test/Transforms/Inline/debug-invoke.ll new file mode 100644 index 00000000000..a1c27b00ea5 --- /dev/null +++ b/llvm/test/Transforms/Inline/debug-invoke.ll @@ -0,0 +1,44 @@ +; RUN: opt < %s -always-inline -S | FileCheck %s + +; Test that the debug location is preserved when rewriting an inlined call as an invoke + +; CHECK: invoke void @test() +; CHECK-NEXT: to label {{.*}} unwind label {{.*}}, !dbg [[INL_LOC:!.*]] +; CHECK: [[SP:.*]] = distinct !DISubprogram( +; CHECK: [[INL_LOC]] = !DILocation(line: 1, scope: [[SP]], inlinedAt: [[INL_AT:.*]]) +; CHECK: [[INL_AT]] = distinct !DILocation(line: 2, scope: [[SP]]) + +declare void @test() +declare i32 @__gxx_personality_v0(...) + +attributes #0 = { alwaysinline } +define void @inl() #0 { + call void @test(), !dbg !3 + ret void +} + +define void @caller() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { + invoke void @inl() + to label %cont unwind label %lpad, !dbg !4 + +cont: + ret void + +lpad: + landingpad { i8*, i32 } + cleanup + ret void +} + +!llvm.module.flags = !{!1} +!llvm.dbg.cu = !{!5} + +!1 = !{i32 2, !"Debug Info Version", i32 3} +!2 = distinct !DISubprogram(unit: !5) +!3 = !DILocation(line: 1, scope: !2) +!4 = !DILocation(line: 2, scope: !2) +!5 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", + file: !6, + isOptimized: true, flags: "-O2", + splitDebugFilename: "abc.debug", emissionKind: 2) +!6 = !DIFile(filename: "path/davidino", directory: "/path/to/dir") diff --git a/llvm/test/Transforms/Inline/delete-call.ll b/llvm/test/Transforms/Inline/delete-call.ll new file mode 100644 index 00000000000..7f30ffb306b --- /dev/null +++ b/llvm/test/Transforms/Inline/delete-call.ll @@ -0,0 +1,26 @@ +; REQUIRES: asserts +; RUN: opt -S -inline -stats < %s 2>&1 | FileCheck %s +; CHECK: Number of functions inlined + +; RUN: opt -S -inline -functionattrs -stats < %s 2>&1 | FileCheck -check-prefix=CHECK-FUNCTIONATTRS %s +; CHECK-FUNCTIONATTRS: Number of call sites deleted, not inlined + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32" +target triple = "i386-apple-darwin9.8" + +define internal i32 @test(i32 %x, i32 %y, i32 %z) nounwind { +entry: + %0 = add nsw i32 %y, %z ; <i32> [#uses=1] + %1 = mul i32 %0, %x ; <i32> [#uses=1] + %2 = mul i32 %y, %z ; <i32> [#uses=1] + %3 = add nsw i32 %1, %2 ; <i32> [#uses=1] + ret i32 %3 +} + +define i32 @test2() nounwind { +entry: + %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind ; <i32> [#uses=1] + ret i32 14 +} + + diff --git a/llvm/test/Transforms/Inline/deopt-bundles.ll b/llvm/test/Transforms/Inline/deopt-bundles.ll new file mode 100644 index 00000000000..3e3c52f7d2d --- /dev/null +++ b/llvm/test/Transforms/Inline/deopt-bundles.ll @@ -0,0 +1,203 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare void @f() +declare i32 @g() +declare fastcc i32 @g.fastcc() + +define i32 @callee_0() alwaysinline { + entry: + call void @f() + ret i32 2 +} + +define i32 @caller_0() { +; CHECK-LABEL: @caller_0( + entry: +; CHECK: entry: +; CHECK-NEXT: call void @f() +; CHECK-NEXT: ret i32 2 + %x = call i32 @callee_0() [ "deopt"(i32 5) ] + ret i32 %x +} + +define i32 @callee_1() alwaysinline { + entry: + call void @f() [ "deopt"() ] + call void @f() [ "deopt"(i32 0, i32 1) ] + call void @f() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 2 +} + +define i32 @caller_1() { +; CHECK-LABEL: @caller_1( + entry: +; CHECK: entry: +; CHECK-NEXT: call void @f() [ "deopt"(i32 5) ] +; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1) ] +; CHECK-NEXT: call void @f() [ "deopt"(i32 5, i32 0, i32 1), "foo"(double 0.000000e+00) ] +; CHECK-NEXT: ret i32 2 + + %x = call i32 @callee_1() [ "deopt"(i32 5) ] + ret i32 %x +} + +define i32 @callee_2() alwaysinline { + entry: + %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_2(i32 %val) { +; CHECK-LABEL: @caller_2( + entry: +; CHECK: entry: +; CHECK-NEXT: [[RVAL:%[^ ]+]] = call i32 @g() [ "deopt"(i32 %val, i32 0, i32 1), "foo"(double 0.000000e+00) ] +; CHECK-NEXT: ret i32 [[RVAL]] + %x = call i32 @callee_2() [ "deopt"(i32 %val) ] + ret i32 %x +} + +define i32 @callee_3() alwaysinline { + entry: + %v = call i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_3() personality i8 3 { +; CHECK-LABEL: @caller_3( + entry: + %x = invoke i32 @callee_3() [ "deopt"(i32 7) ] to label %normal unwind label %unwind +; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + + normal: + ret i32 %x + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 101 +} + +define i32 @callee_4() alwaysinline personality i8 3 { + entry: + %v = invoke i32 @g() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind + + normal: + ret i32 %v + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 100 +} + +define i32 @caller_4() { +; CHECK-LABEL: @caller_4( + entry: +; CHECK: invoke i32 @g() [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + %x = call i32 @callee_4() [ "deopt"(i32 7) ] + ret i32 %x +} + +define i32 @callee_5() alwaysinline personality i8 3 { + entry: + %v = invoke fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] to label %normal unwind label %unwind + + normal: + ret i32 %v + + unwind: + %cleanup = landingpad i8 cleanup + ret i32 100 +} + +define i32 @caller_5() { +; CHECK-LABEL: @caller_5( + entry: +; CHECK: invoke fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX:[0-9]+]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + %x = call i32 @callee_5() [ "deopt"(i32 7) ] + ret i32 %x +} + +define i32 @callee_6() alwaysinline personality i8 3 { + entry: + %v = call fastcc i32 @g.fastcc() #0 [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_6() { +; CHECK-LABEL: @caller_6( + entry: +; CHECK: call fastcc i32 @g.fastcc() #[[FOO_BAR_ATTR_IDX]] [ "deopt"(i32 7, i32 0, i32 1), "foo"(double 0.000000e+00) ] + %x = call i32 @callee_6() [ "deopt"(i32 7) ] + ret i32 %x +} + +define i32 @callee_7(i1 %val) alwaysinline personality i8 3 { +; We want something that PruningFunctionCloner is not smart enough to +; recognize, but can be recognized by recursivelySimplifyInstruction. + + entry: + br i1 %val, label %check, label %precheck + + precheck: + br label %check + + check: + %p = phi i1 [ %val, %entry ], [ true, %precheck ] + br i1 %p, label %do.not, label %do + + do.not: + ret i32 0 + + do: + %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + ret i32 %v +} + +define i32 @caller_7() { +; CHECK-LABEL: @caller_7( + entry: +; CHECK-NOT: call fastcc i32 @g.fastcc() +; CHECK: ret i32 0 + %x = call i32 @callee_7(i1 true) [ "deopt"(i32 7) ] + ret i32 %x +} + +define i32 @callee_8(i1 %val) alwaysinline personality i8 3 { +; We want something that PruningFunctionCloner is not smart enough to +; recognize, but can be recognized by recursivelySimplifyInstruction. + + entry: + br i1 %val, label %check, label %precheck + + precheck: + br label %check + + check: + %p = phi i1 [ %val, %entry ], [ true, %precheck ] + br i1 %p, label %do.not, label %do + + do.not: + ret i32 0 + + do: + %phi = phi i32 [ 0, %check ], [ %v, %do ] + %v = call fastcc i32 @g.fastcc() [ "deopt"(i32 0, i32 1), "foo"(double 0.0) ] + %ic = icmp eq i32 %v, 42 + br i1 %ic, label %do, label %done + + done: + ret i32 %phi +} + +define i32 @caller_8() { +; CHECK-LABEL: @caller_8( + entry: +; CHECK-NOT: call fastcc i32 @g.fastcc() +; CHECK: ret i32 0 + %x = call i32 @callee_8(i1 true) [ "deopt"(i32 7) ] + ret i32 %x +} + +attributes #0 = { "foo"="bar" } + +; CHECK: attributes #[[FOO_BAR_ATTR_IDX]] = { "foo"="bar" } diff --git a/llvm/test/Transforms/Inline/deoptimize-intrinsic-cconv.ll b/llvm/test/Transforms/Inline/deoptimize-intrinsic-cconv.ll new file mode 100644 index 00000000000..4e2c3fe4786 --- /dev/null +++ b/llvm/test/Transforms/Inline/deoptimize-intrinsic-cconv.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare cc42 i32 @llvm.experimental.deoptimize.i32(...) + +define i32 @callee_with_coldcc() alwaysinline { + %v0 = call cc42 i32(...) @llvm.experimental.deoptimize.i32(i32 1) [ "deopt"() ] + ret i32 %v0 +} + +define void @caller_with_coldcc() { +; CHECK-LABEL: @caller_with_coldcc( +; CHECK-NEXT: call cc42 void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"() ] +; CHECK-NEXT: ret void + + %val = call i32 @callee_with_coldcc() + ret void +} + +; CHECK: declare cc42 void @llvm.experimental.deoptimize.isVoid(...) diff --git a/llvm/test/Transforms/Inline/deoptimize-intrinsic.ll b/llvm/test/Transforms/Inline/deoptimize-intrinsic.ll new file mode 100644 index 00000000000..3d84bfc8076 --- /dev/null +++ b/llvm/test/Transforms/Inline/deoptimize-intrinsic.ll @@ -0,0 +1,123 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare i8 @llvm.experimental.deoptimize.i8(...) +declare i32 @llvm.experimental.deoptimize.i32(...) + +define i8 @callee(i1* %c) alwaysinline { + %c0 = load volatile i1, i1* %c + br i1 %c0, label %left, label %right + +left: + %c1 = load volatile i1, i1* %c + br i1 %c1, label %lleft, label %lright + +lleft: + %v0 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ] + ret i8 %v0 + +lright: + ret i8 10 + +right: + %c2 = load volatile i1, i1* %c + br i1 %c2, label %rleft, label %rright + +rleft: + %v1 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1, i32 300, float 500.0, <2 x i32*> undef) [ "deopt"(i32 1) ] + ret i8 %v1 + +rright: + %v2 = call i8(...) @llvm.experimental.deoptimize.i8() [ "deopt"(i32 1) ] + ret i8 %v2 +} + +define void @caller_0(i1* %c, i8* %ptr) { +; CHECK-LABEL: @caller_0( +entry: + %v = call i8 @callee(i1* %c) [ "deopt"(i32 2) ] + store i8 %v, i8* %ptr + ret void + +; CHECK: lleft.i: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + +; CHECK: rleft.i: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1, i32 300, float 5.000000e+02, <2 x i32*> undef) [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + +; CHECK: rright.i: +; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: ret void + +; CHECK: callee.exit: +; CHECK-NEXT: store i8 10, i8* %ptr +; CHECK-NEXT: ret void + +} + +define i32 @caller_1(i1* %c, i8* %ptr) personality i8 3 { +; CHECK-LABEL: @caller_1( +entry: + %v = invoke i8 @callee(i1* %c) [ "deopt"(i32 3) ] to label %normal + unwind label %unwind + +; CHECK: lleft.i: +; CHECK-NEXT: %0 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 1) [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %0 + +; CHECK: rleft.i: +; CHECK-NEXT: %1 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 1, i32 300, float 5.000000e+02, <2 x i32*> undef) [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %1 + +; CHECK: rright.i: +; CHECK-NEXT: %2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: ret i32 %2 + +; CHECK: callee.exit: +; CHECK-NEXT: br label %normal + +; CHECK: normal: +; CHECK-NEXT: store i8 10, i8* %ptr +; CHECK-NEXT: ret i32 42 + +unwind: + %lp = landingpad i32 cleanup + ret i32 43 + +normal: + store i8 %v, i8* %ptr + ret i32 42 +} + +define i8 @callee_with_alloca() alwaysinline { + %t = alloca i8 + %v0 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i8* %t) ] + ret i8 %v0 +} + +define void @caller_with_lifetime() { +; CHECK-LABEL: @caller_with_lifetime( +; CHECK: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i8* %t.i) ] +; CHECK-NEXT: ret void + +entry: + call i8 @callee_with_alloca(); + ret void +} + +define i8 @callee_with_dynamic_alloca(i32 %n) alwaysinline { + %p = alloca i8, i32 %n + %v = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i8* %p) ] + ret i8 %v +} + +define void @caller_with_stacksaverestore(i32 %n) { +; CHECK-LABEL: void @caller_with_stacksaverestore( +; CHECK: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i8* %p.i) ] +; CHECK-NEXT: ret void + + %p = alloca i32, i32 %n + call i8 @callee_with_dynamic_alloca(i32 %n) + ret void +} diff --git a/llvm/test/Transforms/Inline/devirtualize-2.ll b/llvm/test/Transforms/Inline/devirtualize-2.ll new file mode 100644 index 00000000000..e2c1e7c01ae --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize-2.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(devirt<4>(inline))' -S | FileCheck %s +; PR4834 + +define i32 @test1() { + %funcall1_ = call fastcc i32 ()* () @f1() + %executecommandptr1_ = call i32 %funcall1_() + ret i32 %executecommandptr1_ +} + +define internal fastcc i32 ()* @f1() nounwind readnone { + ret i32 ()* @f2 +} + +define internal i32 @f2() nounwind readnone { + ret i32 1 +} + +; CHECK: @test1() +; CHECK-NEXT: ret i32 1 + + + + + +declare i8* @f1a(i8*) ssp align 2 + +define internal i32 @f2a(i8* %t) inlinehint ssp { +entry: + ret i32 41 +} + +define internal i32 @f3a(i32 (i8*)* %__f) ssp { +entry: + %A = call i32 %__f(i8* undef) + ret i32 %A +} + +define i32 @test2(i8* %this) ssp align 2 { + %X = call i32 @f3a(i32 (i8*)* @f2a) ssp + ret i32 %X +} + +; CHECK-LABEL: @test2( +; CHECK-NEXT: ret i32 41 diff --git a/llvm/test/Transforms/Inline/devirtualize-3.ll b/llvm/test/Transforms/Inline/devirtualize-3.ll new file mode 100644 index 00000000000..2a0a6d7f65a --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize-3.ll @@ -0,0 +1,79 @@ +; RUN: opt -basicaa -inline -S -sroa -gvn -instcombine < %s | FileCheck %s +; PR5009 + +; CHECK: define i32 @main() +; CHECK-NEXT: entry: +; CHECK-NEXT: call void @exit(i32 38) + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +%struct.cont_t = type { void (i8*, i32)*, i8* } +%struct.foo_sf_t = type { %struct.cont_t*, i32 } + +define i32 @main() nounwind ssp { +entry: + %cont = alloca %struct.cont_t, align 8 ; <%struct.cont_t*> [#uses=4] + %tmp = getelementptr inbounds %struct.cont_t, %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1] + %tmp1 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %cont, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=2] + store void (i8*, i32)* bitcast (void (%struct.cont_t*, i32)* @quit to void (i8*, i32)*), void (i8*, i32)** %tmp1 + %tmp2 = load void (i8*, i32)*, void (i8*, i32)** %tmp1 ; <void (i8*, i32)*> [#uses=1] + store void (i8*, i32)* %tmp2, void (i8*, i32)** %tmp + %tmp3 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %cont, i32 0, i32 1 ; <i8**> [#uses=1] + store i8* null, i8** %tmp3 + call void @foo(%struct.cont_t* %cont) + ret i32 0 +} + +define internal void @quit(%struct.cont_t* %cont, i32 %rcode) nounwind ssp { +entry: + call void @exit(i32 %rcode) noreturn + unreachable +} + +define internal void @foo(%struct.cont_t* %c) nounwind ssp { +entry: + %sf = alloca %struct.foo_sf_t, align 8 ; <%struct.foo_sf_t*> [#uses=3] + %next = alloca %struct.cont_t, align 8 ; <%struct.cont_t*> [#uses=3] + %tmp = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1] + store %struct.cont_t* %c, %struct.cont_t** %tmp + %tmp2 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1] + store i32 2, i32* %tmp2 + %tmp4 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %next, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1] + store void (i8*, i32)* bitcast (void (%struct.foo_sf_t*, i32)* @foo2 to void (i8*, i32)*), void (i8*, i32)** %tmp4 + %tmp5 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %next, i32 0, i32 1 ; <i8**> [#uses=1] + %conv = bitcast %struct.foo_sf_t* %sf to i8* ; <i8*> [#uses=1] + store i8* %conv, i8** %tmp5 + call void @bar(%struct.cont_t* %next, i32 14) + ret void +} + +define internal void @foo2(%struct.foo_sf_t* %sf, i32 %y) nounwind ssp { +entry: + %tmp1 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1] + %tmp2 = load %struct.cont_t*, %struct.cont_t** %tmp1 ; <%struct.cont_t*> [#uses=1] + %tmp3 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %tmp2, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1] + %tmp4 = load void (i8*, i32)*, void (i8*, i32)** %tmp3 ; <void (i8*, i32)*> [#uses=1] + %tmp6 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 0 ; <%struct.cont_t**> [#uses=1] + %tmp7 = load %struct.cont_t*, %struct.cont_t** %tmp6 ; <%struct.cont_t*> [#uses=1] + %conv = bitcast %struct.cont_t* %tmp7 to i8* ; <i8*> [#uses=1] + %tmp9 = getelementptr inbounds %struct.foo_sf_t, %struct.foo_sf_t* %sf, i32 0, i32 1 ; <i32*> [#uses=1] + %tmp10 = load i32, i32* %tmp9 ; <i32> [#uses=1] + %mul = mul i32 %tmp10, %y ; <i32> [#uses=1] + call void %tmp4(i8* %conv, i32 %mul) + ret void +} + +define internal void @bar(%struct.cont_t* %c, i32 %y) nounwind ssp { +entry: + %tmp1 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %c, i32 0, i32 0 ; <void (i8*, i32)**> [#uses=1] + %tmp2 = load void (i8*, i32)*, void (i8*, i32)** %tmp1 ; <void (i8*, i32)*> [#uses=1] + %tmp4 = getelementptr inbounds %struct.cont_t, %struct.cont_t* %c, i32 0, i32 1 ; <i8**> [#uses=1] + %tmp5 = load i8*, i8** %tmp4 ; <i8*> [#uses=1] + %add = add nsw i32 %y, 5 ; <i32> [#uses=1] + call void %tmp2(i8* %tmp5, i32 %add) + ret void +} + +declare void @exit(i32) noreturn + diff --git a/llvm/test/Transforms/Inline/devirtualize.ll b/llvm/test/Transforms/Inline/devirtualize.ll new file mode 100644 index 00000000000..561bb62ae64 --- /dev/null +++ b/llvm/test/Transforms/Inline/devirtualize.ll @@ -0,0 +1,182 @@ +; RUN: opt -S -Os < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + + +; Simple devirt testcase, requires iteration between inliner and GVN. +; rdar://6295824 +define i32 @foo(i32 ()** noalias %p, i64* noalias %q) nounwind ssp { +entry: + store i32 ()* @bar, i32 ()** %p + store i64 0, i64* %q + %tmp3 = load i32 ()*, i32 ()** %p ; <i32 ()*> [#uses=1] + %call = call i32 %tmp3() ; <i32> [#uses=1] + %X = add i32 %call, 4 + ret i32 %X + +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: store +; CHECK-NEXT: store +; CHECK-NEXT: ret i32 11 +} + +define internal i32 @bar() nounwind ssp { +entry: + ret i32 7 +} + + +;; More complex devirt case, from PR6724 +; CHECK: @_Z1gv() +; CHECK-NEXT: entry: +; CHECK-NEXT: ret i32 7 + +%0 = type { i8*, i8* } +%1 = type { i8*, i8*, i32, i32, i8*, i64, i8*, i64 } +%2 = type { i8*, i8*, i8* } +%struct.A = type { i8** } +%struct.B = type { i8** } +%struct.C = type { [16 x i8] } +%struct.D = type { [16 x i8] } + +@_ZTV1D = linkonce_odr constant [6 x i8*] [i8* null, i8* bitcast (%2* @_ZTI1D to i8*), i8* bitcast (i32 (%struct.C*)* @_ZN1D1fEv to i8*), i8* inttoptr (i64 -8 to i8*), i8* bitcast (%2* @_ZTI1D to i8*), i8* bitcast (i32 (%struct.C*)* @_ZThn8_N1D1fEv to i8*)] ; <[6 x i8*]*> [#uses=2] +@_ZTVN10__cxxabiv120__si_class_type_infoE = external global i8* ; <i8**> [#uses=1] +@_ZTS1D = linkonce_odr constant [3 x i8] c"1D\00" ; <[3 x i8]*> [#uses=1] +@_ZTVN10__cxxabiv121__vmi_class_type_infoE = external global i8* ; <i8**> [#uses=1] +@_ZTS1C = linkonce_odr constant [3 x i8] c"1C\00" ; <[3 x i8]*> [#uses=1] +@_ZTVN10__cxxabiv117__class_type_infoE = external global i8* ; <i8**> [#uses=1] +@_ZTS1A = linkonce_odr constant [3 x i8] c"1A\00" ; <[3 x i8]*> [#uses=1] +@_ZTI1A = linkonce_odr constant %0 { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1A, i32 0, i32 0) } ; <%0*> [#uses=1] +@_ZTS1B = linkonce_odr constant [3 x i8] c"1B\00" ; <[3 x i8]*> [#uses=1] +@_ZTI1B = linkonce_odr constant %0 { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1B, i32 0, i32 0) } ; <%0*> [#uses=1] +@_ZTI1C = linkonce_odr constant %1 { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv121__vmi_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1C, i32 0, i32 0), i32 0, i32 2, i8* bitcast (%0* @_ZTI1A to i8*), i64 2, i8* bitcast (%0* @_ZTI1B to i8*), i64 2050 } ; <%1*> [#uses=1] +@_ZTI1D = linkonce_odr constant %2 { i8* bitcast (i8** getelementptr inbounds (i8*, i8** @_ZTVN10__cxxabiv120__si_class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([3 x i8], [3 x i8]* @_ZTS1D, i32 0, i32 0), i8* bitcast (%1* @_ZTI1C to i8*) } ; <%2*> [#uses=1] +@_ZTV1C = linkonce_odr constant [6 x i8*] [i8* null, i8* bitcast (%1* @_ZTI1C to i8*), i8* bitcast (i32 (%struct.C*)* @_ZN1C1fEv to i8*), i8* inttoptr (i64 -8 to i8*), i8* bitcast (%1* @_ZTI1C to i8*), i8* bitcast (i32 (%struct.C*)* @_ZThn8_N1C1fEv to i8*)] ; <[6 x i8*]*> [#uses=2] +@_ZTV1B = linkonce_odr constant [3 x i8*] [i8* null, i8* bitcast (%0* @_ZTI1B to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1B1fEv to i8*)] ; <[3 x i8*]*> [#uses=1] +@_ZTV1A = linkonce_odr constant [3 x i8*] [i8* null, i8* bitcast (%0* @_ZTI1A to i8*), i8* bitcast (i32 (%struct.A*)* @_ZN1A1fEv to i8*)] ; <[3 x i8*]*> [#uses=1] + +define i32 @_Z1gv() ssp { +entry: + %d = alloca %struct.C, align 8 ; <%struct.C*> [#uses=2] + call void @_ZN1DC1Ev(%struct.C* %d) + %call = call i32 @_Z1fP1D(%struct.C* %d) ; <i32> [#uses=1] + %X = add i32 %call, 3 + ret i32 %X +} + +define linkonce_odr void @_ZN1DC1Ev(%struct.C* %this) inlinehint ssp align 2 { +entry: + call void @_ZN1DC2Ev(%struct.C* %this) + ret void +} + +define internal i32 @_Z1fP1D(%struct.C* %d) ssp { +entry: + %0 = icmp eq %struct.C* %d, null ; <i1> [#uses=1] + br i1 %0, label %cast.end, label %cast.notnull + +cast.notnull: ; preds = %entry + %1 = bitcast %struct.C* %d to i8* ; <i8*> [#uses=1] + %add.ptr = getelementptr i8, i8* %1, i64 8 ; <i8*> [#uses=1] + %2 = bitcast i8* %add.ptr to %struct.A* ; <%struct.A*> [#uses=1] + br label %cast.end + +cast.end: ; preds = %entry, %cast.notnull + %3 = phi %struct.A* [ %2, %cast.notnull ], [ null, %entry ] ; <%struct.A*> [#uses=2] + %4 = bitcast %struct.A* %3 to i32 (%struct.A*)*** ; <i32 (%struct.A*)***> [#uses=1] + %5 = load i32 (%struct.A*)**, i32 (%struct.A*)*** %4 ; <i32 (%struct.A*)**> [#uses=1] + %vfn = getelementptr inbounds i32 (%struct.A*)*, i32 (%struct.A*)** %5, i64 0 ; <i32 (%struct.A*)**> [#uses=1] + %6 = load i32 (%struct.A*)*, i32 (%struct.A*)** %vfn ; <i32 (%struct.A*)*> [#uses=1] + %call = call i32 %6(%struct.A* %3) ; <i32> [#uses=1] + ret i32 %call +} + +define linkonce_odr i32 @_ZN1D1fEv(%struct.C* %this) ssp align 2 { +entry: + ret i32 4 +} + +define linkonce_odr i32 @_ZThn8_N1D1fEv(%struct.C* %this) { +entry: + %0 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %1 = getelementptr inbounds i8, i8* %0, i64 -8 ; <i8*> [#uses=1] + %2 = bitcast i8* %1 to %struct.C* ; <%struct.C*> [#uses=1] + %call = call i32 @_ZN1D1fEv(%struct.C* %2) ; <i32> [#uses=1] + ret i32 %call +} + +define linkonce_odr void @_ZN1DC2Ev(%struct.C* %this) inlinehint ssp align 2 { +entry: + call void @_ZN1CC2Ev(%struct.C* %this) + %0 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %1 = getelementptr inbounds i8, i8* %0, i64 0 ; <i8*> [#uses=1] + %2 = bitcast i8* %1 to i8*** ; <i8***> [#uses=1] + store i8** getelementptr inbounds ([6 x i8*], [6 x i8*]* @_ZTV1D, i64 0, i64 2), i8*** %2 + %3 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %4 = getelementptr inbounds i8, i8* %3, i64 8 ; <i8*> [#uses=1] + %5 = bitcast i8* %4 to i8*** ; <i8***> [#uses=1] + store i8** getelementptr inbounds ([6 x i8*], [6 x i8*]* @_ZTV1D, i64 0, i64 5), i8*** %5 + ret void +} + +define linkonce_odr void @_ZN1CC2Ev(%struct.C* %this) inlinehint ssp align 2 { +entry: + %0 = bitcast %struct.C* %this to %struct.A* ; <%struct.A*> [#uses=1] + call void @_ZN1AC2Ev(%struct.A* %0) + %1 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %2 = getelementptr inbounds i8, i8* %1, i64 8 ; <i8*> [#uses=1] + %3 = bitcast i8* %2 to %struct.A* ; <%struct.A*> [#uses=1] + call void @_ZN1BC2Ev(%struct.A* %3) + %4 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %5 = getelementptr inbounds i8, i8* %4, i64 0 ; <i8*> [#uses=1] + %6 = bitcast i8* %5 to i8*** ; <i8***> [#uses=1] + store i8** getelementptr inbounds ([6 x i8*], [6 x i8*]* @_ZTV1C, i64 0, i64 2), i8*** %6 + %7 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %8 = getelementptr inbounds i8, i8* %7, i64 8 ; <i8*> [#uses=1] + %9 = bitcast i8* %8 to i8*** ; <i8***> [#uses=1] + store i8** getelementptr inbounds ([6 x i8*], [6 x i8*]* @_ZTV1C, i64 0, i64 5), i8*** %9 + ret void +} + +define linkonce_odr i32 @_ZN1C1fEv(%struct.C* %this) ssp align 2 { +entry: + ret i32 3 +} + +define linkonce_odr i32 @_ZThn8_N1C1fEv(%struct.C* %this) { +entry: + %0 = bitcast %struct.C* %this to i8* ; <i8*> [#uses=1] + %1 = getelementptr inbounds i8, i8* %0, i64 -8 ; <i8*> [#uses=1] + %2 = bitcast i8* %1 to %struct.C* ; <%struct.C*> [#uses=1] + %call = call i32 @_ZN1C1fEv(%struct.C* %2) ; <i32> [#uses=1] + ret i32 %call +} + +define linkonce_odr void @_ZN1AC2Ev(%struct.A* %this) inlinehint ssp align 2 { +entry: + %0 = bitcast %struct.A* %this to i8* ; <i8*> [#uses=1] + %1 = getelementptr inbounds i8, i8* %0, i64 0 ; <i8*> [#uses=1] + %2 = bitcast i8* %1 to i8*** ; <i8***> [#uses=1] + store i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1A, i64 0, i64 2), i8*** %2 + ret void +} + +define linkonce_odr void @_ZN1BC2Ev(%struct.A* %this) inlinehint ssp align 2 { +entry: + %0 = bitcast %struct.A* %this to i8* ; <i8*> [#uses=1] + %1 = getelementptr inbounds i8, i8* %0, i64 0 ; <i8*> [#uses=1] + %2 = bitcast i8* %1 to i8*** ; <i8***> [#uses=1] + store i8** getelementptr inbounds ([3 x i8*], [3 x i8*]* @_ZTV1B, i64 0, i64 2), i8*** %2 + ret void +} + +define linkonce_odr i32 @_ZN1B1fEv(%struct.A* %this) ssp align 2 { +entry: + ret i32 2 +} + +define linkonce_odr i32 @_ZN1A1fEv(%struct.A* %this) ssp align 2 { +entry: + ret i32 1 +} diff --git a/llvm/test/Transforms/Inline/dynamic_alloca_test.ll b/llvm/test/Transforms/Inline/dynamic_alloca_test.ll new file mode 100644 index 00000000000..1c17c7cd974 --- /dev/null +++ b/llvm/test/Transforms/Inline/dynamic_alloca_test.ll @@ -0,0 +1,45 @@ +; Test that functions with dynamic allocas get inlined in a case where +; naively inlining it would result in a miscompilation. +; Functions with dynamic allocas can only be inlined into functions that +; already have dynamic allocas. + +; RUN: opt < %s -inline -S | FileCheck %s +; +; FIXME: This test is xfailed because the inline cost rewrite disabled *all* +; inlining of functions which contain a dynamic alloca. It should be re-enabled +; once that functionality is restored. +; XFAIL: * + +declare void @ext(i32*) + +define internal void @callee(i32 %N) { + %P = alloca i32, i32 %N + call void @ext(i32* %P) + ret void +} + +define void @foo(i32 %N) { +; CHECK-LABEL: @foo( +; CHECK: alloca i32, i32 %{{.*}} +; CHECK: call i8* @llvm.stacksave() +; CHECK: alloca i32, i32 %{{.*}} +; CHECK: call void @ext +; CHECK: call void @llvm.stackrestore +; CHECK: ret + +entry: + %P = alloca i32, i32 %N + call void @ext(i32* %P) + br label %loop + +loop: + %count = phi i32 [ 0, %entry ], [ %next, %loop ] + %next = add i32 %count, 1 + call void @callee(i32 %N) + %cond = icmp eq i32 %count, 100000 + br i1 %cond, label %out, label %loop + +out: + ret void +} + diff --git a/llvm/test/Transforms/Inline/ephemeral.ll b/llvm/test/Transforms/Inline/ephemeral.ll new file mode 100644 index 00000000000..6261d4b39ea --- /dev/null +++ b/llvm/test/Transforms/Inline/ephemeral.ll @@ -0,0 +1,30 @@ +; RUN: opt -S -Oz %s | FileCheck %s + +@a = global i32 4 + +define i32 @inner() { + %a1 = load volatile i32, i32* @a + + ; Here are enough instructions to prevent inlining, but because they are used + ; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will + ; still happen). + %a2 = mul i32 %a1, %a1 + %a3 = sub i32 %a1, 5 + %a4 = udiv i32 %a3, -13 + %a5 = mul i32 %a4, %a4 + %a6 = add i32 %a5, %a5 + %ca = icmp sgt i32 %a6, -7 + tail call void @llvm.assume(i1 %ca) + + ret i32 %a1 +} + +; @inner() should be inlined for -Oz. +; CHECK-NOT: call i1 @inner +define i32 @outer() optsize { + %r = call i32 @inner() + ret i32 %r +} + +declare void @llvm.assume(i1) nounwind + diff --git a/llvm/test/Transforms/Inline/externally_available.ll b/llvm/test/Transforms/Inline/externally_available.ll new file mode 100644 index 00000000000..ba316f134cb --- /dev/null +++ b/llvm/test/Transforms/Inline/externally_available.ll @@ -0,0 +1,22 @@ +; RUN: opt < %s -inline -constprop -S | FileCheck %s + +define available_externally i32 @test_function() { +; CHECK-NOT: @test_function +entry: + ret i32 4 +} + + +define i32 @result() { +; CHECK-LABEL: define i32 @result() +entry: + %A = call i32 @test_function() +; CHECK-NOT: call +; CHECK-NOT: @test_function + + %B = add i32 %A, 1 + ret i32 %B +; CHECK: ret i32 5 +} + +; CHECK-NOT: @test_function diff --git a/llvm/test/Transforms/Inline/frameescape.ll b/llvm/test/Transforms/Inline/frameescape.ll new file mode 100644 index 00000000000..f2d4245cdd6 --- /dev/null +++ b/llvm/test/Transforms/Inline/frameescape.ll @@ -0,0 +1,45 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +; PR23216: We can't inline functions using llvm.localescape. + +declare void @llvm.localescape(...) +declare i8* @llvm.frameaddress(i32) +declare i8* @llvm.localrecover(i8*, i8*, i32) + +define internal void @foo(i8* %fp) { + %a.i8 = call i8* @llvm.localrecover(i8* bitcast (i32 ()* @bar to i8*), i8* %fp, i32 0) + %a = bitcast i8* %a.i8 to i32* + store i32 42, i32* %a + ret void +} + +define internal i32 @bar() { +entry: + %a = alloca i32 + call void (...) @llvm.localescape(i32* %a) + %fp = call i8* @llvm.frameaddress(i32 0) + tail call void @foo(i8* %fp) + %r = load i32, i32* %a + ret i32 %r +} + +; We even bail when someone marks it alwaysinline. +define internal i32 @bar_alwaysinline() alwaysinline { +entry: + %a = alloca i32 + call void (...) @llvm.localescape(i32* %a) + tail call void @foo(i8* null) + ret i32 0 +} + +define i32 @bazz() { +entry: + %r = tail call i32 @bar() + %r1 = tail call i32 @bar_alwaysinline() + ret i32 %r +} + +; CHECK: define i32 @bazz() +; CHECK: call i32 @bar() +; CHECK: call i32 @bar_alwaysinline() diff --git a/llvm/test/Transforms/Inline/function-count-update-2.ll b/llvm/test/Transforms/Inline/function-count-update-2.ll new file mode 100644 index 00000000000..702fa6292c2 --- /dev/null +++ b/llvm/test/Transforms/Inline/function-count-update-2.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s + +; This tests that the function count of a callee gets correctly updated after it +; has been inlined into a two callsites. + +; CHECK: @callee() !prof [[COUNT:![0-9]+]] +define i32 @callee() !prof !1 { + ret i32 0 +} + +define i32 @caller1() !prof !2 { +; CHECK-LABEL: @caller1 +; CHECK-NOT: callee +; CHECK: ret + %i = call i32 @callee() + ret i32 %i +} + +define i32 @caller2() !prof !3 { +; CHECK-LABEL: @caller2 +; CHECK-NOT: callee +; CHECK: ret + %i = call i32 @callee() + ret i32 %i +} + +!llvm.module.flags = !{!0} +; CHECK: [[COUNT]] = !{!"function_entry_count", i64 0} +!0 = !{i32 1, !"MaxFunctionCount", i32 1000} +!1 = !{!"function_entry_count", i64 1000} +!2 = !{!"function_entry_count", i64 600} +!3 = !{!"function_entry_count", i64 400} + diff --git a/llvm/test/Transforms/Inline/function-count-update-3.ll b/llvm/test/Transforms/Inline/function-count-update-3.ll new file mode 100644 index 00000000000..215d64175fa --- /dev/null +++ b/llvm/test/Transforms/Inline/function-count-update-3.ll @@ -0,0 +1,78 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 | FileCheck %s + +; This tests that the function count of a function gets properly scaled after +; inlining a call chain leading to the function. +; Function a calls c with count 200 (C1) +; Function c calls e with count 250 (C2) +; Entry count of e is 500 (C3) +; Entry count of c is 500 (C4) +; Function b calls c with count 300 (C5) +; c->e inlining does not happen since the cost exceeds threshold. +; c then inlined into a. +; e now gets inlined into a (through c) since the branch condition in e is now +; known and hence the cost gets reduced. +; Estimated count of a->e callsite = C2 * (C1 / C4) +; Estimated count of a->e callsite = 250 * (200 / 500) = 100 +; Remaining count of e = C3 - 100 = 500 - 100 = 400 +; Remaining count of c = C4 - C1 - C5 = 500 - 200 - 300 = 0 + +@data = external global i32 + +define i32 @a(i32 %a1) !prof !1 { + %a2 = call i32 @c(i32 %a1, i32 1) + ret i32 %a2 +} + +define i32 @b(i32 %b1) !prof !2 { + %b2 = call i32 @c(i32 %b1, i32 %b1) + ret i32 %b2 +} + +declare void @ext(); + +; CHECK: @c(i32 %c1, i32 %c100) !prof [[COUNT1:![0-9]+]] +define i32 @c(i32 %c1, i32 %c100) !prof !3 { + call void @ext() + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false + +cond_false: + ret i32 0 + +cond_true: + %c11 = call i32 @e(i32 %c100) + ret i32 %c11 +} + + +; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]] +define i32 @e(i32 %c1) !prof !4 { + %cond = icmp sle i32 %c1, 1 + br i1 %cond, label %cond_true, label %cond_false + +cond_false: + call void @ext() + %c2 = load i32, i32* @data, align 4 + %c3 = add i32 %c1, %c2 + %c4 = mul i32 %c3, %c2 + %c5 = add i32 %c4, %c2 + %c6 = mul i32 %c5, %c2 + %c7 = add i32 %c6, %c2 + %c8 = mul i32 %c7, %c2 + %c9 = add i32 %c8, %c2 + %c10 = mul i32 %c9, %c2 + ret i32 %c10 + +cond_true: + ret i32 0 +} + +!llvm.module.flags = !{!0} +; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 0} +; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 400} +!0 = !{i32 1, !"MaxFunctionCount", i32 5000} +!1 = !{!"function_entry_count", i64 200} +!2 = !{!"function_entry_count", i64 300} +!3 = !{!"function_entry_count", i64 500} +!4 = !{!"function_entry_count", i64 500} + diff --git a/llvm/test/Transforms/Inline/function-count-update.ll b/llvm/test/Transforms/Inline/function-count-update.ll new file mode 100644 index 00000000000..094ad5a2ae6 --- /dev/null +++ b/llvm/test/Transforms/Inline/function-count-update.ll @@ -0,0 +1,50 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s + +; This tests that the function count of two callees get correctly updated after +; they have been inlined into two back-to-back callsites in a single basic block +; in the caller. The callees have the alwaysinline attribute and so they get +; inlined both with the regular inliner pass and the always inline pass. In +; both cases, the new count of each callee is the original count minus callsite +; count which is 200 (since the caller's entry count is 400 and the block +; containing the calls have a relative block frequency of 0.5). + +; CHECK: @callee1(i32 %n) #0 !prof [[COUNT1:![0-9]+]] +define i32 @callee1(i32 %n) #0 !prof !1 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false + +cond_true: + %r1 = add i32 %n, 1 + ret i32 %r1 +cond_false: + %r2 = add i32 %n, 2 + ret i32 %r2 +} + +; CHECK: @callee2(i32 %n) #0 !prof [[COUNT2:![0-9]+]] +define i32 @callee2(i32 %n) #0 !prof !2 { + %r1 = add i32 %n, 1 + ret i32 %r1 +} + +define i32 @caller(i32 %n) !prof !3 { + %cond = icmp sle i32 %n, 100 + br i1 %cond, label %cond_true, label %cond_false + +cond_true: + %i = call i32 @callee1(i32 %n) + %j = call i32 @callee2(i32 %i) + ret i32 %j +cond_false: + ret i32 0 +} + +!llvm.module.flags = !{!0} +; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 800} +; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 1800} +!0 = !{i32 1, !"MaxFunctionCount", i32 1000} +!1 = !{!"function_entry_count", i64 1000} +!2 = !{!"function_entry_count", i64 2000} +!3 = !{!"function_entry_count", i64 400} +attributes #0 = { alwaysinline } + diff --git a/llvm/test/Transforms/Inline/guard-intrinsic.ll b/llvm/test/Transforms/Inline/guard-intrinsic.ll new file mode 100644 index 00000000000..76d683df6e9 --- /dev/null +++ b/llvm/test/Transforms/Inline/guard-intrinsic.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -always-inline < %s | FileCheck %s + +declare void @llvm.experimental.guard(i1, ...) + +define i8 @callee(i1* %c_ptr) alwaysinline { + %c = load volatile i1, i1* %c_ptr + call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1) [ "deopt"(i32 1) ] + ret i8 5 +} + +define void @caller_0(i1* %c, i8* %ptr) { +; CHECK-LABEL: @caller_0( +entry: +; CHECK: [[COND:%[^ ]+]] = load volatile i1, i1* %c +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND]], i32 1) [ "deopt"(i32 2, i32 1) ] +; CHECK-NEXT: store i8 5, i8* %ptr + + %v = call i8 @callee(i1* %c) [ "deopt"(i32 2) ] + store i8 %v, i8* %ptr + ret void +} + +define i32 @caller_1(i1* %c, i8* %ptr) personality i8 3 { +; CHECK-LABEL: @caller_1( +; CHECK: [[COND:%[^ ]+]] = load volatile i1, i1* %c +; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND]], i32 1) [ "deopt"(i32 3, i32 1) ] +; CHECK-NEXT: br label %normal +entry: + %v = invoke i8 @callee(i1* %c) [ "deopt"(i32 3) ] to label %normal + unwind label %unwind + +unwind: + %lp = landingpad i32 cleanup + ret i32 43 + +normal: + store i8 %v, i8* %ptr + ret i32 42 +} diff --git a/llvm/test/Transforms/Inline/gvn-inline-iteration.ll b/llvm/test/Transforms/Inline/gvn-inline-iteration.ll new file mode 100644 index 00000000000..b87c0609ea2 --- /dev/null +++ b/llvm/test/Transforms/Inline/gvn-inline-iteration.ll @@ -0,0 +1,23 @@ +; RUN: opt -basicaa -inline -gvn -S -max-cg-scc-iterations=1 < %s | FileCheck %s +; rdar://6295824 and PR6724 + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.0.0" + +define i32 @foo(i32 ()** noalias nocapture %p, i64* noalias nocapture %q) nounwind ssp { +entry: + store i32 ()* @bar, i32 ()** %p + store i64 0, i64* %q + %tmp3 = load i32 ()*, i32 ()** %p ; <i32 ()*> [#uses=1] + %call = tail call i32 %tmp3() nounwind ; <i32> [#uses=1] + ret i32 %call +} +; CHECK-LABEL: @foo( +; CHECK: ret i32 7 +; CHECK-LABEL: @bar( +; CHECK: ret i32 7 + +define internal i32 @bar() nounwind readnone ssp { +entry: + ret i32 7 +} diff --git a/llvm/test/Transforms/Inline/ignore-debug-info.ll b/llvm/test/Transforms/Inline/ignore-debug-info.ll new file mode 100644 index 00000000000..60e96777d93 --- /dev/null +++ b/llvm/test/Transforms/Inline/ignore-debug-info.ll @@ -0,0 +1,58 @@ +; RUN: opt < %s -S -inline -inline-threshold=2 | FileCheck %s +; RUN: opt < %s -S -strip-debug -inline -inline-threshold=2 | FileCheck %s +; RUN: opt < %s -S -passes='cgscc(inline)' -inline-threshold=2 | FileCheck %s +; RUN: opt < %s -S -strip-debug -passes='cgscc(inline)' -inline-threshold=2 | FileCheck %s +; +; The purpose of this test is to check that debug info doesn't influence +; inlining decisions. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 +declare void @llvm.dbg.value(metadata, metadata, metadata) #1 + +define <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) { +entry: + call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6) + %mul = fmul <4 x float> %a, <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> + call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6) + %mul1 = fmul <4 x float> %b, <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00> + call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6) + %add = fadd <4 x float> %mul, %mul1 + ret <4 x float> %add +} + +define float @outer_vectors(<4 x float> %a, <4 x float> %b) { +; CHECK-LABEL: @outer_vectors( +; CHECK-NOT: call <4 x float> @inner_vectors( +; CHECK: ret float + +entry: + call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6) + call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6) + %call = call <4 x float> @inner_vectors(<4 x float> %a, <4 x float> %b) + call void @llvm.dbg.value(metadata i32 undef, metadata !DILocalVariable(scope: !6), metadata !DIExpression()), !dbg !DILocation(scope: !6) + %vecext = extractelement <4 x float> %call, i32 0 + %vecext1 = extractelement <4 x float> %call, i32 1 + %add = fadd float %vecext, %vecext1 + %vecext2 = extractelement <4 x float> %call, i32 2 + %add3 = fadd float %add, %vecext2 + %vecext4 = extractelement <4 x float> %call, i32 3 + %add5 = fadd float %add3, %vecext4 + ret float %add5 +} + +attributes #0 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4} +!llvm.ident = !{!5} + +!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "test.c", directory: "") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 1, !"Debug Info Version", i32 3} +!5 = !{!""} +!6 = distinct !DISubprogram(unit: !0) diff --git a/llvm/test/Transforms/Inline/inalloca-not-static.ll b/llvm/test/Transforms/Inline/inalloca-not-static.ll new file mode 100644 index 00000000000..74b5ecf420c --- /dev/null +++ b/llvm/test/Transforms/Inline/inalloca-not-static.ll @@ -0,0 +1,65 @@ +; RUN: opt -always-inline -S < %s | FileCheck %s +; RUN: opt -passes=always-inline -S < %s | FileCheck %s + +; We used to misclassify inalloca as a static alloca in the inliner. This only +; arose with for alwaysinline functions, because the normal inliner refuses to +; inline such things. + +; Generated using this C++ source: +; struct Foo { +; Foo(); +; Foo(const Foo &o); +; ~Foo(); +; int a; +; }; +; __forceinline void h(Foo o) {} +; __forceinline void g() { h(Foo()); } +; void f() { g(); } + +; ModuleID = 't.cpp' +source_filename = "t.cpp" +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i386-pc-windows-msvc19.0.24210" + +%struct.Foo = type { i32 } + +declare i8* @llvm.stacksave() +declare void @llvm.stackrestore(i8*) + +declare x86_thiscallcc %struct.Foo* @"\01??0Foo@@QAE@XZ"(%struct.Foo* returned) unnamed_addr +declare x86_thiscallcc void @"\01??1Foo@@QAE@XZ"(%struct.Foo*) unnamed_addr + +define void @f() { +entry: + call void @g() + ret void +} + +define internal void @g() alwaysinline { +entry: + %inalloca.save = call i8* @llvm.stacksave() + %argmem = alloca inalloca <{ %struct.Foo }>, align 4 + %0 = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %argmem, i32 0, i32 0 + %call = call x86_thiscallcc %struct.Foo* @"\01??0Foo@@QAE@XZ"(%struct.Foo* %0) + call void @h(<{ %struct.Foo }>* inalloca %argmem) + call void @llvm.stackrestore(i8* %inalloca.save) + ret void +} + +; Function Attrs: alwaysinline inlinehint nounwind +define internal void @h(<{ %struct.Foo }>* inalloca) alwaysinline { +entry: + %o = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %0, i32 0, i32 0 + call x86_thiscallcc void @"\01??1Foo@@QAE@XZ"(%struct.Foo* %o) + ret void +} + +; CHECK: define void @f() +; CHECK: %[[STACKSAVE:.*]] = call i8* @llvm.stacksave() +; CHECK: %[[ARGMEM:.*]] = alloca inalloca <{ %struct.Foo }>, align 4 +; CHECK: %[[GEP1:.*]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %[[ARGMEM]], i32 0, i32 0 +; CHECK: %[[CALL:.*]] = call x86_thiscallcc %struct.Foo* @"\01??0Foo@@QAE@XZ"(%struct.Foo* %[[GEP1]]) +; CHECK: %[[GEP2:.*]] = getelementptr inbounds <{ %struct.Foo }>, <{ %struct.Foo }>* %[[ARGMEM]], i32 0, i32 0 +; CHECK: call x86_thiscallcc void @"\01??1Foo@@QAE@XZ"(%struct.Foo* %[[GEP2]]) +; CHECK: call void @llvm.stackrestore(i8* %[[STACKSAVE]]) +; CHECK: ret void diff --git a/llvm/test/Transforms/Inline/infinite-loop-two-predecessors.ll b/llvm/test/Transforms/Inline/infinite-loop-two-predecessors.ll new file mode 100644 index 00000000000..aa07315eb08 --- /dev/null +++ b/llvm/test/Transforms/Inline/infinite-loop-two-predecessors.ll @@ -0,0 +1,32 @@ +; RUN: opt -S -o - %s -inline | FileCheck %s + +define void @f1() { +bb.0: + br i1 false, label %bb.2, label %bb.1 + +bb.1: ; preds = %bb.0 + br label %bb.2 + +bb.2: ; preds = %bb.0, %bb.1 + %tmp0 = phi i1 [ true, %bb.1 ], [ false, %bb.0 ] + br i1 %tmp0, label %bb.4, label %bb.3 + +bb.3: ; preds = %bb.3, %bb.3 + br i1 undef, label %bb.3, label %bb.3 + +bb.4: ; preds = %bb.2 + ret void +} + +define void @f2() { +bb.0: + call void @f1() + ret void +} + +; f1 should be inlined into f2 and simplified/collapsed to nothing. + +; CHECK-LABEL: define void @f2() { +; CHECK-NEXT: bb.0: +; CHECK-NEXT: ret void +; CHECK-NEXT: } diff --git a/llvm/test/Transforms/Inline/inline-assume.ll b/llvm/test/Transforms/Inline/inline-assume.ll new file mode 100644 index 00000000000..d8e2a26d896 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-assume.ll @@ -0,0 +1,32 @@ +; RUN: opt -inline -S -o - < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +%0 = type opaque +%struct.Foo = type { i32, %0* } + +; Test that we don't crash when inlining @bar (rdar://22521387). +define void @foo(%struct.Foo* align 4 %a) { +entry: + call fastcc void @bar(%struct.Foo* nonnull align 4 undef) + +; CHECK: call void @llvm.assume(i1 undef) +; CHECK: unreachable + + ret void +} + +define fastcc void @bar(%struct.Foo* align 4 %a) { +; CHECK-LABEL: @bar +entry: + %b = getelementptr inbounds %struct.Foo, %struct.Foo* %a, i32 0, i32 1 + br i1 undef, label %if.end, label %if.then.i.i + +if.then.i.i: + call void @llvm.assume(i1 undef) + unreachable + +if.end: + ret void +} + +declare void @llvm.assume(i1) diff --git a/llvm/test/Transforms/Inline/inline-brunch-funnel.ll b/llvm/test/Transforms/Inline/inline-brunch-funnel.ll new file mode 100644 index 00000000000..54c6600c038 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-brunch-funnel.ll @@ -0,0 +1,35 @@ +; Test that inliner skips @llvm.icall.branch.funnel +; RUN: opt < %s -inline -S | FileCheck %s + +target datalayout = "e-p:64:64" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.icall.branch.funnel(...) + +; CHECK-LABEL: define void @fn_musttail( +define void @fn_musttail() { + call void (...) @bf_musttail() + ; CHECK: call void (...) @bf_musttail( + ret void +} + +; CHECK-LABEL: define internal void @bf_musttail( +define internal void @bf_musttail(...) { + musttail call void (...) @llvm.icall.branch.funnel(...) + ; CHECK: musttail call void (...) @llvm.icall.branch.funnel( + ret void +} + +; CHECK-LABEL: define void @fn_musttail_always( +define void @fn_musttail_always() { + call void (...) @bf_musttail_always() + ; CHECK: call void (...) @bf_musttail_always( + ret void +} + +; CHECK-LABEL: define internal void @bf_musttail_always( +define internal void @bf_musttail_always(...) alwaysinline { + musttail call void (...) @llvm.icall.branch.funnel(...) + ; CHECK: musttail call void (...) @llvm.icall.branch.funnel( + ret void +} diff --git a/llvm/test/Transforms/Inline/inline-byval-bonus.ll b/llvm/test/Transforms/Inline/inline-byval-bonus.ll new file mode 100644 index 00000000000..785de04c3f1 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-byval-bonus.ll @@ -0,0 +1,194 @@ +; RUN: opt -S -inline -inline-threshold=275 < %s | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' -inline-threshold=275 < %s | FileCheck %s +; PR13095 + +; The performance of the c-ray benchmark largely depends on the inlining of a +; specific call to @ray_sphere. This test case is designed to verify that it's +; inlined at -O3. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.8.0" + +%struct.sphere = type { %struct.vec3, double, %struct.material, %struct.sphere* } +%struct.vec3 = type { double, double, double } +%struct.material = type { %struct.vec3, double, double } +%struct.ray = type { %struct.vec3, %struct.vec3 } +%struct.spoint = type { %struct.vec3, %struct.vec3, %struct.vec3, double } + +define i32 @caller(%struct.sphere* %i) { + %shadow_ray = alloca %struct.ray, align 8 + call void @fix(%struct.ray* %shadow_ray) + + %call = call i32 @ray_sphere(%struct.sphere* %i, %struct.ray* byval align 8 %shadow_ray, %struct.spoint* null) + ret i32 %call + +; CHECK-LABEL: @caller( +; CHECK-NOT: call i32 @ray_sphere +; CHECK: ret i32 +} + +declare void @fix(%struct.ray*) + +define i32 @ray_sphere(%struct.sphere* nocapture %sph, %struct.ray* nocapture byval align 8 %ray, %struct.spoint* %sp) nounwind uwtable ssp { + %1 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 1, i32 0 + %2 = load double, double* %1, align 8 + %3 = fmul double %2, %2 + %4 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 1, i32 1 + %5 = load double, double* %4, align 8 + %6 = fmul double %5, %5 + %7 = fadd double %3, %6 + %8 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 1, i32 2 + %9 = load double, double* %8, align 8 + %10 = fmul double %9, %9 + %11 = fadd double %7, %10 + %12 = fmul double %2, 2.000000e+00 + %13 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 0, i32 0 + %14 = load double, double* %13, align 8 + %15 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 0, i32 0 + %16 = load double, double* %15, align 8 + %17 = fsub double %14, %16 + %18 = fmul double %12, %17 + %19 = fmul double %5, 2.000000e+00 + %20 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 0, i32 1 + %21 = load double, double* %20, align 8 + %22 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 0, i32 1 + %23 = load double, double* %22, align 8 + %24 = fsub double %21, %23 + %25 = fmul double %19, %24 + %26 = fadd double %18, %25 + %27 = fmul double %9, 2.000000e+00 + %28 = getelementptr inbounds %struct.ray, %struct.ray* %ray, i64 0, i32 0, i32 2 + %29 = load double, double* %28, align 8 + %30 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 0, i32 2 + %31 = load double, double* %30, align 8 + %32 = fsub double %29, %31 + %33 = fmul double %27, %32 + %34 = fadd double %26, %33 + %35 = fmul double %16, %16 + %36 = fmul double %23, %23 + %37 = fadd double %35, %36 + %38 = fmul double %31, %31 + %39 = fadd double %37, %38 + %40 = fmul double %14, %14 + %41 = fadd double %40, %39 + %42 = fmul double %21, %21 + %43 = fadd double %42, %41 + %44 = fmul double %29, %29 + %45 = fadd double %44, %43 + %46 = fsub double -0.000000e+00, %16 + %47 = fmul double %14, %46 + %48 = fmul double %21, %23 + %49 = fsub double %47, %48 + %50 = fmul double %29, %31 + %51 = fsub double %49, %50 + %52 = fmul double %51, 2.000000e+00 + %53 = fadd double %52, %45 + %54 = getelementptr inbounds %struct.sphere, %struct.sphere* %sph, i64 0, i32 1 + %55 = load double, double* %54, align 8 + %56 = fmul double %55, %55 + %57 = fsub double %53, %56 + %58 = fmul double %34, %34 + %59 = fmul double %11, 4.000000e+00 + %60 = fmul double %59, %57 + %61 = fsub double %58, %60 + %62 = fcmp olt double %61, 0.000000e+00 + br i1 %62, label %130, label %63 + +; <label>:63 ; preds = %0 + %64 = tail call double @sqrt(double %61) nounwind readnone + %65 = fsub double -0.000000e+00, %34 + %66 = fsub double %64, %34 + %67 = fmul double %11, 2.000000e+00 + %68 = fdiv double %66, %67 + %69 = fsub double %65, %64 + %70 = fdiv double %69, %67 + %71 = fcmp olt double %68, 1.000000e-06 + %72 = fcmp olt double %70, 1.000000e-06 + %or.cond = and i1 %71, %72 + br i1 %or.cond, label %130, label %73 + +; <label>:73 ; preds = %63 + %74 = fcmp ogt double %68, 1.000000e+00 + %75 = fcmp ogt double %70, 1.000000e+00 + %or.cond1 = and i1 %74, %75 + br i1 %or.cond1, label %130, label %76 + +; <label>:76 ; preds = %73 + %77 = icmp eq %struct.spoint* %sp, null + br i1 %77, label %130, label %78 + +; <label>:78 ; preds = %76 + %t1.0 = select i1 %71, double %70, double %68 + %t2.0 = select i1 %72, double %t1.0, double %70 + %79 = fcmp olt double %t1.0, %t2.0 + %80 = select i1 %79, double %t1.0, double %t2.0 + %81 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 3 + store double %80, double* %81, align 8 + %82 = fmul double %80, %2 + %83 = fadd double %14, %82 + %84 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 0, i32 0 + store double %83, double* %84, align 8 + %85 = fmul double %5, %80 + %86 = fadd double %21, %85 + %87 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 0, i32 1 + store double %86, double* %87, align 8 + %88 = fmul double %9, %80 + %89 = fadd double %29, %88 + %90 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 0, i32 2 + store double %89, double* %90, align 8 + %91 = load double, double* %15, align 8 + %92 = fsub double %83, %91 + %93 = load double, double* %54, align 8 + %94 = fdiv double %92, %93 + %95 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 1, i32 0 + store double %94, double* %95, align 8 + %96 = load double, double* %22, align 8 + %97 = fsub double %86, %96 + %98 = load double, double* %54, align 8 + %99 = fdiv double %97, %98 + %100 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 1, i32 1 + store double %99, double* %100, align 8 + %101 = load double, double* %30, align 8 + %102 = fsub double %89, %101 + %103 = load double, double* %54, align 8 + %104 = fdiv double %102, %103 + %105 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 1, i32 2 + store double %104, double* %105, align 8 + %106 = fmul double %2, %94 + %107 = fmul double %5, %99 + %108 = fadd double %106, %107 + %109 = fmul double %9, %104 + %110 = fadd double %108, %109 + %111 = fmul double %110, 2.000000e+00 + %112 = fmul double %94, %111 + %113 = fsub double %112, %2 + %114 = fsub double -0.000000e+00, %113 + %115 = fmul double %99, %111 + %116 = fsub double %115, %5 + %117 = fsub double -0.000000e+00, %116 + %118 = fmul double %104, %111 + %119 = fsub double %118, %9 + %120 = fsub double -0.000000e+00, %119 + %.06 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 2, i32 0 + %.18 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 2, i32 1 + %.210 = getelementptr inbounds %struct.spoint, %struct.spoint* %sp, i64 0, i32 2, i32 2 + %121 = fmul double %113, %113 + %122 = fmul double %116, %116 + %123 = fadd double %121, %122 + %124 = fmul double %119, %119 + %125 = fadd double %123, %124 + %126 = tail call double @sqrt(double %125) nounwind readnone + %127 = fdiv double %114, %126 + store double %127, double* %.06, align 8 + %128 = fdiv double %117, %126 + store double %128, double* %.18, align 8 + %129 = fdiv double %120, %126 + store double %129, double* %.210, align 8 + br label %130 + +; <label>:130 ; preds = %78, %76, %73, %63, %0 + %.0 = phi i32 [ 0, %0 ], [ 0, %73 ], [ 0, %63 ], [ 1, %76 ], [ 1, %78 ] + ret i32 %.0 +} + +declare double @sqrt(double) nounwind readnone diff --git a/llvm/test/Transforms/Inline/inline-cold-callee.ll b/llvm/test/Transforms/Inline/inline-cold-callee.ll new file mode 100644 index 00000000000..404c537b297 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-cold-callee.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -inline -inlinecold-threshold=0 -S | FileCheck %s + +; This tests that a cold callee gets the (lower) inlinecold-threshold even without +; Cold hint and does not get inlined because the cost exceeds the inlinecold-threshold. +; A callee with identical body does gets inlined because cost fits within the +; inline-threshold + +define i32 @callee1(i32 %x) !prof !21 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @callee2(i32 %x) !prof !22 { +; CHECK-LABEL: @callee2( + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller2(i32 %y1) !prof !22 { +; CHECK-LABEL: @caller2( +; CHECK: call i32 @callee2 +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %y2 = call i32 @callee2(i32 %y1) + %y3 = call i32 @callee1(i32 %y2) + ret i32 %y3 +} + +declare void @extern() + +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 100} +!22 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll b/llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll new file mode 100644 index 00000000000..26ea8e50eaf --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-cold-callsite-pgo.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s + +; This tests that a cold callsite gets the inline-cold-callsite-threshold +; and does not get inlined. Another callsite to an identical callee that +; is not cold gets inlined because cost is below the inline-threshold. + +define i32 @callee1(i32 %x) !prof !21 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller(i32 %n) !prof !22 { +; CHECK-LABEL: @caller( + %cond = icmp sle i32 %n, 100 + br i1 %cond, label %cond_true, label %cond_false, !prof !0 + +cond_true: +; CHECK-LABEL: cond_true: +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %i = call i32 @callee1(i32 %n) + ret i32 %i +cond_false: +; CHECK-LABEL: cond_false: +; CHECK: call i32 @callee1 +; CHECK: ret i32 %j + %j = call i32 @callee1(i32 %n) + ret i32 %j +} +declare void @extern() + +!0 = !{!"branch_weights", i32 200, i32 1} + +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 200} +!22 = !{!"function_entry_count", i64 200} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 1000, i32 1} +!13 = !{i32 999000, i64 1000, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/llvm/test/Transforms/Inline/inline-cold-callsite.ll b/llvm/test/Transforms/Inline/inline-cold-callsite.ll new file mode 100644 index 00000000000..50dd55d62ed --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-cold-callsite.ll @@ -0,0 +1,47 @@ + +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=0 -S | FileCheck %s + +; This tests that a cold callsite gets the inline-cold-callsite-threshold +; and does not get inlined. Another callsite to an identical callee that +; is not cold gets inlined because cost is below the inline-threshold. + +define void @callee() { + call void @extern() + call void @extern() + ret void +} + +declare void @extern() +declare i1 @ext(i32) + +; CHECK-LABEL: caller +define i32 @caller(i32 %n) { +entry: + %cmp4 = icmp sgt i32 %n, 0 + br i1 %cmp4, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: + ret i32 0 + +for.body: + %i.05 = phi i32 [ %inc, %for.inc ], [ 0, %entry ] +; CHECK: %call = tail call + %call = tail call zeroext i1 @ext(i32 %i.05) +; CHECK-NOT: call void @callee +; CHECK-NEXT: call void @extern + call void @callee() + br i1 %call, label %cold, label %for.inc, !prof !0 + +cold: +; CHECK: call void @callee + call void @callee() + br label %for.inc + +for.inc: + %inc = add nuw nsw i32 %i.05, 1 + %exitcond = icmp eq i32 %inc, %n + br i1 %exitcond, label %for.cond.cleanup, label %for.body +} + + +!0 = !{!"branch_weights", i32 1, i32 2000} diff --git a/llvm/test/Transforms/Inline/inline-cold.ll b/llvm/test/Transforms/Inline/inline-cold.ll new file mode 100644 index 00000000000..e0e679ad403 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-cold.ll @@ -0,0 +1,188 @@ +; RUN: opt < %s -inline -S -inlinecold-threshold=25 | FileCheck %s +; Test that functions with attribute Cold are not inlined while the +; same function without attribute Cold will be inlined. + +; RUN: opt < %s -inline -S -inline-threshold=600 | FileCheck %s -check-prefix=OVERRIDE +; The command line argument for inline-threshold should override +; the default cold threshold, so a cold function with size bigger +; than the default cold threshold (225) will be inlined. + +; RUN: opt < %s -inline -S | FileCheck %s -check-prefix=DEFAULT +; The same cold function will not be inlined with the default behavior. + +@a = global i32 4 + +; This function should be larger than the cold threshold (75), but smaller +; than the regular threshold. +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #0 { +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +; Function Attrs: nounwind cold readnone uwtable +define i32 @ColdFunction(i32 %a) #1 { +; CHECK-LABEL: @ColdFunction +; CHECK: ret +; OVERRIDE-LABEL: @ColdFunction +; OVERRIDE: ret +; DEFAULT-LABEL: @ColdFunction +; DEFAULT: ret +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %add = add i32 %x4, %a + ret i32 %add +} + +; This function should be larger than the default cold threshold (225). +define i32 @ColdFunction2(i32 %a) #1 { +; CHECK-LABEL: @ColdFunction2 +; CHECK: ret +; OVERRIDE-LABEL: @ColdFunction2 +; OVERRIDE: ret +; DEFAULT-LABEL: @ColdFunction2 +; DEFAULT: ret +entry: + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a7 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + + %a21 = load volatile i32, i32* @a + %x21 = add i32 %x12, %a21 + %a22 = load volatile i32, i32* @a + %x22 = add i32 %x21, %a22 + %a23 = load volatile i32, i32* @a + %x23 = add i32 %x22, %a23 + %a24 = load volatile i32, i32* @a + %x24 = add i32 %x23, %a24 + %a25 = load volatile i32, i32* @a + %x25 = add i32 %x24, %a25 + %a26 = load volatile i32, i32* @a + %x26 = add i32 %x25, %a26 + %a27 = load volatile i32, i32* @a + %x27 = add i32 %x26, %a27 + %a28 = load volatile i32, i32* @a + %x28 = add i32 %x27, %a28 + %a29 = load volatile i32, i32* @a + %x29 = add i32 %x28, %a29 + %a30 = load volatile i32, i32* @a + %x30 = add i32 %x29, %a30 + %a31 = load volatile i32, i32* @a + %x31 = add i32 %x30, %a31 + %a32 = load volatile i32, i32* @a + %x32 = add i32 %x31, %a32 + + %a41 = load volatile i32, i32* @a + %x41 = add i32 %x32, %a41 + %a42 = load volatile i32, i32* @a + %x42 = add i32 %x41, %a42 + %a43 = load volatile i32, i32* @a + %x43 = add i32 %x42, %a43 + %a44 = load volatile i32, i32* @a + %x44 = add i32 %x43, %a44 + %a45 = load volatile i32, i32* @a + %x45 = add i32 %x44, %a45 + %a46 = load volatile i32, i32* @a + %x46 = add i32 %x45, %a46 + %a47 = load volatile i32, i32* @a + %x47 = add i32 %x46, %a47 + %a48 = load volatile i32, i32* @a + %x48 = add i32 %x47, %a48 + %a49 = load volatile i32, i32* @a + %x49 = add i32 %x48, %a49 + %a50 = load volatile i32, i32* @a + %x50 = add i32 %x49, %a50 + %a51 = load volatile i32, i32* @a + %x51 = add i32 %x50, %a51 + %a52 = load volatile i32, i32* @a + %x52 = add i32 %x51, %a52 + + %add = add i32 %x52, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +; CHECK-LABEL: @bar +; CHECK: call i32 @ColdFunction(i32 5) +; CHECK-NOT: call i32 @simpleFunction(i32 6) +; CHECK: call i32 @ColdFunction2(i32 5) +; CHECK: ret +; OVERRIDE-LABEL: @bar +; OVERRIDE-NOT: call i32 @ColdFunction(i32 5) +; OVERRIDE-NOT: call i32 @simpleFunction(i32 6) +; OVERRIDE-NOT: call i32 @ColdFunction2(i32 5) +; OVERRIDE: ret +; DEFAULT-LABEL: @bar +; DEFAULT-NOT: call i32 @ColdFunction(i32 5) +; DEFAULT-NOT: call i32 @simpleFunction(i32 6) +; DEFAULT: call i32 @ColdFunction2(i32 5) +; DEFAULT: ret +entry: + %0 = tail call i32 @ColdFunction(i32 5) + %1 = tail call i32 @simpleFunction(i32 6) + %2 = tail call i32 @ColdFunction2(i32 5) + %3 = add i32 %0, %1 + %add = add i32 %2, %3 + ret i32 %add +} + +declare void @extern() +attributes #0 = { nounwind readnone uwtable } +attributes #1 = { nounwind cold readnone uwtable } diff --git a/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll b/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll new file mode 100644 index 00000000000..b8d41abe1c3 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-constexpr-addrspacecast-argument.ll @@ -0,0 +1,31 @@ +; RUN: opt -S -inline < %s | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s + +target datalayout = "e-p3:32:32-p4:64:64-n32" + +@lds = internal addrspace(3) global [64 x i64] zeroinitializer + +; CHECK-LABEL: @constexpr_addrspacecast_ptr_size_change( +; CHECK: load i64, i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*) +; CHECK-NEXT: br +define void @constexpr_addrspacecast_ptr_size_change() #0 { + %tmp0 = call i32 @foo(i64 addrspace(4)* addrspacecast (i64 addrspace(3)* getelementptr inbounds ([64 x i64], [64 x i64] addrspace(3)* @lds, i32 0, i32 0) to i64 addrspace(4)*)) #1 + ret void +} + +define i32 @foo(i64 addrspace(4)* %arg) #1 { +bb: + %tmp = getelementptr i64, i64 addrspace(4)* %arg, i64 undef + %tmp1 = load i64, i64 addrspace(4)* %tmp + br i1 undef, label %bb2, label %bb3 + +bb2: + store i64 0, i64 addrspace(4)* %tmp + br label %bb3 + +bb3: + unreachable +} + +attributes #0 = { nounwind } +attributes #1 = { alwaysinline nounwind } diff --git a/llvm/test/Transforms/Inline/inline-fast-math-flags.ll b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll new file mode 100644 index 00000000000..dc2f2e1837e --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-fast-math-flags.ll @@ -0,0 +1,35 @@ +; RUN: opt < %s -S -inline -inline-threshold=20 | FileCheck %s +; RUN: opt < %s -S -passes='cgscc(inline)' -inline-threshold=20 | FileCheck %s +; Check that we don't drop FastMathFlag when estimating inlining profitability. +; +; In this test we should inline 'foo' to 'boo', because it'll fold to a +; constant. + +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" + +define float @foo(float* %a, float %b) { +entry: + %a0 = load float, float* %a, align 4 + %mul = fmul fast float %a0, %b + %tobool = fcmp une float %mul, 0.000000e+00 + br i1 %tobool, label %if.then, label %if.end + +if.then: ; preds = %entry + %a1 = load float, float* %a, align 8 + %arrayidx1 = getelementptr inbounds float, float* %a, i64 1 + %a2 = load float, float* %arrayidx1, align 4 + %add = fadd fast float %a1, %a2 + br label %if.end + +if.end: ; preds = %if.then, %entry + %storemerge = phi float [ %add, %if.then ], [ 1.000000e+00, %entry ] + ret float %storemerge +} + +; CHECK-LABEL: @boo +; CHECK-NOT: call float @foo +define float @boo(float* %a) { +entry: + %call = call float @foo(float* %a, float 0.000000e+00) + ret float %call +} diff --git a/llvm/test/Transforms/Inline/inline-funclets.ll b/llvm/test/Transforms/Inline/inline-funclets.ll new file mode 100644 index 00000000000..409310380f2 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-funclets.ll @@ -0,0 +1,676 @@ +; RUN: opt -inline -S %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S %s | FileCheck %s + +declare void @g() + + +;;; Test with a call in a funclet that needs to remain a call +;;; when inlined because the funclet doesn't unwind to caller. +;;; CHECK-LABEL: define void @test1( +define void @test1() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test1_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test1_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup.inner +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup_inner:.+]] + +cleanup.inner: + %pad.inner = cleanuppad within none [] + call void @g() [ "funclet"(token %pad.inner) ] + cleanupret from %pad.inner unwind label %cleanup.outer +; CHECK: [[cleanup_inner]]: +; The call here needs to remain a call becuase pad.inner has a cleanupret +; that stays within the inlinee. +; CHECK-NEXT: %[[pad_inner:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: call void @g() [ "funclet"(token %[[pad_inner]]) ] +; CHECK-NEXT: cleanupret from %[[pad_inner]] unwind label %[[cleanup_outer:.+]] + +cleanup.outer: + %pad.outer = cleanuppad within none [] + call void @g() [ "funclet"(token %pad.outer) ] + cleanupret from %pad.outer unwind to caller +; CHECK: [[cleanup_outer]]: +; The call and cleanupret here need to be redirected to caller cleanup +; CHECK-NEXT: %[[pad_outer:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad_outer]]) ] +; CHECK-NEXT: unwind label %cleanup +; CHECK: cleanupret from %[[pad_outer]] unwind label %cleanup{{$}} + +exit: + ret void +} + + + +;;; Test with an "unwind to caller" catchswitch in a parent funclet +;;; that needs to remain "unwind to caller" because the parent +;;; doesn't unwind to caller. +;;; CHECK-LABEL: define void @test2( +define void @test2() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test2_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test2_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup1 +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup1:.+]] + +cleanup1: + %outer = cleanuppad within none [] + invoke void @g() [ "funclet"(token %outer) ] + to label %ret1 unwind label %catchswitch +; CHECK: [[cleanup1]]: +; CHECK-NEXT: %[[outer:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[outer]]) ] +; CHECK-NEXT: unwind label %[[catchswitch:.+]] + +catchswitch: + %cs = catchswitch within %outer [label %catch] unwind to caller +; CHECK: [[catchswitch]]: +; The catchswitch here needs to remain "unwind to caller" since %outer +; has a cleanupret that remains within the inlinee. +; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[outer]] [label %[[catch:.+]]] unwind to caller + +catch: + %inner = catchpad within %cs [] + call void @g() [ "funclet"(token %inner) ] + catchret from %inner to label %ret1 +; CHECK: [[catch]]: +; The call here needs to remain a call since it too is within %outer +; CHECK: %[[inner:[^ ]+]] = catchpad within %[[cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[inner]]) ] + +ret1: + cleanupret from %outer unwind label %cleanup2 +; CHECK: cleanupret from %[[outer]] unwind label %[[cleanup2:.+]] + +cleanup2: + %later = cleanuppad within none [] + cleanupret from %later unwind to caller +; CHECK: [[cleanup2]]: +; The cleanupret here needs to get redirected to the caller cleanup +; CHECK-NEXT: %[[later:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: cleanupret from %[[later]] unwind label %cleanup{{$}} + +exit: + ret void +} + + +;;; Test with a call in a cleanup that has no definitive unwind +;;; destination, that must be rewritten to an invoke. +;;; CHECK-LABEL: define void @test3( +define void @test3() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test3_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test3_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup:.+]] + +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + unreachable +; CHECK: [[cleanup]]: +; The call must be rewritten to an invoke targeting the caller cleanup +; because it may well unwind to there. +; CHECK-NEXT: %[[pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +exit: + ret void +} + + +;;; Test with a catchswitch in a cleanup that has no definitive +;;; unwind destination, that must be rewritten to unwind to the +;;; inlined invoke's unwind dest +;;; CHECK-LABEL: define void @test4( +define void @test4() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test4_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test4_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %exit unwind label %cleanup +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[cleanup:.+]] + +cleanup: + %clean = cleanuppad within none [] + invoke void @g() [ "funclet"(token %clean) ] + to label %unreachable unwind label %dispatch +; CHECK: [[cleanup]]: +; CHECK-NEXT: %[[clean:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[clean]]) ] +; CHECK-NEXT: unwind label %[[dispatch:.+]] + +dispatch: + %cs = catchswitch within %clean [label %catch] unwind to caller +; CHECK: [[dispatch]]: +; The catchswitch must be rewritten to unwind to %cleanup in the caller +; because it may well unwind to there. +; CHECK-NEXT: %[[cs:[^ ]+]] = catchswitch within %[[clean]] [label %[[catch:.+]]] unwind label %cleanup{{$}} + +catch: + catchpad within %cs [] + br label %unreachable +unreachable: + unreachable +exit: + ret void +} + + +;;; Test with multiple levels of nesting, and unwind dests +;;; that need to be inferred from ancestors, descendants, +;;; and cousins. +;;; CHECK-LABEL: define void @test5( +define void @test5() personality void ()* @g { +entry: +; CHECK-NEXT: entry: + invoke void @test5_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test5_inlinee() alwaysinline personality void ()* @g { +entry: + invoke void @g() + to label %cont unwind label %noinfo.root +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: to label %[[cont:[^ ]+]] unwind label %[[noinfo_root:.+]] + +noinfo.root: + %noinfo.root.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %noinfo.root.pad) ] + invoke void @g() [ "funclet"(token %noinfo.root.pad) ] + to label %noinfo.root.cont unwind label %noinfo.left +; CHECK: [[noinfo_root]]: +; Nothing under "noinfo.root" has a definitive unwind destination, so +; we must assume all of it may actually unwind, and redirect unwinds +; to the cleanup in the caller. +; CHECK-NEXT: %[[noinfo_root_pad:[^ ]+]] = cleanuppad within none [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT: to label %[[next:[^ ]+]] unwind label %cleanup{{$}} +; CHECK: [[next]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT: to label %[[noinfo_root_cont:[^ ]+]] unwind label %[[noinfo_left:.+]] + +noinfo.left: + %noinfo.left.pad = cleanuppad within %noinfo.root.pad [] + invoke void @g() [ "funclet"(token %noinfo.left.pad) ] + to label %unreachable unwind label %noinfo.left.child +; CHECK: [[noinfo_left]]: +; CHECK-NEXT: %[[noinfo_left_pad:[^ ]+]] = cleanuppad within %[[noinfo_root_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_pad]]) ] +; CHECK-NEXT: unwind label %[[noinfo_left_child:.+]] + +noinfo.left.child: + %noinfo.left.child.cs = catchswitch within %noinfo.left.pad [label %noinfo.left.child.catch] unwind to caller +; CHECK: [[noinfo_left_child]]: +; CHECK-NEXT: %[[noinfo_left_child_cs:[^ ]+]] = catchswitch within %[[noinfo_left_pad]] [label %[[noinfo_left_child_catch:[^ ]+]]] unwind label %cleanup{{$}} + +noinfo.left.child.catch: + %noinfo.left.child.pad = catchpad within %noinfo.left.child.cs [] + call void @g() [ "funclet"(token %noinfo.left.child.pad) ] + br label %unreachable +; CHECK: [[noinfo_left_child_catch]]: +; CHECK-NEXT: %[[noinfo_left_child_pad:[^ ]+]] = catchpad within %[[noinfo_left_child_cs]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_left_child_pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +noinfo.root.cont: + invoke void @g() [ "funclet"(token %noinfo.root.pad) ] + to label %unreachable unwind label %noinfo.right +; CHECK: [[noinfo_root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_root_pad]]) ] +; CHECK-NEXT: unwind label %[[noinfo_right:.+]] + +noinfo.right: + %noinfo.right.cs = catchswitch within %noinfo.root.pad [label %noinfo.right.catch] unwind to caller +; CHECK: [[noinfo_right]]: +; CHECK-NEXT: %[[noinfo_right_cs:[^ ]+]] = catchswitch within %[[noinfo_root_pad]] [label %[[noinfo_right_catch:[^ ]+]]] unwind label %cleanup{{$}} + +noinfo.right.catch: + %noinfo.right.pad = catchpad within %noinfo.right.cs [] + invoke void @g() [ "funclet"(token %noinfo.right.pad) ] + to label %unreachable unwind label %noinfo.right.child +; CHECK: [[noinfo_right_catch]]: +; CHECK-NEXT: %[[noinfo_right_pad:[^ ]+]] = catchpad within %[[noinfo_right_cs]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_pad]]) ] +; CHECK-NEXT: unwind label %[[noinfo_right_child:.+]] + +noinfo.right.child: + %noinfo.right.child.pad = cleanuppad within %noinfo.right.pad [] + call void @g() [ "funclet"(token %noinfo.right.child.pad) ] + br label %unreachable +; CHECK: [[noinfo_right_child]]: +; CHECK-NEXT: %[[noinfo_right_child_pad:[^ ]+]] = cleanuppad within %[[noinfo_right_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[noinfo_right_child_pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +cont: + invoke void @g() + to label %exit unwind label %implicit.root +; CHECK: [[cont]]: +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[implicit_root:.+]] + +implicit.root: + %implicit.root.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %implicit.root.pad) ] + invoke void @g() [ "funclet"(token %implicit.root.pad) ] + to label %implicit.root.cont unwind label %implicit.left +; CHECK: [[implicit_root]]: +; There's an unwind edge to %internal in implicit.right, and we need to propagate that +; fact down to implicit.right.grandchild, up to implicit.root, and down to +; implicit.left.child.catch, leaving all calls and "unwind to caller" catchswitches +; alone to so they don't conflict with the unwind edge in implicit.right +; CHECK-NEXT: %[[implicit_root_pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: to label %[[implicit_root_cont:[^ ]+]] unwind label %[[implicit_left:.+]] + +implicit.left: + %implicit.left.pad = cleanuppad within %implicit.root.pad [] + invoke void @g() [ "funclet"(token %implicit.left.pad) ] + to label %unreachable unwind label %implicit.left.child +; CHECK: [[implicit_left]]: +; CHECK-NEXT: %[[implicit_left_pad:[^ ]+]] = cleanuppad within %[[implicit_root_pad:[^ ]+]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_left_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_left_child:.+]] + +implicit.left.child: + %implicit.left.child.cs = catchswitch within %implicit.left.pad [label %implicit.left.child.catch] unwind to caller +; CHECK: [[implicit_left_child]]: +; CHECK-NEXT: %[[implicit_left_child_cs:[^ ]+]] = catchswitch within %[[implicit_left_pad]] [label %[[implicit_left_child_catch:[^ ]+]]] unwind to caller + +implicit.left.child.catch: + %implicit.left.child.pad = catchpad within %implicit.left.child.cs [] + call void @g() [ "funclet"(token %implicit.left.child.pad) ] + br label %unreachable +; CHECK: [[implicit_left_child_catch]]: +; CHECK-NEXT: %[[implicit_left_child_pad:[^ ]+]] = catchpad within %[[implicit_left_child_cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_left_child_pad]]) ] + +implicit.root.cont: + invoke void @g() [ "funclet"(token %implicit.root.pad) ] + to label %unreachable unwind label %implicit.right +; CHECK: [[implicit_root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_root_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_right:.+]] + +implicit.right: + %implicit.right.cs = catchswitch within %implicit.root.pad [label %implicit.right.catch] unwind label %internal +; CHECK: [[implicit_right]]: +; This is the unwind edge (to %internal) whose existence needs to get propagated around the "implicit" tree +; CHECK-NEXT: %[[implicit_right_cs:[^ ]+]] = catchswitch within %[[implicit_root_pad]] [label %[[implicit_right_catch:[^ ]+]]] unwind label %[[internal:.+]] + +implicit.right.catch: + %implicit.right.pad = catchpad within %implicit.right.cs [] + invoke void @g() [ "funclet"(token %implicit.right.pad) ] + to label %unreachable unwind label %implicit.right.child +; CHECK: [[implicit_right_catch]]: +; CHECK-NEXT: %[[implicit_right_pad:[^ ]+]] = catchpad within %[[implicit_right_cs]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_right_child:.+]] + +implicit.right.child: + %implicit.right.child.pad = cleanuppad within %implicit.right.pad [] + invoke void @g() [ "funclet"(token %implicit.right.child.pad) ] + to label %unreachable unwind label %implicit.right.grandchild +; CHECK: [[implicit_right_child]]: +; CHECK-NEXT: %[[implicit_right_child_pad:[^ ]+]] = cleanuppad within %[[implicit_right_pad]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[implicit_right_child_pad]]) ] +; CHECK-NEXT: unwind label %[[implicit_right_grandchild:.+]] + +implicit.right.grandchild: + %implicit.right.grandchild.cs = catchswitch within %implicit.right.child.pad [label %implicit.right.grandchild.catch] unwind to caller +; CHECK: [[implicit_right_grandchild]]: +; CHECK-NEXT: %[[implicit_right_grandchild_cs:[^ ]+]] = catchswitch within %[[implicit_right_child_pad]] [label %[[implicit_right_grandchild_catch:[^ ]+]]] unwind to caller + +implicit.right.grandchild.catch: + %implicit.right.grandhcild.pad = catchpad within %implicit.right.grandchild.cs [] + call void @g() [ "funclet"(token %implicit.right.grandhcild.pad) ] + br label %unreachable +; CHECK: [[implicit_right_grandchild_catch]]: +; CHECK-NEXT: %[[implicit_right_grandhcild_pad:[^ ]+]] = catchpad within %[[implicit_right_grandchild_cs]] +; CHECK-NEXT: call void @g() [ "funclet"(token %[[implicit_right_grandhcild_pad]]) ] + +internal: + %internal.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %internal.pad) ] + cleanupret from %internal.pad unwind to caller +; CHECK: [[internal]]: +; internal is a cleanup with a "return to caller" cleanuppad; that needs to get redirected +; to %cleanup in the caller, and the call needs to get similarly rewritten to an invoke. +; CHECK-NEXT: %[[internal_pad:[^ ]+]] = cleanuppad within none +; CHECK-NEXT: invoke void @g() [ "funclet"(token %internal.pad.i) ] +; CHECK-NEXT: to label %[[next:[^ ]+]] unwind label %cleanup{{$}} +; CHECK: [[next]]: +; CHECK-NEXT: cleanupret from %[[internal_pad]] unwind label %cleanup{{$}} + +unreachable: + unreachable +exit: + ret void +} + +;;; Test with funclets that don't have information for themselves, but have +;;; descendants which unwind to other descendants (left.left unwinds to +;;; left.right, and right unwinds to far_right). Make sure that these local +;;; unwinds don't trip up processing of the ancestor nodes (left and root) that +;;; ultimately have no information. +;;; CHECK-LABEL: define void @test6( +define void @test6() personality void()* @ProcessCLRException { +entry: +; CHECK-NEXT: entry: + invoke void @test6_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test6_inlinee() alwaysinline personality void ()* @ProcessCLRException { +entry: + invoke void @g() + to label %exit unwind label %root + ; CHECK-NEXT: invoke void @g() + ; CHECK-NEXT: unwind label %[[root:.+]] +root: + %root.pad = cleanuppad within none [] + invoke void @g() [ "funclet"(token %root.pad) ] + to label %root.cont unwind label %left +; CHECK: [[root]]: +; CHECK-NEXT: %[[root_pad:.+]] = cleanuppad within none [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[root_pad]]) ] +; CHECK-NEXT: to label %[[root_cont:.+]] unwind label %[[left:.+]] + +left: + %left.cs = catchswitch within %root.pad [label %left.catch] unwind to caller +; CHECK: [[left]]: +; CHECK-NEXT: %[[left_cs:.+]] = catchswitch within %[[root_pad]] [label %[[left_catch:.+]]] unwind label %cleanup + +left.catch: + %left.cp = catchpad within %left.cs [] + call void @g() [ "funclet"(token %left.cp) ] + invoke void @g() [ "funclet"(token %left.cp) ] + to label %unreach unwind label %left.left +; CHECK: [[left_catch:.+]]: +; CHECK-NEXT: %[[left_cp:.+]] = catchpad within %[[left_cs]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[left_cp]]) ] +; CHECK-NEXT: to label %[[lc_cont:.+]] unwind label %cleanup +; CHECK: [[lc_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[left_cp]]) ] +; CHECK-NEXT: to label %[[unreach:.+]] unwind label %[[left_left:.+]] + +left.left: + %ll.pad = cleanuppad within %left.cp [] + cleanupret from %ll.pad unwind label %left.right +; CHECK: [[left_left]]: +; CHECK-NEXT: %[[ll_pad:.+]] = cleanuppad within %[[left_cp]] [] +; CHECK-NEXT: cleanupret from %[[ll_pad]] unwind label %[[left_right:.+]] + +left.right: + %lr.pad = cleanuppad within %left.cp [] + unreachable +; CHECK: [[left_right]]: +; CHECK-NEXT: %[[lr_pad:.+]] = cleanuppad within %[[left_cp]] [] +; CHECK-NEXT: unreachable + +root.cont: + call void @g() [ "funclet"(token %root.pad) ] + invoke void @g() [ "funclet"(token %root.pad) ] + to label %unreach unwind label %right +; CHECK: [[root_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[root_pad]]) ] +; CHECK-NEXT: to label %[[root_cont_cont:.+]] unwind label %cleanup +; CHECK: [[root_cont_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[root_pad]]) ] +; CHECK-NEXT: to label %[[unreach]] unwind label %[[right:.+]] + +right: + %right.pad = cleanuppad within %root.pad [] + invoke void @g() [ "funclet"(token %right.pad) ] + to label %unreach unwind label %right.child +; CHECK: [[right]]: +; CHECK-NEXT: %[[right_pad:.+]] = cleanuppad within %[[root_pad]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[right_pad]]) ] +; CHECK-NEXT: to label %[[unreach]] unwind label %[[right_child:.+]] + +right.child: + %rc.pad = cleanuppad within %right.pad [] + invoke void @g() [ "funclet"(token %rc.pad) ] + to label %unreach unwind label %far_right +; CHECK: [[right_child]]: +; CHECK-NEXT: %[[rc_pad:.+]] = cleanuppad within %[[right_pad]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[rc_pad]]) ] +; CHECK-NEXT: to label %[[unreach]] unwind label %[[far_right:.+]] + +far_right: + %fr.cs = catchswitch within %root.pad [label %fr.catch] unwind to caller +; CHECK: [[far_right]]: +; CHECK-NEXT: %[[fr_cs:.+]] = catchswitch within %[[root_pad]] [label %[[fr_catch:.+]]] unwind label %cleanup + +fr.catch: + %fr.cp = catchpad within %fr.cs [] + unreachable +; CHECK: [[fr_catch]]: +; CHECK-NEXT: %[[fr_cp:.+]] = catchpad within %[[fr_cs]] [] +; CHECK-NEXT: unreachable + +unreach: + unreachable +; CHECK: [[unreach]]: +; CHECK-NEXT: unreachable + +exit: + ret void +} + + +;;; Test with a no-info funclet (right) which has a cousin (left.left) that +;;; unwinds to another cousin (left.right); make sure we don't trip over this +;;; when propagating unwind destination info to "right". +;;; CHECK-LABEL: define void @test7( +define void @test7() personality void()* @ProcessCLRException { +entry: +; CHECK-NEXT: entry: + invoke void @test7_inlinee() + to label %exit unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test7_inlinee() alwaysinline personality void ()* @ProcessCLRException { +entry: + invoke void @g() + to label %exit unwind label %root +; CHECK-NEXT: invoke void @g() +; CHECK-NEXT: unwind label %[[root:.+]] + +root: + %root.cp = cleanuppad within none [] + invoke void @g() [ "funclet"(token %root.cp) ] + to label %root.cont unwind label %child +; CHECK: [[root]]: +; CHECK-NEXT: %[[root_cp:.+]] = cleanuppad within none [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[root_cp]]) ] +; CHECK-NEXT: to label %[[root_cont:.+]] unwind label %[[child:.+]] + +root.cont: + cleanupret from %root.cp unwind to caller +; CHECK: [[root_cont]]: +; CHECK-NEXT: cleanupret from %[[root_cp]] unwind label %cleanup + +child: + %child.cp = cleanuppad within %root.cp [] + invoke void @g() [ "funclet"(token %child.cp) ] + to label %child.cont unwind label %left +; CHECK: [[child]]: +; CHECK-NEXT: %[[child_cp:.+]] = cleanuppad within %[[root_cp]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[child_cp]]) ] +; CHECK-NEXT: to label %[[child_cont:.+]] unwind label %[[left:.+]] + +left: + %left.cp = cleanuppad within %child.cp [] + invoke void @g() [ "funclet"(token %left.cp) ] + to label %left.cont unwind label %left.left +; CHECK: [[left]]: +; CHECK-NEXT: %[[left_cp:.+]] = cleanuppad within %[[child_cp]] [] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[left_cp]]) ] +; CHECK-NEXT: to label %[[left_cont:.+]] unwind label %[[left_left:.+]] + +left.left: + %ll.cp = cleanuppad within %left.cp [] + cleanupret from %ll.cp unwind label %left.right +; CHECK: [[left_left]]: +; CHECK-NEXT: %[[ll_cp:.+]] = cleanuppad within %[[left_cp]] [] +; CHECK-NEXT: cleanupret from %[[ll_cp]] unwind label %[[left_right:.+]] + +left.cont: + invoke void @g() [ "funclet"(token %left.cp) ] + to label %unreach unwind label %left.right +; CHECK: [[left_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[left_cp]]) ] +; CHECK-NEXT: to label %[[unreach:.+]] unwind label %[[left_right]] + +left.right: + %lr.cp = cleanuppad within %left.cp [] + unreachable +; CHECK: [[left_right]]: +; CHECK-NEXT: %[[lr_cp:.+]] = cleanuppad within %[[left_cp]] [] +; CHECK-NEXT: unreachable + +child.cont: + invoke void @g() [ "funclet"(token %child.cp) ] + to label %unreach unwind label %right +; CHECK: [[child_cont]]: +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[child_cp]]) ] +; CHECK-NEXT: to label %[[unreach]] unwind label %[[right:.+]] + +right: + %right.cp = cleanuppad within %child.cp [] + call void @g() [ "funclet"(token %right.cp) ] + unreachable +; CHECK: [[right]]: +; CHECK-NEXT: %[[right_cp:.+]] = cleanuppad within %[[child_cp]] +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[right_cp]]) ] +; CHECK-NEXT: to label %[[right_cont:.+]] unwind label %cleanup +; CHECK: [[right_cont]]: +; CHECK-NEXT: unreachable + +unreach: + unreachable +; CHECK: [[unreach]]: +; CHECK-NEXT: unreachable + +exit: + ret void +} + +declare void @ProcessCLRException() + +; Make sure the logic doesn't get tripped up when the inlined invoke is +; itself within a funclet in the caller. +; CHECK-LABEL: define void @test8( +define void @test8() personality void ()* @ProcessCLRException { +entry: + invoke void @g() + to label %exit unwind label %callsite_parent +callsite_parent: + %callsite_parent.pad = cleanuppad within none [] +; CHECK: %callsite_parent.pad = cleanuppad within none + invoke void @test8_inlinee() [ "funclet"(token %callsite_parent.pad) ] + to label %ret unwind label %cleanup +ret: + cleanupret from %callsite_parent.pad unwind label %cleanup +cleanup: + %pad = cleanuppad within none [] + call void @g() [ "funclet"(token %pad) ] + cleanupret from %pad unwind to caller +exit: + ret void +} + +define void @test8_inlinee() alwaysinline personality void ()* @ProcessCLRException { +entry: + invoke void @g() + to label %exit unwind label %inlinee_cleanup +; CHECK-NEXT: invoke void @g() [ "funclet"(token %callsite_parent.pad) ] +; CHECK-NEXT: unwind label %[[inlinee_cleanup:.+]] + +inlinee_cleanup: + %inlinee.pad = cleanuppad within none [] + call void @g() [ "funclet"(token %inlinee.pad) ] + unreachable +; CHECK: [[inlinee_cleanup]]: +; CHECK-NEXT: %[[inlinee_pad:[^ ]+]] = cleanuppad within %callsite_parent.pad +; CHECK-NEXT: invoke void @g() [ "funclet"(token %[[inlinee_pad]]) ] +; CHECK-NEXT: unwind label %cleanup{{$}} + +exit: + ret void +} diff --git a/llvm/test/Transforms/Inline/inline-hot-callee.ll b/llvm/test/Transforms/Inline/inline-hot-callee.ll new file mode 100644 index 00000000000..dad57440063 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-hot-callee.ll @@ -0,0 +1,55 @@ +; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s + +; This tests that a hot callee gets the (higher) inlinehint-threshold even +; without inline hints and gets inlined because the cost is less than +; inlinehint-threshold. A cold callee with identical body does not get inlined +; because cost exceeds the inline-threshold. This test is relevant only when the +; old pass manager is used. + +define i32 @callee1(i32 %x) !prof !21 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @callee2(i32 %x) !prof !22 { +; CHECK-LABEL: @callee2( + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller2(i32 %y1) !prof !22 { +; CHECK-LABEL: @caller2( +; CHECK: call i32 @callee2 +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %y2 = call i32 @callee2(i32 %y1) + %y3 = call i32 @callee1(i32 %y2) + ret i32 %y3 +} + +declare void @extern() + +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 300} +!22 = !{!"function_entry_count", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite-2.ll b/llvm/test/Transforms/Inline/inline-hot-callsite-2.ll new file mode 100644 index 00000000000..ccfe2f0b5de --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-hot-callsite-2.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -inlinehint-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s + +; This tests that a callsite which is determined to be hot based on the caller's +; entry count and the callsite block frequency gets the hot-callsite-threshold. +; Another callsite with the same callee that is not hot does not get inlined +; because cost exceeds the inline-threshold. inlinthint-threshold is set to 0 +; to ensure callee's hotness is not used to boost the threshold. + +define i32 @callee1(i32 %x) !prof !21 { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + ret i32 %x3 +} + +define i32 @caller(i32 %n) !prof !22 { +; CHECK-LABEL: @caller( + %cond = icmp sle i32 %n, 100 + br i1 %cond, label %cond_true, label %cond_false, !prof !0 + +cond_true: +; CHECK-LABEL: cond_true: +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %i = call i32 @callee1(i32 %n) + ret i32 %i +cond_false: +; CHECK-LABEL: cond_false: +; CHECK: call i32 @callee1 +; CHECK: ret i32 %j + %j = call i32 @callee1(i32 %n) + ret i32 %j +} +declare void @extern() + +!0 = !{!"branch_weights", i32 64, i32 4} + +!llvm.module.flags = !{!1} +!21 = !{!"function_entry_count", i64 200} +!22 = !{!"function_entry_count", i64 200} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/llvm/test/Transforms/Inline/inline-hot-callsite.ll b/llvm/test/Transforms/Inline/inline-hot-callsite.ll new file mode 100644 index 00000000000..48fa3039741 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-hot-callsite.ll @@ -0,0 +1,61 @@ +; This tests that a hot callsite gets the (higher) inlinehint-threshold even without +; without inline hints and gets inlined because the cost is less than +; inlinehint-threshold. A cold callee with identical body does not get inlined because +; cost exceeds the inline-threshold + +; RUN: opt < %s -inline -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s + +; Run this with the default O2 pipeline to test that profile summary analysis +; is available during inlining. +; RUN: opt < %s -passes='default<O2>' -inline-threshold=0 -hot-callsite-threshold=100 -S | FileCheck %s + +define i32 @callee1(i32 %x) { + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + call void @extern() + ret i32 %x3 +} + +define i32 @callee2(i32 %x) { +; CHECK-LABEL: @callee2( + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + call void @extern() + call void @extern() + ret i32 %x3 +} + +define i32 @caller2(i32 %y1) { +; CHECK-LABEL: @caller2( +; CHECK: call i32 @callee2 +; CHECK-NOT: call i32 @callee1 +; CHECK: ret i32 %x3.i + %y2 = call i32 @callee2(i32 %y1), !prof !22 + %y3 = call i32 @callee1(i32 %y2), !prof !21 + ret i32 %y3 +} + +declare void @extern() + +!llvm.module.flags = !{!1} +!21 = !{!"branch_weights", i64 300} +!22 = !{!"branch_weights", i64 1} + +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 1000} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 1000} +!8 = !{!"NumCounts", i64 3} +!9 = !{!"NumFunctions", i64 3} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} diff --git a/llvm/test/Transforms/Inline/inline-indirect.ll b/llvm/test/Transforms/Inline/inline-indirect.ll new file mode 100644 index 00000000000..f6eb528e065 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-indirect.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -inline -disable-output 2>/dev/null +; This test used to trigger an assertion in the assumption cache when +; inlining the indirect call +declare void @llvm.assume(i1) + +define void @foo() { + ret void +} + +define void @bar(void ()*) { + call void @llvm.assume(i1 true) + call void %0(); + ret void +} + +define void @baz() { + call void @bar(void ()* @foo) + ret void +} diff --git a/llvm/test/Transforms/Inline/inline-invoke-tail.ll b/llvm/test/Transforms/Inline/inline-invoke-tail.ll new file mode 100644 index 00000000000..d85ef50fff8 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-invoke-tail.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -inline -S | not grep "tail call void @llvm.memcpy.p0i8.p0i8.i32" +; PR3550 + +define internal void @foo(i32* %p, i32* %q) { +; CHECK-NOT: @foo +entry: + %pp = bitcast i32* %p to i8* + %qq = bitcast i32* %q to i8* + tail call void @llvm.memcpy.p0i8.p0i8.i32(i8* %pp, i8* %qq, i32 4, i1 false) + ret void +} + +define i32 @main() personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: define i32 @main() personality i32 (...)* @__gxx_personality_v0 +entry: + %a = alloca i32 + %b = alloca i32 + store i32 1, i32* %a, align 4 + store i32 0, i32* %b, align 4 + invoke void @foo(i32* %a, i32* %b) + to label %invcont unwind label %lpad +; CHECK-NOT: invoke +; CHECK-NOT: @foo +; CHECK-NOT: tail +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32 +; CHECK: br + +invcont: + %retval = load i32, i32* %a, align 4 + ret i32 %retval + +lpad: + %exn = landingpad {i8*, i32} + catch i8* null + unreachable +} + +declare i32 @__gxx_personality_v0(...) + +declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i1) nounwind diff --git a/llvm/test/Transforms/Inline/inline-invoke-with-asm-call.ll b/llvm/test/Transforms/Inline/inline-invoke-with-asm-call.ll new file mode 100644 index 00000000000..3b4ba19b774 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-invoke-with-asm-call.ll @@ -0,0 +1,33 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +target triple = "x86_64-apple-darwin" + +; In inliner, we assume that inline asm does not throw. This testing case makes +; sure that the inliner does not convert "call asm" to "invoke asm". +; rdar://15317907 +; CHECK-LABEL: @caller +; Make sure we are generating "call asm" instead of "invoke asm". +; CHECK: call void asm +; CHECK-LABEL: @callee_with_asm +define void @caller() personality i8* bitcast (i32 (...)* @__objc_personality_v0 to i8*) { + br i1 undef, label %1, label %4 + +; <label>:1 + invoke void @callee_with_asm() + to label %4 unwind label %2 + +; <label>:2 + %3 = landingpad { i8*, i32 } + cleanup + resume { i8*, i32 } undef + +; <label>:4 + ret void +} + +define void @callee_with_asm() { + call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""() + ret void +} + +declare i32 @__objc_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/inline-min-legal-vector-width.ll b/llvm/test/Transforms/Inline/inline-min-legal-vector-width.ll new file mode 100644 index 00000000000..ec727419f01 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-min-legal-vector-width.ll @@ -0,0 +1,44 @@ +; RUN: opt %s -inline -S | FileCheck %s + +define internal void @innerSmall() "min-legal-vector-width"="128" { + ret void +} + +define internal void @innerLarge() "min-legal-vector-width"="512" { + ret void +} + +define internal void @innerNoAttribute() { + ret void +} + +; We should not add an attribute during inlining. No attribute means unknown. +; Inlining doesn't change the fact that we don't know anything about this +; function. +define void @outerNoAttribute() { + call void @innerLarge() + ret void +} + +define void @outerConflictingAttributeSmall() "min-legal-vector-width"="128" { + call void @innerLarge() + ret void +} + +define void @outerConflictingAttributeLarge() "min-legal-vector-width"="512" { + call void @innerSmall() + ret void +} + +; We should remove the attribute after inlining since the callee's +; vector width requirements are unknown. +define void @outerAttribute() "min-legal-vector-width"="128" { + call void @innerNoAttribute() + ret void +} + +; CHECK: define void @outerNoAttribute() { +; CHECK: define void @outerConflictingAttributeSmall() #0 +; CHECK: define void @outerConflictingAttributeLarge() #0 +; CHECK: define void @outerAttribute() { +; CHECK: attributes #0 = { "min-legal-vector-width"="512" } diff --git a/llvm/test/Transforms/Inline/inline-optnone.ll b/llvm/test/Transforms/Inline/inline-optnone.ll new file mode 100644 index 00000000000..9b99c4558ea --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-optnone.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +; Test that functions with attribute optnone are not inlined. +; Also test that only functions with attribute alwaysinline are +; valid candidates for inlining if the caller has the optnone attribute. + +; Function Attrs: alwaysinline nounwind readnone uwtable +define i32 @alwaysInlineFunction(i32 %a) #0 { +entry: + %mul = mul i32 %a, %a + ret i32 %mul +} + +; Function Attrs: nounwind readnone uwtable +define i32 @simpleFunction(i32 %a) #1 { +entry: + %add = add i32 %a, %a + ret i32 %add +} + +; Function Attrs: nounwind noinline optnone readnone uwtable +define i32 @OptnoneFunction(i32 %a) #2 { +entry: + %0 = tail call i32 @alwaysInlineFunction(i32 %a) + %1 = tail call i32 @simpleFunction(i32 %a) + %add = add i32 %0, %1 + ret i32 %add +} + +; CHECK-LABEL: @OptnoneFunction +; CHECK-NOT: call i32 @alwaysInlineFunction(i32 %a) +; CHECK: call i32 @simpleFunction(i32 %a) +; CHECK: ret + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #1 { +entry: + %0 = tail call i32 @OptnoneFunction(i32 5) + %1 = tail call i32 @simpleFunction(i32 6) + %add = add i32 %0, %1 + ret i32 %add +} + +; CHECK-LABEL: @bar +; CHECK: call i32 @OptnoneFunction(i32 5) +; CHECK-NOT: call i32 @simpleFunction(i32 6) +; CHECK: ret + + +attributes #0 = { alwaysinline nounwind readnone uwtable } +attributes #1 = { nounwind readnone uwtable } +attributes #2 = { nounwind noinline optnone readnone uwtable } diff --git a/llvm/test/Transforms/Inline/inline-optsize.ll b/llvm/test/Transforms/Inline/inline-optsize.ll new file mode 100644 index 00000000000..c7cd9b3189d --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-optsize.ll @@ -0,0 +1,47 @@ +; RUN: opt -S -Oz < %s | FileCheck %s -check-prefix=OZ +; RUN: opt -S -O2 < %s | FileCheck %s -check-prefix=O2 +; RUN: opt -S -Os < %s | FileCheck %s -check-prefix=OS + +; The inline threshold for a function with the optsize attribute is currently +; the same as the global inline threshold for -Os. Check that the optsize +; function attribute doesn't alter the function-specific inline threshold if the +; global inline threshold is lower (as for -Oz). + +@a = global i32 4 + +; This function should be larger than the inline threshold for -Oz (25), but +; smaller than the inline threshold for optsize (75). +define i32 @inner() { + call void @extern() + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x3, %a5 + ret i32 %x5 +} + +; @inner() should be inlined for -O2 and -Os but not for -Oz. +; OZ: call +; O2-NOT: call +; OS-NOT: call +define i32 @outer() optsize { + %r = call i32 @inner() + ret i32 %r +} + +; @inner() should not be inlined for -O2, -Os and -Oz. +; OZ: call +; O2: call +; OS: call +define i32 @outer2() minsize { + %r = call i32 @inner() + ret i32 %r +} + +declare void @extern()
\ No newline at end of file diff --git a/llvm/test/Transforms/Inline/inline-probe-stack.ll b/llvm/test/Transforms/Inline/inline-probe-stack.ll new file mode 100644 index 00000000000..bddee16d30b --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-probe-stack.ll @@ -0,0 +1,20 @@ +; RUN: opt %s -inline -S | FileCheck %s + +define internal void @inner() "probe-stack"="__probestackinner" { + ret void +} + +define void @outerNoAttribute() { + call void @inner() + ret void +} + +define void @outerConflictingAttribute() "probe-stack"="__probestackouter" { + call void @inner() + ret void +} + +; CHECK: define void @outerNoAttribute() #0 +; CHECK: define void @outerConflictingAttribute() #1 +; CHECK: attributes #0 = { "probe-stack"="__probestackinner" } +; CHECK: attributes #1 = { "probe-stack"="__probestackouter" } diff --git a/llvm/test/Transforms/Inline/inline-remark.ll b/llvm/test/Transforms/Inline/inline-remark.ll new file mode 100644 index 00000000000..bfb78c9ee88 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-remark.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -inline -inline-remark-attribute --inline-threshold=0 -S | FileCheck %s + +; Test that the inliner adds inline remark attributes to non-inlined callsites. + +declare void @ext(); + +define void @foo() { + call void @bar(i1 true) + ret void +} + +define void @bar(i1 %p) { + br i1 %p, label %bb1, label %bb2 + +bb1: + call void @foo() + call void @ext() + ret void + +bb2: + call void @bar(i1 true) + ret void +} + +;; Test 1 - Add different inline remarks to similar callsites. +define void @test1() { +; CHECK-LABEL: @test1 +; CHECK-NEXT: call void @bar(i1 true) [[ATTR1:#[0-9]+]] +; CHECK-NEXT: call void @bar(i1 false) [[ATTR2:#[0-9]+]] + call void @bar(i1 true) + call void @bar(i1 false) + ret void +} + +define void @noop() { + ret void +} + +;; Test 2 - Printed InlineResult messages are followed by InlineCost. +define void @test2(i8*) { +; CHECK-LABEL: @test2 +; CHECK-NEXT: call void @noop() [[ATTR3:#[0-9]+]] [ "CUSTOM_OPERAND_BUNDLE"() ] +; CHECK-NEXT: ret void + call void @noop() ; extepected to be inlined + call void @noop() [ "CUSTOM_OPERAND_BUNDLE"() ] ; cannot be inlined because of unsupported operand bundle + ret void +} + +;; Test 3 - InlineResult messages come from llvm::isInlineViable() +define void @test3() { +; CHECK-LABEL: @test3 +; CHECK-NEXT: call void @test3() [[ATTR4:#[0-9]+]] +; CHECK-NEXT: ret void + call void @test3() alwaysinline + ret void +} + +; CHECK: attributes [[ATTR1]] = { "inline-remark"="(cost=25, threshold=0)" } +; CHECK: attributes [[ATTR2]] = { "inline-remark"="(cost=never): recursive" } +; CHECK: attributes [[ATTR3]] = { "inline-remark"="unsupported operand bundle; (cost={{.*}}, threshold={{.*}})" } +; CHECK: attributes [[ATTR4]] = { alwaysinline "inline-remark"="(cost=never): recursive call" } diff --git a/llvm/test/Transforms/Inline/inline-stack-probe-size.ll b/llvm/test/Transforms/Inline/inline-stack-probe-size.ll new file mode 100644 index 00000000000..d24da462d2e --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-stack-probe-size.ll @@ -0,0 +1,29 @@ +; RUN: opt %s -inline -S | FileCheck %s + +define internal void @innerSmall() "stack-probe-size"="4096" { + ret void +} + +define internal void @innerLarge() "stack-probe-size"="8192" { + ret void +} + +define void @outerNoAttribute() { + call void @innerSmall() + ret void +} + +define void @outerConflictingAttributeSmall() "stack-probe-size"="4096" { + call void @innerLarge() + ret void +} + +define void @outerConflictingAttributeLarge() "stack-probe-size"="8192" { + call void @innerSmall() + ret void +} + +; CHECK: define void @outerNoAttribute() #0 +; CHECK: define void @outerConflictingAttributeSmall() #0 +; CHECK: define void @outerConflictingAttributeLarge() #0 +; CHECK: attributes #0 = { "stack-probe-size"="4096" } diff --git a/llvm/test/Transforms/Inline/inline-tail.ll b/llvm/test/Transforms/Inline/inline-tail.ll new file mode 100644 index 00000000000..10b486c5154 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-tail.ll @@ -0,0 +1,219 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; We have to apply the less restrictive TailCallKind of the call site being +; inlined and any call sites cloned into the caller. + +; No tail marker after inlining, since test_capture_c captures an alloca. +; CHECK: define void @test_capture_a( +; CHECK-NOT: tail +; CHECK: call void @test_capture_c( + +declare void @test_capture_c(i32*) +define internal void @test_capture_b(i32* %P) { + tail call void @test_capture_c(i32* %P) + ret void +} +define void @test_capture_a() { + %A = alloca i32 ; captured by test_capture_b + call void @test_capture_b(i32* %A) + ret void +} + +; No musttail marker after inlining, since the prototypes don't match. +; CHECK: define void @test_proto_mismatch_a( +; CHECK-NOT: musttail +; CHECK: call void @test_proto_mismatch_c( + +declare void @test_proto_mismatch_c(i32*) +define internal void @test_proto_mismatch_b(i32* %p) { + musttail call void @test_proto_mismatch_c(i32* %p) + ret void +} +define void @test_proto_mismatch_a() { + call void @test_proto_mismatch_b(i32* null) + ret void +} + +; After inlining through a musttail call site, we need to keep musttail markers +; to prevent unbounded stack growth. +; CHECK: define void @test_musttail_basic_a( +; CHECK: musttail call void @test_musttail_basic_c( + +declare void @test_musttail_basic_c(i32* %p) +define internal void @test_musttail_basic_b(i32* %p) { + musttail call void @test_musttail_basic_c(i32* %p) + ret void +} +define void @test_musttail_basic_a(i32* %p) { + musttail call void @test_musttail_basic_b(i32* %p) + ret void +} + +; Don't insert lifetime end markers here, the lifetime is trivially over due +; the return. +; CHECK: define void @test_byval_a( +; CHECK: musttail call void @test_byval_c( +; CHECK-NEXT: ret void + +declare void @test_byval_c(i32* byval %p) +define internal void @test_byval_b(i32* byval %p) { + musttail call void @test_byval_c(i32* byval %p) + ret void +} +define void @test_byval_a(i32* byval %p) { + musttail call void @test_byval_b(i32* byval %p) + ret void +} + +; Don't insert a stack restore, we're about to return. +; CHECK: define void @test_dynalloca_a( +; CHECK: call i8* @llvm.stacksave( +; CHECK: alloca i8, i32 %n +; CHECK: musttail call void @test_dynalloca_c( +; CHECK-NEXT: ret void + +declare void @escape(i8* %buf) +declare void @test_dynalloca_c(i32* byval %p, i32 %n) +define internal void @test_dynalloca_b(i32* byval %p, i32 %n) alwaysinline { + %buf = alloca i8, i32 %n ; dynamic alloca + call void @escape(i8* %buf) ; escape it + musttail call void @test_dynalloca_c(i32* byval %p, i32 %n) + ret void +} +define void @test_dynalloca_a(i32* byval %p, i32 %n) { + musttail call void @test_dynalloca_b(i32* byval %p, i32 %n) + ret void +} + +; We can't merge the returns. +; CHECK: define void @test_multiret_a( +; CHECK: musttail call void @test_multiret_c( +; CHECK-NEXT: ret void +; CHECK: musttail call void @test_multiret_d( +; CHECK-NEXT: ret void + +declare void @test_multiret_c(i1 zeroext %b) +declare void @test_multiret_d(i1 zeroext %b) +define internal void @test_multiret_b(i1 zeroext %b) { + br i1 %b, label %c, label %d +c: + musttail call void @test_multiret_c(i1 zeroext %b) + ret void +d: + musttail call void @test_multiret_d(i1 zeroext %b) + ret void +} +define void @test_multiret_a(i1 zeroext %b) { + musttail call void @test_multiret_b(i1 zeroext %b) + ret void +} + +; We have to avoid bitcast chains. +; CHECK: define i32* @test_retptr_a( +; CHECK: musttail call i8* @test_retptr_c( +; CHECK-NEXT: bitcast i8* {{.*}} to i32* +; CHECK-NEXT: ret i32* + +declare i8* @test_retptr_c() +define internal i16* @test_retptr_b() { + %rv = musttail call i8* @test_retptr_c() + %v = bitcast i8* %rv to i16* + ret i16* %v +} +define i32* @test_retptr_a() { + %rv = musttail call i16* @test_retptr_b() + %v = bitcast i16* %rv to i32* + ret i32* %v +} + +; Combine the last two cases: multiple returns with pointer bitcasts. +; CHECK: define i32* @test_multiptrret_a( +; CHECK: musttail call i8* @test_multiptrret_c( +; CHECK-NEXT: bitcast i8* {{.*}} to i32* +; CHECK-NEXT: ret i32* +; CHECK: musttail call i8* @test_multiptrret_d( +; CHECK-NEXT: bitcast i8* {{.*}} to i32* +; CHECK-NEXT: ret i32* + +declare i8* @test_multiptrret_c(i1 zeroext %b) +declare i8* @test_multiptrret_d(i1 zeroext %b) +define internal i16* @test_multiptrret_b(i1 zeroext %b) { + br i1 %b, label %c, label %d +c: + %c_rv = musttail call i8* @test_multiptrret_c(i1 zeroext %b) + %c_v = bitcast i8* %c_rv to i16* + ret i16* %c_v +d: + %d_rv = musttail call i8* @test_multiptrret_d(i1 zeroext %b) + %d_v = bitcast i8* %d_rv to i16* + ret i16* %d_v +} +define i32* @test_multiptrret_a(i1 zeroext %b) { + %rv = musttail call i16* @test_multiptrret_b(i1 zeroext %b) + %v = bitcast i16* %rv to i32* + ret i32* %v +} + +; Inline a musttail call site which contains a normal return and a musttail call. +; CHECK: define i32 @test_mixedret_a( +; CHECK: br i1 %b +; CHECK: musttail call i32 @test_mixedret_c( +; CHECK-NEXT: ret i32 +; CHECK: call i32 @test_mixedret_d(i1 zeroext %b) +; CHECK: add i32 1, +; CHECK-NOT: br +; CHECK: ret i32 + +declare i32 @test_mixedret_c(i1 zeroext %b) +declare i32 @test_mixedret_d(i1 zeroext %b) +define internal i32 @test_mixedret_b(i1 zeroext %b) { + br i1 %b, label %c, label %d +c: + %c_rv = musttail call i32 @test_mixedret_c(i1 zeroext %b) + ret i32 %c_rv +d: + %d_rv = call i32 @test_mixedret_d(i1 zeroext %b) + %d_rv1 = add i32 1, %d_rv + ret i32 %d_rv1 +} +define i32 @test_mixedret_a(i1 zeroext %b) { + %rv = musttail call i32 @test_mixedret_b(i1 zeroext %b) + ret i32 %rv +} + +declare i32 @donttailcall() + +define i32 @notail() { + %rv = notail call i32 @donttailcall() + ret i32 %rv +} + +; CHECK: @test_notail +; CHECK: notail call i32 @donttailcall +; CHECK: ret +define i32 @test_notail() { + %rv = tail call i32 @notail() + ret i32 %rv +} + +; PR31014: Inlining a musttail call through a notail call site should remove +; any tail marking, otherwise we break verifier invariants. + +declare void @do_ret(i32) + +define void @test_notail_inline_musttail(i32 %a) { + notail call void @inline_musttail(i32 %a) + musttail call void @do_ret(i32 %a) + ret void +} + +define internal void @inline_musttail(i32 %a) { + musttail call void @do_ret(i32 %a) + ret void +} + +; CHECK-LABEL: define void @test_notail_inline_musttail(i32 %a) +; CHECK: {{^ *}}call void @do_ret(i32 %a) +; CHECK: musttail call void @do_ret(i32 %a) +; CHECK: ret void diff --git a/llvm/test/Transforms/Inline/inline-threshold.ll b/llvm/test/Transforms/Inline/inline-threshold.ll new file mode 100644 index 00000000000..cb0c8e9fcc4 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-threshold.ll @@ -0,0 +1,89 @@ +; Test that -inline-threshold overrides thresholds derived from opt levels. +; RUN: opt < %s -O2 -inline-threshold=500 -S | FileCheck %s +; RUN: opt < %s -O3 -inline-threshold=500 -S | FileCheck %s +; RUN: opt < %s -Os -inline-threshold=500 -S | FileCheck %s +; RUN: opt < %s -Oz -inline-threshold=500 -S | FileCheck %s + +@a = global i32 4 + +define i32 @simpleFunction(i32 %a) #0 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %cmp = icmp eq i32 %a1, 0 + br i1 %cmp, label %if.then, label %if.else +if.then: + %a2 = load volatile i32, i32* @a + %x2_0 = add i32 %x1, %a2 + br label %if.else +if.else: + %x2 = phi i32 [ %x1, %entry ], [ %x2_0, %if.then ] + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a7 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %a13 = load volatile i32, i32* @a + %x13 = add i32 %x12, %a13 + %a14 = load volatile i32, i32* @a + %x14 = add i32 %x13, %a14 + %a15 = load volatile i32, i32* @a + %x15 = add i32 %x14, %a15 + %a16 = load volatile i32, i32* @a + %x16 = add i32 %x15, %a16 + %a17 = load volatile i32, i32* @a + %x17 = add i32 %x16, %a17 + %a18 = load volatile i32, i32* @a + %x18 = add i32 %x17, %a18 + %a19 = load volatile i32, i32* @a + %x19 = add i32 %x18, %a19 + %a20 = load volatile i32, i32* @a + %x20 = add i32 %x19, %a20 + %a21 = load volatile i32, i32* @a + %x21 = add i32 %x20, %a21 + %a22 = load volatile i32, i32* @a + %x22 = add i32 %x21, %a22 + %a23 = load volatile i32, i32* @a + %x23 = add i32 %x22, %a23 + %a24 = load volatile i32, i32* @a + %x24 = add i32 %x23, %a24 + %a25 = load volatile i32, i32* @a + %x25 = add i32 %x24, %a25 + %a26 = load volatile i32, i32* @a + %x26 = add i32 %x25, %a26 + %a27 = load volatile i32, i32* @a + %x27 = add i32 %x26, %a27 + %a28 = load volatile i32, i32* @a + %x28 = add i32 %x27, %a28 + %a29 = load volatile i32, i32* @a + %x29 = add i32 %x28, %a29 + %add = add i32 %x29, %a + ret i32 %add +} + +; Function Attrs: nounwind readnone uwtable +define i32 @bar(i32 %a) #0 { +; CHECK-LABEL: @bar +; CHECK-NOT: call i32 @simpleFunction(i32 6) +; CHECK: ret +entry: + %i = tail call i32 @simpleFunction(i32 6) + ret i32 %i +} + +attributes #0 = { nounwind readnone uwtable } diff --git a/llvm/test/Transforms/Inline/inline-varargs.ll b/llvm/test/Transforms/Inline/inline-varargs.ll new file mode 100644 index 00000000000..d229ef39d59 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-varargs.ll @@ -0,0 +1,120 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline,function(instcombine))' -S | FileCheck %s + +declare void @ext_method(i8*, i32) +declare signext i16 @vararg_fn(...) #0 +declare "cc 9" void @vararg_fn_cc9(i8* %p, ...) + +define linkonce_odr void @thunk(i8* %this, ...) { + %this_adj = getelementptr i8, i8* %this, i32 4 + musttail call void (i8*, ...) bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* nonnull %this_adj, ...) + ret void +} + +define void @thunk_caller(i8* %p) { + call void (i8*, ...) @thunk(i8* %p, i32 42) + ret void +} +; CHECK-LABEL: define void @thunk_caller(i8* %p) +; CHECK: call void (i8*, ...) bitcast (void (i8*, i32)* @ext_method to void (i8*, ...)*)(i8* nonnull %this_adj.i, i32 42) + +define signext i16 @test_callee_2(...) { + %res = musttail call signext i16 (...) @vararg_fn(...) #0 + ret i16 %res +} + +define void @test_caller_2(i8* %p, i8* %q, i16 %r) { + call signext i16 (...) @test_callee_2(i8* %p, i8* byval %q, i16 signext %r) + ret void +} +; CHECK-LABEL: define void @test_caller_2 +; CHECK: call signext i16 (...) @vararg_fn(i8* %p, i8* byval %q, i16 signext %r) [[FN_ATTRS:#[0-9]+]] + +define void @test_callee_3(i8* %p, ...) { + call signext i16 (...) @vararg_fn() + ret void +} + +define void @test_caller_3(i8* %p, i8* %q) { + call void (i8*, ...) @test_callee_3(i8* nonnull %p, i8* %q) + ret void +} +; CHECK-LABEL: define void @test_caller_3 +; CHECK: call signext i16 (...) @vararg_fn() + +define void @test_preserve_cc(i8* %p, ...) { + musttail call "cc 9" void (i8*, ...) @vararg_fn_cc9(i8* %p, ...) + ret void +} + +define void @test_caller_preserve_cc(i8* %p, i8* %q) { + call void (i8*, ...) @test_preserve_cc(i8* %p, i8* %q) + ret void +} +; CHECK-LABEL: define void @test_caller_preserve_cc +; CHECK: call "cc 9" void (i8*, ...) @vararg_fn_cc9(i8* %p, i8* %q) + +define internal i32 @varg_accessed(...) { +entry: + %vargs = alloca i8*, align 8 + %vargs.ptr = bitcast i8** %vargs to i8* + call void @llvm.va_start(i8* %vargs.ptr) + %va1 = va_arg i8** %vargs, i32 + call void @llvm.va_end(i8* %vargs.ptr) + ret i32 %va1 +} + +define internal i32 @varg_accessed_alwaysinline(...) alwaysinline { +entry: + %vargs = alloca i8*, align 8 + %vargs.ptr = bitcast i8** %vargs to i8* + call void @llvm.va_start(i8* %vargs.ptr) + %va1 = va_arg i8** %vargs, i32 + call void @llvm.va_end(i8* %vargs.ptr) + ret i32 %va1 +} + +define i32 @call_vargs() { + %res1 = call i32 (...) @varg_accessed(i32 10) + %res2 = call i32 (...) @varg_accessed_alwaysinline(i32 15) + %res = add i32 %res1, %res2 + ret i32 %res +} +; CHECK-LABEL: @call_vargs +; CHECK: %res1 = call i32 (...) @varg_accessed(i32 10) +; CHECK-NEXT: %res2 = call i32 (...) @varg_accessed_alwaysinline(i32 15) + +define void @caller_with_vastart(i8* noalias nocapture readnone %args, ...) { +entry: + %ap = alloca i8*, align 4 + %ap.ptr = bitcast i8** %ap to i8* + %ap2 = alloca i8*, align 4 + %ap2.ptr = bitcast i8** %ap to i8* + call void @llvm.va_start(i8* nonnull %ap.ptr) + call fastcc void @callee_with_vaend(i8* nonnull %ap.ptr) + call void @llvm.va_start(i8* nonnull %ap2.ptr) + call fastcc void @callee_with_vaend_alwaysinline(i8* nonnull %ap2.ptr) + ret void +} + +define internal fastcc void @callee_with_vaend_alwaysinline(i8* %a) alwaysinline { +entry: + tail call void @llvm.va_end(i8* %a) + ret void +} + +define internal fastcc void @callee_with_vaend(i8* %a) { +entry: + tail call void @llvm.va_end(i8* %a) + ret void +} + +; CHECK-LABEL: @caller_with_vastart +; CHECK-NOT: @callee_with_vaend +; CHECK-NOT: @callee_with_vaend_alwaysinline + +declare void @llvm.va_start(i8*) +declare void @llvm.va_end(i8*) + +; CHECK: attributes [[FN_ATTRS]] = { "foo"="bar" } +attributes #0 = { "foo"="bar" } diff --git a/llvm/test/Transforms/Inline/inline-vla.ll b/llvm/test/Transforms/Inline/inline-vla.ll new file mode 100644 index 00000000000..88dfc2be87f --- /dev/null +++ b/llvm/test/Transforms/Inline/inline-vla.ll @@ -0,0 +1,39 @@ +; RUN: opt -S -inline %s -o - | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' %s -o - | FileCheck %s + +; Check that memcpy2 is completely inlined away. +; CHECK-NOT: memcpy2 + +@.str = private unnamed_addr constant [2 x i8] c"a\00", align 1 +@.str1 = private unnamed_addr constant [3 x i8] c"ab\00", align 1 + +; Function Attrs: nounwind ssp uwtable +define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 { +entry: + %data = alloca [2 x i8], align 1 + %arraydecay = getelementptr inbounds [2 x i8], [2 x i8]* %data, i64 0, i64 0 + call fastcc void @memcpy2(i8* %arraydecay, i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str, i64 0, i64 0), i64 1) + call fastcc void @memcpy2(i8* %arraydecay, i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str1, i64 0, i64 0), i64 2) + ret i32 0 +} + +; Function Attrs: inlinehint nounwind ssp uwtable +define internal fastcc void @memcpy2(i8* nocapture %dst, i8* nocapture readonly %src, i64 %size) #1 { +entry: + %vla = alloca i64, i64 %size, align 16 + %0 = bitcast i64* %vla to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %src, i64 %size, i1 false) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %0, i64 %size, i1 false) + ret void +} + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #2 + +attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { inlinehint nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.5.0 (trunk 205695) (llvm/trunk 205706)"} diff --git a/llvm/test/Transforms/Inline/inline_cleanup.ll b/llvm/test/Transforms/Inline/inline_cleanup.ll new file mode 100644 index 00000000000..344d900c5a3 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_cleanup.ll @@ -0,0 +1,214 @@ +; Test that the inliner doesn't leave around dead allocas, and that it folds +; uncond branches away after it is done specializing. + +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +@A = weak global i32 0 ; <i32*> [#uses=1] +@B = weak global i32 0 ; <i32*> [#uses=1] +@C = weak global i32 0 ; <i32*> [#uses=1] + +define internal fastcc void @foo(i32 %X) { +entry: + %ALL = alloca i32, align 4 ; <i32*> [#uses=1] + %tmp1 = and i32 %X, 1 ; <i32> [#uses=1] + %tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1] + br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true + +cond_true: ; preds = %entry + store i32 1, i32* @A + br label %cond_next + +cond_next: ; preds = %cond_true, %entry + %tmp4 = and i32 %X, 2 ; <i32> [#uses=1] + %tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1] + br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5 + +cond_true5: ; preds = %cond_next + store i32 1, i32* @B + br label %cond_next7 + +cond_next7: ; preds = %cond_true5, %cond_next + %tmp10 = and i32 %X, 4 ; <i32> [#uses=1] + %tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1] + br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11 + +cond_true11: ; preds = %cond_next7 + store i32 1, i32* @C + br label %cond_next13 + +cond_next13: ; preds = %cond_true11, %cond_next7 + %tmp16 = and i32 %X, 8 ; <i32> [#uses=1] + %tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1] + br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17 + +cond_true17: ; preds = %cond_next13 + call void @ext( i32* %ALL ) + ret void + +UnifiedReturnBlock: ; preds = %cond_next13 + ret void +} + +declare void @ext(i32*) + +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NOT: ret +; +; FIXME: This should be a CHECK-NOT, but currently we have a bug that causes us +; to not nuke unused allocas. +; CHECK: alloca +; CHECK-NOT: ret +; +; No branches should survive the inliner's cleanup. +; CHECK-NOT: br +; CHECK: ret void + +entry: + tail call fastcc void @foo( i32 1 ) + tail call fastcc void @foo( i32 2 ) + tail call fastcc void @foo( i32 3 ) + tail call fastcc void @foo( i32 8 ) + ret void +} + +declare void @f(i32 %x) + +define void @inner2(i32 %x, i32 %y, i32 %z, i1 %b) { +entry: + %cmp1 = icmp ne i32 %x, 0 + br i1 %cmp1, label %then1, label %end1 + +then1: + call void @f(i32 %x) + br label %end1 + +end1: + %x2 = and i32 %x, %z + %cmp2 = icmp sgt i32 %x2, 1 + br i1 %cmp2, label %then2, label %end2 + +then2: + call void @f(i32 %x2) + br label %end2 + +end2: + %y2 = or i32 %y, %z + %cmp3 = icmp sgt i32 %y2, 0 + br i1 %cmp3, label %then3, label %end3 + +then3: + call void @f(i32 %y2) + br label %end3 + +end3: + br i1 %b, label %end3.1, label %end3.2 + +end3.1: + %x3.1 = or i32 %x, 10 + br label %end3.3 + +end3.2: + %x3.2 = or i32 %x, 10 + br label %end3.3 + +end3.3: + %x3.3 = phi i32 [ %x3.1, %end3.1 ], [ %x3.2, %end3.2 ] + %cmp4 = icmp slt i32 %x3.3, 1 + br i1 %cmp4, label %then4, label %end4 + +then4: + call void @f(i32 %x3.3) + br label %end4 + +end4: + ret void +} + +define void @outer2(i32 %z, i1 %b) { +; Ensure that after inlining, none of the blocks with a call to @f actually +; make it through inlining. +; CHECK-LABEL: define void @outer2( +; CHECK-NOT: call +; CHECK: ret void + +entry: + call void @inner2(i32 0, i32 -1, i32 %z, i1 %b) + ret void +} + +define void @PR12470_inner(i16 signext %p1) nounwind uwtable { +entry: + br i1 undef, label %cond.true, label %cond.false + +cond.true: + br label %cond.end + +cond.false: + %conv = sext i16 %p1 to i32 + br label %cond.end + +cond.end: + %cond = phi i32 [ undef, %cond.true ], [ 0, %cond.false ] + %tobool = icmp eq i32 %cond, 0 + br i1 %tobool, label %if.end5, label %if.then + +if.then: + ret void + +if.end5: + ret void +} + +define void @PR12470_outer() { +; This previously crashed during inliner cleanup and folding inner return +; instructions. Check that we don't crash and we produce a function with a single +; return instruction due to merging the returns of the inlined function. +; CHECK-LABEL: define void @PR12470_outer( +; CHECK-NOT: call +; CHECK: ret void +; CHECK-NOT: ret void +; CHECK: } + +entry: + call void @PR12470_inner(i16 signext 1) + ret void +} + +define void @crasher_inner() nounwind uwtable { +entry: + br i1 false, label %for.end28, label %for.body6 + +for.body6: + br i1 undef, label %for.body6, label %for.cond12.for.inc26_crit_edge + +for.cond12.for.inc26_crit_edge: + br label %for.body6.1 + +for.end28: + ret void + +for.body6.1: + br i1 undef, label %for.body6.1, label %for.cond12.for.inc26_crit_edge.1 + +for.cond12.for.inc26_crit_edge.1: + br label %for.body6.2 + +for.body6.2: + br i1 undef, label %for.body6.2, label %for.cond12.for.inc26_crit_edge.2 + +for.cond12.for.inc26_crit_edge.2: + br label %for.end28 +} + +define void @crasher_outer() { +; CHECK-LABEL: @crasher_outer( +; CHECK-NOT: call +; CHECK: ret void +; CHECK-NOT: ret +; CHECK: } +entry: + tail call void @crasher_inner() + ret void +} diff --git a/llvm/test/Transforms/Inline/inline_constprop.ll b/llvm/test/Transforms/Inline/inline_constprop.ll new file mode 100644 index 00000000000..b07ec03d7d6 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_constprop.ll @@ -0,0 +1,347 @@ +; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S | FileCheck %s + +define internal i32 @callee1(i32 %A, i32 %B) { + %C = sdiv i32 %A, %B + ret i32 %C +} + +define i32 @caller1() { +; CHECK-LABEL: define i32 @caller1( +; CHECK-NEXT: ret i32 3 + + %X = call i32 @callee1( i32 10, i32 3 ) + ret i32 %X +} + +define i32 @caller2() { +; Check that we can constant-prop through instructions after inlining callee21 +; to get constants in the inlined callsite to callee22. +; FIXME: Currently, the threshold is fixed at 20 because we don't perform +; *recursive* cost analysis to realize that the nested call site will definitely +; inline and be cheap. We should eventually do that and lower the threshold here +; to 1. +; +; CHECK-LABEL: @caller2( +; CHECK-NOT: call void @callee2 +; CHECK: ret + + %x = call i32 @callee21(i32 42, i32 48) + ret i32 %x +} + +define i32 @callee21(i32 %x, i32 %y) { + %sub = sub i32 %y, %x + %result = call i32 @callee22(i32 %sub) + ret i32 %result +} + +declare i8* @getptr() + +define i32 @callee22(i32 %x) { + %icmp = icmp ugt i32 %x, 42 + br i1 %icmp, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + %x4 = add i32 %x3, 1 + %x5 = add i32 %x4, 1 + %x6 = add i32 %x5, 1 + %x7 = add i32 %x6, 1 + %x8 = add i32 %x7, 1 + + ret i32 %x8 +bb.false: + ret i32 %x +} + +define i32 @caller3() { +; Check that even if the expensive path is hidden behind several basic blocks, +; it doesn't count toward the inline cost when constant-prop proves those paths +; dead. +; +; CHECK-LABEL: @caller3( +; CHECK-NOT: call +; CHECK: ret i32 6 + +entry: + %x = call i32 @callee3(i32 42, i32 48) + ret i32 %x +} + +define i32 @callee3(i32 %x, i32 %y) { + %sub = sub i32 %y, %x + %icmp = icmp ugt i32 %sub, 42 + br i1 %icmp, label %bb.true, label %bb.false + +bb.true: + %icmp2 = icmp ult i32 %sub, 64 + br i1 %icmp2, label %bb.true.true, label %bb.true.false + +bb.true.true: + ; This block musn't be counted in the inline cost. + %x1 = add i32 %x, 1 + %x2 = add i32 %x1, 1 + %x3 = add i32 %x2, 1 + %x4 = add i32 %x3, 1 + %x5 = add i32 %x4, 1 + %x6 = add i32 %x5, 1 + %x7 = add i32 %x6, 1 + %x8 = add i32 %x7, 1 + br label %bb.merge + +bb.true.false: + ; This block musn't be counted in the inline cost. + %y1 = add i32 %y, 1 + %y2 = add i32 %y1, 1 + %y3 = add i32 %y2, 1 + %y4 = add i32 %y3, 1 + %y5 = add i32 %y4, 1 + %y6 = add i32 %y5, 1 + %y7 = add i32 %y6, 1 + %y8 = add i32 %y7, 1 + br label %bb.merge + +bb.merge: + %result = phi i32 [ %x8, %bb.true.true ], [ %y8, %bb.true.false ] + ret i32 %result + +bb.false: + ret i32 %sub +} + +declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b) + +define i8 @caller4(i8 %z) { +; Check that we can constant fold through intrinsics such as the +; overflow-detecting arithmetic instrinsics. These are particularly important +; as they are used heavily in standard library code and generic C++ code where +; the arguments are oftent constant but complete generality is required. +; +; CHECK-LABEL: @caller4( +; CHECK-NOT: call +; CHECK: ret i8 -1 + +entry: + %x = call i8 @callee4(i8 254, i8 14, i8 %z) + ret i8 %x +} + +define i8 @callee4(i8 %x, i8 %y, i8 %z) { + %uadd = call {i8, i1} @llvm.uadd.with.overflow.i8(i8 %x, i8 %y) + %o = extractvalue {i8, i1} %uadd, 1 + br i1 %o, label %bb.true, label %bb.false + +bb.true: + ret i8 -1 + +bb.false: + ; This block musn't be counted in the inline cost. + %z1 = add i8 %z, 1 + %z2 = add i8 %z1, 1 + %z3 = add i8 %z2, 1 + %z4 = add i8 %z3, 1 + %z5 = add i8 %z4, 1 + %z6 = add i8 %z5, 1 + %z7 = add i8 %z6, 1 + %z8 = add i8 %z7, 1 + ret i8 %z8 +} + +define i64 @caller5(i64 %y) { +; Check that we can round trip constants through various kinds of casts etc w/o +; losing track of the constant prop in the inline cost analysis. +; +; CHECK-LABEL: @caller5( +; CHECK-NOT: call +; CHECK: ret i64 -1 + +entry: + %x = call i64 @callee5(i64 42, i64 %y) + ret i64 %x +} + +define i64 @callee5(i64 %x, i64 %y) { + %inttoptr = inttoptr i64 %x to i8* + %bitcast = bitcast i8* %inttoptr to i32* + %ptrtoint = ptrtoint i32* %bitcast to i64 + %trunc = trunc i64 %ptrtoint to i32 + %zext = zext i32 %trunc to i64 + %cmp = icmp eq i64 %zext, 42 + br i1 %cmp, label %bb.true, label %bb.false + +bb.true: + ret i64 -1 + +bb.false: + ; This block musn't be counted in the inline cost. + %y1 = add i64 %y, 1 + %y2 = add i64 %y1, 1 + %y3 = add i64 %y2, 1 + %y4 = add i64 %y3, 1 + %y5 = add i64 %y4, 1 + %y6 = add i64 %y5, 1 + %y7 = add i64 %y6, 1 + %y8 = add i64 %y7, 1 + ret i64 %y8 +} + +define float @caller6() { +; Check that we can constant-prop through fcmp instructions +; +; CHECK-LABEL: @caller6( +; CHECK-NOT: call +; CHECK: ret + %x = call float @callee6(float 42.0) + ret float %x +} + +define float @callee6(float %x) { + %icmp = fcmp ugt float %x, 42.0 + br i1 %icmp, label %bb.true, label %bb.false + +bb.true: + ; This block musn't be counted in the inline cost. + %x1 = fadd float %x, 1.0 + %x2 = fadd float %x1, 1.0 + %x3 = fadd float %x2, 1.0 + %x4 = fadd float %x3, 1.0 + %x5 = fadd float %x4, 1.0 + %x6 = fadd float %x5, 1.0 + %x7 = fadd float %x6, 1.0 + %x8 = fadd float %x7, 1.0 + ret float %x8 + +bb.false: + ret float %x +} + + + +define i32 @PR13412.main() { +; This is a somewhat complicated three layer subprogram that was reported to +; compute the wrong value for a branch due to assuming that an argument +; mid-inline couldn't be equal to another pointer. +; +; After inlining, the branch should point directly to the exit block, not to +; the intermediate block. +; CHECK: @PR13412.main +; CHECK: br i1 true, label %[[TRUE_DEST:.*]], label %[[FALSE_DEST:.*]] +; CHECK: [[FALSE_DEST]]: +; CHECK-NEXT: call void @PR13412.fail() +; CHECK: [[TRUE_DEST]]: +; CHECK-NEXT: ret i32 0 + +entry: + %i1 = alloca i64 + store i64 0, i64* %i1 + %arraydecay = bitcast i64* %i1 to i32* + %call = call i1 @PR13412.first(i32* %arraydecay, i32* %arraydecay) + br i1 %call, label %cond.end, label %cond.false + +cond.false: + call void @PR13412.fail() + br label %cond.end + +cond.end: + ret i32 0 +} + +define internal i1 @PR13412.first(i32* %a, i32* %b) { +entry: + %call = call i32* @PR13412.second(i32* %a, i32* %b) + %cmp = icmp eq i32* %call, %b + ret i1 %cmp +} + +declare void @PR13412.fail() + +define internal i32* @PR13412.second(i32* %a, i32* %b) { +entry: + %sub.ptr.lhs.cast = ptrtoint i32* %b to i64 + %sub.ptr.rhs.cast = ptrtoint i32* %a to i64 + %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast + %sub.ptr.div = ashr exact i64 %sub.ptr.sub, 2 + %cmp = icmp ugt i64 %sub.ptr.div, 1 + br i1 %cmp, label %if.then, label %if.end3 + +if.then: + %0 = load i32, i32* %a + %1 = load i32, i32* %b + %cmp1 = icmp eq i32 %0, %1 + br i1 %cmp1, label %return, label %if.end3 + +if.end3: + br label %return + +return: + %retval.0 = phi i32* [ %b, %if.end3 ], [ %a, %if.then ] + ret i32* %retval.0 +} + +declare i32 @PR28802.external(i32 returned %p1) + +define internal i32 @PR28802.callee() { +entry: + br label %cont + +cont: + %0 = phi i32 [ 0, %entry ] + %call = call i32 @PR28802.external(i32 %0) + ret i32 %call +} + +define i32 @PR28802() { +entry: + %call = call i32 @PR28802.callee() + ret i32 %call +} + +; CHECK-LABEL: define i32 @PR28802( +; CHECK: %[[call:.*]] = call i32 @PR28802.external(i32 0) +; CHECK: ret i32 %[[call]] + +define internal i32 @PR28848.callee(i32 %p2, i1 %c) { +entry: + br i1 %c, label %cond.end, label %cond.true + +cond.true: + br label %cond.end + +cond.end: + %cond = phi i32 [ 0, %cond.true ], [ %p2, %entry ] + %or = or i32 %cond, %p2 + ret i32 %or +} + +define i32 @PR28848() { +entry: + %call = call i32 @PR28848.callee(i32 0, i1 false) + ret i32 %call +} +; CHECK-LABEL: define i32 @PR28848( +; CHECK: ret i32 0 + +define internal void @callee7(i16 %param1, i16 %param2) { +entry: + br label %bb + +bb: + %phi = phi i16 [ %param2, %entry ] + %add = add i16 %phi, %param1 + ret void +} + +declare i16 @caller7.external(i16 returned) + +define void @caller7() { +bb1: + %call = call i16 @caller7.external(i16 1) + call void @callee7(i16 0, i16 %call) + ret void +} +; CHECK-LABEL: define void @caller7( +; CHECK: %call = call i16 @caller7.external(i16 1) +; CHECK-NEXT: ret void diff --git a/llvm/test/Transforms/Inline/inline_dbg_declare.ll b/llvm/test/Transforms/Inline/inline_dbg_declare.ll new file mode 100644 index 00000000000..74ab8537970 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_dbg_declare.ll @@ -0,0 +1,99 @@ +; RUN: opt < %s -S -inline | FileCheck %s +; RUN: opt < %s -S -passes='cgscc(inline)' | FileCheck %s +; +; The purpose of this test is to check that inline pass preserves debug info +; for variable using the dbg.declare intrinsic. +; +;; This test was generated by running this command: +;; clang.exe -S -O0 -emit-llvm -g foo.c +;; +;; foo.c +;; ========================== +;; float foo(float x) +;; { +;; return x; +;; } +;; +;; void bar(float *dst) +;; { +;; dst[0] = foo(dst[0]); +;; } +;; ========================== + +target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32" +target triple = "i686-pc-windows-msvc" + +; Function Attrs: nounwind +define float @foo(float %x) #0 !dbg !4 { +entry: + %x.addr = alloca float, align 4 + store float %x, float* %x.addr, align 4 + call void @llvm.dbg.declare(metadata float* %x.addr, metadata !16, metadata !17), !dbg !18 + %0 = load float, float* %x.addr, align 4, !dbg !19 + ret float %0, !dbg !19 +} + +; Function Attrs: nounwind readnone +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; CHECK: define void @bar + +; Function Attrs: nounwind +define void @bar(float* %dst) #0 !dbg !9 { +entry: + +; CHECK: [[x_addr_i:%[a-zA-Z0-9.]+]] = alloca float, align 4 +; CHECK-NEXT: void @llvm.dbg.declare(metadata float* [[x_addr_i]], metadata [[m23:![0-9]+]], metadata !DIExpression()), !dbg [[m24:![0-9]+]] + + %dst.addr = alloca float*, align 4 + store float* %dst, float** %dst.addr, align 4 + call void @llvm.dbg.declare(metadata float** %dst.addr, metadata !20, metadata !17), !dbg !21 + %0 = load float*, float** %dst.addr, align 4, !dbg !22 + %arrayidx = getelementptr inbounds float, float* %0, i32 0, !dbg !22 + %1 = load float, float* %arrayidx, align 4, !dbg !22 + %call = call float @foo(float %1), !dbg !22 + +; CHECK-NOT: call float @foo + + %2 = load float*, float** %dst.addr, align 4, !dbg !22 + %arrayidx1 = getelementptr inbounds float, float* %2, i32 0, !dbg !22 + store float %call, float* %arrayidx1, align 4, !dbg !22 + ret void, !dbg !23 +} + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!13, !14} +!llvm.ident = !{!15} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2) +!1 = !DIFile(filename: "foo.c", directory: "") +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 2, file: !1, scope: !5, type: !6, retainedNodes: !2) +!5 = !DIFile(filename: "foo.c", directory: "") +!6 = !DISubroutineType(types: !7) +!7 = !{!8, !8} +!8 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float) +!9 = distinct !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !10, retainedNodes: !2) +!10 = !DISubroutineType(types: !11) +!11 = !{null, !12} +!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8) +!13 = !{i32 2, !"Dwarf Version", i32 4} +!14 = !{i32 2, !"Debug Info Version", i32 3} +!15 = !{!"clang version 3.6.0 (trunk)"} +!16 = !DILocalVariable(name: "x", line: 1, arg: 1, scope: !4, file: !5, type: !8) +!17 = !DIExpression() +!18 = !DILocation(line: 1, column: 17, scope: !4) +!19 = !DILocation(line: 3, column: 5, scope: !4) +!20 = !DILocalVariable(name: "dst", line: 6, arg: 1, scope: !9, file: !5, type: !12) +!21 = !DILocation(line: 6, column: 17, scope: !9) +!22 = !DILocation(line: 8, column: 14, scope: !9) +!23 = !DILocation(line: 9, column: 1, scope: !9) + +; CHECK: [[FOO:![0-9]+]] = distinct !DISubprogram(name: "foo", +; CHECK: [[m23]] = !DILocalVariable(name: "x", arg: 1, scope: [[FOO]] +; CHECK: [[BAR:![0-9]+]] = distinct !DISubprogram(name: "bar", +; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE:![0-9]+]]) +; CHECK: [[CALL_SITE]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]]) diff --git a/llvm/test/Transforms/Inline/inline_dce.ll b/llvm/test/Transforms/Inline/inline_dce.ll new file mode 100644 index 00000000000..97d9f3f81e4 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_dce.ll @@ -0,0 +1,36 @@ +; This checks to ensure that the inline pass deletes functions if they get +; inlined into all of their callers. + +; RUN: opt < %s -inline -S | \ +; RUN: not grep @reallysmall + +define internal i32 @reallysmall(i32 %A) { +; CHECK-NOT: @reallysmall +entry: + ret i32 %A +} + +define void @caller1() { +; CHECK-LABEL: define void @caller1() +entry: + call i32 @reallysmall(i32 5) +; CHECK-NOT: call + ret void +} + +define void @caller2(i32 %A) { +; CHECK-LABEL: define void @caller2(i32 %A) +entry: + call i32 @reallysmall(i32 %A) +; CHECK-NOT: call + ret void +} + +define i32 @caller3(i32 %A) { +; CHECK-LABEL: define void @caller3(i32 %A) +entry: + %B = call i32 @reallysmall(i32 %A) +; CHECK-NOT: call + ret i32 %B +} + diff --git a/llvm/test/Transforms/Inline/inline_inv_group.ll b/llvm/test/Transforms/Inline/inline_inv_group.ll new file mode 100644 index 00000000000..c33048d7127 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_inv_group.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -inline -S | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define i8* @callee() alwaysinline { +; CHECK-LABEL: define i8* @callee() + %1 = call i8* @llvm.strip.invariant.group.p0i8(i8* null) + ret i8* %1 +} + +define i8* @caller() { +; CHECK-LABEL: define i8* @caller() +; CHECK-NEXT: call i8* @llvm.strip.invariant.group.p0i8(i8* null) + %1 = call i8* @callee() + ret i8* %1 +} + +declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/Inline/inline_invoke.ll b/llvm/test/Transforms/Inline/inline_invoke.ll new file mode 100644 index 00000000000..2b34140aa5b --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_invoke.ll @@ -0,0 +1,349 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; Test that the inliner correctly handles inlining into invoke sites +; by appending selectors and forwarding _Unwind_Resume directly to the +; enclosing landing pad. + +;; Test 0 - basic functionality. + +%struct.A = type { i8 } + +@_ZTIi = external constant i8* + +declare void @_ZN1AC1Ev(%struct.A*) + +declare void @_ZN1AD1Ev(%struct.A*) + +declare void @use(i32) nounwind + +declare void @opaque() + +declare i32 @llvm.eh.typeid.for(i8*) nounwind + +declare i32 @__gxx_personality_v0(...) + +declare i8* @__cxa_begin_catch(i8*) + +declare void @__cxa_end_catch() + +declare void @_ZSt9terminatev() + +define internal void @test0_in() alwaysinline uwtable ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + %a = alloca %struct.A, align 1 + %b = alloca %struct.A, align 1 + call void @_ZN1AC1Ev(%struct.A* %a) + invoke void @_ZN1AC1Ev(%struct.A* %b) + to label %invoke.cont unwind label %lpad + +invoke.cont: + invoke void @_ZN1AD1Ev(%struct.A* %b) + to label %invoke.cont1 unwind label %lpad + +invoke.cont1: + call void @_ZN1AD1Ev(%struct.A* %a) + ret void + +lpad: + %exn = landingpad {i8*, i32} + cleanup + invoke void @_ZN1AD1Ev(%struct.A* %a) + to label %invoke.cont2 unwind label %terminate.lpad + +invoke.cont2: + resume { i8*, i32 } %exn + +terminate.lpad: + %exn1 = landingpad {i8*, i32} + catch i8* null + call void @_ZSt9terminatev() noreturn nounwind + unreachable +} + +define void @test0_out() uwtable ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @test0_in() + to label %ret unwind label %lpad + +ret: + ret void + +lpad: ; preds = %entry + %exn = landingpad {i8*, i32} + catch i8* bitcast (i8** @_ZTIi to i8*) + %eh.exc = extractvalue { i8*, i32 } %exn, 0 + %eh.selector = extractvalue { i8*, i32 } %exn, 1 + %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind + %1 = icmp eq i32 %eh.selector, %0 + br i1 %1, label %catch, label %eh.resume + +catch: + %ignored = call i8* @__cxa_begin_catch(i8* %eh.exc) nounwind + call void @__cxa_end_catch() nounwind + br label %ret + +eh.resume: + resume { i8*, i32 } %exn +} + +; CHECK: define void @test0_out() +; CHECK: [[A:%.*]] = alloca %struct.A, +; CHECK: [[B:%.*]] = alloca %struct.A, +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[A]]) +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[B]]) +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[B]]) +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[A]]) +; CHECK: landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A]]) +; CHECK-NEXT: to label %[[LBL:[^\s]+]] unwind +; CHECK: [[LBL]]: +; CHECK-NEXT: br label %[[LPAD:[^\s]+]] +; CHECK: ret void +; CHECK: landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: br label %[[LPAD]] +; CHECK: [[LPAD]]: +; CHECK-NEXT: phi { i8*, i32 } [ +; CHECK-NEXT: extractvalue { i8*, i32 } +; CHECK-NEXT: extractvalue { i8*, i32 } +; CHECK-NEXT: call i32 @llvm.eh.typeid.for( + + +;; Test 1 - Correctly handle phis in outer landing pads. + +define void @test1_out() uwtable ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @test0_in() + to label %cont unwind label %lpad + +cont: + invoke void @test0_in() + to label %ret unwind label %lpad + +ret: + ret void + +lpad: + %x = phi i32 [ 0, %entry ], [ 1, %cont ] + %y = phi i32 [ 1, %entry ], [ 4, %cont ] + %exn = landingpad {i8*, i32} + catch i8* bitcast (i8** @_ZTIi to i8*) + %eh.exc = extractvalue { i8*, i32 } %exn, 0 + %eh.selector = extractvalue { i8*, i32 } %exn, 1 + %0 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind + %1 = icmp eq i32 %eh.selector, %0 + br i1 %1, label %catch, label %eh.resume + +catch: + %ignored = call i8* @__cxa_begin_catch(i8* %eh.exc) nounwind + call void @use(i32 %x) + call void @use(i32 %y) + call void @__cxa_end_catch() nounwind + br label %ret + +eh.resume: + resume { i8*, i32 } %exn +} + +; CHECK: define void @test1_out() +; CHECK: [[A2:%.*]] = alloca %struct.A, +; CHECK: [[B2:%.*]] = alloca %struct.A, +; CHECK: [[A1:%.*]] = alloca %struct.A, +; CHECK: [[B1:%.*]] = alloca %struct.A, +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[A1]]) +; CHECK-NEXT: unwind label %[[LPAD:[^\s]+]] +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[B1]]) +; CHECK-NEXT: unwind label %[[LPAD1:[^\s]+]] +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[B1]]) +; CHECK-NEXT: unwind label %[[LPAD1]] +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[A1]]) +; CHECK-NEXT: unwind label %[[LPAD]] + +; Inner landing pad from first inlining. +; CHECK: [[LPAD1]]: +; CHECK-NEXT: [[LPADVAL1:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A1]]) +; CHECK-NEXT: to label %[[RESUME1:[^\s]+]] unwind +; CHECK: [[RESUME1]]: +; CHECK-NEXT: br label %[[LPAD_JOIN1:[^\s]+]] + +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[A2]]) +; CHECK-NEXT: unwind label %[[LPAD]] +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[B2]]) +; CHECK-NEXT: unwind label %[[LPAD2:[^\s]+]] +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[B2]]) +; CHECK-NEXT: unwind label %[[LPAD2]] +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[A2]]) +; CHECK-NEXT: unwind label %[[LPAD]] + +; Inner landing pad from second inlining. +; CHECK: [[LPAD2]]: +; CHECK-NEXT: [[LPADVAL2:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: invoke void @_ZN1AD1Ev(%struct.A* [[A2]]) +; CHECK-NEXT: to label %[[RESUME2:[^\s]+]] unwind +; CHECK: [[RESUME2]]: +; CHECK-NEXT: br label %[[LPAD_JOIN2:[^\s]+]] + +; CHECK: ret void + +; CHECK: [[LPAD]]: +; CHECK-NEXT: [[X:%.*]] = phi i32 [ 0, %entry ], [ 0, {{%.*}} ], [ 1, %cont ], [ 1, {{%.*}} ] +; CHECK-NEXT: [[Y:%.*]] = phi i32 [ 1, %entry ], [ 1, {{%.*}} ], [ 4, %cont ], [ 4, {{%.*}} ] +; CHECK-NEXT: [[LPADVAL:%.*]] = landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: br label %[[LPAD_JOIN2]] + +; CHECK: [[LPAD_JOIN2]]: +; CHECK-NEXT: [[XJ2:%.*]] = phi i32 [ [[X]], %[[LPAD]] ], [ 1, %[[RESUME2]] ] +; CHECK-NEXT: [[YJ2:%.*]] = phi i32 [ [[Y]], %[[LPAD]] ], [ 4, %[[RESUME2]] ] +; CHECK-NEXT: [[EXNJ2:%.*]] = phi { i8*, i32 } [ [[LPADVAL]], %[[LPAD]] ], [ [[LPADVAL2]], %[[RESUME2]] ] +; CHECK-NEXT: br label %[[LPAD_JOIN1]] + +; CHECK: [[LPAD_JOIN1]]: +; CHECK-NEXT: [[XJ1:%.*]] = phi i32 [ [[XJ2]], %[[LPAD_JOIN2]] ], [ 0, %[[RESUME1]] ] +; CHECK-NEXT: [[YJ1:%.*]] = phi i32 [ [[YJ2]], %[[LPAD_JOIN2]] ], [ 1, %[[RESUME1]] ] +; CHECK-NEXT: [[EXNJ1:%.*]] = phi { i8*, i32 } [ [[EXNJ2]], %[[LPAD_JOIN2]] ], [ [[LPADVAL1]], %[[RESUME1]] ] +; CHECK-NEXT: extractvalue { i8*, i32 } [[EXNJ1]], 0 +; CHECK-NEXT: [[SELJ1:%.*]] = extractvalue { i8*, i32 } [[EXNJ1]], 1 +; CHECK-NEXT: [[T:%.*]] = call i32 @llvm.eh.typeid.for( +; CHECK-NEXT: icmp eq i32 [[SELJ1]], [[T]] + +; CHECK: call void @use(i32 [[XJ1]]) +; CHECK: call void @use(i32 [[YJ1]]) + +; CHECK: resume { i8*, i32 } + + +;; Test 2 - Don't make invalid IR for inlines into landing pads without eh.exception calls +define void @test2_out() uwtable ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @test0_in() + to label %ret unwind label %lpad + +ret: + ret void + +lpad: + %exn = landingpad {i8*, i32} + cleanup + call void @_ZSt9terminatev() + unreachable +} + +; CHECK: define void @test2_out() +; CHECK: [[A:%.*]] = alloca %struct.A, +; CHECK: [[B:%.*]] = alloca %struct.A, +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[A]]) +; CHECK-NEXT: unwind label %[[LPAD:[^\s]+]] +; CHECK: invoke void @_ZN1AC1Ev(%struct.A* [[B]]) +; CHECK-NEXT: unwind label %[[LPAD2:[^\s]+]] +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[B]]) +; CHECK-NEXT: unwind label %[[LPAD2]] +; CHECK: invoke void @_ZN1AD1Ev(%struct.A* [[A]]) +; CHECK-NEXT: unwind label %[[LPAD]] + + +;; Test 3 - Deal correctly with split unwind edges. +define void @test3_out() uwtable ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @test0_in() + to label %ret unwind label %lpad + +ret: + ret void + +lpad: + %exn = landingpad {i8*, i32} + catch i8* bitcast (i8** @_ZTIi to i8*) + br label %lpad.cont + +lpad.cont: + call void @_ZSt9terminatev() + unreachable +} + +; CHECK: define void @test3_out() +; CHECK: landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: invoke void @_ZN1AD1Ev( +; CHECK-NEXT: to label %[[L:[^\s]+]] unwind +; CHECK: [[L]]: +; CHECK-NEXT: br label %[[JOIN:[^\s]+]] +; CHECK: [[JOIN]]: +; CHECK-NEXT: phi { i8*, i32 } +; CHECK-NEXT: br label %lpad.cont +; CHECK: lpad.cont: +; CHECK-NEXT: call void @_ZSt9terminatev() + + +;; Test 4 - Split unwind edges with a dominance problem +define void @test4_out() uwtable ssp personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @test0_in() + to label %cont unwind label %lpad.crit + +cont: + invoke void @opaque() + to label %ret unwind label %lpad + +ret: + ret void + +lpad.crit: + %exn = landingpad {i8*, i32} + catch i8* bitcast (i8** @_ZTIi to i8*) + call void @opaque() nounwind + br label %terminate + +lpad: + %exn2 = landingpad {i8*, i32} + catch i8* bitcast (i8** @_ZTIi to i8*) + br label %terminate + +terminate: + %phi = phi i32 [ 0, %lpad.crit ], [ 1, %lpad ] + call void @use(i32 %phi) + call void @_ZSt9terminatev() + unreachable +} + +; CHECK: define void @test4_out() +; CHECK: landingpad { i8*, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: invoke void @_ZN1AD1Ev( +; CHECK-NEXT: to label %[[L:[^\s]+]] unwind +; CHECK: [[L]]: +; CHECK-NEXT: br label %[[JOIN:[^\s]+]] +; CHECK: invoke void @opaque() +; CHECK-NEXT: unwind label %lpad +; CHECK: lpad.crit: +; CHECK-NEXT: landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: br label %[[JOIN]] +; CHECK: [[JOIN]]: +; CHECK-NEXT: phi { i8*, i32 } +; CHECK-NEXT: call void @opaque() [[NUW:#[0-9]+]] +; CHECK-NEXT: br label %[[FIX:[^\s]+]] +; CHECK: lpad: +; CHECK-NEXT: landingpad { i8*, i32 } +; CHECK-NEXT: catch i8* bitcast (i8** @_ZTIi to i8*) +; CHECK-NEXT: br label %[[FIX]] +; CHECK: [[FIX]]: +; CHECK-NEXT: [[T1:%.*]] = phi i32 [ 0, %[[JOIN]] ], [ 1, %lpad ] +; CHECK-NEXT: call void @use(i32 [[T1]]) +; CHECK-NEXT: call void @_ZSt9terminatev() + +; CHECK: attributes [[NUW]] = { nounwind } +; CHECK: attributes #1 = { nounwind readnone } +; CHECK: attributes #2 = { ssp uwtable } +; CHECK: attributes #3 = { argmemonly nounwind } +; CHECK: attributes #4 = { noreturn nounwind } diff --git a/llvm/test/Transforms/Inline/inline_minisize.ll b/llvm/test/Transforms/Inline/inline_minisize.ll new file mode 100644 index 00000000000..0bf75d72bd1 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_minisize.ll @@ -0,0 +1,232 @@ +; RUN: opt -O2 -S < %s | FileCheck %s + +@data = common global i32* null, align 8 + +define i32 @fct1(i32 %a) nounwind uwtable ssp { +entry: + %a.addr = alloca i32, align 4 + %res = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %tmp = load i32, i32* %a.addr, align 4 + %idxprom = sext i32 %tmp to i64 + %tmp1 = load i32*, i32** @data, align 8 + %arrayidx = getelementptr inbounds i32, i32* %tmp1, i64 %idxprom + %tmp2 = load i32, i32* %arrayidx, align 4 + %tmp3 = load i32, i32* %a.addr, align 4 + %add = add nsw i32 %tmp3, 1 + %idxprom1 = sext i32 %add to i64 + %tmp4 = load i32*, i32** @data, align 8 + %arrayidx2 = getelementptr inbounds i32, i32* %tmp4, i64 %idxprom1 + %tmp5 = load i32, i32* %arrayidx2, align 4 + %mul = mul nsw i32 %tmp2, %tmp5 + store i32 %mul, i32* %res, align 4 + store i32 0, i32* %i, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %tmp6 = load i32, i32* %i, align 4 + %tmp7 = load i32, i32* %res, align 4 + %cmp = icmp slt i32 %tmp6, %tmp7 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp8 = load i32, i32* %i, align 4 + %idxprom3 = sext i32 %tmp8 to i64 + %tmp9 = load i32*, i32** @data, align 8 + %arrayidx4 = getelementptr inbounds i32, i32* %tmp9, i64 %idxprom3 + call void @fct0(i32* %arrayidx4) + br label %for.inc + +for.inc: ; preds = %for.body + %tmp10 = load i32, i32* %i, align 4 + %inc = add nsw i32 %tmp10, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + store i32 0, i32* %i, align 4 + br label %for.cond5 + +for.cond5: ; preds = %for.inc10, %for.end + %tmp11 = load i32, i32* %i, align 4 + %tmp12 = load i32, i32* %res, align 4 + %cmp6 = icmp slt i32 %tmp11, %tmp12 + br i1 %cmp6, label %for.body7, label %for.end12 + +for.body7: ; preds = %for.cond5 + %tmp13 = load i32, i32* %i, align 4 + %idxprom8 = sext i32 %tmp13 to i64 + %tmp14 = load i32*, i32** @data, align 8 + %arrayidx9 = getelementptr inbounds i32, i32* %tmp14, i64 %idxprom8 + call void @fct0(i32* %arrayidx9) + br label %for.inc10 + +for.inc10: ; preds = %for.body7 + %tmp15 = load i32, i32* %i, align 4 + %inc11 = add nsw i32 %tmp15, 1 + store i32 %inc11, i32* %i, align 4 + br label %for.cond5 + +for.end12: ; preds = %for.cond5 + store i32 0, i32* %i, align 4 + br label %for.cond13 + +for.cond13: ; preds = %for.inc18, %for.end12 + %tmp16 = load i32, i32* %i, align 4 + %tmp17 = load i32, i32* %res, align 4 + %cmp14 = icmp slt i32 %tmp16, %tmp17 + br i1 %cmp14, label %for.body15, label %for.end20 + +for.body15: ; preds = %for.cond13 + %tmp18 = load i32, i32* %i, align 4 + %idxprom16 = sext i32 %tmp18 to i64 + %tmp19 = load i32*, i32** @data, align 8 + %arrayidx17 = getelementptr inbounds i32, i32* %tmp19, i64 %idxprom16 + call void @fct0(i32* %arrayidx17) + br label %for.inc18 + +for.inc18: ; preds = %for.body15 + %tmp20 = load i32, i32* %i, align 4 + %inc19 = add nsw i32 %tmp20, 1 + store i32 %inc19, i32* %i, align 4 + br label %for.cond13 + +for.end20: ; preds = %for.cond13 + %tmp21 = load i32, i32* %res, align 4 + ret i32 %tmp21 +} + +declare void @fct0(i32*) + +define i32 @fct2(i32 %a) nounwind uwtable inlinehint ssp { +entry: + %a.addr = alloca i32, align 4 + %res = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 %a, i32* %a.addr, align 4 + %tmp = load i32, i32* %a.addr, align 4 + %shl = shl i32 %tmp, 1 + %idxprom = sext i32 %shl to i64 + %tmp1 = load i32*, i32** @data, align 8 + %arrayidx = getelementptr inbounds i32, i32* %tmp1, i64 %idxprom + %tmp2 = load i32, i32* %arrayidx, align 4 + %tmp3 = load i32, i32* %a.addr, align 4 + %shl1 = shl i32 %tmp3, 1 + %add = add nsw i32 %shl1, 13 + %idxprom2 = sext i32 %add to i64 + %tmp4 = load i32*, i32** @data, align 8 + %arrayidx3 = getelementptr inbounds i32, i32* %tmp4, i64 %idxprom2 + %tmp5 = load i32, i32* %arrayidx3, align 4 + %mul = mul nsw i32 %tmp2, %tmp5 + store i32 %mul, i32* %res, align 4 + store i32 0, i32* %i, align 4 + store i32 0, i32* %i, align 4 + br label %for.cond + +for.cond: ; preds = %for.inc, %entry + %tmp6 = load i32, i32* %i, align 4 + %tmp7 = load i32, i32* %res, align 4 + %cmp = icmp slt i32 %tmp6, %tmp7 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %tmp8 = load i32, i32* %i, align 4 + %idxprom4 = sext i32 %tmp8 to i64 + %tmp9 = load i32*, i32** @data, align 8 + %arrayidx5 = getelementptr inbounds i32, i32* %tmp9, i64 %idxprom4 + call void @fct0(i32* %arrayidx5) + br label %for.inc + +for.inc: ; preds = %for.body + %tmp10 = load i32, i32* %i, align 4 + %inc = add nsw i32 %tmp10, 1 + store i32 %inc, i32* %i, align 4 + br label %for.cond + +for.end: ; preds = %for.cond + store i32 0, i32* %i, align 4 + br label %for.cond6 + +for.cond6: ; preds = %for.inc11, %for.end + %tmp11 = load i32, i32* %i, align 4 + %tmp12 = load i32, i32* %res, align 4 + %cmp7 = icmp slt i32 %tmp11, %tmp12 + br i1 %cmp7, label %for.body8, label %for.end13 + +for.body8: ; preds = %for.cond6 + %tmp13 = load i32, i32* %i, align 4 + %idxprom9 = sext i32 %tmp13 to i64 + %tmp14 = load i32*, i32** @data, align 8 + %arrayidx10 = getelementptr inbounds i32, i32* %tmp14, i64 %idxprom9 + call void @fct0(i32* %arrayidx10) + br label %for.inc11 + +for.inc11: ; preds = %for.body8 + %tmp15 = load i32, i32* %i, align 4 + %inc12 = add nsw i32 %tmp15, 1 + store i32 %inc12, i32* %i, align 4 + br label %for.cond6 + +for.end13: ; preds = %for.cond6 + store i32 0, i32* %i, align 4 + br label %for.cond14 + +for.cond14: ; preds = %for.inc19, %for.end13 + %tmp16 = load i32, i32* %i, align 4 + %tmp17 = load i32, i32* %res, align 4 + %cmp15 = icmp slt i32 %tmp16, %tmp17 + br i1 %cmp15, label %for.body16, label %for.end21 + +for.body16: ; preds = %for.cond14 + %tmp18 = load i32, i32* %i, align 4 + %idxprom17 = sext i32 %tmp18 to i64 + %tmp19 = load i32*, i32** @data, align 8 + %arrayidx18 = getelementptr inbounds i32, i32* %tmp19, i64 %idxprom17 + call void @fct0(i32* %arrayidx18) + br label %for.inc19 + +for.inc19: ; preds = %for.body16 + %tmp20 = load i32, i32* %i, align 4 + %inc20 = add nsw i32 %tmp20, 1 + store i32 %inc20, i32* %i, align 4 + br label %for.cond14 + +for.end21: ; preds = %for.cond14 + %tmp21 = load i32, i32* %res, align 4 + ret i32 %tmp21 +} + +define i32 @fct3(i32 %c) nounwind uwtable ssp { +entry: + ;CHECK-LABEL: @fct3( + ;CHECK: call i32 @fct1 + ; The inline keyword gives a sufficient benefits to inline fct2 + ;CHECK-NOT: call i32 @fct2 + %c.addr = alloca i32, align 4 + store i32 %c, i32* %c.addr, align 4 + %tmp = load i32, i32* %c.addr, align 4 + %call = call i32 @fct1(i32 %tmp) + %tmp1 = load i32, i32* %c.addr, align 4 + %call1 = call i32 @fct2(i32 %tmp1) + %add = add nsw i32 %call, %call1 + ret i32 %add +} + +define i32 @fct4(i32 %c) minsize nounwind uwtable ssp { +entry: + ;CHECK-LABEL: @fct4( + ;CHECK: call i32 @fct1 + ; With Oz (minsize attribute), the benefit of inlining fct2 + ; is the same as fct1, thus no inlining for fct2 + ;CHECK: call i32 @fct2 + %c.addr = alloca i32, align 4 + store i32 %c, i32* %c.addr, align 4 + %tmp = load i32, i32* %c.addr, align 4 + %call = call i32 @fct1(i32 %tmp) + %tmp1 = load i32, i32* %c.addr, align 4 + %call1 = call i32 @fct2(i32 %tmp1) + %add = add nsw i32 %call, %call1 + ret i32 %add +} diff --git a/llvm/test/Transforms/Inline/inline_prune.ll b/llvm/test/Transforms/Inline/inline_prune.ll new file mode 100644 index 00000000000..c4c5c0cf252 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_prune.ll @@ -0,0 +1,54 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +define internal i32 @callee1(i32 %A, i32 %B) { +; CHECK-NOT: @callee1 +entry: + %cond = icmp eq i32 %A, 123 + br i1 %cond, label %T, label %F + +T: + %C = mul i32 %B, %B + ret i32 %C + +F: + ret i32 0 +} + +define internal i32 @callee2(i32 %A, i32 %B) { +; CHECK-NOT: @callee2 +entry: + switch i32 %A, label %T [ + i32 10, label %F + i32 1234, label %G + ] + +dead: + %cond = icmp eq i32 %A, 123 + br i1 %cond, label %T, label %F + +T: + %C = mul i32 %B, %B + ret i32 %C + +F: + ret i32 0 + +G: + %D = mul i32 %B, %B + %E = mul i32 %D, %B + ret i32 %E +} + +define i32 @test(i32 %A) { +; CHECK-LABEL: define i32 @test(i32 %A) +entry: + %X = call i32 @callee1( i32 10, i32 %A ) + %Y = call i32 @callee2( i32 10, i32 %A ) +; CHECK-NOT: call +; CHECK-NOT: mul + + %Z = add i32 %X, %Y + ret i32 %Z +} + diff --git a/llvm/test/Transforms/Inline/inline_returns_twice.ll b/llvm/test/Transforms/Inline/inline_returns_twice.ll new file mode 100644 index 00000000000..c1f31d6719e --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_returns_twice.ll @@ -0,0 +1,85 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; Check that functions with "returns_twice" calls are only inlined, +; if they are themselves marked as such. + +declare i32 @a() returns_twice + +define i32 @inner1() { +entry: + %call = call i32 @a() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @outer1() { +entry: +; CHECK-LABEL: define i32 @outer1( +; CHECK: call i32 @inner1() + %call = call i32 @inner1() + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @inner2() returns_twice { +entry: + %call = call i32 @a() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @outer2() { +entry: +; CHECK-LABEL: define i32 @outer2( +; CHECK: call i32 @a() + %call = call i32 @inner2() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @inner3() personality i8* null { +entry: + %invoke = invoke i32 @a() returns_twice + to label %cont unwind label %lpad + +cont: + %add = add nsw i32 1, %invoke + ret i32 %add + +lpad: + %lp = landingpad i32 cleanup + resume i32 %lp +} + +define i32 @outer3() { +entry: +; CHECK-LABEL: define i32 @outer3( +; CHECK: call i32 @inner3() + %call = call i32 @inner3() + %add = add nsw i32 1, %call + ret i32 %add +} + +define i32 @inner4() returns_twice personality i8* null { +entry: + %invoke = invoke i32 @a() returns_twice + to label %cont unwind label %lpad + +cont: + %add = add nsw i32 1, %invoke + ret i32 %add + +lpad: + %lp = landingpad i32 cleanup + resume i32 %lp +} + +define i32 @outer4() { +entry: +; CHECK-LABEL: define i32 @outer4( +; CHECK: invoke i32 @a() + %call = call i32 @inner4() returns_twice + %add = add nsw i32 1, %call + ret i32 %add +} diff --git a/llvm/test/Transforms/Inline/inline_ssp.ll b/llvm/test/Transforms/Inline/inline_ssp.ll new file mode 100644 index 00000000000..bad332dbff0 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_ssp.ll @@ -0,0 +1,161 @@ +; RUN: opt -inline %s -S | FileCheck %s +; RUN: opt -passes='cgscc(inline)' %s -S | FileCheck %s +; Ensure SSP attributes are propagated correctly when inlining. + +@.str = private unnamed_addr constant [11 x i8] c"fun_nossp\0A\00", align 1 +@.str1 = private unnamed_addr constant [9 x i8] c"fun_ssp\0A\00", align 1 +@.str2 = private unnamed_addr constant [15 x i8] c"fun_sspstrong\0A\00", align 1 +@.str3 = private unnamed_addr constant [12 x i8] c"fun_sspreq\0A\00", align 1 + +; These first four functions (@fun_sspreq, @fun_sspstrong, @fun_ssp, @fun_nossp) +; are used by the remaining functions to ensure that the SSP attributes are +; propagated correctly. The caller should have its SSP attribute set as: +; strictest(caller-ssp-attr, callee-ssp-attr), where strictness is ordered as: +; sspreq > sspstrong > ssp > [no ssp] +define internal void @fun_sspreq() nounwind sspreq uwtable { +entry: + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([12 x i8], [12 x i8]* @.str3, i32 0, i32 0)) + ret void +} + +define internal void @fun_sspstrong() nounwind sspstrong uwtable { +entry: + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str2, i32 0, i32 0)) + ret void +} + +define internal void @fun_ssp() nounwind ssp uwtable { +entry: + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([9 x i8], [9 x i8]* @.str1, i32 0, i32 0)) + ret void +} + +define internal void @fun_nossp() nounwind uwtable { +entry: + %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0)) + ret void +} + +; Tests start below + +define void @inline_req_req() nounwind sspreq uwtable { +entry: +; CHECK: @inline_req_req() #0 + call void @fun_sspreq() + ret void +} + +define void @inline_req_strong() nounwind sspstrong uwtable { +entry: +; CHECK: @inline_req_strong() #0 + call void @fun_sspreq() + ret void +} + +define void @inline_req_ssp() nounwind ssp uwtable { +entry: +; CHECK: @inline_req_ssp() #0 + call void @fun_sspreq() + ret void +} + +define void @inline_req_nossp() nounwind uwtable { +entry: +; CHECK: @inline_req_nossp() #0 + call void @fun_sspreq() + ret void +} + +define void @inline_strong_req() nounwind sspreq uwtable { +entry: +; CHECK: @inline_strong_req() #0 + call void @fun_sspstrong() + ret void +} + + +define void @inline_strong_strong() nounwind sspstrong uwtable { +entry: +; CHECK: @inline_strong_strong() #1 + call void @fun_sspstrong() + ret void +} + +define void @inline_strong_ssp() nounwind ssp uwtable { +entry: +; CHECK: @inline_strong_ssp() #1 + call void @fun_sspstrong() + ret void +} + +define void @inline_strong_nossp() nounwind uwtable { +entry: +; CHECK: @inline_strong_nossp() #1 + call void @fun_sspstrong() + ret void +} + +define void @inline_ssp_req() nounwind sspreq uwtable { +entry: +; CHECK: @inline_ssp_req() #0 + call void @fun_ssp() + ret void +} + + +define void @inline_ssp_strong() nounwind sspstrong uwtable { +entry: +; CHECK: @inline_ssp_strong() #1 + call void @fun_ssp() + ret void +} + +define void @inline_ssp_ssp() nounwind ssp uwtable { +entry: +; CHECK: @inline_ssp_ssp() #2 + call void @fun_ssp() + ret void +} + +define void @inline_ssp_nossp() nounwind uwtable { +entry: +; CHECK: @inline_ssp_nossp() #2 + call void @fun_ssp() + ret void +} + +define void @inline_nossp_req() nounwind uwtable sspreq { +entry: +; CHECK: @inline_nossp_req() #0 + call void @fun_nossp() + ret void +} + + +define void @inline_nossp_strong() nounwind sspstrong uwtable { +entry: +; CHECK: @inline_nossp_strong() #1 + call void @fun_nossp() + ret void +} + +define void @inline_nossp_ssp() nounwind ssp uwtable { +entry: +; CHECK: @inline_nossp_ssp() #2 + call void @fun_nossp() + ret void +} + +define void @inline_nossp_nossp() nounwind uwtable { +entry: +; CHECK: @inline_nossp_nossp() #3 + call void @fun_nossp() + ret void +} + +declare i32 @printf(i8*, ...) + +; CHECK: attributes #0 = { nounwind sspreq uwtable } +; CHECK: attributes #1 = { nounwind sspstrong uwtable } +; CHECK: attributes #2 = { nounwind ssp uwtable } +; CHECK: attributes #3 = { nounwind uwtable } diff --git a/llvm/test/Transforms/Inline/inline_stats.ll b/llvm/test/Transforms/Inline/inline_stats.ll new file mode 100644 index 00000000000..40d6cb30b69 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_stats.ll @@ -0,0 +1,95 @@ +; First with legacy PM +; RUN: opt -S -inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK +; RUN: opt -S -inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK + +; Do again with new PM +; RUN: opt -S -passes=inline -inliner-function-import-stats=basic < %s 2>&1 | FileCheck %s -check-prefix=CHECK-BASIC -check-prefix=CHECK +; RUN: opt -S -passes=inline -inliner-function-import-stats=verbose < %s 2>&1 | FileCheck %s -check-prefix="CHECK-VERBOSE" -check-prefix=CHECK + +; CHECK: ------- Dumping inliner stats for [<stdin>] ------- +; CHECK-BASIC-NOT: -- List of inlined functions: +; CHECK-BASIC-NOT: -- Inlined not imported function +; CHECK-VERBOSE: -- List of inlined functions: +; CHECK-VERBOSE: Inlined not imported function [internal2]: #inlines = 6, #inlines_to_importing_module = 2 +; CHECK-VERBOSE: Inlined imported function [external2]: #inlines = 4, #inlines_to_importing_module = 1 +; CHECK-VERBOSE: Inlined imported function [external1]: #inlines = 3, #inlines_to_importing_module = 2 +; CHECK-VERBOSE: Inlined imported function [external5]: #inlines = 1, #inlines_to_importing_module = 1 +; CHECK-VERBOSE: Inlined imported function [external3]: #inlines = 1, #inlines_to_importing_module = 0 + +; CHECK: -- Summary: +; CHECK: All functions: 10, imported functions: 7 +; CHECK: inlined functions: 5 [50% of all functions] +; CHECK: imported functions inlined anywhere: 4 [57.14% of imported functions] +; CHECK: imported functions inlined into importing module: 3 [42.86% of imported functions], remaining: 4 [57.14% of imported functions] +; CHECK: non-imported functions inlined anywhere: 1 [33.33% of non-imported functions] +; CHECK: non-imported functions inlined into importing module: 1 [33.33% of non-imported functions] + +define void @internal() { + call fastcc void @external1() + call fastcc void @internal2() + call coldcc void @external_big() + ret void +} + +define void @internal2() alwaysinline { + ret void +} + +define void @internal3() { + call fastcc void @external1() + call fastcc void @external5() + ret void +} + +declare void @external_decl() + +define void @external1() alwaysinline !thinlto_src_module !0 { + call fastcc void @internal2() + call fastcc void @external2(); + call void @external_decl(); + ret void +} + +define void @external2() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external3() alwaysinline !thinlto_src_module !1 { + ret void +} + +define void @external4() !thinlto_src_module !1 { + call fastcc void @external1() + call fastcc void @external2() + ret void +} + +define void @external5() !thinlto_src_module !1 { + ret void +} + +; Assume big piece of code here. This function won't be inlined, so all the +; inlined function it will have won't affect real inlines. +define void @external_big() noinline !thinlto_src_module !1 { +; CHECK-NOT: call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + call fastcc void @internal2() + +; CHECK-NOT: call fastcc void @external2() + call fastcc void @external2() + call fastcc void @external2() +; CHECK-NOT: call fastcc void @external3() + call fastcc void @external3() + ret void +} + +; It should not be imported, but it should not break anything. +define void @external_notcalled() !thinlto_src_module !0 { + call void @external_notcalled() + ret void +} + +!0 = !{!"file.cc"} +!1 = !{!"other.cc"} diff --git a/llvm/test/Transforms/Inline/inline_unreachable-2.ll b/llvm/test/Transforms/Inline/inline_unreachable-2.ll new file mode 100644 index 00000000000..825999593ac --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_unreachable-2.ll @@ -0,0 +1,23 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +; CHECK-LABEL: caller +; CHECK: call void @callee +define void @caller(i32 %a, i1 %b) #0 { + call void @callee(i32 %a, i1 %b) + unreachable +} + +define void @callee(i32 %a, i1 %b) { + call void @extern() + call void asm sideeffect "", ""() + br i1 %b, label %bb1, label %bb2 +bb1: + call void asm sideeffect "", ""() + ret void +bb2: + call void asm sideeffect "", ""() + ret void +} + +declare void @extern() diff --git a/llvm/test/Transforms/Inline/inline_unreachable.ll b/llvm/test/Transforms/Inline/inline_unreachable.ll new file mode 100644 index 00000000000..b23ddc85275 --- /dev/null +++ b/llvm/test/Transforms/Inline/inline_unreachable.ll @@ -0,0 +1,131 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +@a = global i32 4 +@_ZTIi = external global i8* + +; CHECK-LABEL: callSimpleFunction +; CHECK: call i32 @simpleFunction +define i32 @callSimpleFunction(i32 %idx, i32 %limit) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + %s = call i32 @simpleFunction(i32 %idx) + store i32 %s, i32* @a + unreachable + +if.end: + ret i32 %idx +} + +; CHECK-LABEL: callSmallFunction +; CHECK-NOT: call i32 @smallFunction +define i32 @callSmallFunction(i32 %idx, i32 %limit) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: + %s = call i32 @smallFunction(i32 %idx) + store i32 %s, i32* @a + unreachable + +if.end: + ret i32 %idx +} + +; CHECK-LABEL: throwSimpleException +; CHECK: invoke i32 @simpleFunction +define i32 @throwSimpleException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + invoke i32 @simpleFunction(i32 %idx) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +lpad: ; preds = %if.then + %ll = landingpad { i8*, i32 } + cleanup + ret i32 %idx + +if.end: ; preds = %entry + ret i32 %idx +} + +; CHECK-LABEL: throwSmallException +; CHECK-NOT: invoke i32 @smallFunction +define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) { +entry: + %cmp = icmp sge i32 %idx, %limit + br i1 %cmp, label %if.then, label %if.end + +if.then: ; preds = %entry + %exception = call i8* @__cxa_allocate_exception(i64 1) #0 + invoke i32 @smallFunction(i32 %idx) + to label %invoke.cont unwind label %lpad + +invoke.cont: ; preds = %if.then + call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1 + unreachable + +lpad: ; preds = %if.then + %ll = landingpad { i8*, i32 } + cleanup + ret i32 %idx + +if.end: ; preds = %entry + ret i32 %idx +} + +define i32 @simpleFunction(i32 %a) #0 { +entry: + %a1 = load volatile i32, i32* @a + %x1 = add i32 %a1, %a1 + %a2 = load volatile i32, i32* @a + %x2 = add i32 %x1, %a2 + %a3 = load volatile i32, i32* @a + %x3 = add i32 %x2, %a3 + %a4 = load volatile i32, i32* @a + %x4 = add i32 %x3, %a4 + %a5 = load volatile i32, i32* @a + %x5 = add i32 %x4, %a5 + %a6 = load volatile i32, i32* @a + %x6 = add i32 %x5, %a6 + %a7 = load volatile i32, i32* @a + %x7 = add i32 %x6, %a6 + %a8 = load volatile i32, i32* @a + %x8 = add i32 %x7, %a8 + %a9 = load volatile i32, i32* @a + %x9 = add i32 %x8, %a9 + %a10 = load volatile i32, i32* @a + %x10 = add i32 %x9, %a10 + %a11 = load volatile i32, i32* @a + %x11 = add i32 %x10, %a11 + %a12 = load volatile i32, i32* @a + %x12 = add i32 %x11, %a12 + %add = add i32 %x12, %a + ret i32 %add +} + +define i32 @smallFunction(i32 %a) { +entry: + %r = load volatile i32, i32* @a + ret i32 %r +} + +attributes #0 = { nounwind } +attributes #1 = { noreturn } + +declare i8* @__cxa_allocate_exception(i64) +declare i32 @__gxx_personality_v0(...) +declare void @__cxa_throw(i8*, i8*, i8*) + diff --git a/llvm/test/Transforms/Inline/internal-scc-members.ll b/llvm/test/Transforms/Inline/internal-scc-members.ll new file mode 100644 index 00000000000..258ce00744c --- /dev/null +++ b/llvm/test/Transforms/Inline/internal-scc-members.ll @@ -0,0 +1,31 @@ +; Test that the inliner can handle deleting functions within an SCC while still +; processing the calls in that SCC. +; +; RUN: opt < %s -S -inline | FileCheck %s +; RUN: opt < %s -S -passes=inline | FileCheck %s + +; CHECK-LABEL: define internal void @test1_scc0() +; CHECK-NOT: call +; CHECK: call void @test1_scc0() +; CHECK-NOT: call +; CHECK: ret +define internal void @test1_scc0() { +entry: + call void @test1_scc1() + ret void +} + +; CHECK-NOT: @test1_scc1 +define internal void @test1_scc1() { +entry: + call void @test1_scc0() + ret void +} + +; CHECK-LABEL: define void @test1() +; CHECK: call void @test1_scc0() +define void @test1() { +entry: + call void @test1_scc0() noinline + ret void +} diff --git a/llvm/test/Transforms/Inline/invoke-cleanup.ll b/llvm/test/Transforms/Inline/invoke-cleanup.ll new file mode 100644 index 00000000000..e04f4fe12f5 --- /dev/null +++ b/llvm/test/Transforms/Inline/invoke-cleanup.ll @@ -0,0 +1,40 @@ +; RUN: opt %s -inline -S | FileCheck %s +; RUN: opt %s -passes='cgscc(inline)' -S | FileCheck %s + +declare void @external_func() + +@exception_type1 = external global i8 +@exception_type2 = external global i8 + + +define internal void @inner() personality i8* null { + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_type1 + resume i32 %lp +} + +; Test that the "cleanup" clause is kept when inlining @inner() into +; this call site (PR17872), otherwise C++ destructors will not be +; called when they should be. + +define void @outer() personality i8* null { + invoke void @inner() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + cleanup + catch i8* @exception_type2 + resume i32 %lp +} +; CHECK: define void @outer +; CHECK: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: catch i8* @exception_type1 +; CHECK-NEXT: catch i8* @exception_type2 diff --git a/llvm/test/Transforms/Inline/invoke-combine-clauses.ll b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll new file mode 100644 index 00000000000..09a437a5d30 --- /dev/null +++ b/llvm/test/Transforms/Inline/invoke-combine-clauses.ll @@ -0,0 +1,117 @@ +; RUN: opt %s -passes='cgscc(inline)' -S | FileCheck %s + +declare void @external_func() +declare void @abort() + +@exception_inner = external global i8 +@exception_outer = external global i8 +@condition = external global i1 + + +; Check for a bug in which multiple "resume" instructions in the +; inlined function caused "catch i8* @exception_outer" to appear +; multiple times in the resulting landingpad. + +define internal void @inner_multiple_resume() personality i8* null { + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_inner + %cond = load i1, i1* @condition + br i1 %cond, label %resume1, label %resume2 +resume1: + resume i32 1 +resume2: + resume i32 2 +} + +define void @outer_multiple_resume() personality i8* null { + invoke void @inner_multiple_resume() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_outer + resume i32 %lp +} +; CHECK: define void @outer_multiple_resume() +; CHECK: %lp.i = landingpad +; CHECK-NEXT: catch i8* @exception_inner +; CHECK-NEXT: catch i8* @exception_outer +; Check that there isn't another "catch" clause: +; CHECK-NEXT: load + + +; Check for a bug in which having a "resume" and a "call" in the +; inlined function caused "catch i8* @exception_outer" to appear +; multiple times in the resulting landingpad. + +define internal void @inner_resume_and_call() personality i8* null { + call void @external_func() + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_inner + resume i32 %lp +} + +define void @outer_resume_and_call() personality i8* null { + invoke void @inner_resume_and_call() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_outer + resume i32 %lp +} +; CHECK: define void @outer_resume_and_call() +; CHECK: %lp.i = landingpad +; CHECK-NEXT: catch i8* @exception_inner +; CHECK-NEXT: catch i8* @exception_outer +; Check that there isn't another "catch" clause: +; CHECK-NEXT: br + + +; Check what happens if the inlined function contains an "invoke" but +; no "resume". In this case, the inlined landingpad does not need to +; include the "catch i8* @exception_outer" clause from the outer +; function (since the outer function's landingpad will not be +; reachable), but it's OK to include this clause. + +define internal void @inner_no_resume_or_call() personality i8* null { + invoke void @external_func() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_inner + ; A landingpad might have no "resume" if a C++ destructor aborts. + call void @abort() noreturn nounwind + unreachable +} + +define void @outer_no_resume_or_call() personality i8* null { + invoke void @inner_no_resume_or_call() + to label %cont unwind label %lpad +cont: + ret void +lpad: + %lp = landingpad i32 + catch i8* @exception_outer + resume i32 %lp +} +; CHECK: define void @outer_no_resume_or_call() +; CHECK: %lp.i = landingpad +; CHECK-NEXT: catch i8* @exception_inner +; CHECK-NEXT: catch i8* @exception_outer +; Check that there isn't another "catch" clause: +; CHECK-NEXT: call void @abort() diff --git a/llvm/test/Transforms/Inline/invoke-cost.ll b/llvm/test/Transforms/Inline/invoke-cost.ll new file mode 100644 index 00000000000..fb60d42ecde --- /dev/null +++ b/llvm/test/Transforms/Inline/invoke-cost.ll @@ -0,0 +1,46 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=100 | FileCheck %s +; RUN: opt -passes='cgscc(inline)' < %s -S -o - -inline-threshold=100 | FileCheck %s + +target datalayout = "p:32:32" + +@glbl = external global i32 + +declare void @f() +declare i32 @__gxx_personality_v0(...) +declare i8* @__cxa_begin_catch(i8*) +declare void @__cxa_end_catch() +declare void @_ZSt9terminatev() + +define void @inner1() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @f() to label %cont1 unwind label %terminate.lpad + +cont1: + invoke void @f() to label %cont2 unwind label %terminate.lpad + +cont2: + invoke void @f() to label %cont3 unwind label %terminate.lpad + +cont3: + invoke void @f() to label %cont4 unwind label %terminate.lpad + +cont4: + ret void + +terminate.lpad: + landingpad {i8*, i32} + catch i8* null + call void @_ZSt9terminatev() noreturn nounwind + unreachable +} + +define void @outer1() { +; CHECK-LABEL: @outer1( +; +; This call should not get inlined because inner1 actually calls a function +; many times, but it only does so through invoke as opposed to call. +; +; CHECK: call void @inner1 + call void @inner1() + ret void +} diff --git a/llvm/test/Transforms/Inline/invoke_test-1.ll b/llvm/test/Transforms/Inline/invoke_test-1.ll new file mode 100644 index 00000000000..a5964121866 --- /dev/null +++ b/llvm/test/Transforms/Inline/invoke_test-1.ll @@ -0,0 +1,33 @@ +; Test that we can inline a simple function, turning the calls in it into invoke +; instructions + +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +declare void @might_throw() + +define internal void @callee() { +entry: + call void @might_throw() + ret void +} + +; caller returns true if might_throw throws an exception... +define i32 @caller() personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: define i32 @caller() personality i32 (...)* @__gxx_personality_v0 +entry: + invoke void @callee() + to label %cont unwind label %exc +; CHECK-NOT: @callee +; CHECK: invoke void @might_throw() + +cont: + ret i32 0 + +exc: + %exn = landingpad {i8*, i32} + cleanup + ret i32 1 +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/invoke_test-2.ll b/llvm/test/Transforms/Inline/invoke_test-2.ll new file mode 100644 index 00000000000..6dfd24822e4 --- /dev/null +++ b/llvm/test/Transforms/Inline/invoke_test-2.ll @@ -0,0 +1,52 @@ +; Test that if an invoked function is inlined, and if that function cannot +; throw, that the dead handler is now unreachable. + +; RUN: opt < %s -inline -simplifycfg -S | FileCheck %s + +declare void @might_throw() + +define internal i32 @callee() personality i32 (...)* @__gxx_personality_v0 { +enrty: + invoke void @might_throw() + to label %cont unwind label %exc + +cont: + ret i32 0 + +exc: + %exn = landingpad {i8*, i32} + cleanup + ret i32 1 +} + +; caller returns true if might_throw throws an exception... callee cannot throw. +define i32 @caller() personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: define i32 @caller() personality i32 (...)* @__gxx_personality_v0 +enrty: + %X = invoke i32 @callee() + to label %cont unwind label %UnreachableExceptionHandler +; CHECK-NOT: @callee +; CHECK: invoke void @might_throw() +; CHECK: to label %[[C:.*]] unwind label %[[E:.*]] + +; CHECK: [[E]]: +; CHECK: landingpad +; CHECK: cleanup +; CHECK: br label %[[C]] + +cont: +; CHECK: [[C]]: + ret i32 %X +; CHECK: %[[PHI:.*]] = phi i32 +; CHECK: ret i32 %[[PHI]] + +UnreachableExceptionHandler: +; CHECK-NOT: UnreachableExceptionHandler: + %exn = landingpad {i8*, i32} + cleanup + ret i32 -1 +; CHECK-NOT: ret i32 -1 +} +; CHECK: } + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/invoke_test-3.ll b/llvm/test/Transforms/Inline/invoke_test-3.ll new file mode 100644 index 00000000000..149afac4c4a --- /dev/null +++ b/llvm/test/Transforms/Inline/invoke_test-3.ll @@ -0,0 +1,48 @@ +; Test that any rethrown exceptions in an inlined function are automatically +; turned into branches to the invoke destination. + +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +declare void @might_throw() + +define internal i32 @callee() personality i32 (...)* @__gxx_personality_v0 { +entry: + invoke void @might_throw() + to label %cont unwind label %exc + +cont: + ret i32 0 + +exc: + ; This just rethrows the exception! + %exn = landingpad {i8*, i32} + cleanup + resume { i8*, i32 } %exn +} + +; caller returns true if might_throw throws an exception... which gets +; propagated by callee. +define i32 @caller() personality i32 (...)* @__gxx_personality_v0 { +; CHECK-LABEL: define i32 @caller() +entry: + %X = invoke i32 @callee() + to label %cont unwind label %Handler +; CHECK-NOT: @callee +; CHECK: invoke void @might_throw() +; At this point we just check that the rest of the function does not 'resume' +; at any point and instead the inlined resume is threaded into normal control +; flow. +; CHECK-NOT: resume + +cont: + ret i32 %X + +Handler: +; This consumes an exception thrown by might_throw + %exn = landingpad {i8*, i32} + cleanup + ret i32 1 +} + +declare i32 @__gxx_personality_v0(...) diff --git a/llvm/test/Transforms/Inline/label-annotation.ll b/llvm/test/Transforms/Inline/label-annotation.ll new file mode 100644 index 00000000000..9d471f63620 --- /dev/null +++ b/llvm/test/Transforms/Inline/label-annotation.ll @@ -0,0 +1,35 @@ +; Inlining should not clone label annotations. +; Currently we block all duplication for simplicity. + +; RUN: opt < %s -S -inline | FileCheck %s + +@the_global = global i32 0 + +declare void @llvm.codeview.annotation(metadata) + +define void @inlinee() { +entry: + store i32 42, i32* @the_global + call void @llvm.codeview.annotation(metadata !0) + ret void +} + +define void @caller() { +entry: + call void @inlinee() + ret void +} + +!0 = !{!"annotation"} + +; CHECK-LABEL: define void @inlinee() +; CHECK: store i32 42, i32* @the_global +; CHECK: call void @llvm.codeview.annotation(metadata !0) +; CHECK: ret void + +; CHECK-LABEL: define void @caller() +; MSVC can inline this. If we ever do, check for the store but make sure +; there is no annotation. +; CHECK: call void @inlinee() +; CHECK-NOT: call void @llvm.codeview.annotation +; CHECK: ret void diff --git a/llvm/test/Transforms/Inline/last-call-bonus.ll b/llvm/test/Transforms/Inline/last-call-bonus.ll new file mode 100644 index 00000000000..7de67861cf8 --- /dev/null +++ b/llvm/test/Transforms/Inline/last-call-bonus.ll @@ -0,0 +1,53 @@ +; The goal of this test is checking if LastCallToStaticBonus is applied +; correctly while deciding inline deferral. For the test code below, when +; inliner evaluates the callsite of bar->baz, it checks if inlining of bar->baz +; prevents ininling of foo->bar, even when foo->bar inlining is more beneficial +; than bar->baz inlining. As LastCallToStaticBonus has a massive value, and +; both baz and bar has only one caller, the cost of foo->bar inlining and +; bar->baz inlining should be non-trivial for inliner to compute that bar->baz +; inlining can actaully prevent foo->bar inlining. To make the cost of these +; callsites big enough, loop unrolling pass with very high threshold is used to +; preprocess the test. + +; RUN: opt < %s -loop-unroll -inline -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s +; RUN: opt < %s -passes='function(require<opt-remark-emit>,unroll),require<profile-summary>,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s +; CHECK-LABEL: define internal i32 @bar() + +define internal i32 @baz() { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %ret, label %bb1 + +ret: + ret i32 0 +} + +define internal i32 @bar() { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %ret, label %bb1 + +ret: + call i32 @baz() + ret i32 0 +} + +define i32 @foo() { +entry: + call i32 @bar() + ret i32 0 +} + +declare void @extern() diff --git a/llvm/test/Transforms/Inline/last-call-no-bonus.ll b/llvm/test/Transforms/Inline/last-call-no-bonus.ll new file mode 100644 index 00000000000..14fe2373422 --- /dev/null +++ b/llvm/test/Transforms/Inline/last-call-no-bonus.ll @@ -0,0 +1,58 @@ +; This code is virtually identical to last-call-bonus.ll, but the callsites +; to the internal functions are cold, thereby preventing the last call to +; static bonus from being applied. + +; RUN: opt < %s -passes='function(require<opt-remark-emit>,unroll),require<profile-summary>,cgscc(inline)' -unroll-threshold=15000 -inline-threshold=250 -S | FileCheck %s + +; CHECK-LABEL: define internal i32 @baz +define internal i32 @baz() { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %ret, label %bb1 + +ret: + ret i32 0 +} + +; CHECK-LABEL: define internal i32 @bar +define internal i32 @bar(i1 %b) { +entry: + br label %bb1 + +bb1: + %ind = phi i32 [ 0, %entry ], [ %inc, %bb1 ] + call void @extern() + %inc = add nsw i32 %ind, 1 + %cmp = icmp sgt i32 %inc, 510 + br i1 %cmp, label %for.exit, label %bb1 + +for.exit: + br i1 %b, label %bb2, label %ret, !prof !0 +bb2: +; CHECK: call i32 @baz + call i32 @baz() + br label %ret +ret: + ret i32 0 +} +; CHECK-LABEL: define i32 @foo +define i32 @foo(i1 %b) { +entry: + br i1 %b, label %bb1, label %ret, !prof !0 +bb1: +; CHECK: call i32 @bar + call i32 @bar(i1 %b) + br label %ret +ret: + ret i32 0 +} + +declare void @extern() + +!0 = !{!"branch_weights", i32 1, i32 2500} diff --git a/llvm/test/Transforms/Inline/last-callsite.ll b/llvm/test/Transforms/Inline/last-callsite.ll new file mode 100644 index 00000000000..8ec53d0b6ff --- /dev/null +++ b/llvm/test/Transforms/Inline/last-callsite.ll @@ -0,0 +1,269 @@ +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=0 -S | FileCheck %s + +; The 'test1_' prefixed functions test the basic 'last callsite' inline +; threshold adjustment where we specifically inline the last call site of an +; internal function regardless of cost. + +define internal void @test1_f() { +entry: + %p = alloca i32 + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + ret void +} + +; Identical to @test1_f but doesn't get inlined because there is more than one +; call. If this *does* get inlined, the body used both here and in @test1_f +; isn't a good test for different threshold based on the last call. +define internal void @test1_g() { +entry: + %p = alloca i32 + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + ret void +} + +define void @test1() { +; CHECK-LABEL: define void @test1() +entry: + call void @test1_f() +; CHECK-NOT: @test1_f + + call void @test1_g() + call void @test1_g() +; CHECK: call void @test1_g() +; CHECK: call void @test1_g() + + ret void +} + + +; The 'test2_' prefixed functions test that we can discover the last callsite +; bonus after having inlined the prior call site. For this to work, we need +; a callsite dependent cost so we have a trivial predicate guarding all the +; cost, and set that in a particular direction. + +define internal void @test2_f(i1 %b) { +entry: + %p = alloca i32 + br i1 %b, label %then, label %exit + +then: + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + br label %exit + +exit: + ret void +} + +; Identical to @test2_f but doesn't get inlined because there is more than one +; call. If this *does* get inlined, the body used both here and in @test2_f +; isn't a good test for different threshold based on the last call. +define internal void @test2_g(i1 %b) { +entry: + %p = alloca i32 + br i1 %b, label %then, label %exit + +then: + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + br label %exit + +exit: + ret void +} + +define void @test2() { +; CHECK-LABEL: define void @test2() +entry: + ; The first call is trivial to inline due to the argument. + call void @test2_f(i1 false) +; CHECK-NOT: @test2_f + + ; The second call is too expensive to inline unless we update the number of + ; calls after inlining the second. + call void @test2_f(i1 true) +; CHECK-NOT: @test2_f + + ; Sanity check that two calls with the hard predicate remain uninlined. + call void @test2_g(i1 true) + call void @test2_g(i1 true) +; CHECK: call void @test2_g(i1 true) +; CHECK: call void @test2_g(i1 true) + + ret void +} + + +; The 'test3_' prefixed functions are similar to the 'test2_' functions but the +; relative order of the trivial and hard to inline callsites is reversed. This +; checks that the order of calls isn't significant to whether we observe the +; "last callsite" threshold difference because the next-to-last gets inlined. +; FIXME: We don't currently catch this case. + +define internal void @test3_f(i1 %b) { +entry: + %p = alloca i32 + br i1 %b, label %then, label %exit + +then: + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + br label %exit + +exit: + ret void +} + +; Identical to @test3_f but doesn't get inlined because there is more than one +; call. If this *does* get inlined, the body used both here and in @test3_f +; isn't a good test for different threshold based on the last call. +define internal void @test3_g(i1 %b) { +entry: + %p = alloca i32 + br i1 %b, label %then, label %exit + +then: + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + br label %exit + +exit: + ret void +} + +define void @test3() { +; CHECK-LABEL: define void @test3() +entry: + ; The first call is too expensive to inline unless we update the number of + ; calls after inlining the second. + call void @test3_f(i1 true) +; FIXME: We should inline this call without iteration. +; CHECK: call void @test3_f(i1 true) + + ; But the second call is trivial to inline due to the argument. + call void @test3_f(i1 false) +; CHECK-NOT: @test3_f + + ; Sanity check that two calls with the hard predicate remain uninlined. + call void @test3_g(i1 true) + call void @test3_g(i1 true) +; CHECK: call void @test3_g(i1 true) +; CHECK: call void @test3_g(i1 true) + + ret void +} + + +; The 'test4_' prefixed functions are similar to the 'test2_' prefixed +; functions but include unusual constant expressions that make discovering that +; a function is dead harder. + +define internal void @test4_f(i1 %b) { +entry: + %p = alloca i32 + br i1 %b, label %then, label %exit + +then: + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + br label %exit + +exit: + ret void +} + +; Identical to @test4_f but doesn't get inlined because there is more than one +; call. If this *does* get inlined, the body used both here and in @test4_f +; isn't a good test for different threshold based on the last call. +define internal void @test4_g(i1 %b) { +entry: + %p = alloca i32 + br i1 %b, label %then, label %exit + +then: + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + store volatile i32 0, i32* %p + br label %exit + +exit: + ret void +} + +define void @test4() { +; CHECK-LABEL: define void @test4() +entry: + ; The first call is trivial to inline due to the argument. However this + ; argument also uses the function being called as part of a complex + ; constant expression. Merely inlining and deleting the call isn't enough to + ; drop the use count here, we need to GC the dead constant expression as + ; well. + call void @test4_f(i1 icmp ne (i64 ptrtoint (void (i1)* @test4_f to i64), i64 ptrtoint(void (i1)* @test4_f to i64))) +; CHECK-NOT: @test4_f + + ; The second call is too expensive to inline unless we update the number of + ; calls after inlining the second. + call void @test4_f(i1 true) +; CHECK-NOT: @test4_f + + ; And check that a single call to a function which is used by a complex + ; constant expression cannot be inlined because the constant expression forms + ; a second use. If this part starts failing we need to use more complex + ; constant expressions to reference a particular function with them. + %sink = alloca i1 + store volatile i1 icmp ne (i64 ptrtoint (void (i1)* @test4_g to i64), i64 ptrtoint(void (i1)* @test4_g to i64)), i1* %sink + call void @test4_g(i1 true) +; CHECK: store volatile i1 false +; CHECK: call void @test4_g(i1 true) + + ret void +} diff --git a/llvm/test/Transforms/Inline/launder.invariant.group.ll b/llvm/test/Transforms/Inline/launder.invariant.group.ll new file mode 100644 index 00000000000..5ada6202f8f --- /dev/null +++ b/llvm/test/Transforms/Inline/launder.invariant.group.ll @@ -0,0 +1,59 @@ +; RUN: opt -S -inline < %s | FileCheck %s +; RUN: opt -S -O3 < %s | FileCheck %s +; RUN: opt -S -inline -inline-threshold=1 < %s | FileCheck %s + +%struct.A = type <{ i32 (...)**, i32, [4 x i8] }> + +; This test checks if value returned from the launder is considered aliasing +; with its argument. Due to bug caused by handling launder in capture tracking +; sometimes it would be considered noalias. +; CHECK-LABEL: define i32 @bar(%struct.A* noalias +define i32 @bar(%struct.A* noalias) { +; CHECK-NOT: noalias + %2 = bitcast %struct.A* %0 to i8* + %3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %2) + %4 = getelementptr inbounds i8, i8* %3, i64 8 + %5 = bitcast i8* %4 to i32* + store i32 42, i32* %5, align 8 + %6 = getelementptr inbounds %struct.A, %struct.A* %0, i64 0, i32 1 + %7 = load i32, i32* %6, align 8 + ret i32 %7 +} + +; CHECK-LABEL: define i32 @foo(%struct.A* noalias +define i32 @foo(%struct.A* noalias) { + ; CHECK-NOT: call i32 @bar( + ; CHECK-NOT: noalias + %2 = tail call i32 @bar(%struct.A* %0) + ret i32 %2 +} + + +; This test checks if invariant group intrinsics have zero cost for inlining. +; CHECK-LABEL: define i8* @caller(i8* +define i8* @caller(i8* %p) { +; CHECK-NOT: call i8* @lot_of_launders_and_strips + %a1 = call i8* @lot_of_launders_and_strips(i8* %p) + %a2 = call i8* @lot_of_launders_and_strips(i8* %a1) + %a3 = call i8* @lot_of_launders_and_strips(i8* %a2) + %a4 = call i8* @lot_of_launders_and_strips(i8* %a3) + ret i8* %a4 +} + +define i8* @lot_of_launders_and_strips(i8* %p) { + %a1 = call i8* @llvm.launder.invariant.group.p0i8(i8* %p) + %a2 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a1) + %a3 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a2) + %a4 = call i8* @llvm.launder.invariant.group.p0i8(i8* %a3) + + %s1 = call i8* @llvm.strip.invariant.group.p0i8(i8* %a4) + %s2 = call i8* @llvm.strip.invariant.group.p0i8(i8* %s1) + %s3 = call i8* @llvm.strip.invariant.group.p0i8(i8* %s2) + %s4 = call i8* @llvm.strip.invariant.group.p0i8(i8* %s3) + + ret i8* %s4 +} + + +declare i8* @llvm.launder.invariant.group.p0i8(i8*) +declare i8* @llvm.strip.invariant.group.p0i8(i8*) diff --git a/llvm/test/Transforms/Inline/lifetime-no-datalayout.ll b/llvm/test/Transforms/Inline/lifetime-no-datalayout.ll new file mode 100644 index 00000000000..5d1872c6a24 --- /dev/null +++ b/llvm/test/Transforms/Inline/lifetime-no-datalayout.ll @@ -0,0 +1,24 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +declare void @use(i8* %a) + +define void @helper() { + %a = alloca i8 + call void @use(i8* %a) + ret void +} + +; Size in llvm.lifetime.X should be 1 (default for i8). +define void @test() { +; CHECK-LABEL: @test( +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.start.p0i8(i64 1 +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.end.p0i8(i64 1 + call void @helper() +; CHECK-NOT: lifetime +; CHECK: ret void + ret void +} + diff --git a/llvm/test/Transforms/Inline/lifetime.ll b/llvm/test/Transforms/Inline/lifetime.ll new file mode 100644 index 00000000000..c47091395fc --- /dev/null +++ b/llvm/test/Transforms/Inline/lifetime.ll @@ -0,0 +1,118 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" + +declare void @llvm.lifetime.start.p0i8(i64, i8*) +declare void @llvm.lifetime.end.p0i8(i64, i8*) + +define void @helper_both_markers() { + %a = alloca i8 + ; Size in llvm.lifetime.start / llvm.lifetime.end differs from + ; allocation size. We should use the former. + call void @llvm.lifetime.start.p0i8(i64 2, i8* %a) + call void @llvm.lifetime.end.p0i8(i64 2, i8* %a) + ret void +} + +define void @test_both_markers() { +; CHECK-LABEL: @test_both_markers( +; CHECK: llvm.lifetime.start.p0i8(i64 2 +; CHECK-NEXT: llvm.lifetime.end.p0i8(i64 2 + call void @helper_both_markers() +; CHECK-NEXT: llvm.lifetime.start.p0i8(i64 2 +; CHECK-NEXT: llvm.lifetime.end.p0i8(i64 2 + call void @helper_both_markers() +; CHECK-NEXT: ret void + ret void +} + +;; Without this, the inliner will simplify out @test_no_marker before adding +;; any lifetime markers. +declare void @use(i8* %a) + +define void @helper_no_markers() { + %a = alloca i8 ; Allocation size is 1 byte. + call void @use(i8* %a) + ret void +} + +;; We can't use CHECK-NEXT because there's an extra call void @use in between. +;; Instead, we use CHECK-NOT to verify that there are no other lifetime calls. +define void @test_no_marker() { +; CHECK-LABEL: @test_no_marker( +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.start.p0i8(i64 1 +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.end.p0i8(i64 1 + call void @helper_no_markers() +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.start.p0i8(i64 1 +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.end.p0i8(i64 1 + call void @helper_no_markers() +; CHECK-NOT: lifetime +; CHECK: ret void + ret void +} + +define void @helper_two_casts() { + %a = alloca i32 + %b = bitcast i32* %a to i8* + call void @llvm.lifetime.start.p0i8(i64 4, i8* %b) + %c = bitcast i32* %a to i8* + call void @llvm.lifetime.end.p0i8(i64 4, i8* %c) + ret void +} + +define void @test_two_casts() { +; CHECK-LABEL: @test_two_casts( +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.start.p0i8(i64 4 +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.end.p0i8(i64 4 + call void @helper_two_casts() +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.start.p0i8(i64 4 +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.end.p0i8(i64 4 + call void @helper_two_casts() +; CHECK-NOT: lifetime +; CHECK: ret void + ret void +} + +define void @helper_arrays_alloca() { + %a = alloca [10 x i32], align 16 + %1 = bitcast [10 x i32]* %a to i8* + call void @use(i8* %1) + ret void +} + +define void @test_arrays_alloca() { +; CHECK-LABEL: @test_arrays_alloca( +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.start.p0i8(i64 40, +; CHECK-NOT: lifetime +; CHECK: llvm.lifetime.end.p0i8(i64 40, + call void @helper_arrays_alloca() +; CHECK-NOT: lifetime +; CHECK: ret void + ret void +} + +%swift.error = type opaque + +define void @helper_swifterror_alloca() { +entry: + %swifterror = alloca swifterror %swift.error*, align 8 + store %swift.error* null, %swift.error** %swifterror, align 8 + ret void +} + +define void @test_swifterror_alloca() { +; CHECK-LABEL: @test_swifterror_alloca( +; CHECK-NOT: lifetime + call void @helper_swifterror_alloca() +; CHECK: ret void + ret void +} diff --git a/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll b/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll new file mode 100644 index 00000000000..a933d2d13f6 --- /dev/null +++ b/llvm/test/Transforms/Inline/local-as-metadata-undominated-use.ll @@ -0,0 +1,49 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +; Make sure the inliner doesn't crash when a metadata-bridged SSA operand is an +; undominated use. +; +; If we ever add a verifier check to prevent the scenario in this file, it's +; fine to delete this testcase. However, we would need a bitcode upgrade since +; such historical IR exists in practice. + +define i32 @foo(i32 %i) !dbg !4 { +entry: + tail call void @llvm.dbg.value(metadata i32 %add, metadata !8, metadata !10), !dbg !11 + %add = add nsw i32 1, %i, !dbg !12 + ret i32 %add, !dbg !13 +} + +; CHECK-LABEL: define i32 @caller( +define i32 @caller(i32 %i) { +; CHECK-NEXT: entry: +entry: +; Although the inliner shouldn't crash, it can't be expected to get the +; "correct" SSA value since its assumptions have been violated. +; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ![[EMPTY:[0-9]+]], +; CHECK-NEXT: %{{.*}} = add nsw + %call = tail call i32 @foo(i32 %i) + ret i32 %call +} + +declare void @llvm.dbg.value(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!9} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 265634) (llvm/trunk 265637)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2) +!1 = !DIFile(filename: "t.c", directory: "/path/to/tests") + +; CHECK: ![[EMPTY]] = !{} +!2 = !{} +!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0) +!5 = !DISubroutineType(types: !6) +!6 = !{!7, !7} +!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed) +!8 = !DILocalVariable(name: "add", arg: 1, scope: !4, file: !1, line: 2, type: !7) +!9 = !{i32 2, !"Debug Info Version", i32 3} +!10 = !DIExpression() +!11 = !DILocation(line: 2, column: 13, scope: !4) +!12 = !DILocation(line: 2, column: 27, scope: !4) +!13 = !DILocation(line: 2, column: 18, scope: !4) diff --git a/llvm/test/Transforms/Inline/monster_scc.ll b/llvm/test/Transforms/Inline/monster_scc.ll new file mode 100644 index 00000000000..b32a2aed331 --- /dev/null +++ b/llvm/test/Transforms/Inline/monster_scc.ll @@ -0,0 +1,432 @@ +; This test creates a monster SCC with a very pernicious call graph. It builds +; a cycle of cross-connected pairs of functions with interesting inlining +; decisions throughout, but ultimately trivial code complexity. +; +; Typically, a greedy approach to inlining works well for bottom-up inliners +; such as LLVM's. However, there is no way to be bottom-up over an SCC: it's +; a cycle! Greedily inlining as much as possible into each function of this +; *SCC* will have the disasterous effect of inlining all N-1 functions into the +; first one visited, N-2 functions into the second one visited, N-3 into the +; third, and so on. This is because until inlining occurs, each function in +; isolation appears to be an excellent inline candidate. +; +; Note that the exact number of calls in each function doesn't really matter. +; It is mostly a function of cost thresholds and visit order. Because this is an +; SCC there is no "right" or "wrong" answer here as long as no function blows up +; to be *huge*. The specific concerning pattern is if one or more functions get +; more than 16 calls in them. +; +; This test is extracted from the following C++ program compiled with Clang. +; The IR is simplified with SROA, instcombine, and simplify-cfg. Then C++ +; linkage stuff, attributes, target specific things, metadata and comments were +; removed. The order of the fuctions is also made more predictable than Clang's +; output order. +; +; void g(int); +; +; template <bool K, int N> void f(bool *B, bool *E) { +; if (K) +; g(N); +; if (B == E) +; return; +; if (*B) +; f<true, N + 1>(B + 1, E); +; else +; f<false, N + 1>(B + 1, E); +; } +; template <> void f<false, MAX>(bool *B, bool *E) { return f<false, 0>(B, E); } +; template <> void f<true, MAX>(bool *B, bool *E) { return f<true, 0>(B, E); } +; +; void test(bool *B, bool *E) { f<false, 0>(B, E); } +; +; RUN: opt -S < %s -inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,OLD +; RUN: opt -S < %s -passes=inline -inline-threshold=150 | FileCheck %s --check-prefixes=CHECK,NEW + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +declare void @_Z1gi(i32) + +; CHECK-LABEL: define void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi1EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi2EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi1EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi2EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 0) + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi1EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi1EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi2EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi3EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi1EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi1EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi3EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi3EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi3EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi1EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 1) + %cmp = icmp eq i8* %B, %E +; CHECK-NOT: call + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi2EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi4EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi4EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi2EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi2EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1gi( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi4EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi4EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi4EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi4EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi2EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 2) + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi3EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi3EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb0ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi0EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb0ELi3EEvPbS0_(i8* %B, i8* %E) { +entry: + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi3EEvPbS0_( +; CHECK-NOT: call +; CHECK: call void @_Z1gi( +; CHECK-NOT: call +; CHECK: call void @_Z1fILb1ELi0EEvPbS0_( +; CHECK-NOT: call +; CHECK: call void @_Z1fILb0ELi0EEvPbS0_( +; CHECK-NOT: call +define void @_Z1fILb1ELi3EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1gi(i32 3) + %cmp = icmp eq i8* %B, %E + br i1 %cmp, label %if.end3, label %if.end + +if.end: + %0 = load i8, i8* %B, align 1 + %tobool = icmp eq i8 %0, 0 + %add.ptr2 = getelementptr inbounds i8, i8* %B, i64 1 + br i1 %tobool, label %if.else, label %if.then1 + +if.then1: + call void @_Z1fILb1ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.else: + call void @_Z1fILb0ELi4EEvPbS0_(i8* %add.ptr2, i8* %E) + br label %if.end3 + +if.end3: + ret void +} + +; CHECK-LABEL: define void @_Z1fILb0ELi4EEvPbS0_( +; CHECK-NOT: call +; CHECK: call void @_Z1fILb0ELi0EEvPbS0_( +; CHECK-NOT: call +define void @_Z1fILb0ELi4EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) + ret void +} + +; CHECK-LABEL: define void @_Z1fILb1ELi4EEvPbS0_( +; OLD-NOT: call +; OLD: call void @_Z1fILb1ELi0EEvPbS0_( +; OLD-NOT: call +; NEW-NOT: call +; NEW: call void @_Z1gi( +; NEW-NOT: call +; NEW: call void @_Z1fILb1ELi1EEvPbS0_( +; NEW-NOT: call +; NEW: call void @_Z1fILb0ELi1EEvPbS0_( +; NEW-NOT: call +define void @_Z1fILb1ELi4EEvPbS0_(i8* %B, i8* %E) { +entry: + call void @_Z1fILb1ELi0EEvPbS0_(i8* %B, i8* %E) + ret void +} + +; CHECK-LABEL: define void @_Z4testPbS_( +; CHECK: call +; CHECK-NOT: call +define void @_Z4testPbS_(i8* %B, i8* %E) { +entry: + call void @_Z1fILb0ELi0EEvPbS0_(i8* %B, i8* %E) + ret void +} + diff --git a/llvm/test/Transforms/Inline/nested-inline.ll b/llvm/test/Transforms/Inline/nested-inline.ll new file mode 100644 index 00000000000..7a207f6a3f0 --- /dev/null +++ b/llvm/test/Transforms/Inline/nested-inline.ll @@ -0,0 +1,112 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s +; Test that bar and bar2 are both inlined throughout and removed. +@A = weak global i32 0 ; <i32*> [#uses=1] +@B = weak global i32 0 ; <i32*> [#uses=1] +@C = weak global i32 0 ; <i32*> [#uses=1] + +define fastcc void @foo(i32 %X) { +entry: +; CHECK-LABEL: @foo( + %ALL = alloca i32, align 4 ; <i32*> [#uses=1] + %tmp1 = and i32 %X, 1 ; <i32> [#uses=1] + %tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1] + br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true + +cond_true: ; preds = %entry + store i32 1, i32* @A + br label %cond_next + +cond_next: ; preds = %cond_true, %entry + %tmp4 = and i32 %X, 2 ; <i32> [#uses=1] + %tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1] + br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5 + +cond_true5: ; preds = %cond_next + store i32 1, i32* @B + br label %cond_next7 + +cond_next7: ; preds = %cond_true5, %cond_next + %tmp10 = and i32 %X, 4 ; <i32> [#uses=1] + %tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1] + br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11 + +cond_true11: ; preds = %cond_next7 + store i32 1, i32* @C + br label %cond_next13 + +cond_next13: ; preds = %cond_true11, %cond_next7 + %tmp16 = and i32 %X, 8 ; <i32> [#uses=1] + %tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1] + br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17 + +cond_true17: ; preds = %cond_next13 + call void @ext( i32* %ALL ) + ret void + +UnifiedReturnBlock: ; preds = %cond_next13 + ret void +} + +; CHECK-NOT: @bar( +define internal fastcc void @bar(i32 %X) { +entry: + %ALL = alloca i32, align 4 ; <i32*> [#uses=1] + %tmp1 = and i32 %X, 1 ; <i32> [#uses=1] + %tmp1.upgrd.1 = icmp eq i32 %tmp1, 0 ; <i1> [#uses=1] + br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true + +cond_true: ; preds = %entry + store i32 1, i32* @A + br label %cond_next + +cond_next: ; preds = %cond_true, %entry + %tmp4 = and i32 %X, 2 ; <i32> [#uses=1] + %tmp4.upgrd.2 = icmp eq i32 %tmp4, 0 ; <i1> [#uses=1] + br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5 + +cond_true5: ; preds = %cond_next + store i32 1, i32* @B + br label %cond_next7 + +cond_next7: ; preds = %cond_true5, %cond_next + %tmp10 = and i32 %X, 4 ; <i32> [#uses=1] + %tmp10.upgrd.3 = icmp eq i32 %tmp10, 0 ; <i1> [#uses=1] + br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11 + +cond_true11: ; preds = %cond_next7 + store i32 1, i32* @C + br label %cond_next13 + +cond_next13: ; preds = %cond_true11, %cond_next7 + %tmp16 = and i32 %X, 8 ; <i32> [#uses=1] + %tmp16.upgrd.4 = icmp eq i32 %tmp16, 0 ; <i1> [#uses=1] + br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17 + +cond_true17: ; preds = %cond_next13 + call void @foo( i32 %X ) + ret void + +UnifiedReturnBlock: ; preds = %cond_next13 + ret void +} + +define internal fastcc void @bar2(i32 %X) { +entry: + call void @foo( i32 %X ) + ret void +} + +declare void @ext(i32*) + +define void @test(i32 %X) { +entry: +; CHECK: test +; CHECK-NOT: @bar( + tail call fastcc void @bar( i32 %X ) + tail call fastcc void @bar( i32 %X ) + tail call fastcc void @bar2( i32 %X ) + tail call fastcc void @bar2( i32 %X ) + ret void +; CHECK: ret +} diff --git a/llvm/test/Transforms/Inline/noalias-calls.ll b/llvm/test/Transforms/Inline/noalias-calls.ll new file mode 100644 index 00000000000..f4fe6fe9ff8 --- /dev/null +++ b/llvm/test/Transforms/Inline/noalias-calls.ll @@ -0,0 +1,45 @@ +; RUN: opt -basicaa -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i1) #0 +declare void @hey() #0 + +define void @hello(i8* noalias nocapture %a, i8* noalias nocapture readonly %c, i8* nocapture %b) #1 { +entry: + %l = alloca i8, i32 512, align 1 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 0) + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 0) + call void @hey() + call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %l, i8* align 16 %c, i64 16, i1 0) + ret void +} + +define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +entry: + tail call void @hello(i8* %a, i8* %c, i8* %b) + ret void +} + +; CHECK: define void @foo(i8* nocapture %a, i8* nocapture readonly %c, i8* nocapture %b) #2 { +; CHECK: entry: +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %b, i64 16, i1 false) #1, !noalias !0 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %b, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %a, i8* align 16 %c, i64 16, i1 false) #1, !alias.scope !5 +; CHECK: call void @hey() #1, !noalias !5 +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %{{.*}}, i8* align 16 %c, i64 16, i1 false) #1, !noalias !3 +; CHECK: ret void +; CHECK: } + +attributes #0 = { nounwind argmemonly } +attributes #1 = { nounwind } +attributes #2 = { nounwind uwtable } + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} +; CHECK: !2 = distinct !{!2, !"hello"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} +; CHECK: !5 = !{!4, !1} + diff --git a/llvm/test/Transforms/Inline/noalias-cs.ll b/llvm/test/Transforms/Inline/noalias-cs.ll new file mode 100644 index 00000000000..8528a391cf9 --- /dev/null +++ b/llvm/test/Transforms/Inline/noalias-cs.ll @@ -0,0 +1,84 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: nounwind uwtable +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4, !noalias !3 + %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 + store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 + %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 + store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 + %1 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %1, float* %arrayidx, align 4 + ret void +} + +define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + call void @foo2(float* %a, float* %b, float* %c), !noalias !0 + call void @foo2(float* %b, float* %b, float* %a), !alias.scope !0 + ret void +} + +; CHECK: define void @foo(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float, float* %c, align 4, !noalias !6 +; CHECK: %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i.i, align 4, !alias.scope !12, !noalias !13 +; CHECK: %arrayidx1.i.i = getelementptr inbounds float, float* %b, i64 8 +; CHECK: store float %0, float* %arrayidx1.i.i, align 4, !alias.scope !14, !noalias !15 +; CHECK: %1 = load float, float* %c, align 4, !noalias !16 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx.i, align 4, !noalias !16 +; CHECK: %2 = load float, float* %a, align 4, !alias.scope !16, !noalias !17 +; CHECK: %arrayidx.i.i1 = getelementptr inbounds float, float* %b, i64 5 +; CHECK: store float %2, float* %arrayidx.i.i1, align 4, !alias.scope !21, !noalias !22 +; CHECK: %arrayidx1.i.i2 = getelementptr inbounds float, float* %b, i64 8 +; CHECK: store float %2, float* %arrayidx1.i.i2, align 4, !alias.scope !23, !noalias !24 +; CHECK: %3 = load float, float* %a, align 4, !alias.scope !16 +; CHECK: %arrayidx.i3 = getelementptr inbounds float, float* %b, i64 7 +; CHECK: store float %3, float* %arrayidx.i3, align 4, !alias.scope !16 +; CHECK: ret void +; CHECK: } + +attributes #0 = { nounwind uwtable } + +!0 = !{!1} +!1 = distinct !{!1, !2, !"hello: %a"} +!2 = distinct !{!2, !"hello"} +!3 = !{!4, !6} +!4 = distinct !{!4, !5, !"hello2: %a"} +!5 = distinct !{!5, !"hello2"} +!6 = distinct !{!6, !5, !"hello2: %b"} +!7 = !{!4} +!8 = !{!6} + +; CHECK: !0 = !{!1, !3} +; CHECK: !1 = distinct !{!1, !2, !"hello2: %a"} +; CHECK: !2 = distinct !{!2, !"hello2"} +; CHECK: !3 = distinct !{!3, !2, !"hello2: %b"} +; CHECK: !4 = !{!1} +; CHECK: !5 = !{!3} +; CHECK: !6 = !{!7, !9, !10} +; CHECK: !7 = distinct !{!7, !8, !"hello2: %a"} +; CHECK: !8 = distinct !{!8, !"hello2"} +; CHECK: !9 = distinct !{!9, !8, !"hello2: %b"} +; CHECK: !10 = distinct !{!10, !11, !"hello: %a"} +; CHECK: !11 = distinct !{!11, !"hello"} +; CHECK: !12 = !{!7} +; CHECK: !13 = !{!9, !10} +; CHECK: !14 = !{!9} +; CHECK: !15 = !{!7, !10} +; CHECK: !16 = !{!10} +; CHECK: !17 = !{!18, !20} +; CHECK: !18 = distinct !{!18, !19, !"hello2: %a"} +; CHECK: !19 = distinct !{!19, !"hello2"} +; CHECK: !20 = distinct !{!20, !19, !"hello2: %b"} +; CHECK: !21 = !{!18, !10} +; CHECK: !22 = !{!20} +; CHECK: !23 = !{!20, !10} +; CHECK: !24 = !{!18} + diff --git a/llvm/test/Transforms/Inline/noalias.ll b/llvm/test/Transforms/Inline/noalias.ll new file mode 100644 index 00000000000..27e53afc2a7 --- /dev/null +++ b/llvm/test/Transforms/Inline/noalias.ll @@ -0,0 +1,76 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @hello(float* noalias nocapture %a, float* nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + ret void +} + +define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +entry: + tail call void @hello(float* %a, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float, float* %c, align 4, !noalias !0 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !0 +; CHECK: %1 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %b, i64 8 + store float %0, float* %arrayidx1, align 4 + ret void +} + +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + tail call void @hello2(float* %a, float* %b, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float, float* %c, align 4, !noalias !3 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !7, !noalias !8 +; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; CHECK: store float %0, float* %arrayidx1.i, align 4, !alias.scope !8, !noalias !7 +; CHECK: %1 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +attributes #0 = { nounwind uwtable } + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2, !"hello: %a"} +; CHECK: !2 = distinct !{!2, !"hello"} +; CHECK: !3 = !{!4, !6} +; CHECK: !4 = distinct !{!4, !5, !"hello2: %a"} +; CHECK: !5 = distinct !{!5, !"hello2"} +; CHECK: !6 = distinct !{!6, !5, !"hello2: %b"} +; CHECK: !7 = !{!4} +; CHECK: !8 = !{!6} + diff --git a/llvm/test/Transforms/Inline/noalias2.ll b/llvm/test/Transforms/Inline/noalias2.ll new file mode 100644 index 00000000000..432fccf431c --- /dev/null +++ b/llvm/test/Transforms/Inline/noalias2.ll @@ -0,0 +1,97 @@ +; RUN: opt -inline -enable-noalias-to-md-conversion -S < %s | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @hello(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 5 + store float %0, float* %arrayidx, align 4 + ret void +} + +define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +entry: + tail call void @hello(float* %a, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo(float* noalias nocapture %a, float* noalias nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float, float* %c, align 4, !alias.scope !0, !noalias !3 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i, align 4, !alias.scope !3, !noalias !0 +; CHECK: %1 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +define void @hello2(float* noalias nocapture %a, float* noalias nocapture %b, float* nocapture readonly %c) #0 { +entry: + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 6 + store float %0, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %b, i64 8 + store float %0, float* %arrayidx1, align 4 + ret void +} + +; Check that when hello() is inlined into foo(), and then foo() is inlined into +; foo2(), the noalias scopes are properly concatenated. +define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +entry: + tail call void @foo(float* %a, float* %c) + tail call void @hello2(float* %a, float* %b, float* %c) + %0 = load float, float* %c, align 4 + %arrayidx = getelementptr inbounds float, float* %a, i64 7 + store float %0, float* %arrayidx, align 4 + ret void +} + +; CHECK: define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture readonly %c) #0 { +; CHECK: entry: +; CHECK: %0 = load float, float* %c, align 4, !alias.scope !5, !noalias !10 +; CHECK: %arrayidx.i.i = getelementptr inbounds float, float* %a, i64 5 +; CHECK: store float %0, float* %arrayidx.i.i, align 4, !alias.scope !10, !noalias !5 +; CHECK: %1 = load float, float* %c, align 4, !alias.scope !13, !noalias !14 +; CHECK: %arrayidx.i = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %1, float* %arrayidx.i, align 4, !alias.scope !14, !noalias !13 +; CHECK: %2 = load float, float* %c, align 4, !noalias !15 +; CHECK: %arrayidx.i1 = getelementptr inbounds float, float* %a, i64 6 +; CHECK: store float %2, float* %arrayidx.i1, align 4, !alias.scope !19, !noalias !20 +; CHECK: %arrayidx1.i = getelementptr inbounds float, float* %b, i64 8 +; CHECK: store float %2, float* %arrayidx1.i, align 4, !alias.scope !20, !noalias !19 +; CHECK: %3 = load float, float* %c, align 4 +; CHECK: %arrayidx = getelementptr inbounds float, float* %a, i64 7 +; CHECK: store float %3, float* %arrayidx, align 4 +; CHECK: ret void +; CHECK: } + +; CHECK: !0 = !{!1} +; CHECK: !1 = distinct !{!1, !2, !"hello: %c"} +; CHECK: !2 = distinct !{!2, !"hello"} +; CHECK: !3 = !{!4} +; CHECK: !4 = distinct !{!4, !2, !"hello: %a"} +; CHECK: !5 = !{!6, !8} +; CHECK: !6 = distinct !{!6, !7, !"hello: %c"} +; CHECK: !7 = distinct !{!7, !"hello"} +; CHECK: !8 = distinct !{!8, !9, !"foo: %c"} +; CHECK: !9 = distinct !{!9, !"foo"} +; CHECK: !10 = !{!11, !12} +; CHECK: !11 = distinct !{!11, !7, !"hello: %a"} +; CHECK: !12 = distinct !{!12, !9, !"foo: %a"} +; CHECK: !13 = !{!8} +; CHECK: !14 = !{!12} +; CHECK: !15 = !{!16, !18} +; CHECK: !16 = distinct !{!16, !17, !"hello2: %a"} +; CHECK: !17 = distinct !{!17, !"hello2"} +; CHECK: !18 = distinct !{!18, !17, !"hello2: %b"} +; CHECK: !19 = !{!16} +; CHECK: !20 = !{!18} + +attributes #0 = { nounwind uwtable } + diff --git a/llvm/test/Transforms/Inline/noinline-recursive-fn.ll b/llvm/test/Transforms/Inline/noinline-recursive-fn.ll new file mode 100644 index 00000000000..2b1851bd46b --- /dev/null +++ b/llvm/test/Transforms/Inline/noinline-recursive-fn.ll @@ -0,0 +1,111 @@ +; The inliner should never inline recursive functions into other functions. +; This effectively is just peeling off the first iteration of a loop, and the +; inliner heuristics are not set up for this. + +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-apple-darwin10.3" + +@g = common global i32 0 ; <i32*> [#uses=1] + +define internal void @foo(i32 %x) nounwind ssp { +entry: + %0 = icmp slt i32 %x, 0 ; <i1> [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %entry + %1 = sub nsw i32 %x, 1 ; <i32> [#uses=1] + call void @foo(i32 %1) nounwind ssp + store volatile i32 1, i32* @g, align 4 + ret void + +return: ; preds = %entry + ret void +} + + +;; CHECK-LABEL: @bonk( +;; CHECK: call void @foo(i32 42) +define void @bonk() nounwind ssp { +entry: + call void @foo(i32 42) nounwind ssp + ret void +} + + + +;; Here is an indirect case that should not be infinitely inlined. + +define internal void @f1(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp { +entry: + %0 = bitcast i8* %Bar to void (i32, i8*, i8*)* + %1 = sub nsw i32 %x, 1 + call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind + store volatile i32 42, i32* @g, align 4 + ret void +} + +define internal void @f2(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp { +entry: + %0 = icmp slt i32 %x, 0 ; <i1> [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %entry + %1 = bitcast i8* %Foo to void (i32, i8*, i8*)* ; <void (i32, i8*, i8*)*> [#uses=1] + call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind + store volatile i32 13, i32* @g, align 4 + ret void + +return: ; preds = %entry + ret void +} + + +; CHECK-LABEL: @top_level( +; CHECK: call void @f2(i32 122 +; Here we inline one instance of the cycle, but we don't want to completely +; unroll it. +define void @top_level() nounwind ssp { +entry: + call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp + ret void +} + + +; Check that a recursive function, when called with a constant that makes the +; recursive path dead code can actually be inlined. +define i32 @fib(i32 %i) { +entry: + %is.zero = icmp eq i32 %i, 0 + br i1 %is.zero, label %zero.then, label %zero.else + +zero.then: + ret i32 0 + +zero.else: + %is.one = icmp eq i32 %i, 1 + br i1 %is.one, label %one.then, label %one.else + +one.then: + ret i32 1 + +one.else: + %i1 = sub i32 %i, 1 + %f1 = call i32 @fib(i32 %i1) + %i2 = sub i32 %i, 2 + %f2 = call i32 @fib(i32 %i2) + %f = add i32 %f1, %f2 + ret i32 %f +} + +define i32 @fib_caller() { +; CHECK-LABEL: @fib_caller( +; CHECK-NOT: call +; CHECK: ret + %f1 = call i32 @fib(i32 0) + %f2 = call i32 @fib(i32 1) + %result = add i32 %f1, %f2 + ret i32 %result +} diff --git a/llvm/test/Transforms/Inline/noinline.ll b/llvm/test/Transforms/Inline/noinline.ll new file mode 100644 index 00000000000..7667114b68e --- /dev/null +++ b/llvm/test/Transforms/Inline/noinline.ll @@ -0,0 +1,18 @@ +; RUN: opt -inline -S < %s | FileCheck %s +; PR6682 +declare void @foo() nounwind + +define void @bar() nounwind { +entry: + tail call void @foo() nounwind + ret void +} + +define void @bazz() nounwind { +entry: + tail call void @bar() nounwind noinline + ret void +} + +; CHECK: define void @bazz() +; CHECK: call void @bar() diff --git a/llvm/test/Transforms/Inline/nonnull.ll b/llvm/test/Transforms/Inline/nonnull.ll new file mode 100644 index 00000000000..46665466924 --- /dev/null +++ b/llvm/test/Transforms/Inline/nonnull.ll @@ -0,0 +1,46 @@ +; RUN: opt -S -inline %s | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' %s | FileCheck %s + +declare void @foo() +declare void @bar() + +define void @callee(i8* %arg) { + %cmp = icmp eq i8* %arg, null + br i1 %cmp, label %expensive, label %done + +; This block is designed to be too expensive to inline. We can only inline +; callee if this block is known to be dead. +expensive: + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + call void @foo() + ret void + +done: + call void @bar() + ret void +} + +; Positive test - arg is known non null +define void @caller(i8* nonnull %arg) { +; CHECK-LABEL: @caller +; CHECK: call void @bar() + call void @callee(i8* nonnull %arg) + ret void +} + +; Negative test - arg is not known to be non null +define void @caller2(i8* %arg) { +; CHECK-LABEL: @caller2 +; CHECK: call void @callee( + call void @callee(i8* %arg) + ret void +} + diff --git a/llvm/test/Transforms/Inline/null-function.ll b/llvm/test/Transforms/Inline/null-function.ll new file mode 100644 index 00000000000..2aecfa85cd8 --- /dev/null +++ b/llvm/test/Transforms/Inline/null-function.ll @@ -0,0 +1,9 @@ +; RUN: opt -print-before=always-inline -always-inline < %s -o /dev/null 2>&1 | FileCheck %s + +define i32 @main() #0 { +entry: + ret i32 0 +} + +; CHECK: *** IR Dump Before Inliner for always_inline functions *** +; CHECK: Printing <null> Function diff --git a/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll b/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll new file mode 100644 index 00000000000..16d7db33412 --- /dev/null +++ b/llvm/test/Transforms/Inline/optimization-remarks-hotness-threshold.ll @@ -0,0 +1,56 @@ +; RUN: opt < %s -S -inline -pass-remarks=inline \ +; RUN: -pass-remarks-with-hotness 2>&1 | FileCheck %s + +; RUN: opt < %s -S -passes=inline -pass-remarks-output=%t -pass-remarks=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold=1 2>&1 | \ +; RUN: FileCheck -allow-empty -check-prefix=THRESHOLD %s + +; Check that when any threshold is specified we ignore remarks with no +; hotness -- these are blocks that have not been executed during training. + +; 1 int foo() { return 1; } +; 2 +; 3 int bar() { +; 4 return foo(); +; 5 } + +; CHECK: remark: /tmp/s.c:4:10: foo inlined into bar with (cost={{[0-9\-]+}}, threshold={{[0-9]+}}) +; THRESHOLD-NOT: remark + +; ModuleID = '/tmp/s.c' +source_filename = "/tmp/s.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; Function Attrs: nounwind ssp uwtable +define i32 @foo() #0 !dbg !7 { +entry: + ret i32 1, !dbg !9 +} + +; Function Attrs: nounwind ssp uwtable +define i32 @bar() #0 !dbg !10 { +entry: + %call = call i32 @foo(), !dbg !11 + ret i32 %call, !dbg !12 +} + +attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = !{!"clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 1, column: 13, scope: !7) +!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 4, column: 10, scope: !10) +!12 = !DILocation(line: 4, column: 3, scope: !10) diff --git a/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll new file mode 100644 index 00000000000..8692abfaf19 --- /dev/null +++ b/llvm/test/Transforms/Inline/optimization-remarks-passed-yaml.ll @@ -0,0 +1,79 @@ +; RUN: opt < %s -S -inline -pass-remarks-output=%t -pass-remarks=inline \ +; RUN: -pass-remarks-missed=inline -pass-remarks-analysis=inline \ +; RUN: -pass-remarks-with-hotness 2>&1 | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=YAML %s + +; RUN: opt < %s -S -passes=inline -pass-remarks-output=%t -pass-remarks=inline \ +; RUN: -pass-remarks-missed=inline -pass-remarks-analysis=inline \ +; RUN: -pass-remarks-with-hotness 2>&1 | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=YAML %s + +; Check the YAML file for inliner-generated passed and analysis remarks. This +; is the input: + +; 1 int foo() { return 1; } +; 2 +; 3 int bar() { +; 4 return foo(); +; 5 } + +; CHECK: remark: /tmp/s.c:4:10: foo inlined into bar with (cost={{[0-9\-]+}}, threshold={{[0-9]+}}) (hotness: 30) + +; YAML: --- !Passed +; YAML-NEXT: Pass: inline +; YAML-NEXT: Name: Inlined +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 4, Column: 10 } +; YAML-NEXT: Function: bar +; YAML-NEXT: Hotness: 30 +; YAML-NEXT: Args: +; YAML-NEXT: - Callee: foo +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 1, Column: 0 } +; YAML-NEXT: - String: ' inlined into ' +; YAML-NEXT: - Caller: bar +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 3, Column: 0 } +; YAML-NEXT: - String: ' with ' +; YAML-NEXT: - String: '(cost=' +; YAML-NEXT: - Cost: '{{[0-9\-]+}}' +; YAML-NEXT: - String: ', threshold=' +; YAML-NEXT: - Threshold: '{{[0-9]+}}' +; YAML-NEXT: - String: ')' +; YAML-NEXT: ... + +; ModuleID = '/tmp/s.c' +source_filename = "/tmp/s.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; Function Attrs: nounwind ssp uwtable +define i32 @foo() #0 !dbg !7 { +entry: + ret i32 1, !dbg !9 +} + +; Function Attrs: nounwind ssp uwtable +define i32 @bar() #0 !dbg !10 !prof !13 { +entry: + %call = call i32 @foo(), !dbg !11 + ret i32 %call, !dbg !12 +} + +attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="core2" "target-features"="+cx16,+fxsr,+mmx,+sse,+sse2,+sse3,+ssse3,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = !{!"clang version 4.0.0 (trunk 282540) (llvm/trunk 282542)"} +!7 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 1, column: 13, scope: !7) +!10 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 3, type: !8, isLocal: false, isDefinition: true, scopeLine: 3, isOptimized: true, unit: !0, retainedNodes: !2) +!11 = !DILocation(line: 4, column: 10, scope: !10) +!12 = !DILocation(line: 4, column: 3, scope: !10) +!13 = !{!"function_entry_count", i64 30} diff --git a/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll new file mode 100644 index 00000000000..1a1c0f4bac1 --- /dev/null +++ b/llvm/test/Transforms/Inline/optimization-remarks-with-hotness.ll @@ -0,0 +1,40 @@ +; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \ +; RUN: | FileCheck %s +; RUN: opt < %s -passes=inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 \ +; RUN: | FileCheck %s + +; CHECK: foo inlined into bar with (cost=always): always inline attribute (hotness: 30) +; CHECK: foz not inlined into bar because it should never be inlined (cost=never): noinline function attribute (hotness: 30) + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @foo() #0 !prof !1 { +entry: + ret i32 4 +} + +; Function Attrs: noinline nounwind uwtable +define i32 @foz() #1 !prof !2 { +entry: + ret i32 2 +} + +; Function Attrs: nounwind uwtable +define i32 @bar() !prof !3 { +entry: + %call = call i32 @foo() + %call2 = call i32 @foz() + %mul = mul i32 %call, %call2 + ret i32 %mul +} + +attributes #0 = { alwaysinline } +attributes #1 = { noinline } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.5.0 "} +!1 = !{!"function_entry_count", i64 10} +!2 = !{!"function_entry_count", i64 20} +!3 = !{!"function_entry_count", i64 30} diff --git a/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll b/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll new file mode 100644 index 00000000000..10a93f5cd79 --- /dev/null +++ b/llvm/test/Transforms/Inline/optimization-remarks-yaml.ll @@ -0,0 +1,118 @@ +; RUN: opt < %s -S -inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 15 \ +; RUN: -pass-remarks-output=%t 2>&1 | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=YAML %s +; RUN: opt < %s -S -inline -pass-remarks-with-hotness -pass-remarks-output=%t +; RUN: cat %t | FileCheck -check-prefix=YAML %s +; +; Verify that remarks that don't meet the hotness threshold are not output. +; RUN: opt < %s -S -inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ +; RUN: -pass-remarks-output=%t.threshold 2>&1 | \ +; RUN: FileCheck -check-prefix=THRESHOLD %s +; RUN: test ! -s %t.threshold +; RUN: opt < %s -S -inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ +; RUN: -pass-remarks-output=%t.threshold +; The remarks output file should be empty. +; RUN: test ! -s %t.threshold + +; NewPM: +; RUN: opt < %s -S -passes=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 15 \ +; RUN: -pass-remarks-output=%t 2>&1 | FileCheck %s +; RUN: cat %t | FileCheck -check-prefix=YAML %s +; RUN: opt < %s -S -passes=inline -pass-remarks-with-hotness -pass-remarks-output=%t +; RUN: cat %t | FileCheck -check-prefix=YAML %s +; +; Verify that remarks that don't meet the hotness threshold are not output. +; RUN: opt < %s -S -passes=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ +; RUN: -pass-remarks-output=%t.threshold 2>&1 | \ +; RUN: FileCheck -check-prefix=THRESHOLD %s +; RUN: test ! -s %t.threshold +; RUN: opt < %s -S -passes=inline \ +; RUN: -pass-remarks-with-hotness -pass-remarks-hotness-threshold 100 \ +; RUN: -pass-remarks-output=%t.threshold +; The remarks output file should be empty. +; RUN: test ! -s %t.threshold + +; Check the YAML file generated for inliner remarks for this program: +; +; 1 int foo(); +; 2 int bar(); +; 3 +; 4 int baz() { +; 5 return foo() + bar(); +; 6 } + +; CHECK: remark: /tmp/s.c:5:10: foo will not be inlined into baz because its definition is unavailable (hotness: 30) +; CHECK-NEXT: remark: /tmp/s.c:5:18: bar will not be inlined into baz because its definition is unavailable (hotness: 30) + +; YAML: --- !Missed +; YAML-NEXT: Pass: inline +; YAML-NEXT: Name: NoDefinition +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 5, Column: 10 } +; YAML-NEXT: Function: baz +; YAML-NEXT: Hotness: 30 +; YAML-NEXT: Args: +; YAML-NEXT: - Callee: foo +; YAML-NEXT: - String: ' will not be inlined into ' +; YAML-NEXT: - Caller: baz +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 4, Column: 0 } +; YAML-NEXT: - String: ' because its definition is unavailable' +; YAML-NEXT: ... +; YAML-NEXT: --- !Missed +; YAML-NEXT: Pass: inline +; YAML-NEXT: Name: NoDefinition +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 5, Column: 18 } +; YAML-NEXT: Function: baz +; YAML-NEXT: Hotness: 30 +; YAML-NEXT: Args: +; YAML-NEXT: - Callee: bar +; YAML-NEXT: - String: ' will not be inlined into ' +; YAML-NEXT: - Caller: baz +; YAML-NEXT: DebugLoc: { File: '/tmp/s.c', Line: 4, Column: 0 } +; YAML-NEXT: - String: ' because its definition is unavailable' +; YAML-NEXT: ... + +; No remarks should be output, since none meet the threshold. +; THRESHOLD-NOT: remark + +; ModuleID = '/tmp/s.c' +source_filename = "/tmp/s.c" +target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-apple-macosx10.11.0" + +; Function Attrs: nounwind ssp uwtable +define i32 @"\01baz"() !dbg !7 !prof !14 { +entry: + %call = call i32 (...) @foo(), !dbg !9 + %call1 = call i32 (...) @"\01bar"(), !dbg !10 + %add = add nsw i32 %call, %call1, !dbg !12 + ret i32 %add, !dbg !13 +} + +declare i32 @foo(...) + +declare i32 @"\01bar"(...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!3, !4, !5} +!llvm.ident = !{!6} + +!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 4.0.0 (trunk 281293) (llvm/trunk 281290)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2) +!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp") +!2 = !{} +!3 = !{i32 2, !"Dwarf Version", i32 4} +!4 = !{i32 2, !"Debug Info Version", i32 3} +!5 = !{i32 1, !"PIC Level", i32 2} +!6 = !{!"clang version 4.0.0 (trunk 281293) (llvm/trunk 281290)"} +!7 = distinct !DISubprogram(name: "baz", scope: !1, file: !1, line: 4, type: !8, isLocal: false, isDefinition: true, scopeLine: 4, isOptimized: true, unit: !0, retainedNodes: !2) +!8 = !DISubroutineType(types: !2) +!9 = !DILocation(line: 5, column: 10, scope: !7) +!10 = !DILocation(line: 5, column: 18, scope: !11) +!11 = !DILexicalBlockFile(scope: !7, file: !1, discriminator: 1) +!12 = !DILocation(line: 5, column: 16, scope: !7) +!13 = !DILocation(line: 5, column: 3, scope: !7) +!14 = !{!"function_entry_count", i64 30} diff --git a/llvm/test/Transforms/Inline/optimization-remarks.ll b/llvm/test/Transforms/Inline/optimization-remarks.ll new file mode 100644 index 00000000000..72e90aefa61 --- /dev/null +++ b/llvm/test/Transforms/Inline/optimization-remarks.ll @@ -0,0 +1,77 @@ +; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -S 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK -check-prefix=NO_HOTNESS %s +; RUN: opt < %s -inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK -check-prefix=HOTNESS %s + +; RUN: opt < %s -passes=inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -S 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK -check-prefix=NO_HOTNESS %s +; RUN: opt < %s -passes=inline -pass-remarks=inline -pass-remarks-missed=inline \ +; RUN: -pass-remarks-analysis=inline -pass-remarks-with-hotness -S 2>&1 | \ +; RUN: FileCheck -check-prefix=CHECK -check-prefix=HOTNESS %s + +; HOTNESS: fox will not be inlined into bar because its definition is unavailable +; NO_HOTNESS-NOT: fox will not be inlined into bar because its definition is unavailable +; CHECK: foo inlined into bar with (cost=always): always inline attribute +; CHECK: foz not inlined into bar because it should never be inlined (cost=never): noinline function attribute + +; Function Attrs: alwaysinline nounwind uwtable +define i32 @foo(i32 %x, i32 %y) #0 !prof !1 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %1 = load i32, i32* %y.addr, align 4 + %add = add nsw i32 %0, %1 + ret i32 %add +} + +; Function Attrs: noinline nounwind uwtable +define float @foz(i32 %x, i32 %y) #1 !prof !1 { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32, i32* %x.addr, align 4 + %1 = load i32, i32* %y.addr, align 4 + %mul = mul nsw i32 %0, %1 + %conv = sitofp i32 %mul to float + ret float %conv +} + +declare i32 @fox() + +; Function Attrs: nounwind uwtable +define i32 @bar(i32 %j) #2 !prof !1 { +entry: + %j.addr = alloca i32, align 4 + store i32 %j, i32* %j.addr, align 4 + %0 = load i32, i32* %j.addr, align 4 + %1 = load i32, i32* %j.addr, align 4 + %sub = sub nsw i32 %1, 2 + %call = call i32 @foo(i32 %0, i32 %sub) + %conv = sitofp i32 %call to float + %2 = load i32, i32* %j.addr, align 4 + %sub1 = sub nsw i32 %2, 2 + %3 = load i32, i32* %j.addr, align 4 + %call2 = call float @foz(i32 %sub1, i32 %3) + %mul = fmul float %conv, %call2 + %conv3 = fptosi float %mul to i32 + %call3 = call i32 @fox() + %add = add i32 %conv3, %call + ret i32 %add +} + +attributes #0 = { alwaysinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #1 = { noinline nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } +attributes #2 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.ident = !{!0} + +!0 = !{!"clang version 3.5.0 "} +!1 = !{!"function_entry_count", i64 10} diff --git a/llvm/test/Transforms/Inline/parallel-loop-md-callee.ll b/llvm/test/Transforms/Inline/parallel-loop-md-callee.ll new file mode 100644 index 00000000000..4a87c00a440 --- /dev/null +++ b/llvm/test/Transforms/Inline/parallel-loop-md-callee.ll @@ -0,0 +1,56 @@ +; RUN: opt -S -inline < %s | FileCheck %s +; +; Check that the !llvm.access.group is still present after inlining. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define void @Body(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p, i32 %i) { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4, !llvm.access.group !0 + %cmp = icmp eq i32 %0, 0 + %arrayidx2 = getelementptr inbounds i32, i32* %res, i64 %idxprom + %1 = load i32, i32* %arrayidx2, align 4, !llvm.access.group !0 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: + %arrayidx6 = getelementptr inbounds i32, i32* %d, i64 %idxprom + %2 = load i32, i32* %arrayidx6, align 4, !llvm.access.group !0 + %add = add nsw i32 %2, %1 + br label %cond.end + +cond.end: + %cond = phi i32 [ %add, %cond.false ], [ %1, %entry ] + store i32 %cond, i32* %arrayidx2, align 4 + ret void +} + +define void @Test(i32* %res, i32* %c, i32* %d, i32* %p, i32 %n) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 1600 + br i1 %cmp, label %for.body, label %for.end + +for.body: + call void @Body(i32* %res, i32* undef, i32* %d, i32* %p, i32 %i.0), !llvm.access.group !0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !1 + +for.end: + ret void +} + +!0 = distinct !{} ; access group +!1 = distinct !{!1, !{!"llvm.loop.parallel_accesses", !0}} ; LoopID + + +; CHECK-LABEL: @Test +; CHECK: load i32,{{.*}}, !llvm.access.group !0 +; CHECK: load i32,{{.*}}, !llvm.access.group !0 +; CHECK: load i32,{{.*}}, !llvm.access.group !0 +; CHECK: store i32 {{.*}}, !llvm.access.group !0 +; CHECK: br label %for.cond, !llvm.loop !1 diff --git a/llvm/test/Transforms/Inline/parallel-loop-md-merge.ll b/llvm/test/Transforms/Inline/parallel-loop-md-merge.ll new file mode 100644 index 00000000000..a53efb75302 --- /dev/null +++ b/llvm/test/Transforms/Inline/parallel-loop-md-merge.ll @@ -0,0 +1,78 @@ +; RUN: opt -always-inline -globalopt -S < %s | FileCheck %s +; +; static void __attribute__((always_inline)) callee(long n, double A[static const restrict n], long i) { +; for (long j = 0; j < n; j += 1) +; A[i * n + j] = 42; +; } +; +; void caller(long n, double A[static const restrict n]) { +; for (long i = 0; i < n; i += 1) +; callee(n, A, i); +; } +; +; Check that the access groups (llvm.access.group) are correctly merged. +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +define internal void @callee(i64 %n, double* noalias nonnull %A, i64 %i) #0 { +entry: + br label %for.cond + +for.cond: + %j.0 = phi i64 [ 0, %entry ], [ %add1, %for.body ] + %cmp = icmp slt i64 %j.0, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: + %mul = mul nsw i64 %i, %n + %add = add nsw i64 %mul, %j.0 + %arrayidx = getelementptr inbounds double, double* %A, i64 %add + store double 4.200000e+01, double* %arrayidx, align 8, !llvm.access.group !6 + %add1 = add nuw nsw i64 %j.0, 1 + br label %for.cond, !llvm.loop !7 + +for.end: + ret void +} + +attributes #0 = { alwaysinline } + +!6 = distinct !{} ; access group +!7 = distinct !{!7, !9} ; LoopID +!9 = !{!"llvm.loop.parallel_accesses", !6} + + +define void @caller(i64 %n, double* noalias nonnull %A) { +entry: + br label %for.cond + +for.cond: + %i.0 = phi i64 [ 0, %entry ], [ %add, %for.body ] + %cmp = icmp slt i64 %i.0, %n + br i1 %cmp, label %for.body, label %for.end + +for.body: + call void @callee(i64 %n, double* %A, i64 %i.0), !llvm.access.group !10 + %add = add nuw nsw i64 %i.0, 1 + br label %for.cond, !llvm.loop !11 + +for.end: + ret void +} + +!10 = distinct !{} ; access group +!11 = distinct !{!11, !12} ; LoopID +!12 = !{!"llvm.loop.parallel_accesses", !10} + + +; CHECK: store double 4.200000e+01, {{.*}} !llvm.access.group ![[ACCESS_GROUP_LIST_3:[0-9]+]] +; CHECK: br label %for.cond.i, !llvm.loop ![[LOOP_INNER:[0-9]+]] +; CHECK: br label %for.cond, !llvm.loop ![[LOOP_OUTER:[0-9]+]] + +; CHECK: ![[ACCESS_GROUP_LIST_3]] = !{![[ACCESS_GROUP_INNER:[0-9]+]], ![[ACCESS_GROUP_OUTER:[0-9]+]]} +; CHECK: ![[ACCESS_GROUP_INNER]] = distinct !{} +; CHECK: ![[ACCESS_GROUP_OUTER]] = distinct !{} +; CHECK: ![[LOOP_INNER]] = distinct !{![[LOOP_INNER]], ![[ACCESSES_INNER:[0-9]+]]} +; CHECK: ![[ACCESSES_INNER]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_INNER]]} +; CHECK: ![[LOOP_OUTER]] = distinct !{![[LOOP_OUTER]], ![[ACCESSES_OUTER:[0-9]+]]} +; CHECK: ![[ACCESSES_OUTER]] = !{!"llvm.loop.parallel_accesses", ![[ACCESS_GROUP_OUTER]]} diff --git a/llvm/test/Transforms/Inline/parallel-loop-md.ll b/llvm/test/Transforms/Inline/parallel-loop-md.ll new file mode 100644 index 00000000000..a55392d6a73 --- /dev/null +++ b/llvm/test/Transforms/Inline/parallel-loop-md.ll @@ -0,0 +1,58 @@ +; RUN: opt -S -inline < %s | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; Function Attrs: norecurse nounwind uwtable +define void @Body(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p, i32 %i) #0 { +entry: + %idxprom = sext i32 %i to i64 + %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom + %0 = load i32, i32* %arrayidx, align 4 + %cmp = icmp eq i32 %0, 0 + %arrayidx2 = getelementptr inbounds i32, i32* %res, i64 %idxprom + %1 = load i32, i32* %arrayidx2, align 4 + br i1 %cmp, label %cond.end, label %cond.false + +cond.false: ; preds = %entry + %arrayidx6 = getelementptr inbounds i32, i32* %d, i64 %idxprom + %2 = load i32, i32* %arrayidx6, align 4 + %add = add nsw i32 %2, %1 + br label %cond.end + +cond.end: ; preds = %entry, %cond.false + %cond = phi i32 [ %add, %cond.false ], [ %1, %entry ] + store i32 %cond, i32* %arrayidx2, align 4 + ret void +} + +; Function Attrs: nounwind uwtable +define void @Test(i32* %res, i32* %c, i32* %d, i32* %p, i32 %n) #1 { +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 1600 + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + call void @Body(i32* %res, i32* undef, i32* %d, i32* %p, i32 %i.0), !llvm.access.group !0 + %inc = add nsw i32 %i.0, 1 + br label %for.cond, !llvm.loop !1 + +for.end: ; preds = %for.cond + ret void +} + +; CHECK-LABEL: @Test +; CHECK: load i32,{{.*}}, !llvm.access.group !0 +; CHECK: load i32,{{.*}}, !llvm.access.group !0 +; CHECK: load i32,{{.*}}, !llvm.access.group !0 +; CHECK: store i32{{.*}}, !llvm.access.group !0 +; CHECK: br label %for.cond, !llvm.loop !1 + +attributes #0 = { norecurse nounwind uwtable } + +!0 = distinct !{} +!1 = distinct !{!0, !{!"llvm.loop.parallel_accesses", !0}} diff --git a/llvm/test/Transforms/Inline/partial-inline-act.ll b/llvm/test/Transforms/Inline/partial-inline-act.ll new file mode 100644 index 00000000000..27e71915387 --- /dev/null +++ b/llvm/test/Transforms/Inline/partial-inline-act.ll @@ -0,0 +1,20 @@ +; RUN: opt < %s -partial-inliner -skip-partial-inlining-cost-analysis -disable-output +; This testcase tests the assumption cache + +define internal i32 @inlinedFunc(i1 %cond, i32* align 4 %align.val) { +entry: + br i1 %cond, label %if.then, label %return +if.then: + ; Dummy store to have more than 0 uses + store i32 10, i32* %align.val, align 4 + br label %return +return: ; preds = %entry + ret i32 0 +} + +define internal i32 @dummyCaller(i1 %cond, i32* align 2 %align.val) { +entry: + %val = call i32 @inlinedFunc(i1 %cond, i32* %align.val) + ret i32 %val +} + diff --git a/llvm/test/Transforms/Inline/pr21206.ll b/llvm/test/Transforms/Inline/pr21206.ll new file mode 100644 index 00000000000..fa8f183c904 --- /dev/null +++ b/llvm/test/Transforms/Inline/pr21206.ll @@ -0,0 +1,19 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -S | FileCheck %s + +$c = comdat any +; CHECK: $c = comdat any + +define linkonce_odr void @foo() comdat($c) { + ret void +} +; CHECK: define linkonce_odr void @foo() comdat($c) + +define linkonce_odr void @bar() comdat($c) { + ret void +} +; CHECK: define linkonce_odr void @bar() comdat($c) + +define void()* @zed() { + ret void()* @foo +} diff --git a/llvm/test/Transforms/Inline/pr22285.ll b/llvm/test/Transforms/Inline/pr22285.ll new file mode 100644 index 00000000000..d7631554c2c --- /dev/null +++ b/llvm/test/Transforms/Inline/pr22285.ll @@ -0,0 +1,16 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline),globaldce' -S | FileCheck %s + +$f1 = comdat any +; CHECK-NOT: $f1 = comdat any + +define void @f2() { + call void @f1() + ret void +} +; CHECK-LABEL: define void @f2 + +define linkonce_odr void @f1() comdat { + ret void +} +; CHECK-NOT: define linkonce_odr void @f1() comdat diff --git a/llvm/test/Transforms/Inline/pr26698.ll b/llvm/test/Transforms/Inline/pr26698.ll new file mode 100644 index 00000000000..6d5873ff1bc --- /dev/null +++ b/llvm/test/Transforms/Inline/pr26698.ll @@ -0,0 +1,66 @@ +; RUN: opt -S -inline -inline-threshold=100 -inline-cold-callsite-threshold=100 < %s | FileCheck %s +; RUN: opt -S -passes='cgscc(inline)' -inline-threshold=100 -inline-cold-callsite-threshold=100 < %s | FileCheck %s +target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32" +target triple = "i686-pc-windows-msvc18.0.0" + +declare void @g(i32) + +define void @f() personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @g(i32 0) + to label %invoke.cont unwind label %cs.bb + +invoke.cont: + ret void + +cs.bb: + %cs = catchswitch within none [label %cp.bb] unwind label %cleanup.bb + +cp.bb: + %cpouter1 = catchpad within %cs [i8* null, i32 0, i8* null] + call void @dtor() #1 [ "funclet"(token %cpouter1) ] + catchret from %cpouter1 to label %invoke.cont + +cleanup.bb: + %cpouter2 = cleanuppad within none [] + call void @g(i32 1) [ "funclet"(token %cpouter2) ] + cleanupret from %cpouter2 unwind to caller +} + +declare i32 @__CxxFrameHandler3(...) + +; Function Attrs: nounwind +define internal void @dtor() #1 personality i32 (...)* @__CxxFrameHandler3 { +entry: + invoke void @g(i32 2) + to label %invoke.cont unwind label %ehcleanup1 + +invoke.cont: + ret void + +ehcleanup1: + %cpinner1 = cleanuppad within none [] + invoke void @g(i32 3) [ "funclet" (token %cpinner1) ] + to label %done unwind label %ehcleanup2 +done: + unreachable + +ehcleanup2: + %cpinner2 = cleanuppad within %cpinner1 [] + call void @g(i32 4) [ "funclet" (token %cpinner2) ] + cleanupret from %cpinner2 unwind to caller +} + +; CHECK-LABEL: define void @f( + +; CHECK: %[[cs:.*]] = catchswitch within none + +; CHECK: %[[cpouter1:.*]] = catchpad within %[[cs]] + +; CHECK: %[[cpinner1:.*]] = cleanuppad within %[[cpouter1]] + +; CHECK: %[[cpinner2:.*]] = cleanuppad within %[[cpinner1]] +; CHECK-NEXT: call void @g(i32 4) #0 [ "funclet"(token %[[cpinner2]]) ] +; CHECK-NEXT: unreachable + +attributes #1 = { nounwind } diff --git a/llvm/test/Transforms/Inline/pr28298.ll b/llvm/test/Transforms/Inline/pr28298.ll new file mode 100644 index 00000000000..83224106c46 --- /dev/null +++ b/llvm/test/Transforms/Inline/pr28298.ll @@ -0,0 +1,19 @@ +; RUN: opt -S -passes='cgscc(inline)' < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test1() { +entry: + call void @test2() + ret void +} + +define internal void @test2() { +entry: + call void undef() + ret void +} + +; CHECK-LABEL: define void @test1( +; CHECK: call void undef( +; CHECK: ret void diff --git a/llvm/test/Transforms/Inline/pr33637.ll b/llvm/test/Transforms/Inline/pr33637.ll new file mode 100644 index 00000000000..315feca27bd --- /dev/null +++ b/llvm/test/Transforms/Inline/pr33637.ll @@ -0,0 +1,25 @@ +; RUN: opt -inline < %s + +define void @patatino() { +for.cond: + br label %for.body + +for.body: + %tobool = icmp eq i32 5, 0 + %sel = select i1 %tobool, i32 0, i32 2 + br i1 undef, label %cleanup1.thread, label %cleanup1 + +cleanup1.thread: + ret void + +cleanup1: + %cleanup.dest2 = phi i32 [ %sel, %for.body ] + %switch = icmp ult i32 %cleanup.dest2, 1 + ret void +} + +define void @main() { +entry: + call void @patatino() + ret void +} diff --git a/llvm/test/Transforms/Inline/prof-update-instr.ll b/llvm/test/Transforms/Inline/prof-update-instr.ll new file mode 100644 index 00000000000..6650165cb90 --- /dev/null +++ b/llvm/test/Transforms/Inline/prof-update-instr.ll @@ -0,0 +1,57 @@ +; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S | FileCheck %s +; Checks if inliner updates VP metadata for indrect call instructions +; with instrumentation based profile. + +@func = global void ()* null +@func2 = global void ()* null + +; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]] +define void @callee(i32 %n) !prof !15 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false, !prof !20 +cond_true: +; f2 is optimized away, thus not updated. + %f2 = load void ()*, void ()** @func2 +; CHECK: call void %f2(), !prof ![[COUNT_IND_CALLEE1:[0-9]*]] + call void %f2(), !prof !19 + ret void +cond_false: + %f = load void ()*, void ()** @func +; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] + call void %f(), !prof !18 + ret void +} + +; CHECK: define void @caller() +define void @caller() !prof !21 { +; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] + call void @callee(i32 15) + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"InstrProf"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +!19 = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40} +!20 = !{!"branch_weights", i32 1000, i32 1000} +!21 = !{!"function_entry_count", i64 400} +attributes #0 = { alwaysinline } +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} +; CHECK: ![[COUNT_IND_CALLEE1]] = !{!"VP", i32 0, i64 200, i64 111, i64 100, i64 222, i64 60, i64 333, i64 40} +; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12} +; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8} diff --git a/llvm/test/Transforms/Inline/prof-update-sample.ll b/llvm/test/Transforms/Inline/prof-update-sample.ll new file mode 100644 index 00000000000..4a4471e8e17 --- /dev/null +++ b/llvm/test/Transforms/Inline/prof-update-sample.ll @@ -0,0 +1,60 @@ +; RUN: opt < %s -inline -S | FileCheck %s +; Checks if inliner updates branch_weights annotation for call instructions. + +declare void @ext(); +declare void @ext1(); +@func = global void ()* null + +; CHECK: define void @callee(i32 %n) !prof ![[ENTRY_COUNT:[0-9]*]] +define void @callee(i32 %n) !prof !15 { + %cond = icmp sle i32 %n, 10 + br i1 %cond, label %cond_true, label %cond_false +cond_true: +; ext1 is optimized away, thus not updated. +; CHECK: call void @ext1(), !prof ![[COUNT_CALLEE1:[0-9]*]] + call void @ext1(), !prof !16 + ret void +cond_false: +; ext is cloned and updated. +; CHECK: call void @ext(), !prof ![[COUNT_CALLEE:[0-9]*]] + call void @ext(), !prof !16 + %f = load void ()*, void ()** @func +; CHECK: call void %f(), !prof ![[COUNT_IND_CALLEE:[0-9]*]] + call void %f(), !prof !18 + ret void +} + +; CHECK: define void @caller() +define void @caller() { +; CHECK: call void @ext(), !prof ![[COUNT_CALLER:[0-9]*]] +; CHECK: call void %f.i(), !prof ![[COUNT_IND_CALLER:[0-9]*]] + call void @callee(i32 15), !prof !17 + ret void +} + +!llvm.module.flags = !{!1} +!1 = !{i32 1, !"ProfileSummary", !2} +!2 = !{!3, !4, !5, !6, !7, !8, !9, !10} +!3 = !{!"ProfileFormat", !"SampleProfile"} +!4 = !{!"TotalCount", i64 10000} +!5 = !{!"MaxCount", i64 10} +!6 = !{!"MaxInternalCount", i64 1} +!7 = !{!"MaxFunctionCount", i64 2000} +!8 = !{!"NumCounts", i64 2} +!9 = !{!"NumFunctions", i64 2} +!10 = !{!"DetailedSummary", !11} +!11 = !{!12, !13, !14} +!12 = !{i32 10000, i64 100, i32 1} +!13 = !{i32 999000, i64 100, i32 1} +!14 = !{i32 999999, i64 1, i32 2} +!15 = !{!"function_entry_count", i64 1000} +!16 = !{!"branch_weights", i64 2000} +!17 = !{!"branch_weights", i64 400} +!18 = !{!"VP", i32 0, i64 140, i64 111, i64 80, i64 222, i64 40, i64 333, i64 20} +attributes #0 = { alwaysinline } +; CHECK: ![[ENTRY_COUNT]] = !{!"function_entry_count", i64 600} +; CHECK: ![[COUNT_CALLEE1]] = !{!"branch_weights", i64 2000} +; CHECK: ![[COUNT_CALLEE]] = !{!"branch_weights", i64 1200} +; CHECK: ![[COUNT_IND_CALLEE]] = !{!"VP", i32 0, i64 84, i64 111, i64 48, i64 222, i64 24, i64 333, i64 12} +; CHECK: ![[COUNT_CALLER]] = !{!"branch_weights", i64 800} +; CHECK: ![[COUNT_IND_CALLER]] = !{!"VP", i32 0, i64 56, i64 111, i64 32, i64 222, i64 16, i64 333, i64 8} diff --git a/llvm/test/Transforms/Inline/profile-meta.ll b/llvm/test/Transforms/Inline/profile-meta.ll new file mode 100644 index 00000000000..3c967d6618f --- /dev/null +++ b/llvm/test/Transforms/Inline/profile-meta.ll @@ -0,0 +1,45 @@ +; RUN: opt < %s -S -inline | FileCheck %s +; RUN: opt < %s -S -passes='cgscc(inline)' | FileCheck %s + +; Make sure that profile and unpredictable metadata is preserved when cloning a select. + +define i32 @callee_with_select(i1 %c, i32 %a, i32 %b) { + %sel = select i1 %c, i32 %a, i32 %b, !prof !0, !unpredictable !1 + ret i32 %sel +} + +define i32 @caller_of_select(i1 %C, i32 %A, i32 %B) { + %ret = call i32 @callee_with_select(i1 %C, i32 %A, i32 %B) + ret i32 %ret + +; CHECK-LABEL: @caller_of_select( +; CHECK-NEXT: [[SEL:%.*]] = select i1 %C, i32 %A, i32 %B, !prof !0, !unpredictable !1 +; CHECK-NEXT: ret i32 [[SEL]] +} + +; Make sure that profile and unpredictable metadata is preserved when cloning a branch. + +define i32 @callee_with_branch(i1 %c) { + br i1 %c, label %if, label %else, !unpredictable !1, !prof !2 +if: + ret i32 1 +else: + ret i32 2 +} + +define i32 @caller_of_branch(i1 %C) { + %ret = call i32 @callee_with_branch(i1 %C) + ret i32 %ret + +; CHECK-LABEL: @caller_of_branch( +; CHECK-NEXT: br i1 %C, label %{{.*}}, label %{{.*}}, !prof !2, !unpredictable !1 +} + +!0 = !{!"branch_weights", i32 1, i32 2} +!1 = !{} +!2 = !{!"branch_weights", i32 3, i32 4} + +; CHECK: !0 = !{!"branch_weights", i32 1, i32 2} +; CHECK: !1 = !{} +; CHECK: !2 = !{!"branch_weights", i32 3, i32 4} + diff --git a/llvm/test/Transforms/Inline/ptr-diff.ll b/llvm/test/Transforms/Inline/ptr-diff.ll new file mode 100644 index 00000000000..5ad3994a503 --- /dev/null +++ b/llvm/test/Transforms/Inline/ptr-diff.ll @@ -0,0 +1,157 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=10 | FileCheck %s + +target datalayout = "p:32:32-p1:64:64-p2:16:16-n16:32:64" + +define i32 @outer1() { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call i32 +; CHECK: ret i32 + + %ptr = alloca i32 + %ptr1 = getelementptr inbounds i32, i32* %ptr, i32 0 + %ptr2 = getelementptr inbounds i32, i32* %ptr, i32 42 + %result = call i32 @inner1(i32* %ptr1, i32* %ptr2) + ret i32 %result +} + +define i32 @inner1(i32* %begin, i32* %end) { + call void @extern() + %begin.i = ptrtoint i32* %begin to i32 + %end.i = ptrtoint i32* %end to i32 + %distance = sub i32 %end.i, %begin.i + %icmp = icmp sle i32 %distance, 42 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + %t = load i32, i32* %begin + ret i32 %t +} + +define i32 @outer1_as1(i32 addrspace(1)* %ptr) { +; CHECK-LABEL: @outer1_as1( +; CHECK-NOT: call +; CHECK: ret i32 + %ptr1 = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i32 0 + %ptr2 = getelementptr inbounds i32, i32 addrspace(1)* %ptr, i32 42 + %result = call i32 @inner1_as1(i32 addrspace(1)* %ptr1, i32 addrspace(1)* %ptr2) + ret i32 %result +} + +; Make sure that the address space's larger size makes the ptrtoints +; not no-ops preventing inlining +define i32 @inner1_as1(i32 addrspace(1)* %begin, i32 addrspace(1)* %end) { + %begin.i = ptrtoint i32 addrspace(1)* %begin to i32 + %end.i = ptrtoint i32 addrspace(1)* %end to i32 + %distance = sub i32 %end.i, %begin.i + %icmp = icmp sle i32 %distance, 42 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + %t = load i32, i32 addrspace(1)* %begin + ret i32 %t +} + +define i32 @outer2(i32* %ptr) { +; Test that an inbounds GEP disables this -- it isn't safe in general as +; wrapping changes the behavior of lessthan and greaterthan comparisons. +; CHECK-LABEL: @outer2( +; CHECK: call i32 @inner2 +; CHECK: ret i32 + + %ptr1 = getelementptr i32, i32* %ptr, i32 0 + %ptr2 = getelementptr i32, i32* %ptr, i32 42 + %result = call i32 @inner2(i32* %ptr1, i32* %ptr2) + ret i32 %result +} + +define i32 @inner2(i32* %begin, i32* %end) { + call void @extern() + %begin.i = ptrtoint i32* %begin to i32 + %end.i = ptrtoint i32* %end to i32 + %distance = sub i32 %end.i, %begin.i + %icmp = icmp sle i32 %distance, 42 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + %t = load i32, i32* %begin + ret i32 %t +} + +define i32 @outer3(i16* addrspace(1)* %ptr) { +; CHECK-LABEL: @outer3( +; CHECK-NOT: call i32 +; CHECK: ret i32 3 +; CHECK-LABEL: @inner3( + %result = call i32 @inner3(i16* addrspace(1)* %ptr) + ret i32 %result +} + +define i32 @inner3(i16* addrspace(1)* %ptr) { + call void @extern() + %ptr.i = ptrtoint i16* addrspace(1)* %ptr to i64 + %distance = sub i64 %ptr.i, %ptr.i + %icmp = icmp eq i64 %distance, 0 + br i1 %icmp, label %then, label %else + +then: + ret i32 3 + +else: + ret i32 5 +} + + +; The inttoptrs are free since it is a smaller integer to a larger +; pointer size +define i32 @inttoptr_free_cost(i32 %a, i32 %b, i32 %c) { + call void @extern() + %p1 = inttoptr i32 %a to i32 addrspace(1)* + %p2 = inttoptr i32 %b to i32 addrspace(1)* + %p3 = inttoptr i32 %c to i32 addrspace(1)* + %t1 = load i32, i32 addrspace(1)* %p1 + %t2 = load i32, i32 addrspace(1)* %p2 + %t3 = load i32, i32 addrspace(1)* %p3 + %s = add i32 %t1, %t2 + %s1 = add i32 %s, %t3 + ret i32 %s1 +} + +define i32 @inttoptr_free_cost_user(i32 %begin, i32 %end) { +; CHECK-LABEL: @inttoptr_free_cost_user( +; CHECK-NOT: call i32 + %x = call i32 @inttoptr_free_cost(i32 %begin, i32 %end, i32 9) + ret i32 %x +} + +; The inttoptrs have a cost since it is a larger integer to a smaller +; pointer size +define i32 @inttoptr_cost_smaller_ptr(i32 %a, i32 %b, i32 %c) { + call void @extern() + %p1 = inttoptr i32 %a to i32 addrspace(2)* + %p2 = inttoptr i32 %b to i32 addrspace(2)* + %p3 = inttoptr i32 %c to i32 addrspace(2)* + %t1 = load i32, i32 addrspace(2)* %p1 + %t2 = load i32, i32 addrspace(2)* %p2 + %t3 = load i32, i32 addrspace(2)* %p3 + %s = add i32 %t1, %t2 + %s1 = add i32 %s, %t3 + ret i32 %s1 +} + +define i32 @inttoptr_cost_smaller_ptr_user(i32 %begin, i32 %end) { +; CHECK-LABEL: @inttoptr_cost_smaller_ptr_user( +; CHECK: call i32 + %x = call i32 @inttoptr_cost_smaller_ptr(i32 %begin, i32 %end, i32 9) + ret i32 %x +} + +declare void @extern()
\ No newline at end of file diff --git a/llvm/test/Transforms/Inline/recursive.ll b/llvm/test/Transforms/Inline/recursive.ll new file mode 100644 index 00000000000..cbdf86b093a --- /dev/null +++ b/llvm/test/Transforms/Inline/recursive.ll @@ -0,0 +1,73 @@ +; Inlining in the presence of recursion presents special challenges that we +; test here. +; +; RUN: opt -inline -S < %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S < %s | FileCheck %s + +define i32 @large_stack_callee(i32 %param) { +; CHECK-LABEL: define i32 @large_stack_callee( +entry: + %yyy = alloca [100000 x i8] + %r = bitcast [100000 x i8]* %yyy to i8* + call void @bar(i8* %r) + ret i32 4 +} + +; Test a recursive function which calls another function with a large stack. In +; addition to not inlining the recursive call, we should also not inline the +; large stack allocation into a potentially recursive frame. +define i32 @large_stack_recursive_caller(i32 %param) { +; CHECK-LABEL: define i32 @large_stack_recursive_caller( +entry: +; CHECK-NEXT: entry: +; CHECK-NOT: alloca + %t = call i32 @foo(i32 %param) + %cmp = icmp eq i32 %t, -1 + br i1 %cmp, label %exit, label %cont + +cont: + %r = call i32 @large_stack_recursive_caller(i32 %t) +; CHECK: call i32 @large_stack_recursive_caller + %f = call i32 @large_stack_callee(i32 %r) +; CHECK: call i32 @large_stack_callee + br label %exit + +exit: + ret i32 4 +} + +declare void @bar(i8* %in) + +declare i32 @foo(i32 %param) + +; Check that when inlining a non-recursive path into a function's own body that +; we get the re-mapping of instructions correct. +define i32 @test_recursive_inlining_remapping(i1 %init, i8* %addr) { +; CHECK-LABEL: define i32 @test_recursive_inlining_remapping( +bb: + %n = alloca i32 + br i1 %init, label %store, label %load +; CHECK-NOT: alloca +; +; CHECK: %[[N:.*]] = alloca i32 +; CHECK-NEXT: br i1 %init, + +store: + store i32 0, i32* %n + %cast = bitcast i32* %n to i8* + %v = call i32 @test_recursive_inlining_remapping(i1 false, i8* %cast) + ret i32 %v +; CHECK-NOT: call +; +; CHECK: store i32 0, i32* %[[N]] +; CHECK-NEXT: %[[CAST:.*]] = bitcast i32* %[[N]] to i8* +; CHECK-NEXT: %[[INLINED_LOAD:.*]] = load i32, i32* %[[N]] +; CHECK-NEXT: ret i32 %[[INLINED_LOAD]] +; +; CHECK-NOT: call + +load: + %castback = bitcast i8* %addr to i32* + %n.load = load i32, i32* %castback + ret i32 %n.load +} diff --git a/llvm/test/Transforms/Inline/redundant-loads.ll b/llvm/test/Transforms/Inline/redundant-loads.ll new file mode 100644 index 00000000000..176f605fc73 --- /dev/null +++ b/llvm/test/Transforms/Inline/redundant-loads.ll @@ -0,0 +1,204 @@ +; RUN: opt -inline < %s -S -o - -inline-threshold=3 | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @pad() readnone + +define void @outer1(i32* %a) { +; CHECK-LABEL: @outer1( +; CHECK-NOT: call void @inner1 + %b = alloca i32 + call void @inner1(i32* %a, i32* %b) + ret void +} + +define void @inner1(i32* %a, i32* %b) { + %1 = load i32, i32* %a + store i32 %1, i32 * %b ; This store does not clobber the first load. + %2 = load i32, i32* %a + call void @pad() + %3 = load i32, i32* %a + ret void +} + + +define void @outer2(i32* %a, i32* %b) { +; CHECK-LABEL: @outer2( +; CHECK: call void @inner2 + call void @inner2(i32* %a, i32* %b) + ret void +} + +define void @inner2(i32* %a, i32* %b) { + %1 = load i32, i32* %a + store i32 %1, i32 * %b ; This store clobbers the first load. + %2 = load i32, i32* %a + call void @pad() + ret void +} + + +define void @outer3(i32* %a) { +; CHECK-LABEL: @outer3( +; CHECK: call void @inner3 + call void @inner3(i32* %a) + ret void +} + +declare void @ext() + +define void @inner3(i32* %a) { + %1 = load i32, i32* %a + call void @ext() ; This call clobbers the first load. + %2 = load i32, i32* %a + ret void +} + + +define void @outer4(i32* %a, i32* %b, i32* %c) { +; CHECK-LABEL: @outer4( +; CHECK-NOT: call void @inner4 + call void @inner4(i32* %a, i32* %b, i1 false) + ret void +} + +define void @inner4(i32* %a, i32* %b, i1 %pred) { + %1 = load i32, i32* %a + br i1 %pred, label %cond_true, label %cond_false + +cond_true: + store i32 %1, i32 * %b ; This store does not clobber the first load. + br label %cond_false + +cond_false: + %2 = load i32, i32* %a + call void @pad() + %3 = load i32, i32* %a + %4 = load i32, i32* %a + ret void +} + + +define void @outer5(i32* %a, double %b) { +; CHECK-LABEL: @outer5( +; CHECK-NOT: call void @inner5 + call void @inner5(i32* %a, double %b) + ret void +} + +declare double @llvm.fabs.f64(double) nounwind readnone + +define void @inner5(i32* %a, double %b) { + %1 = load i32, i32* %a + %2 = call double @llvm.fabs.f64(double %b) ; This intrinsic does not clobber the first load. + %3 = load i32, i32* %a + call void @pad() + ret void +} + +define void @outer6(i32* %a, i8* %ptr) { +; CHECK-LABEL: @outer6( +; CHECK-NOT: call void @inner6 + call void @inner6(i32* %a, i8* %ptr) + ret void +} + +declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) argmemonly nounwind + +define void @inner6(i32* %a, i8* %ptr) { + %1 = load i32, i32* %a + call void @llvm.lifetime.start.p0i8(i64 32, i8* %ptr) ; This intrinsic does not clobber the first load. + %2 = load i32, i32* %a + call void @pad() + %3 = load i32, i32* %a + ret void +} + +define void @outer7(i32* %a) { +; CHECK-LABEL: @outer7( +; CHECK-NOT: call void @inner7 + call void @inner7(i32* %a) + ret void +} + +declare void @ext2() readnone + +define void @inner7(i32* %a) { + %1 = load i32, i32* %a + call void @ext2() ; This call does not clobber the first load. + %2 = load i32, i32* %a + ret void +} + + +define void @outer8(i32* %a) { +; CHECK-LABEL: @outer8( +; CHECK-NOT: call void @inner8 + call void @inner8(i32* %a, void ()* @ext2) + ret void +} + +define void @inner8(i32* %a, void ()* %f) { + %1 = load i32, i32* %a + call void %f() ; This indirect call does not clobber the first load. + %2 = load i32, i32* %a + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + ret void +} + + +define void @outer9(i32* %a) { +; CHECK-LABEL: @outer9( +; CHECK: call void @inner9 + call void @inner9(i32* %a, void ()* @ext) + ret void +} + +define void @inner9(i32* %a, void ()* %f) { + %1 = load i32, i32* %a + call void %f() ; This indirect call clobbers the first load. + %2 = load i32, i32* %a + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + call void @pad() + ret void +} + + +define void @outer10(i32* %a) { +; CHECK-LABEL: @outer10( +; CHECK: call void @inner10 + %b = alloca i32 + call void @inner10(i32* %a, i32* %b) + ret void +} + +define void @inner10(i32* %a, i32* %b) { + %1 = load i32, i32* %a + store i32 %1, i32 * %b + %2 = load volatile i32, i32* %a ; volatile load should be kept. + call void @pad() + %3 = load volatile i32, i32* %a ; Same as the above. + ret void +} diff --git a/llvm/test/Transforms/Inline/store-sroa.ll b/llvm/test/Transforms/Inline/store-sroa.ll new file mode 100644 index 00000000000..6b1ca964d33 --- /dev/null +++ b/llvm/test/Transforms/Inline/store-sroa.ll @@ -0,0 +1,22 @@ +; RUN: opt -S -O2 -inline-threshold=1 < %s | FileCheck %s + +%class.A = type { i32 } + +define void @_Z3barP1A(%class.A* %a) #0 { +entry: + %a1 = getelementptr inbounds %class.A, %class.A* %a, i64 0, i32 0 + %0 = load i32, i32* %a1, align 4 + %add = add nsw i32 %0, 10 + store i32 %add, i32* %a1, align 4 + ret void +} + +define void @_Z3foov() #0 { +; CHECK-LABEL: @_Z3foov( +; CHECK-NOT: call void @_Z3barP1A +; CHECK: ret +entry: + %a = alloca %class.A, align 4 + call void @_Z3barP1A(%class.A* %a) + ret void +} diff --git a/llvm/test/Transforms/Inline/switch.ll b/llvm/test/Transforms/Inline/switch.ll new file mode 100644 index 00000000000..83f01926982 --- /dev/null +++ b/llvm/test/Transforms/Inline/switch.ll @@ -0,0 +1,61 @@ +; RUN: opt < %s -inline -inline-threshold=20 -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=20 -S | FileCheck %s + +define i32 @callee(i32 %a) { + switch i32 %a, label %sw.default [ + i32 0, label %sw.bb0 + i32 1, label %sw.bb1 + i32 2, label %sw.bb2 + i32 3, label %sw.bb3 + i32 4, label %sw.bb4 + i32 5, label %sw.bb5 + i32 6, label %sw.bb6 + i32 7, label %sw.bb7 + i32 8, label %sw.bb8 + i32 9, label %sw.bb9 + ] + +sw.default: + br label %return + +sw.bb0: + br label %return + +sw.bb1: + br label %return + +sw.bb2: + br label %return + +sw.bb3: + br label %return + +sw.bb4: + br label %return + +sw.bb5: + br label %return + +sw.bb6: + br label %return + +sw.bb7: + br label %return + +sw.bb8: + br label %return + +sw.bb9: + br label %return + +return: + ret i32 42 +} + +define i32 @caller(i32 %a) { +; CHECK-LABEL: @caller( +; CHECK: call i32 @callee( + + %result = call i32 @callee(i32 %a) + ret i32 %result +} diff --git a/llvm/test/Transforms/Inline/vector-bonus.ll b/llvm/test/Transforms/Inline/vector-bonus.ll new file mode 100644 index 00000000000..567ff02e136 --- /dev/null +++ b/llvm/test/Transforms/Inline/vector-bonus.ll @@ -0,0 +1,38 @@ +; RUN: opt < %s -inline -inline-threshold=35 -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=35 -S | FileCheck %s + +define i32 @bar(<4 x i32> %v, i32 %i) #0 { +entry: + %cmp = icmp sgt i32 %i, 4 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %mul1 = mul nsw i32 %i, %i + br label %return + +if.else: ; preds = %entry + %add1 = add nsw i32 %i, %i + %add2 = add nsw i32 %i, %i + %add3 = add nsw i32 %i, %i + %add4 = add nsw i32 %i, %i + %add5 = add nsw i32 %i, %i + %add6 = add nsw i32 %i, %i + %vecext = extractelement <4 x i32> %v, i32 0 + %vecext7 = extractelement <4 x i32> %v, i32 1 + %add7 = add nsw i32 %vecext, %vecext7 + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %mul1, %if.then ], [ %add7, %if.else ] + ret i32 %retval.0 +} + +define i32 @foo(<4 x i32> %v, i32 %a) #1 { +; CHECK-LABEL: @foo( +; CHECK-NOT: call i32 @bar +; CHECK: ret +entry: + %call = call i32 @bar(<4 x i32> %v, i32 %a) + ret i32 %call +} + diff --git a/llvm/test/Transforms/Inline/vector-no-bonus.ll b/llvm/test/Transforms/Inline/vector-no-bonus.ll new file mode 100644 index 00000000000..d20e1ae7fa2 --- /dev/null +++ b/llvm/test/Transforms/Inline/vector-no-bonus.ll @@ -0,0 +1,47 @@ +; The code in this test is very similar to vector-bonus.ll except for +; the fact that the call to bar is cold thereby preventing the application of +; the vector bonus. +; RUN: opt < %s -inline -inline-threshold=35 -S | FileCheck %s +; RUN: opt < %s -passes='cgscc(inline)' -inline-threshold=35 -S | FileCheck %s + +define i32 @bar(<4 x i32> %v, i32 %i) #0 { +entry: + %cmp = icmp sgt i32 %i, 4 + br i1 %cmp, label %if.then, label %if.else + +if.then: ; preds = %entry + %mul1 = mul nsw i32 %i, %i + br label %return + +if.else: ; preds = %entry + %add1 = add nsw i32 %i, %i + %add2 = add nsw i32 %i, %i + %add3 = add nsw i32 %i, %i + %add4 = add nsw i32 %i, %i + %add5 = add nsw i32 %i, %i + %add6 = add nsw i32 %i, %i + %vecext = extractelement <4 x i32> %v, i32 0 + %vecext7 = extractelement <4 x i32> %v, i32 1 + %add7 = add nsw i32 %vecext, %vecext7 + br label %return + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %mul1, %if.then ], [ %add7, %if.else ] + ret i32 %retval.0 +} + +define i32 @foo(<4 x i32> %v, i32 %a) #1 { +; CHECK-LABEL: @foo( +; CHECK-NOT: call i32 @bar +; CHECK: ret +entry: + %cmp = icmp eq i32 %a, 0 + br i1 %cmp, label %callbb, label %ret +callbb: + %call = call i32 @bar(<4 x i32> %v, i32 %a) + br label %ret +ret: + %call1 = phi i32 [%call, %callbb], [0, %entry] + ret i32 %call1 +} + diff --git a/llvm/test/Transforms/Inline/zero-cost.ll b/llvm/test/Transforms/Inline/zero-cost.ll new file mode 100644 index 00000000000..6f5348ff395 --- /dev/null +++ b/llvm/test/Transforms/Inline/zero-cost.ll @@ -0,0 +1,18 @@ +; RUN: opt -inline -S %s | FileCheck %s +; RUN: opt -passes='cgscc(inline)' -S %s | FileCheck %s + +define void @f() { +entry: + tail call void @g() + unreachable + +; CHECK-LABEL: @f +; CHECK-NOT: call +; CHECK: unreachable +} + +define void @g() { +entry: + unreachable +} + |

