From 53c19c986606bf484095e57daddcf2e33da79cad Mon Sep 17 00:00:00 2001 From: timshen Date: Sun, 20 Oct 2013 10:07:29 +0000 Subject: 2013-10-20 Tim Shen * include/bits/regex.h: Remove virtual class _Automaton. * include/bits/regex_automaton.h: Likewise. * include/bits/regex.tcc: Adjust comment for policy changing. * include/bits/regex_executor.h: Update comments of complexity. * include/bits/regex_executor.tcc: Adjust executor choosing policy. Now DFS executor is the default one. * testsuite/util/testsuite_regex.h (regex_match_debug, regex_search_debug): Adjust for policy changing. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@203875 138bc75d-0d04-0410-961f-82ee72b054a4 --- libstdc++-v3/include/bits/regex_executor.tcc | 42 ++++++++++++++++++++-------- 1 file changed, 31 insertions(+), 11 deletions(-) (limited to 'libstdc++-v3/include/bits/regex_executor.tcc') diff --git a/libstdc++-v3/include/bits/regex_executor.tcc b/libstdc++-v3/include/bits/regex_executor.tcc index b8755736e60..7c084add031 100644 --- a/libstdc++-v3/include/bits/regex_executor.tcc +++ b/libstdc++-v3/include/bits/regex_executor.tcc @@ -28,6 +28,13 @@ * Do not attempt to use it directly. @headername{regex} */ +// See below __get_executor to get what this is talking about. The default +// value 1 indicated a conservative optimization without giving up worst case +// performance. +#ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT +#define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1 +#endif + namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail @@ -60,7 +67,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_dfs(_StateIdT __i) { auto& __current = this->_M_current; - const auto& __state = _M_nfa[__i]; + const auto& __state = (*_M_nfa)[__i]; bool __ret = false; switch (__state._M_opcode) { @@ -216,7 +223,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { auto __u = _M_stack._M_pop(); _GLIBCXX_DEBUG_ASSERT(_M_covered.count(__u)); - const auto& __state = _M_nfa[__u]; + const auto& __state = (*_M_nfa)[__u]; // Can be implemented using method, but there will be too many // arguments. I would use macro function before C++11, but lambda is @@ -314,7 +321,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION while (!_M_match_stack._M_empty()) { auto __u = _M_match_stack._M_pop(); - const auto& __state = _M_nfa[__u]; + const auto& __state = (*_M_nfa)[__u]; auto& __cu = _M_covered[__u]; if (__state._M_matches(*this->_M_current) && (__next.count(__state._M_next) == 0 @@ -333,7 +340,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_includes_some() { bool __succ = false; - for (auto __u : _M_nfa._M_final_states()) + for (auto __u : _M_nfa->_M_final_states()) if (_M_covered.count(__u)) { __succ = true; @@ -380,8 +387,21 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } enum class _RegexExecutorPolicy : int - { _S_auto, _S_force_dfs }; + { _S_auto, _S_alternate }; + // This function decide which executor to use under given circumstances. + // The _S_auto policy now is the following: if a NFA has no back-references + // and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT quantifiers + // (*, +, ?), the _BFSExecutor will be used, other wise _DFSExecutor. This is + // because _DFSExecutor has a exponential upper bound, but better best-case + // performace. Meanwhile, _BFSExecutor can effectively prevent from + // exponential-long time matching (which must contains many quantifiers), but + // it's slower in average. + // + // For simple regex, _BFSExecutor could be 2 or more times slower than + // _DFSExecutor. + // + // Of course, _BFSExecutor cannot handle back-references. template @@ -396,12 +416,12 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _ExecutorPtr; typedef _DFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _DFSExecutorT; typedef _BFSExecutor<_BiIter, _Alloc, _CharT, _TraitsT> _BFSExecutorT; - auto __p = std::static_pointer_cast<_NFA<_CharT, _TraitsT>> - (__re._M_automaton); - if (__policy == _RegexExecutorPolicy::_S_force_dfs - || (__policy == _RegexExecutorPolicy::_S_auto && __p->_M_has_backref)) - return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags)); - return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags)); + if (!__re._M_automaton->_M_has_backref + && (__policy == _RegexExecutorPolicy::_S_alternate + || __re._M_automaton->_M_quant_count + > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT)) + return _ExecutorPtr(new _BFSExecutorT(__b, __e, __m, __re, __flags)); + return _ExecutorPtr(new _DFSExecutorT(__b, __e, __m, __re, __flags)); } _GLIBCXX_END_NAMESPACE_VERSION -- cgit v1.2.1