diff options
author | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-08-27 02:49:22 +0000 |
---|---|---|
committer | timshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4> | 2013-08-27 02:49:22 +0000 |
commit | 24ef3585c8da774f02d9163d1603c5901ed9455a (patch) | |
tree | 2152998d3b362b24cc7f901c04a1249d8fe40c68 /libstdc++-v3/include/bits/regex_compiler.h | |
parent | 2517803207400a68ebb8e234e2a80075d24fa654 (diff) | |
download | ppe42-gcc-24ef3585c8da774f02d9163d1603c5901ed9455a.tar.gz ppe42-gcc-24ef3585c8da774f02d9163d1603c5901ed9455a.zip |
2013-08-26 Tim Shen <timshen91@gmail.com>
* include/Makefile.am: Add regex_scanner.{h,tcc}.
* include/Makefile.in: Regenerate.
* include/bits/regex.h (match_search): Handle the `__first == __last`
situation correctly.
* include/bits/regex_compiler.h: Move _Scanner...
* include/bits/regex_scanner.h: ...to here. New.
* include/bits/regex_compiler.tcc: Move _Scanner...
* include/bits/regex_scanner.tcc: ...to here, too. New.
* include/bits/regex_executor.tcc: Use value instead of reference for
submatch.
* include/std/regex: Add regex_scanner.h
* testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc: New.
* testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc: New.
* testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc: New.
* testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc: New.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@202015 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libstdc++-v3/include/bits/regex_compiler.h')
-rw-r--r-- | libstdc++-v3/include/bits/regex_compiler.h | 295 |
1 files changed, 103 insertions, 192 deletions
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index 4ab36d28d2b..1d588b91df8 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -39,197 +39,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION * @{ */ - /// Matches a character range (bracket expression) template<typename _CharT, typename _TraitsT> - struct _BracketMatcher - { - typedef typename _TraitsT::char_class_type _CharClassT; - typedef typename _TraitsT::string_type _StringT; - typedef regex_constants::syntax_option_type _FlagT; - - explicit - _BracketMatcher(bool __is_non_matching, - const _TraitsT& __t, - _FlagT __flags) - : _M_is_non_matching(__is_non_matching), _M_traits(__t), - _M_flags(__flags), _M_class_set(0) - { } - - bool - operator()(_CharT) const; - - void - _M_add_char(_CharT __c) - { - if (_M_flags & regex_constants::collate) - if (_M_is_icase()) - _M_char_set.push_back(_M_traits.translate_nocase(__c)); - else - _M_char_set.push_back(_M_traits.translate(__c)); - else - _M_char_set.push_back(__c); - } - - void - _M_add_collating_element(const _StringT& __s) - { - auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end()); - if (__st.empty()) - __throw_regex_error(regex_constants::error_collate); - // TODO: digraph - _M_char_set.push_back(__st[0]); - } - - void - _M_add_equivalence_class(const _StringT& __s) - { - _M_add_character_class( - _M_traits.transform_primary(&*__s.begin(), &*__s.end())); - } - - void - _M_add_character_class(const _StringT& __s) - { - auto __st = _M_traits. - lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase()); - if (__st == 0) - __throw_regex_error(regex_constants::error_ctype); - _M_class_set |= __st; - } - - void - _M_make_range(_CharT __l, _CharT __r) - { _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); } - - bool - _M_is_icase() const - { return _M_flags & regex_constants::icase; } - - _StringT - _M_get_str(_CharT __c) const - { - auto __s = _StringT(1, - _M_is_icase() - ? _M_traits.translate_nocase(__c) - : _M_traits.translate(__c)); - return _M_traits.transform(__s.begin(), __s.end()); - } - - _TraitsT _M_traits; - _FlagT _M_flags; - bool _M_is_non_matching; - std::vector<_CharT> _M_char_set; - std::vector<pair<_StringT, _StringT>> _M_range_set; - _CharClassT _M_class_set; - }; - - /** - * @brief struct _Scanner. Scans an input range for regex tokens. - * - * The %_Scanner class interprets the regular expression pattern in - * the input range passed to its constructor as a sequence of parse - * tokens passed to the regular expression compiler. The sequence - * of tokens provided depends on the flag settings passed to the - * constructor: different regular expression grammars will interpret - * the same input pattern in syntactically different ways. - */ - template<typename _InputIter> - class _Scanner - { - public: - typedef unsigned int _StateT; - typedef typename std::iterator_traits<_InputIter>::value_type _CharT; - typedef std::basic_string<_CharT> _StringT; - typedef regex_constants::syntax_option_type _FlagT; - typedef const std::ctype<_CharT> _CtypeT; - - /// Token types returned from the scanner. - enum _TokenT - { - _S_token_anychar, - _S_token_backref, - _S_token_bracket_begin, - _S_token_bracket_inverse_begin, - _S_token_bracket_end, - _S_token_char_class_name, - _S_token_closure0, - _S_token_closure1, - _S_token_collelem_multi, - _S_token_collelem_single, - _S_token_collsymbol, - _S_token_comma, - _S_token_dash, - _S_token_dup_count, - _S_token_eof, - _S_token_equiv_class_name, - _S_token_interval_begin, - _S_token_interval_end, - _S_token_line_begin, - _S_token_line_end, - _S_token_opt, - _S_token_or, - _S_token_ord_char, - _S_token_subexpr_begin, - _S_token_subexpr_end, - _S_token_word_begin, - _S_token_word_end, - _S_token_unknown - }; - - _Scanner(_InputIter __begin, _InputIter __end, - _FlagT __flags, std::locale __loc) - : _M_current(__begin) , _M_end(__end) , _M_flags(__flags), - _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0) - { _M_advance(); } - - void - _M_advance(); - - _TokenT - _M_token() const - { return _M_curToken; } - - const _StringT& - _M_value() const - { return _M_curValue; } - -#ifdef _GLIBCXX_DEBUG - std::ostream& - _M_print(std::ostream&); -#endif - - private: - void - _M_eat_escape(); - - void - _M_scan_in_brace(); - - void - _M_scan_in_bracket(); - - void - _M_eat_charclass(); - - void - _M_eat_equivclass(); - - void - _M_eat_collsymbol(); - - static constexpr _StateT _S_state_in_brace = 1 << 0; - static constexpr _StateT _S_state_in_bracket = 1 << 1; - _InputIter _M_current; - _InputIter _M_end; - _FlagT _M_flags; - _CtypeT& _M_ctype; - _TokenT _M_curToken; - _StringT _M_curValue; - _StateT _M_state; - }; + struct _BracketMatcher; /// Builds an NFA from an input iterator interval. - template<typename _InputIter, typename _CharT, typename _TraitsT> + template<typename _FwdIter, typename _CharT, typename _TraitsT> class _Compiler { public: @@ -237,7 +51,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION typedef _NFA<_CharT, _TraitsT> _RegexT; typedef regex_constants::syntax_option_type _FlagT; - _Compiler(_InputIter __b, _InputIter __e, + _Compiler(_FwdIter __b, _FwdIter __e, const _TraitsT& __traits, _FlagT __flags); std::shared_ptr<_RegexT> @@ -245,7 +59,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION { return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); } private: - typedef _Scanner<_InputIter> _ScannerT; + typedef _Scanner<_FwdIter> _ScannerT; typedef typename _ScannerT::_TokenT _TokenT; typedef _StateSeq<_CharT, _TraitsT> _StateSeqT; typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT; @@ -276,7 +90,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION bool _M_bracket_expression(); - bool + void _M_bracket_list(_BMatcherT& __matcher); bool @@ -303,14 +117,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION int _M_cur_int_value(int __radix); + bool + _M_try_char(); + + _CharT + _M_get_char(); + const _TraitsT& _M_traits; _ScannerT _M_scanner; - _StringT _M_cur_value; + _StringT _M_value; _RegexT _M_state_store; _StackT _M_stack; _FlagT _M_flags; }; + /// Matches a character range (bracket expression) + template<typename _CharT, typename _TraitsT> + struct _BracketMatcher + { + typedef typename _TraitsT::char_class_type _CharClassT; + typedef typename _TraitsT::string_type _StringT; + typedef regex_constants::syntax_option_type _FlagT; + + explicit + _BracketMatcher(bool __is_non_matching, + const _TraitsT& __t, + _FlagT __flags) + : _M_is_non_matching(__is_non_matching), _M_traits(__t), + _M_flags(__flags), _M_class_set(0) + { } + + bool + operator()(_CharT) const; + + void + _M_add_char(_CharT __c) + { _M_char_set.push_back(_M_translate(__c)); } + + void + _M_add_collating_element(const _StringT& __s) + { + auto __st = _M_traits.lookup_collatename(__s.data(), + __s.data() + __s.size()); + if (__st.empty()) + __throw_regex_error(regex_constants::error_collate); + // TODO: digraph + _M_char_set.push_back(__st[0]); + } + + void + _M_add_equivalence_class(const _StringT& __s) + { + _M_add_character_class( + _M_traits.transform_primary(__s.data(), + __s.data() + __s.size())); + } + + void + _M_add_character_class(const _StringT& __s) + { + auto __st = _M_traits. + lookup_classname(__s.data(), __s.data() + __s.size(), _M_is_icase()); + if (__st == 0) + __throw_regex_error(regex_constants::error_ctype); + _M_class_set |= __st; + } + + void + _M_make_range(_CharT __l, _CharT __r) + { + _M_range_set.push_back( + make_pair(_M_get_str(_M_translate(__l)), + _M_get_str(_M_translate(__r)))); + } + + _CharT + _M_translate(_CharT __c) const + { + if (_M_flags & regex_constants::collate) + if (_M_is_icase()) + return _M_traits.translate_nocase(__c); + else + return _M_traits.translate(__c); + else + return __c; + } + + bool + _M_is_icase() const + { return _M_flags & regex_constants::icase; } + + _StringT + _M_get_str(_CharT __c) const + { + _StringT __s(1, __c); + return _M_traits.transform(__s.begin(), __s.end()); + } + + _TraitsT _M_traits; + _FlagT _M_flags; + bool _M_is_non_matching; + std::vector<_CharT> _M_char_set; + std::vector<pair<_StringT, _StringT>> _M_range_set; + _CharClassT _M_class_set; + }; + //@} regex-detail _GLIBCXX_END_NAMESPACE_VERSION } // namespace __detail |