summaryrefslogtreecommitdiffstats
path: root/libstdc++-v3/include/bits/regex_compiler.h
diff options
context:
space:
mode:
authortimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2013-08-27 02:49:22 +0000
committertimshen <timshen@138bc75d-0d04-0410-961f-82ee72b054a4>2013-08-27 02:49:22 +0000
commit24ef3585c8da774f02d9163d1603c5901ed9455a (patch)
tree2152998d3b362b24cc7f901c04a1249d8fe40c68 /libstdc++-v3/include/bits/regex_compiler.h
parent2517803207400a68ebb8e234e2a80075d24fa654 (diff)
downloadppe42-gcc-24ef3585c8da774f02d9163d1603c5901ed9455a.tar.gz
ppe42-gcc-24ef3585c8da774f02d9163d1603c5901ed9455a.zip
2013-08-26 Tim Shen <timshen91@gmail.com>
* include/Makefile.am: Add regex_scanner.{h,tcc}. * include/Makefile.in: Regenerate. * include/bits/regex.h (match_search): Handle the `__first == __last` situation correctly. * include/bits/regex_compiler.h: Move _Scanner... * include/bits/regex_scanner.h: ...to here. New. * include/bits/regex_compiler.tcc: Move _Scanner... * include/bits/regex_scanner.tcc: ...to here, too. New. * include/bits/regex_executor.tcc: Use value instead of reference for submatch. * include/std/regex: Add regex_scanner.h * testsuite/28_regex/algorithms/regex_match/awk/cstring_01.cc: New. * testsuite/28_regex/algorithms/regex_match/basic/empty_range.cc: New. * testsuite/28_regex/algorithms/regex_match/ecma/cstring_hex.cc: New. * testsuite/28_regex/algorithms/regex_match/ecma/empty_range.cc: New. * testsuite/28_regex/algorithms/regex_search/ecma/string_01.cc: New. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@202015 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libstdc++-v3/include/bits/regex_compiler.h')
-rw-r--r--libstdc++-v3/include/bits/regex_compiler.h295
1 files changed, 103 insertions, 192 deletions
diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 4ab36d28d2b..1d588b91df8 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -39,197 +39,11 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
* @{
*/
- /// Matches a character range (bracket expression)
template<typename _CharT, typename _TraitsT>
- struct _BracketMatcher
- {
- typedef typename _TraitsT::char_class_type _CharClassT;
- typedef typename _TraitsT::string_type _StringT;
- typedef regex_constants::syntax_option_type _FlagT;
-
- explicit
- _BracketMatcher(bool __is_non_matching,
- const _TraitsT& __t,
- _FlagT __flags)
- : _M_is_non_matching(__is_non_matching), _M_traits(__t),
- _M_flags(__flags), _M_class_set(0)
- { }
-
- bool
- operator()(_CharT) const;
-
- void
- _M_add_char(_CharT __c)
- {
- if (_M_flags & regex_constants::collate)
- if (_M_is_icase())
- _M_char_set.push_back(_M_traits.translate_nocase(__c));
- else
- _M_char_set.push_back(_M_traits.translate(__c));
- else
- _M_char_set.push_back(__c);
- }
-
- void
- _M_add_collating_element(const _StringT& __s)
- {
- auto __st = _M_traits.lookup_collatename(&*__s.begin(), &*__s.end());
- if (__st.empty())
- __throw_regex_error(regex_constants::error_collate);
- // TODO: digraph
- _M_char_set.push_back(__st[0]);
- }
-
- void
- _M_add_equivalence_class(const _StringT& __s)
- {
- _M_add_character_class(
- _M_traits.transform_primary(&*__s.begin(), &*__s.end()));
- }
-
- void
- _M_add_character_class(const _StringT& __s)
- {
- auto __st = _M_traits.
- lookup_classname(&*__s.begin(), &*__s.end(), _M_is_icase());
- if (__st == 0)
- __throw_regex_error(regex_constants::error_ctype);
- _M_class_set |= __st;
- }
-
- void
- _M_make_range(_CharT __l, _CharT __r)
- { _M_range_set.push_back(make_pair(_M_get_str(__l), _M_get_str(__r))); }
-
- bool
- _M_is_icase() const
- { return _M_flags & regex_constants::icase; }
-
- _StringT
- _M_get_str(_CharT __c) const
- {
- auto __s = _StringT(1,
- _M_is_icase()
- ? _M_traits.translate_nocase(__c)
- : _M_traits.translate(__c));
- return _M_traits.transform(__s.begin(), __s.end());
- }
-
- _TraitsT _M_traits;
- _FlagT _M_flags;
- bool _M_is_non_matching;
- std::vector<_CharT> _M_char_set;
- std::vector<pair<_StringT, _StringT>> _M_range_set;
- _CharClassT _M_class_set;
- };
-
- /**
- * @brief struct _Scanner. Scans an input range for regex tokens.
- *
- * The %_Scanner class interprets the regular expression pattern in
- * the input range passed to its constructor as a sequence of parse
- * tokens passed to the regular expression compiler. The sequence
- * of tokens provided depends on the flag settings passed to the
- * constructor: different regular expression grammars will interpret
- * the same input pattern in syntactically different ways.
- */
- template<typename _InputIter>
- class _Scanner
- {
- public:
- typedef unsigned int _StateT;
- typedef typename std::iterator_traits<_InputIter>::value_type _CharT;
- typedef std::basic_string<_CharT> _StringT;
- typedef regex_constants::syntax_option_type _FlagT;
- typedef const std::ctype<_CharT> _CtypeT;
-
- /// Token types returned from the scanner.
- enum _TokenT
- {
- _S_token_anychar,
- _S_token_backref,
- _S_token_bracket_begin,
- _S_token_bracket_inverse_begin,
- _S_token_bracket_end,
- _S_token_char_class_name,
- _S_token_closure0,
- _S_token_closure1,
- _S_token_collelem_multi,
- _S_token_collelem_single,
- _S_token_collsymbol,
- _S_token_comma,
- _S_token_dash,
- _S_token_dup_count,
- _S_token_eof,
- _S_token_equiv_class_name,
- _S_token_interval_begin,
- _S_token_interval_end,
- _S_token_line_begin,
- _S_token_line_end,
- _S_token_opt,
- _S_token_or,
- _S_token_ord_char,
- _S_token_subexpr_begin,
- _S_token_subexpr_end,
- _S_token_word_begin,
- _S_token_word_end,
- _S_token_unknown
- };
-
- _Scanner(_InputIter __begin, _InputIter __end,
- _FlagT __flags, std::locale __loc)
- : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
- _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(0)
- { _M_advance(); }
-
- void
- _M_advance();
-
- _TokenT
- _M_token() const
- { return _M_curToken; }
-
- const _StringT&
- _M_value() const
- { return _M_curValue; }
-
-#ifdef _GLIBCXX_DEBUG
- std::ostream&
- _M_print(std::ostream&);
-#endif
-
- private:
- void
- _M_eat_escape();
-
- void
- _M_scan_in_brace();
-
- void
- _M_scan_in_bracket();
-
- void
- _M_eat_charclass();
-
- void
- _M_eat_equivclass();
-
- void
- _M_eat_collsymbol();
-
- static constexpr _StateT _S_state_in_brace = 1 << 0;
- static constexpr _StateT _S_state_in_bracket = 1 << 1;
- _InputIter _M_current;
- _InputIter _M_end;
- _FlagT _M_flags;
- _CtypeT& _M_ctype;
- _TokenT _M_curToken;
- _StringT _M_curValue;
- _StateT _M_state;
- };
+ struct _BracketMatcher;
/// Builds an NFA from an input iterator interval.
- template<typename _InputIter, typename _CharT, typename _TraitsT>
+ template<typename _FwdIter, typename _CharT, typename _TraitsT>
class _Compiler
{
public:
@@ -237,7 +51,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
typedef _NFA<_CharT, _TraitsT> _RegexT;
typedef regex_constants::syntax_option_type _FlagT;
- _Compiler(_InputIter __b, _InputIter __e,
+ _Compiler(_FwdIter __b, _FwdIter __e,
const _TraitsT& __traits, _FlagT __flags);
std::shared_ptr<_RegexT>
@@ -245,7 +59,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{ return std::shared_ptr<_RegexT>(new _RegexT(_M_state_store)); }
private:
- typedef _Scanner<_InputIter> _ScannerT;
+ typedef _Scanner<_FwdIter> _ScannerT;
typedef typename _ScannerT::_TokenT _TokenT;
typedef _StateSeq<_CharT, _TraitsT> _StateSeqT;
typedef std::stack<_StateSeqT, std::vector<_StateSeqT>> _StackT;
@@ -276,7 +90,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
bool
_M_bracket_expression();
- bool
+ void
_M_bracket_list(_BMatcherT& __matcher);
bool
@@ -303,14 +117,111 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
int
_M_cur_int_value(int __radix);
+ bool
+ _M_try_char();
+
+ _CharT
+ _M_get_char();
+
const _TraitsT& _M_traits;
_ScannerT _M_scanner;
- _StringT _M_cur_value;
+ _StringT _M_value;
_RegexT _M_state_store;
_StackT _M_stack;
_FlagT _M_flags;
};
+ /// Matches a character range (bracket expression)
+ template<typename _CharT, typename _TraitsT>
+ struct _BracketMatcher
+ {
+ typedef typename _TraitsT::char_class_type _CharClassT;
+ typedef typename _TraitsT::string_type _StringT;
+ typedef regex_constants::syntax_option_type _FlagT;
+
+ explicit
+ _BracketMatcher(bool __is_non_matching,
+ const _TraitsT& __t,
+ _FlagT __flags)
+ : _M_is_non_matching(__is_non_matching), _M_traits(__t),
+ _M_flags(__flags), _M_class_set(0)
+ { }
+
+ bool
+ operator()(_CharT) const;
+
+ void
+ _M_add_char(_CharT __c)
+ { _M_char_set.push_back(_M_translate(__c)); }
+
+ void
+ _M_add_collating_element(const _StringT& __s)
+ {
+ auto __st = _M_traits.lookup_collatename(__s.data(),
+ __s.data() + __s.size());
+ if (__st.empty())
+ __throw_regex_error(regex_constants::error_collate);
+ // TODO: digraph
+ _M_char_set.push_back(__st[0]);
+ }
+
+ void
+ _M_add_equivalence_class(const _StringT& __s)
+ {
+ _M_add_character_class(
+ _M_traits.transform_primary(__s.data(),
+ __s.data() + __s.size()));
+ }
+
+ void
+ _M_add_character_class(const _StringT& __s)
+ {
+ auto __st = _M_traits.
+ lookup_classname(__s.data(), __s.data() + __s.size(), _M_is_icase());
+ if (__st == 0)
+ __throw_regex_error(regex_constants::error_ctype);
+ _M_class_set |= __st;
+ }
+
+ void
+ _M_make_range(_CharT __l, _CharT __r)
+ {
+ _M_range_set.push_back(
+ make_pair(_M_get_str(_M_translate(__l)),
+ _M_get_str(_M_translate(__r))));
+ }
+
+ _CharT
+ _M_translate(_CharT __c) const
+ {
+ if (_M_flags & regex_constants::collate)
+ if (_M_is_icase())
+ return _M_traits.translate_nocase(__c);
+ else
+ return _M_traits.translate(__c);
+ else
+ return __c;
+ }
+
+ bool
+ _M_is_icase() const
+ { return _M_flags & regex_constants::icase; }
+
+ _StringT
+ _M_get_str(_CharT __c) const
+ {
+ _StringT __s(1, __c);
+ return _M_traits.transform(__s.begin(), __s.end());
+ }
+
+ _TraitsT _M_traits;
+ _FlagT _M_flags;
+ bool _M_is_non_matching;
+ std::vector<_CharT> _M_char_set;
+ std::vector<pair<_StringT, _StringT>> _M_range_set;
+ _CharClassT _M_class_set;
+ };
+
//@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
OpenPOWER on IntegriCloud