From 5e470c4344f67e0e54274c992ef2cfffa5ba7c42 Mon Sep 17 00:00:00 2001 From: redi Date: Tue, 3 Jun 2014 17:26:24 +0000 Subject: Backport from mainline 2014-05-20 Tim Shen PR libstdc++/61227 * include/bits/regex_compiler.h (_BracketMatcher<>::_M_add_character_class): Add negative character class support. * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply): Likewise. * testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc: Add more testcases. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-4_9-branch@211192 138bc75d-0d04-0410-961f-82ee72b054a4 --- libstdc++-v3/ChangeLog | 12 ++++++++++++ libstdc++-v3/include/bits/regex_compiler.h | 11 ++++++++--- libstdc++-v3/include/bits/regex_compiler.tcc | 17 +++++++++++++++-- .../algorithms/regex_match/ecma/char/quoted_char.cc | 10 ++++++++++ 4 files changed, 45 insertions(+), 5 deletions(-) (limited to 'libstdc++-v3') diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index ff1e3daec35..eb23bed260f 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -13,6 +13,18 @@ Do _M_alt before _M_next. * testsuite/28_regex/basic_regex/multiple_quantifiers.cc: Add testcases. + Backport from mainline + 2014-05-20 Tim Shen + + PR libstdc++/61227 + * include/bits/regex_compiler.h + (_BracketMatcher<>::_M_add_character_class): Add negative character + class support. + * include/bits/regex_compiler.tcc (_BracketMatcher<>::_M_apply): + Likewise. + * testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc: + Add more testcases. + 2014-05-29 Jonathan Wakely * include/tr2/bool_set: Use UTF-8 for accented characters. diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h index f5a198f65e9..af76f55054a 100644 --- a/libstdc++-v3/include/bits/regex_compiler.h +++ b/libstdc++-v3/include/bits/regex_compiler.h @@ -369,15 +369,19 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION #endif } + // __neg should be true for \D, \S and \W only. void - _M_add_character_class(const _StringT& __s) + _M_add_character_class(const _StringT& __s, bool __neg) { auto __mask = _M_traits.lookup_classname(__s.data(), __s.data() + __s.size(), __icase); if (__mask == 0) __throw_regex_error(regex_constants::error_ctype); - _M_class_set |= __mask; + if (!__neg) + _M_class_set |= __mask; + else + _M_neg_class_set.push_back(__mask); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -387,7 +391,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_make_range(_CharT __l, _CharT __r) { _M_range_set.push_back(make_pair(_M_translator._M_transform(__l), - _M_translator._M_transform(__r))); + _M_translator._M_transform(__r))); #ifdef _GLIBCXX_DEBUG _M_is_ready = false; #endif @@ -435,6 +439,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION std::vector<_CharT> _M_char_set; std::vector<_StringT> _M_equiv_set; std::vector> _M_range_set; + std::vector<_CharClassT> _M_neg_class_set; _CharClassT _M_class_set; _TransT _M_translator; const _TraitsT& _M_traits; diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc b/libstdc++-v3/include/bits/regex_compiler.tcc index 128dac12bd7..14e40c0cd19 100644 --- a/libstdc++-v3/include/bits/regex_compiler.tcc +++ b/libstdc++-v3/include/bits/regex_compiler.tcc @@ -397,7 +397,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _GLIBCXX_DEBUG_ASSERT(_M_value.size() == 1); _BracketMatcher<_TraitsT, __icase, __collate> __matcher (_M_ctype.is(_CtypeT::upper, _M_value[0]), _M_traits); - __matcher._M_add_character_class(_M_value); + __matcher._M_add_character_class(_M_value, false); __matcher._M_ready(); _M_stack.push(_StateSeqT(_M_nfa, _M_nfa._M_insert_matcher(std::move(__matcher)))); @@ -428,7 +428,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION else if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) __matcher._M_add_equivalence_class(_M_value); else if (_M_match_token(_ScannerT::_S_token_char_class_name)) - __matcher._M_add_character_class(_M_value); + __matcher._M_add_character_class(_M_value, false); else if (_M_try_char()) // [a { auto __ch = _M_value[0]; @@ -451,6 +451,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION } __matcher._M_add_char(__ch); } + else if (_M_match_token(_ScannerT::_S_token_quoted_class)) + __matcher._M_add_character_class(_M_value, + _M_ctype.is(_CtypeT::upper, + _M_value[0])); else __throw_regex_error(regex_constants::error_brack); } @@ -527,6 +531,15 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION _M_traits.transform_primary(&__ch, &__ch+1)) != _M_equiv_set.end()) __ret = true; + else + { + for (auto& __it : _M_neg_class_set) + if (!_M_traits.isctype(__ch, __it)) + { + __ret = true; + break; + } + } } if (_M_is_non_matching) return !__ret; diff --git a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc index e7280acbdbd..86417323516 100644 --- a/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc +++ b/libstdc++-v3/testsuite/28_regex/algorithms/regex_match/ecma/char/quoted_char.cc @@ -44,6 +44,16 @@ test01() VERIFY(regex_match_debug("_az", regex("\\w*"))); VERIFY(regex_match_debug("!@#$%", regex("\\W*"))); VERIFY(!regex_match_debug("_01234", regex("\\W*"))); + + VERIFY(regex_match_debug("01", regex("[\\d]*"))); + VERIFY(regex_match_debug("asdfjkl", regex("[\\D]*"))); + VERIFY(!regex_match_debug("asdfjkl0", regex("[\\D]*"))); + VERIFY(regex_match_debug("\r\t\v\f ", regex("[\\s]*"))); + VERIFY(regex_match_debug("asdfjkl", regex("[\\S]*"))); + VERIFY(!regex_match_debug("asdfjkl\r", regex("[\\S]*"))); + VERIFY(regex_match_debug("_az", regex("[\\w]*"))); + VERIFY(regex_match_debug("!@#$%", regex("[\\W]*"))); + VERIFY(!regex_match_debug("_01234", regex("[\\W]*"))); } int -- cgit v1.2.1