Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
102 changes: 66 additions & 36 deletions stl/inc/regex
Original file line number Diff line number Diff line change
Expand Up @@ -1586,7 +1586,7 @@ public:
_Loop_number(_Number) {}

const int _Min;
const int _Max;
const int _Max; // non-negative if bounded, -1 if unbounded
_Node_end_rep* _End_rep;
unsigned int _Loop_number;
int _Simple_loop = -1; // -1 undetermined, 0 contains if/do, 1 simple
Expand Down Expand Up @@ -1680,6 +1680,7 @@ enum class _Rx_unwind_ops {
_Disjunction_eval_alt_on_failure,
_Disjunction_eval_alt_always,
_Do_nothing,
_Loop_simple_nongreedy,
};

template <class _BidIt>
Expand Down Expand Up @@ -1814,7 +1815,7 @@ private:
void _Decrease_stack_usage_count();
void _Increase_complexity_count();

bool _Do_rep0(_Node_rep*, bool);
bool _Do_rep0(_Node_rep*);
bool _Do_rep(_Node_rep*, bool, int);
void _Prepare_rep(_Node_rep*);
bool _Find_first_inner_capture_group(_Node_base*, _Loop_vals_v2_t*);
Expand Down Expand Up @@ -3413,22 +3414,18 @@ void _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Increase_complexity_coun
}

template <class _BidIt, class _Elem, class _RxTraits, class _It, class _Alloc>
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node, bool _Greedy) {
bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node) {
// apply repetition to loop with no nested if/do
int _Ix = _Node->_Min;
const size_t _Frame_idx = _Loop_vals[_Node->_Loop_number]._Loop_frame_idx;
_Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 1;
_Loop_vals[_Node->_Loop_number]._Loop_idx = _Ix + 2;

_Tgt_state_t<_It> _Final;
bool _Matched0 = false;
_It _Saved_pos = _Tgt_state._Cur;
bool _Done = false;

if (_Match_pat(_Node->_End_rep->_Next)) {
if (!_Greedy) {
return true; // go with current match
}

// record an acceptable match and continue
_Final = _Tgt_state;
_Matched0 = true;
Expand All @@ -3449,10 +3446,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
} else {
_Saved_pos = _Tgt_state._Cur;
if (_Match_pat(_Node->_End_rep->_Next)) {
if (!_Greedy) {
return true; // go with current match
}

// record match and continue
_Final = _Tgt_state;
_Matched0 = true;
Expand All @@ -3472,10 +3465,6 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Do_rep0(_Node_rep* _Node
// since loop is branchless, empty rep match is not possible at this point
_Saved_pos = _Tgt_state._Cur;
if (_Match_pat(_Node->_End_rep->_Next)) {
if (!_Greedy) {
return true; // go with current match
}

// record match and continue
_Final = _Tgt_state;
_Matched0 = true;
Expand Down Expand Up @@ -4135,13 +4124,25 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N

if (_Node->_Simple_loop == 1) {
auto& _Sav = _Loop_vals[_Node->_Loop_number];
_Sav._Loop_idx = 1;
_Sav._Loop_frame_idx = _Push_frame(_Rx_unwind_ops::_Do_nothing);
if (_Node->_Min == 0) {
_Failed = !_Do_rep0(_Node, _Greedy);
_Next = nullptr;
} else {
if (_Node->_Min > 0) { // try to match a rep
_Increase_complexity_count();
_Sav._Loop_idx = 1;
// _Next is already assigned correctly for matching a rep
} else if (!_Greedy || _Longest) { // non-greedy matching
_Increase_complexity_count();

// try tail first
_Sav._Loop_idx = 0;
_Next = _Node->_End_rep->_Next;

// set up stack unwinding for non-greedy matching if at least one rep is allowed
if (_Node->_Max != 0) {
_Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Node);
}
} else {
_Failed = !_Do_rep0(_Node);
_Next = nullptr;
}
} else {
_Failed = !_Do_rep(_Node, _Greedy, 0);
Expand All @@ -4155,29 +4156,41 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
{
_Node_rep* _Nr = static_cast<_Node_end_rep*>(_Nx)->_Begin_rep;
auto& _Sav = _Loop_vals[_Nr->_Loop_number];
bool _Greedy = (_Nr->_Flags & _Fl_greedy) != 0;
if (_Nr->_Simple_loop != 0) {
if (_Sav._Loop_idx <= _Nr->_Min) {
if (_Sav._Loop_idx == 1
&& _Tgt_state._Cur == _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // match empty
// loop is branchless, so it will only ever match empty strings
// -> skip all other matches as they don't change state and immediately try tail
if (_Sav._Loop_idx == 1
&& _Tgt_state._Cur
== _Frames[_Sav._Loop_frame_idx]._Match_state._Cur) { // initial match empty
// loop is branchless, so it will only ever match empty strings
// -> we only try tail for POSIX or if minimum number of reps is non-zero
if ((_Sflags & regex_constants::_Any_posix) || _Nr->_Min > 0) {
_Increase_complexity_count();
// _Next is already assigned correctly for matching tail
} else if (_Sav._Loop_idx < _Nr->_Min) { // needs at least one more rep to reach minimum
_Increase_complexity_count();
// GH-5365: We have to reset the capture groups from the second iteration on.
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
_Next = _Nr->_Next;
++_Sav._Loop_idx;
} else { // minimum number of reps reached
_Failed = !_Do_rep0(_Nr, (_Nr->_Flags & _Fl_greedy) != 0);
_Next = nullptr;
} else {
_Failed = true;
}
} else if (_Sav._Loop_idx < _Nr->_Min) { // at least one more rep to reach minimum
_Increase_complexity_count();

_Next = _Nr->_Next;
// GH-5365: We have to reset the capture groups from the second iteration on.
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
++_Sav._Loop_idx;
} else if (_Longest || !_Greedy) {
_Increase_complexity_count();
// set up stack unwinding for non-greedy matching if one more rep is allowed
if (_Sav._Loop_idx != _Nr->_Max) {
_Push_frame(_Rx_unwind_ops::_Loop_simple_nongreedy, _Nr);
}
// _Next is already assigned correctly for matching tail
} else if (_Sav._Loop_idx == _Nr->_Min) { // greedy and minimum number of reps reached
_Failed = !_Do_rep0(_Nr);
_Next = nullptr;
} else { // internal _Match_pat(_Node->_Next) call in _Do_rep0()
_Next = nullptr;
}
} else {
_Failed = !_Do_rep(_Nr, (_Nr->_Flags & _Fl_greedy) != 0, _Sav._Loop_idx);
_Failed = !_Do_rep(_Nr, _Greedy, _Sav._Loop_idx);
_Next = nullptr;
}
break;
Expand Down Expand Up @@ -4267,6 +4280,23 @@ bool _Matcher3<_BidIt, _Elem, _RxTraits, _It, _Alloc>::_Match_pat(_Node_base* _N
case _Rx_unwind_ops::_Do_nothing:
break;

case _Rx_unwind_ops::_Loop_simple_nongreedy:
// try one more rep after matching tail if necessary
if (_Longest || _Failed) {
auto _Node = static_cast<_Node_rep*>(_Frame._Node);
auto& _Sav = _Loop_vals[_Node->_Loop_number];

_Increase_complexity_count();
_Nx = _Node->_Next;
_Tgt_state._Cur = _Frame._Match_state._Cur;
_Tgt_state._Grp_valid = _Frames[_Sav._Loop_frame_idx]._Match_state._Grp_valid;
_Failed = false;
if (_Sav._Loop_idx < INT_MAX) { // avoid overflowing _Loop_idx
++_Sav._Loop_idx;
}
}
break;

default:
#if _ITERATOR_DEBUG_LEVEL != 0
_STL_REPORT_ERROR("internal stack of regex matcher corrupted");
Expand Down
15 changes: 15 additions & 0 deletions tests/std/tests/VSO_0000000_regex_use/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2129,6 +2129,20 @@ void test_gh_5672() {
}
}

void test_gh_5774() {
// GH-5774: Process non-greedy and longest-mode simple loops non-recursively.
// This extends our test coverage on non-greedy simple loops with bounded number of repetitions.
g_regexTester.should_not_match("", "a+?");
g_regexTester.should_not_match("ab", "a{0}?b");
g_regexTester.should_match("ab", "a{0,1}?b");
g_regexTester.should_not_match("aab", "a{0,1}?b");
g_regexTester.should_match("aab", "a{0,2}?b");
g_regexTester.should_match("aab", "a{1,2}?b");
g_regexTester.should_not_match("aab", "a{1}?b");
g_regexTester.should_not_match("aaab", "a{1,2}?b");
g_regexTester.should_match("aaab", "a{1,3}?b");
}

int main() {
test_dev10_449367_case_insensitivity_should_work();
test_dev11_462743_regex_collate_should_not_disable_regex_icase();
Expand Down Expand Up @@ -2180,6 +2194,7 @@ int main() {
test_gh_5509();
test_gh_5576();
test_gh_5672();
test_gh_5774();

return g_regexTester.result();
}