summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Wakely <jwakely@redhat.com>2021-09-29 13:48:11 +0100
committerJonathan Wakely <jwakely@redhat.com>2023-06-23 13:19:24 +0100
commitd877bf3bdf46b5c996505fc247d170e79fbfa4bf (patch)
tree9e371de82220ce10f07b50e66be72ccae6a96c37
parent61dc491950863d0b441ddd3f9d609b83a01deaf7 (diff)
libstdc++: std::basic_regex should treat '\0' as an ordinary char [PR84110]
When the input sequence contains a _CharT(0) character, the strchr call in _Scanner<_CharT>::_M_scan_normal() will search for '\0' and so return a pointer to the terminating null at the end of the string. This makes the scanner think it's found a special character. Because it doesn't match any of the actual special characters, we fall off the end of the function (or assert in debug mode). We should check for a null character explicitly and either treat it as an ordinary character (for the ECMAScript grammar) or an error (for all others). I'm not 100% sure that's right, but it seems consistent with the POSIX RE rules where a '\0' means the end of the regex pattern or the end of the sequence being matched. Signed-off-by: Jonathan Wakely <jwakely@redhat.com> libstdc++-v3/ChangeLog: PR libstdc++/84110 * include/bits/regex_error.h (regex_constants::_S_null): New error code for internal use. * include/bits/regex_scanner.tcc (_Scanner::_M_scan_normal()): Check for null character. * testsuite/28_regex/basic_regex/84110.cc: New test. (cherry picked from commit b701e1f8f6870c0f8cb4050674da489101dd05a5)
-rw-r--r--libstdc++-v3/include/bits/regex_error.h1
-rw-r--r--libstdc++-v3/include/bits/regex_scanner.tcc10
-rw-r--r--libstdc++-v3/testsuite/28_regex/basic_regex/84110.cc39
3 files changed, 50 insertions, 0 deletions
diff --git a/libstdc++-v3/include/bits/regex_error.h b/libstdc++-v3/include/bits/regex_error.h
index b40351a39cc..f2899174352 100644
--- a/libstdc++-v3/include/bits/regex_error.h
+++ b/libstdc++-v3/include/bits/regex_error.h
@@ -61,6 +61,7 @@ namespace regex_constants
_S_error_badrepeat,
_S_error_complexity,
_S_error_stack,
+ _S_null
};
/** The expression contained an invalid collating element name. */
diff --git a/libstdc++-v3/include/bits/regex_scanner.tcc b/libstdc++-v3/include/bits/regex_scanner.tcc
index cb8a526ea1a..a5a16af6cec 100644
--- a/libstdc++-v3/include/bits/regex_scanner.tcc
+++ b/libstdc++-v3/include/bits/regex_scanner.tcc
@@ -176,6 +176,16 @@ namespace __detail
_M_state = _S_state_in_brace;
_M_token = _S_token_interval_begin;
}
+ else if (__builtin_expect(__c == _CharT(0), false))
+ {
+ if (!_M_is_ecma())
+ {
+ __throw_regex_error(regex_constants::_S_null,
+ "Unexpected null character in regular expression");
+ }
+ _M_token = _S_token_ord_char;
+ _M_value.assign(1, __c);
+ }
else if (__c != ']' && __c != '}')
{
auto __it = _M_token_tbl;
diff --git a/libstdc++-v3/testsuite/28_regex/basic_regex/84110.cc b/libstdc++-v3/testsuite/28_regex/basic_regex/84110.cc
new file mode 100644
index 00000000000..b9971dcaac5
--- /dev/null
+++ b/libstdc++-v3/testsuite/28_regex/basic_regex/84110.cc
@@ -0,0 +1,39 @@
+// { dg-do run { target c++11 } }
+#include <regex>
+#include <string>
+#include <testsuite_hooks.h>
+
+void test01()
+{
+ const std::string s(1ul, '\0');
+ std::regex re(s);
+ VERIFY( std::regex_match(s, re) ); // PR libstdc++/84110
+
+#if __cpp_exceptions
+ using namespace std::regex_constants;
+ for (auto syn : {basic, extended, awk, grep, egrep})
+ {
+ try
+ {
+ std::regex{s, syn}; // '\0' is not valid for other grammars
+ VERIFY( false );
+ }
+ catch (const std::regex_error&)
+ {
+ }
+ }
+#endif
+}
+
+void test02()
+{
+ const std::string s("uh-\0h", 5);
+ std::regex re(s);
+ VERIFY( std::regex_match(s, re) );
+}
+
+int main()
+{
+ test01();
+ test02();
+}