diff --git a/Doc/library/re.rst b/Doc/library/re.rst
index 5b304f717b07fa..de69990fd9d1ab 100644
--- a/Doc/library/re.rst
+++ b/Doc/library/re.rst
@@ -29,9 +29,9 @@ a literal backslash, one might have to write ``'\\\\'`` as the pattern
string, because the regular expression must be ``\\``, and each
backslash must be expressed as ``\\`` inside a regular Python string
literal. Also, please note that any invalid escape sequences in Python's
-usage of the backslash in string literals now generate a :exc:`DeprecationWarning`
-and in the future this will become a :exc:`SyntaxError`. This behaviour
-will happen even if it is a valid escape sequence for a regular expression.
+usage of the backslash in string literals now generate a :exc:`SyntaxError`.
+This behaviour will happen even if it is a valid escape sequence for a regular
+expression.
The solution is to use Python's raw string notation for regular expression
patterns; backslashes are not handled in any special way in a string literal
diff --git a/Doc/reference/lexical_analysis.rst b/Doc/reference/lexical_analysis.rst
index 4ab6e90a623449..c82dee8304e4dc 100644
--- a/Doc/reference/lexical_analysis.rst
+++ b/Doc/reference/lexical_analysis.rst
@@ -646,9 +646,10 @@ escape sequences only recognized in string literals fall into the category of
unrecognized escapes for bytes literals.
.. versionchanged:: 3.6
- Unrecognized escape sequences produce a :exc:`DeprecationWarning`. In
- a future Python version they will be a :exc:`SyntaxWarning` and
- eventually a :exc:`SyntaxError`.
+ Unrecognized escape sequences produce a :exc:`DeprecationWarning`.
+
+ .. versionchanged:: 3.12
+ Unrecognized escape sequences produce a :exc:`SyntaxError`.
Even in a raw literal, quotes can be escaped with a backslash, but the
backslash remains in the result; for example, ``r"\""`` is a valid string
diff --git a/Doc/whatsnew/3.12.rst b/Doc/whatsnew/3.12.rst
index 525efc405c8520..9907c75cd0e6be 100644
--- a/Doc/whatsnew/3.12.rst
+++ b/Doc/whatsnew/3.12.rst
@@ -79,6 +79,10 @@ New Features
Other Language Changes
======================
+* A backslash-character pair that is not a valid escape sequence now generates
+ a :exc:`SyntaxError`.
+ (Contributed by Victor Stinner in :gh:`98401`.)
+
* :class:`types.MappingProxyType` instances are now hashable if the underlying
mapping is hashable.
(Contributed by Serhiy Storchaka in :gh:`87995`.)
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index 57f3648eb7017c..b6ae7155d8c678 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -1197,15 +1197,15 @@ def test_escape(self):
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtvx':
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(b"\\" + b, b"\\" + b)
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(b"\\" + b.upper(), b"\\" + b.upper())
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(br"\8", b"\\8")
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(br"\9", b"\\9")
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(b"\\\xfa", b"\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
@@ -2425,16 +2425,16 @@ def test_escape_decode(self):
for i in range(97, 123):
b = bytes([i])
if b not in b'abfnrtuvx':
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(b"\\" + b, "\\" + chr(i))
if b.upper() not in b'UN':
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(b"\\" + b.upper(), "\\" + chr(i-32))
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(br"\8", "\\8")
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(br"\9", "\\9")
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
check(b"\\\xfa", "\\\xfa")
for i in range(0o400, 0o1000):
with self.assertWarns(DeprecationWarning):
diff --git a/Lib/test/test_codeop.py b/Lib/test/test_codeop.py
index 133096d25a44bc..0b4ac87bc2cd77 100644
--- a/Lib/test/test_codeop.py
+++ b/Lib/test/test_codeop.py
@@ -313,7 +313,7 @@ def test_warning(self):
(".*literal", SyntaxWarning),
(".*invalid", DeprecationWarning),
) as w:
- compile_command(r"'\e' is 0")
+ compile_command(r"'\777' is 0")
self.assertEqual(len(w.warnings), 2)
# bpo-41520: check SyntaxWarning treated as an SyntaxError
@@ -324,21 +324,21 @@ def test_warning(self):
# Check DeprecationWarning treated as an SyntaxError
with warnings.catch_warnings(), self.assertRaises(SyntaxError):
warnings.simplefilter('error', DeprecationWarning)
- compile_command(r"'\e'", symbol='exec')
+ compile_command(r"'\777'", symbol='exec')
def test_incomplete_warning(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
- self.assertIncomplete("'\\e' + (")
+ self.assertIncomplete("'\\777' + (")
self.assertEqual(w, [])
def test_invalid_warning(self):
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('always')
- self.assertInvalid("'\\e' 1")
+ self.assertInvalid("'\\777' 1")
self.assertEqual(len(w), 1)
self.assertEqual(w[0].category, DeprecationWarning)
- self.assertRegex(str(w[0].message), 'invalid escape sequence')
+ self.assertRegex(str(w[0].message), 'invalid octal escape sequence')
self.assertEqual(w[0].filename, '')
diff --git a/Lib/test/test_fstring.py b/Lib/test/test_fstring.py
index bf3a5b0bbccdfb..f60761106b2e08 100644
--- a/Lib/test/test_fstring.py
+++ b/Lib/test/test_fstring.py
@@ -776,9 +776,9 @@ def test_backslashes_in_string_part(self):
self.assertEqual(f'2\x203', '2 3')
self.assertEqual(f'\x203', ' 3')
- with self.assertWarns(DeprecationWarning): # invalid escape sequence
- value = eval(r"f'\{6*7}'")
- self.assertEqual(value, '\\42')
+ with self.assertRaisesRegex(SyntaxError, 'invalid escape sequence'):
+ eval(r"f'\{6*7}'")
+
self.assertEqual(f'\\{6*7}', '\\42')
self.assertEqual(fr'\{6*7}', '\\42')
diff --git a/Lib/test/test_string_literals.py b/Lib/test/test_string_literals.py
index 7247b7e48bc2b6..0ed4e6703f81b5 100644
--- a/Lib/test/test_string_literals.py
+++ b/Lib/test/test_string_literals.py
@@ -109,23 +109,12 @@ def test_eval_str_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567NU\\abfnrtuvx""":
continue
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
self.assertEqual(eval(r"'\%c'" % b), '\\' + chr(b))
- with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter('always', category=DeprecationWarning)
+ with self.assertRaises(SyntaxError) as cm:
eval("'''\n\\z'''")
- self.assertEqual(len(w), 1)
- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
- self.assertEqual(w[0].filename, '')
- self.assertEqual(w[0].lineno, 1)
-
- with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter('error', category=DeprecationWarning)
- with self.assertRaises(SyntaxError) as cm:
- eval("'''\n\\z'''")
- exc = cm.exception
- self.assertEqual(w, [])
+ exc = cm.exception
self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
self.assertEqual(exc.filename, '')
self.assertEqual(exc.lineno, 1)
@@ -186,16 +175,15 @@ def test_eval_bytes_invalid_escape(self):
for b in range(1, 128):
if b in b"""\n\r"'01234567\\abfnrtvx""":
continue
- with self.assertWarns(DeprecationWarning):
+ with self.assertRaises(SyntaxError):
self.assertEqual(eval(r"b'\%c'" % b), b'\\' + bytes([b]))
- with warnings.catch_warnings(record=True) as w:
- warnings.simplefilter('always', category=DeprecationWarning)
+ with self.assertRaises(SyntaxError) as cm:
eval("b'''\n\\z'''")
- self.assertEqual(len(w), 1)
- self.assertEqual(str(w[0].message), r"invalid escape sequence '\z'")
- self.assertEqual(w[0].filename, '')
- self.assertEqual(w[0].lineno, 1)
+ exc = cm.exception
+ self.assertEqual(exc.msg, r"invalid escape sequence '\z'")
+ self.assertEqual(exc.filename, '')
+ self.assertEqual(exc.lineno, 1)
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter('error', category=DeprecationWarning)
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst b/Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst
new file mode 100644
index 00000000000000..5b113ae50da62c
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-10-18-17-38-22.gh-issue-98401.3kHNtJ.rst
@@ -0,0 +1,2 @@
+A backslash-character pair that is not a valid escape sequence now generates
+a :exc:`SyntaxError`. Patch by Victor Stinner.
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
index 80660881920fb7..f783a684b5eb39 100644
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -1192,13 +1192,10 @@ PyObject *PyBytes_DecodeEscape(const char *s,
}
}
else {
- if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'",
- c) < 0)
- {
- Py_DECREF(result);
- return NULL;
- }
+ PyErr_Format(PyExc_SyntaxError,
+ "invalid escape sequence '\\%c'", c);
+ Py_DECREF(result);
+ return NULL;
}
}
return result;
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
index d090915146f804..022f674a41e2b7 100644
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -5967,13 +5967,10 @@ _PyUnicode_DecodeUnicodeEscapeStateful(const char *s,
}
}
else {
- if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
- "invalid escape sequence '\\%c'",
- c) < 0)
- {
- Py_DECREF(result);
- return NULL;
- }
+ PyErr_Format(PyExc_SyntaxError,
+ "invalid escape sequence '\\%c'", c);
+ Py_DECREF(result);
+ return NULL;
}
}
return result;
diff --git a/Parser/string_parser.c b/Parser/string_parser.c
index 9bc3b082136be5..942bf823eea9b3 100644
--- a/Parser/string_parser.c
+++ b/Parser/string_parser.c
@@ -13,38 +13,42 @@ warn_invalid_escape_sequence(Parser *p, const char *first_invalid_escape, Token
{
unsigned char c = *first_invalid_escape;
int octal = ('4' <= c && c <= '7');
- PyObject *msg =
- octal
- ? PyUnicode_FromFormat("invalid octal escape sequence '\\%.3s'",
- first_invalid_escape)
- : PyUnicode_FromFormat("invalid escape sequence '\\%c'", c);
- if (msg == NULL) {
- return -1;
- }
- if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
- t->lineno, NULL, NULL) < 0) {
- if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
- /* Replace the DeprecationWarning exception with a SyntaxError
- to get a more accurate error report */
- PyErr_Clear();
-
- /* This is needed, in order for the SyntaxError to point to the token t,
- since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
- error location, if p->known_err_token is not set. */
- p->known_err_token = t;
- if (octal) {
+
+ if (octal) {
+ PyObject *msg = PyUnicode_FromFormat(
+ "invalid octal escape sequence '\\%.3s'",
+ first_invalid_escape);
+ if (msg == NULL) {
+ return -1;
+ }
+ if (PyErr_WarnExplicitObject(PyExc_DeprecationWarning, msg, p->tok->filename,
+ t->lineno, NULL, NULL) < 0) {
+ if (PyErr_ExceptionMatches(PyExc_DeprecationWarning)) {
+ /* Replace the DeprecationWarning exception with a SyntaxError
+ to get a more accurate error report */
+ PyErr_Clear();
+
+ /* This is needed, in order for the SyntaxError to point to the token t,
+ since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
+ error location, if p->known_err_token is not set. */
+ p->known_err_token = t;
RAISE_SYNTAX_ERROR("invalid octal escape sequence '\\%.3s'",
first_invalid_escape);
}
- else {
- RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
- }
+ Py_DECREF(msg);
+ return -1;
}
Py_DECREF(msg);
+ return 0;
+ }
+ else {
+ /* This is needed, in order for the SyntaxError to point to the token t,
+ since _PyPegen_raise_error uses p->tokens[p->fill - 1] for the
+ error location, if p->known_err_token is not set. */
+ p->known_err_token = t;
+ RAISE_SYNTAX_ERROR("invalid escape sequence '\\%c'", c);
return -1;
}
- Py_DECREF(msg);
- return 0;
}
static PyObject *