Skip to content
1 change: 1 addition & 0 deletions Lib/_pyrepl/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from .types import CharBuffer, CharWidths
from .trace import trace


ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")
ZERO_WIDTH_BRACKET = re.compile(r"\x01.*?\x02")
ZERO_WIDTH_TRANS = str.maketrans({"\x01": "", "\x02": ""})
Expand Down
87 changes: 87 additions & 0 deletions Lib/test/test_traceback.py
Original file line number Diff line number Diff line change
Expand Up @@ -1790,6 +1790,7 @@ def f():
]
self.assertEqual(result_lines, expected)


class TestKeywordTypoSuggestions(unittest.TestCase):
TYPO_CASES = [
("with block ad something:\n pass", "and"),
Expand Down Expand Up @@ -5414,6 +5415,92 @@ def expected(t, m, fn, l, f, E, e, z, n):
]
self.assertEqual(actual, expected(**colors))

def test_colorized_traceback_unicode(self):
try:
啊哈=1; 啊哈/0####
except Exception as e:
exc = traceback.TracebackException.from_exception(e)

actual = "".join(exc.format(colorize=True)).splitlines()
def expected(t, m, fn, l, f, E, e, z, n):
return [
f" 啊哈=1; {e}啊哈{z}{E}/{z}{e}0{z}####",
f" {e}~~~~{z}{E}^{z}{e}~{z}",
]
self.assertEqual(actual[2:4], expected(**colors))

try:
ééééé/0
except Exception as e:
exc = traceback.TracebackException.from_exception(e)

actual = "".join(exc.format(colorize=True)).splitlines()
def expected(t, m, fn, l, f, E, e, z, n):
return [
f" {E}ééééé{z}/0",
f" {E}^^^^^{z}",
]
self.assertEqual(actual[2:4], expected(**colors))

def test_colorized_syntax_error_ascii_display_width(self):
"""Caret alignment for ASCII edge cases handled by _wlen.

The old ASCII fast track in _display_width returned the raw character
offset for ASCII strings, which is wrong for CTRL-Z (display width 2)
and ANSI escape sequences (display width 0).
"""
E = colors["E"]
z = colors["z"]
t = colors["t"]
m = colors["m"]
fn = colors["fn"]
l = colors["l"]

def _make_syntax_error(text, offset, end_offset):
err = SyntaxError("invalid syntax")
err.filename = "<string>"
err.lineno = 1
err.end_lineno = 1
err.text = text
err.offset = offset
err.end_offset = end_offset
return err

# CTRL-Z (\x1a) is ASCII but displayed as ^Z (2 columns).
# Verify caret aligns when CTRL-Z precedes the error.
err = _make_syntax_error("a\x1a$\n", offset=3, end_offset=4)
exc = traceback.TracebackException.from_exception(err)
actual = "".join(exc.format(colorize=True))
# 'a' (1 col) + '\x1a' (2 cols) = 3 cols before '$'
self.assertIn(
f' File {fn}"<string>"{z}, line {l}1{z}\n'
f' a\x1a{E}${z}\n'
f' {" " * 3}{E}^{z}\n'
f'{t}SyntaxError{z}: {m}invalid syntax{z}\n',
actual,
)

# CTRL-Z in the highlighted (error) region counts as 2 columns.
err = _make_syntax_error("$\x1a\n", offset=1, end_offset=3)
exc = traceback.TracebackException.from_exception(err)
actual = "".join(exc.format(colorize=True))
# '$' (1 col) + '\x1a' (2 cols) = 3 columns of carets
self.assertIn(
f' {E}$\x1a{z}\n'
f' {E}{"^" * 3}{z}\n',
actual,
)

# ANSI escape sequences are ASCII but take 0 display columns.
err = _make_syntax_error("a\x1b[1mb$\n", offset=7, end_offset=8)
exc = traceback.TracebackException.from_exception(err)
actual = "".join(exc.format(colorize=True))
# 'a' (1 col) + '\x1b[1m' (0 cols) + 'b' (1 col) = 2 before '$'
self.assertIn(
f' a\x1b[1mb{E}${z}\n'
f' {" " * 2}{E}^{z}\n',
actual,
)

class TestLazyImportSuggestions(unittest.TestCase):
"""Test that lazy imports are not reified when computing AttributeError suggestions."""
Expand Down
70 changes: 55 additions & 15 deletions Lib/traceback.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
"""Extract, format and print information about Python stack traces."""

import collections.abc
import functools
import itertools
import linecache
import os
import re
import sys
import textwrap
import types
Expand Down Expand Up @@ -684,12 +686,12 @@ def output_line(lineno):
colorized_line_parts = []
colorized_carets_parts = []

for color, group in itertools.groupby(itertools.zip_longest(line, carets, fillvalue=""), key=lambda x: x[1]):
for color, group in itertools.groupby(_zip_display_width(line, carets), key=lambda x: x[1]):
caret_group = list(group)
if color == "^":
if "^" in color:
colorized_line_parts.append(theme.error_highlight + "".join(char for char, _ in caret_group) + theme.reset)
colorized_carets_parts.append(theme.error_highlight + "".join(caret for _, caret in caret_group) + theme.reset)
elif color == "~":
elif "~" in color:
colorized_line_parts.append(theme.error_range + "".join(char for char, _ in caret_group) + theme.reset)
colorized_carets_parts.append(theme.error_range + "".join(caret for _, caret in caret_group) + theme.reset)
else:
Expand Down Expand Up @@ -971,26 +973,64 @@ def setup_positions(expr, force_valid=True):

return None

_WIDE_CHAR_SPECIFIERS = "WF"

def _zip_display_width(line, carets):
carets = iter(carets)
if line.isascii() and '\x1a' not in line:
for char in line:
yield char, next(carets, "")
return

import unicodedata
for char in unicodedata.iter_graphemes(line):
char = str(char)
char_width = _display_width(char)
yield char, "".join(itertools.islice(carets, char_width))


@functools.cache
def _str_width(c: str) -> int:
# copied from _pyrepl.utils to fix gh-130273

if ord(c) < 128:
return 1
import unicodedata
# gh-139246 for zero-width joiner and combining characters
if unicodedata.combining(c):
return 0
category = unicodedata.category(c)
if category == "Cf" and c != "\u00ad":
return 0
w = unicodedata.east_asian_width(c)
if w in ("N", "Na", "H", "A"):
return 1
return 2


_ANSI_ESCAPE_SEQUENCE = re.compile(r"\x1b\[[ -@]*[A-~]")


def _wlen(s: str) -> int:
# copied from _pyrepl.utils to fix gh-130273

if len(s) == 1 and s != "\x1a":
return _str_width(s)
length = sum(_str_width(i) for i in s)
# remove lengths of any escape sequences
sequence = _ANSI_ESCAPE_SEQUENCE.findall(s)
ctrl_z_cnt = s.count("\x1a")
return length - sum(len(i) for i in sequence) + ctrl_z_cnt


def _display_width(line, offset=None):
"""Calculate the extra amount of width space the given source
code segment might take if it were to be displayed on a fixed
width output device. Supports wide unicode characters and emojis."""

if offset is None:
offset = len(line)

# Fast track for ASCII-only strings
if line.isascii():
return offset
return _wlen(line)

import unicodedata

return sum(
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
for char in line[:offset]
)
return _wlen(line[:offset])


def _format_note(note, indent, theme):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix traceback color output with Unicode characters.
Loading