1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
|
from __future__ import annotations
from functools import lru_cache
from typing import Callable
from ._cell_widths import CELL_WIDTHS
# Ranges of unicode ordinals that produce a 1-cell wide character
# This is non-exhaustive, but covers most common Western characters
_SINGLE_CELL_UNICODE_RANGES: list[tuple[int, int]] = [
(0x20, 0x7E), # Latin (excluding non-printable)
(0xA0, 0xAC),
(0xAE, 0x002FF),
(0x00370, 0x00482), # Greek / Cyrillic
(0x02500, 0x025FC), # Box drawing, box elements, geometric shapes
(0x02800, 0x028FF), # Braille
]
# A set of characters that are a single cell wide
_SINGLE_CELLS = frozenset(
[
character
for _start, _end in _SINGLE_CELL_UNICODE_RANGES
for character in map(chr, range(_start, _end + 1))
]
)
# When called with a string this will return True if all
# characters are single-cell, otherwise False
_is_single_cell_widths: Callable[[str], bool] = _SINGLE_CELLS.issuperset
@lru_cache(4096)
def cached_cell_len(text: str) -> int:
"""Get the number of cells required to display text.
This method always caches, which may use up a lot of memory. It is recommended to use
`cell_len` over this method.
Args:
text (str): Text to display.
Returns:
int: Get the number of cells required to display text.
"""
if _is_single_cell_widths(text):
return len(text)
return sum(map(get_character_cell_size, text))
def cell_len(text: str, _cell_len: Callable[[str], int] = cached_cell_len) -> int:
"""Get the number of cells required to display text.
Args:
text (str): Text to display.
Returns:
int: Get the number of cells required to display text.
"""
if len(text) < 512:
return _cell_len(text)
if _is_single_cell_widths(text):
return len(text)
return sum(map(get_character_cell_size, text))
@lru_cache(maxsize=4096)
def get_character_cell_size(character: str) -> int:
"""Get the cell size of a character.
Args:
character (str): A single character.
Returns:
int: Number of cells (0, 1 or 2) occupied by that character.
"""
codepoint = ord(character)
_table = CELL_WIDTHS
lower_bound = 0
upper_bound = len(_table) - 1
index = (lower_bound + upper_bound) // 2
while True:
start, end, width = _table[index]
if codepoint < start:
upper_bound = index - 1
elif codepoint > end:
lower_bound = index + 1
else:
return 0 if width == -1 else width
if upper_bound < lower_bound:
break
index = (lower_bound + upper_bound) // 2
return 1
def set_cell_size(text: str, total: int) -> str:
"""Set the length of a string to fit within given number of cells."""
if _is_single_cell_widths(text):
size = len(text)
if size < total:
return text + " " * (total - size)
return text[:total]
if total <= 0:
return ""
cell_size = cell_len(text)
if cell_size == total:
return text
if cell_size < total:
return text + " " * (total - cell_size)
start = 0
end = len(text)
# Binary search until we find the right size
while True:
pos = (start + end) // 2
before = text[: pos + 1]
before_len = cell_len(before)
if before_len == total + 1 and cell_len(before[-1]) == 2:
return before[:-1] + " "
if before_len == total:
return before
if before_len > total:
end = pos
else:
start = pos
def chop_cells(
text: str,
width: int,
) -> list[str]:
"""Split text into lines such that each line fits within the available (cell) width.
Args:
text: The text to fold such that it fits in the given width.
width: The width available (number of cells).
Returns:
A list of strings such that each string in the list has cell width
less than or equal to the available width.
"""
_get_character_cell_size = get_character_cell_size
lines: list[list[str]] = [[]]
append_new_line = lines.append
append_to_last_line = lines[-1].append
total_width = 0
for character in text:
cell_width = _get_character_cell_size(character)
char_doesnt_fit = total_width + cell_width > width
if char_doesnt_fit:
append_new_line([character])
append_to_last_line = lines[-1].append
total_width = cell_width
else:
append_to_last_line(character)
total_width += cell_width
return ["".join(line) for line in lines]
if __name__ == "__main__": # pragma: no cover
print(get_character_cell_size("😽"))
for line in chop_cells("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", 8):
print(line)
for n in range(80, 1, -1):
print(set_cell_size("""这是对亚洲语言支持的测试。面对模棱两可的想法,拒绝猜测的诱惑。""", n) + "|")
print("x" * n)
|