wcwidth 0.2.11__tar.gz → 0.2.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of wcwidth might be problematic. Click here for more details.
- {wcwidth-0.2.11/wcwidth.egg-info → wcwidth-0.2.13}/PKG-INFO +10 -2
- {wcwidth-0.2.11 → wcwidth-0.2.13}/README.rst +9 -1
- {wcwidth-0.2.11 → wcwidth-0.2.13}/bin/update-tables.py +32 -17
- {wcwidth-0.2.11 → wcwidth-0.2.13}/bin/verify-table-integrity.py +38 -10
- {wcwidth-0.2.11 → wcwidth-0.2.13}/bin/wcwidth-browser.py +1 -1
- {wcwidth-0.2.11 → wcwidth-0.2.13}/docs/intro.rst +9 -1
- wcwidth-0.2.13/docs/specs.rst +79 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/setup.py +1 -1
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tests/test_core.py +51 -6
- wcwidth-0.2.13/tests/test_table_integrity.py +15 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth/__init__.py +1 -1
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth/table_wide.py +80 -68
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth/table_zero.py +39 -1
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth/wcwidth.py +5 -2
- {wcwidth-0.2.11 → wcwidth-0.2.13/wcwidth.egg-info}/PKG-INFO +10 -2
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth.egg-info/SOURCES.txt +1 -1
- wcwidth-0.2.11/code_templates/python_table_width.py.j2 +0 -0
- wcwidth-0.2.11/docs/specs.rst +0 -58
- {wcwidth-0.2.11 → wcwidth-0.2.13}/LICENSE +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/MANIFEST.in +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/bin/new-wide-by-version.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/bin/wcwidth-libc-comparator.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/code_templates/python_table.py.j2 +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/code_templates/unicode_version.rst.j2 +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/code_templates/unicode_versions.py.j2 +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/docs/api.rst +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/docs/conf.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/docs/index.rst +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/docs/requirements.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/docs/unicode_version.rst +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-develop.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-docs.in +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-tests36.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-tests37.in +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-tests37.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-tests39.in +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-tests39.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-update.in +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/requirements-update.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/setup.cfg +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tests/__init__.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tests/emoji-variation-sequences.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tests/emoji-zwj-sequences.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tests/test_emojis.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tests/test_ucslevel.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/tox.ini +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth/table_vs16.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth/unicode_versions.py +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth.egg-info/dependency_links.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth.egg-info/requires.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth.egg-info/top_level.txt +0 -0
- {wcwidth-0.2.11 → wcwidth-0.2.13}/wcwidth.egg-info/zip-safe +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: wcwidth
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.13
|
|
4
4
|
Summary: Measures the displayed width of unicode strings in a terminal
|
|
5
5
|
Home-page: https://github.com/jquast/wcwidth
|
|
6
6
|
Author: Jeff Quast
|
|
@@ -63,7 +63,7 @@ Example
|
|
|
63
63
|
>>> text = u'コンニチハ'
|
|
64
64
|
|
|
65
65
|
Python **incorrectly** uses the *string length* of 5 codepoints rather than the
|
|
66
|
-
*
|
|
66
|
+
*printable length* of 10 cells, so that when using the `rjust` function, the
|
|
67
67
|
output length is wrong::
|
|
68
68
|
|
|
69
69
|
>>> print(len('コンニチハ'))
|
|
@@ -247,6 +247,13 @@ Other Languages
|
|
|
247
247
|
=======
|
|
248
248
|
History
|
|
249
249
|
=======
|
|
250
|
+
|
|
251
|
+
0.2.13 *2024-01-06*
|
|
252
|
+
* **Bugfix** zero-width support for Hangul Jamo (Korean)
|
|
253
|
+
|
|
254
|
+
0.2.12 *2023-11-21*
|
|
255
|
+
* re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
|
|
256
|
+
|
|
250
257
|
0.2.11 *2023-11-20*
|
|
251
258
|
* Include tests files in the source distribution (`PR #98`_, `PR #100`_).
|
|
252
259
|
|
|
@@ -361,6 +368,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
|
|
|
361
368
|
.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
|
|
362
369
|
.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98
|
|
363
370
|
.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100
|
|
371
|
+
.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
|
|
364
372
|
.. _`jquast/blessed`: https://github.com/jquast/blessed
|
|
365
373
|
.. _`selectel/pyte`: https://github.com/selectel/pyte
|
|
366
374
|
.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
|
|
@@ -32,7 +32,7 @@ Example
|
|
|
32
32
|
>>> text = u'コンニチハ'
|
|
33
33
|
|
|
34
34
|
Python **incorrectly** uses the *string length* of 5 codepoints rather than the
|
|
35
|
-
*
|
|
35
|
+
*printable length* of 10 cells, so that when using the `rjust` function, the
|
|
36
36
|
output length is wrong::
|
|
37
37
|
|
|
38
38
|
>>> print(len('コンニチハ'))
|
|
@@ -216,6 +216,13 @@ Other Languages
|
|
|
216
216
|
=======
|
|
217
217
|
History
|
|
218
218
|
=======
|
|
219
|
+
|
|
220
|
+
0.2.13 *2024-01-06*
|
|
221
|
+
* **Bugfix** zero-width support for Hangul Jamo (Korean)
|
|
222
|
+
|
|
223
|
+
0.2.12 *2023-11-21*
|
|
224
|
+
* re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
|
|
225
|
+
|
|
219
226
|
0.2.11 *2023-11-20*
|
|
220
227
|
* Include tests files in the source distribution (`PR #98`_, `PR #100`_).
|
|
221
228
|
|
|
@@ -330,6 +337,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
|
|
|
330
337
|
.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
|
|
331
338
|
.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98
|
|
332
339
|
.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100
|
|
340
|
+
.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
|
|
333
341
|
.. _`jquast/blessed`: https://github.com/jquast/blessed
|
|
334
342
|
.. _`selectel/pyte`: https://github.com/selectel/pyte
|
|
335
343
|
.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
|
|
@@ -54,6 +54,19 @@ FETCH_BLOCKSIZE = int(os.environ.get('FETCH_BLOCKSIZE', '4096'))
|
|
|
54
54
|
MAX_RETRIES = int(os.environ.get('MAX_RETRIES', '6'))
|
|
55
55
|
BACKOFF_FACTOR = float(os.environ.get('BACKOFF_FACTOR', '0.1'))
|
|
56
56
|
|
|
57
|
+
# Hangul Jamo is a decomposed form of Hangul Syllables, see
|
|
58
|
+
# see https://www.unicode.org/faq/korean.html#3
|
|
59
|
+
# https://github.com/ridiculousfish/widecharwidth/pull/17
|
|
60
|
+
# https://github.com/jquast/ucs-detect/issues/9
|
|
61
|
+
# https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
|
|
62
|
+
# "Conjoining Jamo are divided into three classes: L, V, T (Leading
|
|
63
|
+
# consonant, Vowel, Trailing consonant). A Hangul Syllable consists of
|
|
64
|
+
# <LV> or <LVT> sequences."
|
|
65
|
+
HANGUL_JAMO_ZEROWIDTH = (
|
|
66
|
+
*range(0x1160, 0x1200), # Hangul Jungseong Filler .. Hangul Jongseong Ssangnieun
|
|
67
|
+
*range(0xD7B0, 0xD800), # Hangul Jungseong O-Yeo .. Undefined Character of Hangul Jamo Extended-B
|
|
68
|
+
)
|
|
69
|
+
|
|
57
70
|
|
|
58
71
|
def _bisearch(ucs, table):
|
|
59
72
|
"""A copy of wcwwidth._bisearch, to prevent having issues when depending on code that imports
|
|
@@ -112,11 +125,11 @@ class TableEntry:
|
|
|
112
125
|
properties: tuple[str, ...]
|
|
113
126
|
comment: str
|
|
114
127
|
|
|
115
|
-
def
|
|
128
|
+
def filter_by_category_width(self, wide: int) -> bool:
|
|
116
129
|
"""
|
|
117
|
-
Return whether entry matches
|
|
130
|
+
Return whether entry matches displayed width.
|
|
118
131
|
|
|
119
|
-
|
|
132
|
+
Parses both DerivedGeneralCategory.txt and EastAsianWidth.txt
|
|
120
133
|
"""
|
|
121
134
|
if self.code_range is None:
|
|
122
135
|
return False
|
|
@@ -146,13 +159,12 @@ class TableEntry:
|
|
|
146
159
|
return wide == 1
|
|
147
160
|
|
|
148
161
|
@staticmethod
|
|
149
|
-
def
|
|
150
|
-
|
|
151
|
-
wide: int) -> set[tuple[int, int]]:
|
|
162
|
+
def parse_width_category_values(table_iter: Iterator[TableEntry],
|
|
163
|
+
wide: int) -> set[tuple[int, int]]:
|
|
152
164
|
"""Parse value ranges of unicode data files, by given category and width."""
|
|
153
165
|
return {n
|
|
154
166
|
for entry in table_iter
|
|
155
|
-
if entry.
|
|
167
|
+
if entry.filter_by_category_width(wide)
|
|
156
168
|
for n in list(range(entry.code_range[0], entry.code_range[1]))}
|
|
157
169
|
|
|
158
170
|
|
|
@@ -326,18 +338,19 @@ def fetch_table_wide_data() -> UnicodeTableRenderCtx:
|
|
|
326
338
|
for version in fetch_unicode_versions():
|
|
327
339
|
# parse typical 'wide' characters by categories 'W' and 'F',
|
|
328
340
|
table[version] = parse_category(fname=UnicodeDataFile.EastAsianWidth(version),
|
|
329
|
-
category_codes=('W', 'F'),
|
|
330
341
|
wide=2)
|
|
331
342
|
|
|
332
343
|
# subtract(!) wide characters that were defined above as 'W' category in EastAsianWidth,
|
|
333
344
|
# but also zero-width category 'Mn' or 'Mc' in DerivedGeneralCategory!
|
|
334
|
-
table[version].values.
|
|
335
|
-
|
|
336
|
-
|
|
345
|
+
table[version].values = table[version].values.difference(parse_category(
|
|
346
|
+
fname=UnicodeDataFile.DerivedGeneralCategory(version),
|
|
347
|
+
wide=0).values)
|
|
348
|
+
|
|
349
|
+
# Also subtract Hangul Jamo Vowels and Hangul Trailing Consonants
|
|
350
|
+
table[version].values = table[version].values.difference(HANGUL_JAMO_ZEROWIDTH)
|
|
337
351
|
|
|
338
352
|
# finally, join with atypical 'wide' characters defined by category 'Sk',
|
|
339
353
|
table[version].values.update(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version),
|
|
340
|
-
category_codes=('Sk',),
|
|
341
354
|
wide=2).values)
|
|
342
355
|
return UnicodeTableRenderCtx('WIDE_EASTASIAN', table)
|
|
343
356
|
|
|
@@ -352,11 +365,13 @@ def fetch_table_zero_data() -> UnicodeTableRenderCtx:
|
|
|
352
365
|
for version in fetch_unicode_versions():
|
|
353
366
|
# Determine values of zero-width character lookup table by the following category codes
|
|
354
367
|
table[version] = parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version),
|
|
355
|
-
category_codes=('Me', 'Mn', 'Mc', 'Cf', 'Zl', 'Zp', 'Sk'),
|
|
356
368
|
wide=0)
|
|
357
369
|
|
|
358
|
-
#
|
|
370
|
+
# Include NULL
|
|
359
371
|
table[version].values.add(0)
|
|
372
|
+
|
|
373
|
+
# Add Hangul Jamo Vowels and Hangul Trailing Consonants
|
|
374
|
+
table[version].values.update(HANGUL_JAMO_ZEROWIDTH)
|
|
360
375
|
return UnicodeTableRenderCtx('ZERO_WIDTH', table)
|
|
361
376
|
|
|
362
377
|
|
|
@@ -501,9 +516,9 @@ def parse_vs16_table(fp: Iterable[str]) -> Iterator[TableEntry]:
|
|
|
501
516
|
|
|
502
517
|
|
|
503
518
|
@functools.cache
|
|
504
|
-
def parse_category(fname: str,
|
|
519
|
+
def parse_category(fname: str, wide: int) -> TableDef:
|
|
505
520
|
"""Parse value ranges of unicode data files, by given categories into string tables."""
|
|
506
|
-
print(f'parsing {fname}
|
|
521
|
+
print(f'parsing {fname}, wide={wide}: ', end='', flush=True)
|
|
507
522
|
|
|
508
523
|
with open(fname, encoding='utf-8') as f:
|
|
509
524
|
table_iter = parse_unicode_table(f)
|
|
@@ -512,7 +527,7 @@ def parse_category(fname: str, category_codes: Container[str], wide: int) -> Tab
|
|
|
512
527
|
version = next(table_iter).comment.strip()
|
|
513
528
|
# and "date string" from second line
|
|
514
529
|
date = next(table_iter).comment.split(':', 1)[1].strip()
|
|
515
|
-
values = TableEntry.
|
|
530
|
+
values = TableEntry.parse_width_category_values(table_iter, wide)
|
|
516
531
|
print('ok')
|
|
517
532
|
return TableDef(version, date, values)
|
|
518
533
|
|
|
@@ -63,9 +63,30 @@ Category code was changed from 'Mc' to 'Lo':
|
|
|
63
63
|
import logging
|
|
64
64
|
|
|
65
65
|
|
|
66
|
+
def bisearch_pair(ucs, table):
|
|
67
|
+
"""
|
|
68
|
+
A copy of wcwidth._bisearch() but also returns the range of matched values.
|
|
69
|
+
"""
|
|
70
|
+
lbound = 0
|
|
71
|
+
ubound = len(table) - 1
|
|
72
|
+
|
|
73
|
+
if ucs < table[0][0] or ucs > table[ubound][1]:
|
|
74
|
+
return (0, None, None)
|
|
75
|
+
while ubound >= lbound:
|
|
76
|
+
mid = (lbound + ubound) // 2
|
|
77
|
+
if ucs > table[mid][1]:
|
|
78
|
+
lbound = mid + 1
|
|
79
|
+
elif ucs < table[mid][0]:
|
|
80
|
+
ubound = mid - 1
|
|
81
|
+
else:
|
|
82
|
+
return (1, table[mid][0], table[mid][1])
|
|
83
|
+
|
|
84
|
+
return (0, None, None)
|
|
85
|
+
|
|
86
|
+
|
|
66
87
|
def main(log: logging.Logger):
|
|
67
|
-
|
|
68
|
-
|
|
88
|
+
from wcwidth import ZERO_WIDTH, WIDE_EASTASIAN, list_versions
|
|
89
|
+
|
|
69
90
|
reversed_uni_versions = list(reversed(list_versions()))
|
|
70
91
|
tables = {'ZERO_WIDTH': ZERO_WIDTH,
|
|
71
92
|
'WIDE_EASTASIAN': WIDE_EASTASIAN}
|
|
@@ -81,14 +102,21 @@ def main(log: logging.Logger):
|
|
|
81
102
|
other_table = tables[other_table_name][version]
|
|
82
103
|
for start_range, stop_range in curr_table:
|
|
83
104
|
for unichar_n in range(start_range, stop_range):
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
105
|
+
result, _, _ = bisearch_pair(unichar_n, next_table)
|
|
106
|
+
if not result:
|
|
107
|
+
log.info(
|
|
108
|
+
f'value 0x{unichar_n:05x} in table_name={table_name}'
|
|
109
|
+
f' version={version} is not defined in next_version={next_version}'
|
|
110
|
+
f' from inclusive range {hex(start_range)}-{hex(stop_range)}'
|
|
111
|
+
)
|
|
112
|
+
result, lbound, ubound = bisearch_pair(unichar_n, other_table)
|
|
113
|
+
if result:
|
|
114
|
+
log.error(
|
|
115
|
+
f'value 0x{unichar_n:05x} in table_name={table_name}'
|
|
116
|
+
f' version={version} is duplicated in other_table_name={other_table_name}'
|
|
117
|
+
f' from inclusive range 0x{start_range:05x}-0x{stop_range:05x} of'
|
|
118
|
+
f' {table_name} against 0x{lbound:05x}-0x{ubound:05x} in {other_table_name}'
|
|
119
|
+
)
|
|
92
120
|
errors += 1
|
|
93
121
|
if errors:
|
|
94
122
|
log.error(f'{errors} errors, exit 1')
|
|
@@ -116,7 +116,7 @@ class WcCombinedCharacterGenerator(object):
|
|
|
116
116
|
"""
|
|
117
117
|
self.characters = []
|
|
118
118
|
letters_o = ('o' * width)
|
|
119
|
-
for (begin, end) in ZERO_WIDTH[unicode_version]:
|
|
119
|
+
for (begin, end) in ZERO_WIDTH[_wcmatch_version(unicode_version)]:
|
|
120
120
|
for val in [_val for _val in
|
|
121
121
|
range(begin, end + 1)
|
|
122
122
|
if _val <= LIMIT_UCS]:
|
|
@@ -32,7 +32,7 @@ Example
|
|
|
32
32
|
>>> text = u'コンニチハ'
|
|
33
33
|
|
|
34
34
|
Python **incorrectly** uses the *string length* of 5 codepoints rather than the
|
|
35
|
-
*
|
|
35
|
+
*printable length* of 10 cells, so that when using the `rjust` function, the
|
|
36
36
|
output length is wrong::
|
|
37
37
|
|
|
38
38
|
>>> print(len('コンニチハ'))
|
|
@@ -216,6 +216,13 @@ Other Languages
|
|
|
216
216
|
=======
|
|
217
217
|
History
|
|
218
218
|
=======
|
|
219
|
+
|
|
220
|
+
0.2.13 *2024-01-06*
|
|
221
|
+
* **Bugfix** zero-width support for Hangul Jamo (Korean)
|
|
222
|
+
|
|
223
|
+
0.2.12 *2023-11-21*
|
|
224
|
+
* re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
|
|
225
|
+
|
|
219
226
|
0.2.11 *2023-11-20*
|
|
220
227
|
* Include tests files in the source distribution (`PR #98`_, `PR #100`_).
|
|
221
228
|
|
|
@@ -330,6 +337,7 @@ https://www.cl.cam.ac.uk/~mgk25/ucs/wcwidth.c::
|
|
|
330
337
|
.. _`PR #97`: https://github.com/jquast/wcwidth/pull/97
|
|
331
338
|
.. _`PR #98`: https://github.com/jquast/wcwidth/pull/98
|
|
332
339
|
.. _`PR #100`: https://github.com/jquast/wcwidth/pull/100
|
|
340
|
+
.. _`Issue #101`: https://github.com/jquast/wcwidth/issues/101
|
|
333
341
|
.. _`jquast/blessed`: https://github.com/jquast/blessed
|
|
334
342
|
.. _`selectel/pyte`: https://github.com/selectel/pyte
|
|
335
343
|
.. _`thomasballinger/curtsies`: https://github.com/thomasballinger/curtsies
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
.. _Specification:
|
|
2
|
+
|
|
3
|
+
=============
|
|
4
|
+
Specification
|
|
5
|
+
=============
|
|
6
|
+
|
|
7
|
+
This document defines how the wcwidth library measures the printable width
|
|
8
|
+
of characters of a string.
|
|
9
|
+
|
|
10
|
+
Width of -1
|
|
11
|
+
-----------
|
|
12
|
+
|
|
13
|
+
The following have a column width of -1 for function :func:`wcwidth.wcwidth`
|
|
14
|
+
|
|
15
|
+
- ``C0`` control characters (`U+0001`_ through `U+001F`_).
|
|
16
|
+
- ``C1`` control characters and ``DEL`` (`U+007F`_ through `U+00A0`_).
|
|
17
|
+
|
|
18
|
+
If any character in sequence contains ``C0`` or ``C1`` control characters, the final
|
|
19
|
+
return value of of :func:`wcwidth.wcswidth` is -1.
|
|
20
|
+
|
|
21
|
+
Width of 0
|
|
22
|
+
----------
|
|
23
|
+
|
|
24
|
+
Any characters defined by category codes in `DerivedGeneralCategory.txt`_ files:
|
|
25
|
+
|
|
26
|
+
- 'Me': Enclosing Combining Mark, aprox. 13 characters.
|
|
27
|
+
- 'Mn': Nonspacing Combining Mark, aprox. 1,839 characters.
|
|
28
|
+
- 'Mc': Spacing Mark, aprox. 443 characters.
|
|
29
|
+
- 'Cf': Format control character, aprox. 161 characters.
|
|
30
|
+
- 'Zl': `U+2028`_ LINE SEPARATOR only
|
|
31
|
+
- 'Zp': `U+2029`_ PARAGRAPH SEPARATOR only
|
|
32
|
+
- 'Sk': Modifier Symbol, aprox. 4 characters of only those where phrase
|
|
33
|
+
``'EMOJI MODIFIER'`` is present in comment of unicode data file.
|
|
34
|
+
|
|
35
|
+
The NULL character (`U+0000`_).
|
|
36
|
+
|
|
37
|
+
Any character following ZWJ (`U+200D`_) when in sequence by
|
|
38
|
+
function :func:`wcwidth.wcswidth`.
|
|
39
|
+
|
|
40
|
+
Hangul Jamo Jungseong and "Extended-B" code blocks, `U+1160`_ through
|
|
41
|
+
`U+11FF`_ and `U+D7B0`_ through `U+D7FF`_.
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
Width of 1
|
|
45
|
+
----------
|
|
46
|
+
|
|
47
|
+
String characters are measured width of 1 when they are not
|
|
48
|
+
measured as `Width of 0`_ or `Width of 2`_.
|
|
49
|
+
|
|
50
|
+
Width of 2
|
|
51
|
+
----------
|
|
52
|
+
|
|
53
|
+
Any character defined by East Asian Fullwidth (``F``) or Wide (``W``)
|
|
54
|
+
properties in `EastAsianWidth.txt`_ files, except those that are defined by the
|
|
55
|
+
Category codes of Nonspacing Mark (``Mn``) and Spacing Mark (``Mc``).
|
|
56
|
+
|
|
57
|
+
Any characters of Modifier Symbol category, ``'Sk'`` where ``'FULLWIDTH'`` is
|
|
58
|
+
present in comment of unicode data file, aprox. 3 characters.
|
|
59
|
+
|
|
60
|
+
Any character in sequence with `U+FE0F`_ (Variation Selector 16) defined by
|
|
61
|
+
`emoji-variation-sequences.txt`_ as ``emoji style``.
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
.. _`U+0000`: https://codepoints.net/U+0000
|
|
65
|
+
.. _`U+0001`: https://codepoints.net/U+0001
|
|
66
|
+
.. _`U+001F`: https://codepoints.net/U+001F
|
|
67
|
+
.. _`U+007F`: https://codepoints.net/U+007F
|
|
68
|
+
.. _`U+00A0`: https://codepoints.net/U+00A0
|
|
69
|
+
.. _`U+1160`: https://codepoints.net/U+1160
|
|
70
|
+
.. _`U+11FF`: https://codepoints.net/U+11FF
|
|
71
|
+
.. _`U+200D`: https://codepoints.net/U+200D
|
|
72
|
+
.. _`U+2028`: https://codepoints.net/U+2028
|
|
73
|
+
.. _`U+2029`: https://codepoints.net/U+2029
|
|
74
|
+
.. _`U+D7B0`: https://codepoints.net/U+D7B0
|
|
75
|
+
.. _`U+D7FF`: https://codepoints.net/U+D7FF
|
|
76
|
+
.. _`U+FE0F`: https://codepoints.net/U+FE0F
|
|
77
|
+
.. _`DerivedGeneralCategory.txt`: https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt
|
|
78
|
+
.. _`EastAsianWidth.txt`: https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt`
|
|
79
|
+
.. _`emoji-variation-sequences.txt`: https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-variation-sequences.txt
|
|
@@ -44,7 +44,7 @@ def main():
|
|
|
44
44
|
setuptools.setup(
|
|
45
45
|
name='wcwidth',
|
|
46
46
|
# NOTE: manually manage __version__ in wcwidth/__init__.py !
|
|
47
|
-
version='0.2.
|
|
47
|
+
version='0.2.13',
|
|
48
48
|
description=(
|
|
49
49
|
"Measures the displayed width of unicode strings in a terminal"),
|
|
50
50
|
long_description=codecs.open(
|
|
@@ -222,17 +222,48 @@ def test_balinese_script():
|
|
|
222
222
|
assert length_phrase == expect_length_phrase
|
|
223
223
|
|
|
224
224
|
|
|
225
|
+
def test_kr_jamo():
|
|
226
|
+
"""
|
|
227
|
+
Test basic combining of HANGUL CHOSEONG and JUNGSEONG
|
|
228
|
+
|
|
229
|
+
Example and from Raymond Chen's blog post,
|
|
230
|
+
https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
|
|
231
|
+
"""
|
|
232
|
+
# This is an example where both characters are "wide" when displayed alone.
|
|
233
|
+
#
|
|
234
|
+
# But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant).
|
|
235
|
+
#
|
|
236
|
+
# This wcwidth library understands their width only when combination,
|
|
237
|
+
# and not by independent display, like other zero-width characters that may
|
|
238
|
+
# only combine with an appropriate preceding character.
|
|
239
|
+
phrase = (
|
|
240
|
+
u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant)
|
|
241
|
+
u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel)
|
|
242
|
+
)
|
|
243
|
+
expect_length_each = (2, 0)
|
|
244
|
+
expect_length_phrase = 2
|
|
245
|
+
|
|
246
|
+
# exercise,
|
|
247
|
+
length_each = tuple(map(wcwidth.wcwidth, phrase))
|
|
248
|
+
length_phrase = wcwidth.wcswidth(phrase)
|
|
249
|
+
|
|
250
|
+
# verify.
|
|
251
|
+
assert length_each == expect_length_each
|
|
252
|
+
assert length_phrase == expect_length_phrase
|
|
253
|
+
|
|
254
|
+
|
|
225
255
|
def test_kr_jamo_filler():
|
|
226
256
|
u"""
|
|
227
257
|
Jamo filler is 0 width.
|
|
228
258
|
|
|
229
|
-
|
|
230
|
-
like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal,
|
|
231
|
-
seems it doesn't matter whether it is implemented or not, they are not typically used !
|
|
259
|
+
Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf
|
|
232
260
|
"""
|
|
233
|
-
phrase =
|
|
234
|
-
|
|
235
|
-
|
|
261
|
+
phrase = (
|
|
262
|
+
u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant)
|
|
263
|
+
u"\u1160" # HANGUL JUNGSEONG FILLER (vowel)
|
|
264
|
+
)
|
|
265
|
+
expect_length_each = (2, 0)
|
|
266
|
+
expect_length_phrase = 2
|
|
236
267
|
|
|
237
268
|
# exercise,
|
|
238
269
|
length_each = tuple(map(wcwidth.wcwidth, phrase))
|
|
@@ -355,3 +386,17 @@ def test_kannada_script_2():
|
|
|
355
386
|
# verify.
|
|
356
387
|
assert length_each == expect_length_each
|
|
357
388
|
assert length_phrase == expect_length_phrase
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def test_zero_wide_conflict():
|
|
392
|
+
# Test characters considered both "wide" and "zero" width
|
|
393
|
+
# - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In
|
|
394
|
+
# + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine
|
|
395
|
+
assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2
|
|
396
|
+
assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0
|
|
397
|
+
|
|
398
|
+
# - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto
|
|
399
|
+
# + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
|
|
400
|
+
assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0
|
|
401
|
+
assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0
|
|
402
|
+
assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Executes verify-table-integrity.py as a unit test.
|
|
3
|
+
"""
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
import subprocess
|
|
7
|
+
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
@pytest.mark.skipif(sys.version_info[:2] != (3, 12), reason='Test only with a single version of python')
|
|
11
|
+
def test_verify_table_integrity():
|
|
12
|
+
subprocess.check_output([sys.executable, os.path.join(os.path.dirname(__file__),
|
|
13
|
+
os.path.pardir,
|
|
14
|
+
'bin',
|
|
15
|
+
'verify-table-integrity.py')])
|
|
@@ -26,4 +26,4 @@ __all__ = ('wcwidth', 'wcswidth', 'list_versions')
|
|
|
26
26
|
# We also used pkg_resources to load unicode version tables from version.json,
|
|
27
27
|
# generated by bin/update-tables.py, but some environments are unable to
|
|
28
28
|
# import pkg_resources for one reason or another, yikes!
|
|
29
|
-
__version__ = '0.2.
|
|
29
|
+
__version__ = '0.2.13'
|