wcwidth 0.2.12__tar.gz → 0.2.13__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of wcwidth might be problematic. Click here for more details.

Files changed (52) hide show
  1. {wcwidth-0.2.12/wcwidth.egg-info → wcwidth-0.2.13}/PKG-INFO +7 -3
  2. {wcwidth-0.2.12 → wcwidth-0.2.13}/README.rst +6 -2
  3. {wcwidth-0.2.12 → wcwidth-0.2.13}/bin/update-tables.py +32 -17
  4. {wcwidth-0.2.12 → wcwidth-0.2.13}/bin/verify-table-integrity.py +38 -10
  5. {wcwidth-0.2.12 → wcwidth-0.2.13}/bin/wcwidth-browser.py +1 -1
  6. {wcwidth-0.2.12 → wcwidth-0.2.13}/docs/intro.rst +6 -2
  7. wcwidth-0.2.13/docs/specs.rst +79 -0
  8. {wcwidth-0.2.12 → wcwidth-0.2.13}/setup.py +1 -1
  9. {wcwidth-0.2.12 → wcwidth-0.2.13}/tests/test_core.py +51 -6
  10. wcwidth-0.2.13/tests/test_table_integrity.py +15 -0
  11. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth/__init__.py +1 -1
  12. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth/table_wide.py +80 -68
  13. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth/table_zero.py +39 -1
  14. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth/wcwidth.py +5 -2
  15. {wcwidth-0.2.12 → wcwidth-0.2.13/wcwidth.egg-info}/PKG-INFO +7 -3
  16. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth.egg-info/SOURCES.txt +1 -1
  17. wcwidth-0.2.12/code_templates/python_table_width.py.j2 +0 -0
  18. wcwidth-0.2.12/docs/specs.rst +0 -58
  19. {wcwidth-0.2.12 → wcwidth-0.2.13}/LICENSE +0 -0
  20. {wcwidth-0.2.12 → wcwidth-0.2.13}/MANIFEST.in +0 -0
  21. {wcwidth-0.2.12 → wcwidth-0.2.13}/bin/new-wide-by-version.py +0 -0
  22. {wcwidth-0.2.12 → wcwidth-0.2.13}/bin/wcwidth-libc-comparator.py +0 -0
  23. {wcwidth-0.2.12 → wcwidth-0.2.13}/code_templates/python_table.py.j2 +0 -0
  24. {wcwidth-0.2.12 → wcwidth-0.2.13}/code_templates/unicode_version.rst.j2 +0 -0
  25. {wcwidth-0.2.12 → wcwidth-0.2.13}/code_templates/unicode_versions.py.j2 +0 -0
  26. {wcwidth-0.2.12 → wcwidth-0.2.13}/docs/api.rst +0 -0
  27. {wcwidth-0.2.12 → wcwidth-0.2.13}/docs/conf.py +0 -0
  28. {wcwidth-0.2.12 → wcwidth-0.2.13}/docs/index.rst +0 -0
  29. {wcwidth-0.2.12 → wcwidth-0.2.13}/docs/requirements.txt +0 -0
  30. {wcwidth-0.2.12 → wcwidth-0.2.13}/docs/unicode_version.rst +0 -0
  31. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-develop.txt +0 -0
  32. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-docs.in +0 -0
  33. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-tests36.txt +0 -0
  34. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-tests37.in +0 -0
  35. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-tests37.txt +0 -0
  36. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-tests39.in +0 -0
  37. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-tests39.txt +0 -0
  38. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-update.in +0 -0
  39. {wcwidth-0.2.12 → wcwidth-0.2.13}/requirements-update.txt +0 -0
  40. {wcwidth-0.2.12 → wcwidth-0.2.13}/setup.cfg +0 -0
  41. {wcwidth-0.2.12 → wcwidth-0.2.13}/tests/__init__.py +0 -0
  42. {wcwidth-0.2.12 → wcwidth-0.2.13}/tests/emoji-variation-sequences.txt +0 -0
  43. {wcwidth-0.2.12 → wcwidth-0.2.13}/tests/emoji-zwj-sequences.txt +0 -0
  44. {wcwidth-0.2.12 → wcwidth-0.2.13}/tests/test_emojis.py +0 -0
  45. {wcwidth-0.2.12 → wcwidth-0.2.13}/tests/test_ucslevel.py +0 -0
  46. {wcwidth-0.2.12 → wcwidth-0.2.13}/tox.ini +0 -0
  47. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth/table_vs16.py +0 -0
  48. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth/unicode_versions.py +0 -0
  49. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth.egg-info/dependency_links.txt +0 -0
  50. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth.egg-info/requires.txt +0 -0
  51. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth.egg-info/top_level.txt +0 -0
  52. {wcwidth-0.2.12 → wcwidth-0.2.13}/wcwidth.egg-info/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: wcwidth
3
- Version: 0.2.12
3
+ Version: 0.2.13
4
4
  Summary: Measures the displayed width of unicode strings in a terminal
5
5
  Home-page: https://github.com/jquast/wcwidth
6
6
  Author: Jeff Quast
@@ -63,7 +63,7 @@ Example
63
63
  >>> text = u'コンニチハ'
64
64
 
65
65
  Python **incorrectly** uses the *string length* of 5 codepoints rather than the
66
- *printible length* of 10 cells, so that when using the `rjust` function, the
66
+ *printable length* of 10 cells, so that when using the `rjust` function, the
67
67
  output length is wrong::
68
68
 
69
69
  >>> print(len('コンニチハ'))
@@ -247,8 +247,12 @@ Other Languages
247
247
  =======
248
248
  History
249
249
  =======
250
+
251
+ 0.2.13 *2024-01-06*
252
+ * **Bugfix** zero-width support for Hangul Jamo (Korean)
253
+
250
254
  0.2.12 *2023-11-21*
251
- * re-release to remove .pyi file misplaced in wheel files `Issue #101`.
255
+ * re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
252
256
 
253
257
  0.2.11 *2023-11-20*
254
258
  * Include tests files in the source distribution (`PR #98`_, `PR #100`_).
@@ -32,7 +32,7 @@ Example
32
32
  >>> text = u'コンニチハ'
33
33
 
34
34
  Python **incorrectly** uses the *string length* of 5 codepoints rather than the
35
- *printible length* of 10 cells, so that when using the `rjust` function, the
35
+ *printable length* of 10 cells, so that when using the `rjust` function, the
36
36
  output length is wrong::
37
37
 
38
38
  >>> print(len('コンニチハ'))
@@ -216,8 +216,12 @@ Other Languages
216
216
  =======
217
217
  History
218
218
  =======
219
+
220
+ 0.2.13 *2024-01-06*
221
+ * **Bugfix** zero-width support for Hangul Jamo (Korean)
222
+
219
223
  0.2.12 *2023-11-21*
220
- * re-release to remove .pyi file misplaced in wheel files `Issue #101`.
224
+ * re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
221
225
 
222
226
  0.2.11 *2023-11-20*
223
227
  * Include tests files in the source distribution (`PR #98`_, `PR #100`_).
@@ -54,6 +54,19 @@ FETCH_BLOCKSIZE = int(os.environ.get('FETCH_BLOCKSIZE', '4096'))
54
54
  MAX_RETRIES = int(os.environ.get('MAX_RETRIES', '6'))
55
55
  BACKOFF_FACTOR = float(os.environ.get('BACKOFF_FACTOR', '0.1'))
56
56
 
57
+ # Hangul Jamo is a decomposed form of Hangul Syllables, see
58
+ # see https://www.unicode.org/faq/korean.html#3
59
+ # https://github.com/ridiculousfish/widecharwidth/pull/17
60
+ # https://github.com/jquast/ucs-detect/issues/9
61
+ # https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
62
+ # "Conjoining Jamo are divided into three classes: L, V, T (Leading
63
+ # consonant, Vowel, Trailing consonant). A Hangul Syllable consists of
64
+ # <LV> or <LVT> sequences."
65
+ HANGUL_JAMO_ZEROWIDTH = (
66
+ *range(0x1160, 0x1200), # Hangul Jungseong Filler .. Hangul Jongseong Ssangnieun
67
+ *range(0xD7B0, 0xD800), # Hangul Jungseong O-Yeo .. Undefined Character of Hangul Jamo Extended-B
68
+ )
69
+
57
70
 
58
71
  def _bisearch(ucs, table):
59
72
  """A copy of wcwwidth._bisearch, to prevent having issues when depending on code that imports
@@ -112,11 +125,11 @@ class TableEntry:
112
125
  properties: tuple[str, ...]
113
126
  comment: str
114
127
 
115
- def filter_by_category(self, category_codes: str, wide: int) -> bool:
128
+ def filter_by_category_width(self, wide: int) -> bool:
116
129
  """
117
- Return whether entry matches given category code and displayed width.
130
+ Return whether entry matches displayed width.
118
131
 
119
- Categories are described here, https://www.unicode.org/reports/tr44/#GC_Values_Table
132
+ Parses both DerivedGeneralCategory.txt and EastAsianWidth.txt
120
133
  """
121
134
  if self.code_range is None:
122
135
  return False
@@ -146,13 +159,12 @@ class TableEntry:
146
159
  return wide == 1
147
160
 
148
161
  @staticmethod
149
- def parse_category_values(category_codes: str,
150
- table_iter: Iterator[TableEntry],
151
- wide: int) -> set[tuple[int, int]]:
162
+ def parse_width_category_values(table_iter: Iterator[TableEntry],
163
+ wide: int) -> set[tuple[int, int]]:
152
164
  """Parse value ranges of unicode data files, by given category and width."""
153
165
  return {n
154
166
  for entry in table_iter
155
- if entry.filter_by_category(category_codes, wide)
167
+ if entry.filter_by_category_width(wide)
156
168
  for n in list(range(entry.code_range[0], entry.code_range[1]))}
157
169
 
158
170
 
@@ -326,18 +338,19 @@ def fetch_table_wide_data() -> UnicodeTableRenderCtx:
326
338
  for version in fetch_unicode_versions():
327
339
  # parse typical 'wide' characters by categories 'W' and 'F',
328
340
  table[version] = parse_category(fname=UnicodeDataFile.EastAsianWidth(version),
329
- category_codes=('W', 'F'),
330
341
  wide=2)
331
342
 
332
343
  # subtract(!) wide characters that were defined above as 'W' category in EastAsianWidth,
333
344
  # but also zero-width category 'Mn' or 'Mc' in DerivedGeneralCategory!
334
- table[version].values.discard(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version),
335
- category_codes=('Mn', 'Mc'),
336
- wide=0).values)
345
+ table[version].values = table[version].values.difference(parse_category(
346
+ fname=UnicodeDataFile.DerivedGeneralCategory(version),
347
+ wide=0).values)
348
+
349
+ # Also subtract Hangul Jamo Vowels and Hangul Trailing Consonants
350
+ table[version].values = table[version].values.difference(HANGUL_JAMO_ZEROWIDTH)
337
351
 
338
352
  # finally, join with atypical 'wide' characters defined by category 'Sk',
339
353
  table[version].values.update(parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version),
340
- category_codes=('Sk',),
341
354
  wide=2).values)
342
355
  return UnicodeTableRenderCtx('WIDE_EASTASIAN', table)
343
356
 
@@ -352,11 +365,13 @@ def fetch_table_zero_data() -> UnicodeTableRenderCtx:
352
365
  for version in fetch_unicode_versions():
353
366
  # Determine values of zero-width character lookup table by the following category codes
354
367
  table[version] = parse_category(fname=UnicodeDataFile.DerivedGeneralCategory(version),
355
- category_codes=('Me', 'Mn', 'Mc', 'Cf', 'Zl', 'Zp', 'Sk'),
356
368
  wide=0)
357
369
 
358
- # And, include NULL
370
+ # Include NULL
359
371
  table[version].values.add(0)
372
+
373
+ # Add Hangul Jamo Vowels and Hangul Trailing Consonants
374
+ table[version].values.update(HANGUL_JAMO_ZEROWIDTH)
360
375
  return UnicodeTableRenderCtx('ZERO_WIDTH', table)
361
376
 
362
377
 
@@ -501,9 +516,9 @@ def parse_vs16_table(fp: Iterable[str]) -> Iterator[TableEntry]:
501
516
 
502
517
 
503
518
  @functools.cache
504
- def parse_category(fname: str, category_codes: Container[str], wide: int) -> TableDef:
519
+ def parse_category(fname: str, wide: int) -> TableDef:
505
520
  """Parse value ranges of unicode data files, by given categories into string tables."""
506
- print(f'parsing {fname} category_codes={",".join(category_codes)}: ', end='', flush=True)
521
+ print(f'parsing {fname}, wide={wide}: ', end='', flush=True)
507
522
 
508
523
  with open(fname, encoding='utf-8') as f:
509
524
  table_iter = parse_unicode_table(f)
@@ -512,7 +527,7 @@ def parse_category(fname: str, category_codes: Container[str], wide: int) -> Tab
512
527
  version = next(table_iter).comment.strip()
513
528
  # and "date string" from second line
514
529
  date = next(table_iter).comment.split(':', 1)[1].strip()
515
- values = TableEntry.parse_category_values(category_codes, table_iter, wide)
530
+ values = TableEntry.parse_width_category_values(table_iter, wide)
516
531
  print('ok')
517
532
  return TableDef(version, date, values)
518
533
 
@@ -63,9 +63,30 @@ Category code was changed from 'Mc' to 'Lo':
63
63
  import logging
64
64
 
65
65
 
66
+ def bisearch_pair(ucs, table):
67
+ """
68
+ A copy of wcwidth._bisearch() but also returns the range of matched values.
69
+ """
70
+ lbound = 0
71
+ ubound = len(table) - 1
72
+
73
+ if ucs < table[0][0] or ucs > table[ubound][1]:
74
+ return (0, None, None)
75
+ while ubound >= lbound:
76
+ mid = (lbound + ubound) // 2
77
+ if ucs > table[mid][1]:
78
+ lbound = mid + 1
79
+ elif ucs < table[mid][0]:
80
+ ubound = mid - 1
81
+ else:
82
+ return (1, table[mid][0], table[mid][1])
83
+
84
+ return (0, None, None)
85
+
86
+
66
87
  def main(log: logging.Logger):
67
- # local
68
- from wcwidth import ZERO_WIDTH, WIDE_EASTASIAN, _bisearch, list_versions
88
+ from wcwidth import ZERO_WIDTH, WIDE_EASTASIAN, list_versions
89
+
69
90
  reversed_uni_versions = list(reversed(list_versions()))
70
91
  tables = {'ZERO_WIDTH': ZERO_WIDTH,
71
92
  'WIDE_EASTASIAN': WIDE_EASTASIAN}
@@ -81,14 +102,21 @@ def main(log: logging.Logger):
81
102
  other_table = tables[other_table_name][version]
82
103
  for start_range, stop_range in curr_table:
83
104
  for unichar_n in range(start_range, stop_range):
84
- if not _bisearch(unichar_n, next_table):
85
- log.info(f'value {hex(unichar_n)} in table_name={table_name}'
86
- f' version={version} is not defined in next_version={next_version}'
87
- f' from inclusive range {hex(start_range)}-{hex(stop_range)}')
88
- if _bisearch(unichar_n, other_table):
89
- log.error(f'value {hex(unichar_n)} in table_name={table_name}'
90
- f' version={version} is duplicated in other_table_name={other_table_name}'
91
- f' from inclusive range {hex(start_range)}-{hex(stop_range)}')
105
+ result, _, _ = bisearch_pair(unichar_n, next_table)
106
+ if not result:
107
+ log.info(
108
+ f'value 0x{unichar_n:05x} in table_name={table_name}'
109
+ f' version={version} is not defined in next_version={next_version}'
110
+ f' from inclusive range {hex(start_range)}-{hex(stop_range)}'
111
+ )
112
+ result, lbound, ubound = bisearch_pair(unichar_n, other_table)
113
+ if result:
114
+ log.error(
115
+ f'value 0x{unichar_n:05x} in table_name={table_name}'
116
+ f' version={version} is duplicated in other_table_name={other_table_name}'
117
+ f' from inclusive range 0x{start_range:05x}-0x{stop_range:05x} of'
118
+ f' {table_name} against 0x{lbound:05x}-0x{ubound:05x} in {other_table_name}'
119
+ )
92
120
  errors += 1
93
121
  if errors:
94
122
  log.error(f'{errors} errors, exit 1')
@@ -116,7 +116,7 @@ class WcCombinedCharacterGenerator(object):
116
116
  """
117
117
  self.characters = []
118
118
  letters_o = ('o' * width)
119
- for (begin, end) in ZERO_WIDTH[unicode_version]:
119
+ for (begin, end) in ZERO_WIDTH[_wcmatch_version(unicode_version)]:
120
120
  for val in [_val for _val in
121
121
  range(begin, end + 1)
122
122
  if _val <= LIMIT_UCS]:
@@ -32,7 +32,7 @@ Example
32
32
  >>> text = u'コンニチハ'
33
33
 
34
34
  Python **incorrectly** uses the *string length* of 5 codepoints rather than the
35
- *printible length* of 10 cells, so that when using the `rjust` function, the
35
+ *printable length* of 10 cells, so that when using the `rjust` function, the
36
36
  output length is wrong::
37
37
 
38
38
  >>> print(len('コンニチハ'))
@@ -216,8 +216,12 @@ Other Languages
216
216
  =======
217
217
  History
218
218
  =======
219
+
220
+ 0.2.13 *2024-01-06*
221
+ * **Bugfix** zero-width support for Hangul Jamo (Korean)
222
+
219
223
  0.2.12 *2023-11-21*
220
- * re-release to remove .pyi file misplaced in wheel files `Issue #101`.
224
+ * re-release to remove .pyi file misplaced in wheel files `Issue #101`_.
221
225
 
222
226
  0.2.11 *2023-11-20*
223
227
  * Include tests files in the source distribution (`PR #98`_, `PR #100`_).
@@ -0,0 +1,79 @@
1
+ .. _Specification:
2
+
3
+ =============
4
+ Specification
5
+ =============
6
+
7
+ This document defines how the wcwidth library measures the printable width
8
+ of characters of a string.
9
+
10
+ Width of -1
11
+ -----------
12
+
13
+ The following have a column width of -1 for function :func:`wcwidth.wcwidth`
14
+
15
+ - ``C0`` control characters (`U+0001`_ through `U+001F`_).
16
+ - ``C1`` control characters and ``DEL`` (`U+007F`_ through `U+00A0`_).
17
+
18
+ If any character in sequence contains ``C0`` or ``C1`` control characters, the final
19
+ return value of of :func:`wcwidth.wcswidth` is -1.
20
+
21
+ Width of 0
22
+ ----------
23
+
24
+ Any characters defined by category codes in `DerivedGeneralCategory.txt`_ files:
25
+
26
+ - 'Me': Enclosing Combining Mark, aprox. 13 characters.
27
+ - 'Mn': Nonspacing Combining Mark, aprox. 1,839 characters.
28
+ - 'Mc': Spacing Mark, aprox. 443 characters.
29
+ - 'Cf': Format control character, aprox. 161 characters.
30
+ - 'Zl': `U+2028`_ LINE SEPARATOR only
31
+ - 'Zp': `U+2029`_ PARAGRAPH SEPARATOR only
32
+ - 'Sk': Modifier Symbol, aprox. 4 characters of only those where phrase
33
+ ``'EMOJI MODIFIER'`` is present in comment of unicode data file.
34
+
35
+ The NULL character (`U+0000`_).
36
+
37
+ Any character following ZWJ (`U+200D`_) when in sequence by
38
+ function :func:`wcwidth.wcswidth`.
39
+
40
+ Hangul Jamo Jungseong and "Extended-B" code blocks, `U+1160`_ through
41
+ `U+11FF`_ and `U+D7B0`_ through `U+D7FF`_.
42
+
43
+
44
+ Width of 1
45
+ ----------
46
+
47
+ String characters are measured width of 1 when they are not
48
+ measured as `Width of 0`_ or `Width of 2`_.
49
+
50
+ Width of 2
51
+ ----------
52
+
53
+ Any character defined by East Asian Fullwidth (``F``) or Wide (``W``)
54
+ properties in `EastAsianWidth.txt`_ files, except those that are defined by the
55
+ Category codes of Nonspacing Mark (``Mn``) and Spacing Mark (``Mc``).
56
+
57
+ Any characters of Modifier Symbol category, ``'Sk'`` where ``'FULLWIDTH'`` is
58
+ present in comment of unicode data file, aprox. 3 characters.
59
+
60
+ Any character in sequence with `U+FE0F`_ (Variation Selector 16) defined by
61
+ `emoji-variation-sequences.txt`_ as ``emoji style``.
62
+
63
+
64
+ .. _`U+0000`: https://codepoints.net/U+0000
65
+ .. _`U+0001`: https://codepoints.net/U+0001
66
+ .. _`U+001F`: https://codepoints.net/U+001F
67
+ .. _`U+007F`: https://codepoints.net/U+007F
68
+ .. _`U+00A0`: https://codepoints.net/U+00A0
69
+ .. _`U+1160`: https://codepoints.net/U+1160
70
+ .. _`U+11FF`: https://codepoints.net/U+11FF
71
+ .. _`U+200D`: https://codepoints.net/U+200D
72
+ .. _`U+2028`: https://codepoints.net/U+2028
73
+ .. _`U+2029`: https://codepoints.net/U+2029
74
+ .. _`U+D7B0`: https://codepoints.net/U+D7B0
75
+ .. _`U+D7FF`: https://codepoints.net/U+D7FF
76
+ .. _`U+FE0F`: https://codepoints.net/U+FE0F
77
+ .. _`DerivedGeneralCategory.txt`: https://www.unicode.org/Public/UCD/latest/ucd/extracted/DerivedGeneralCategory.txt
78
+ .. _`EastAsianWidth.txt`: https://www.unicode.org/Public/UCD/latest/ucd/EastAsianWidth.txt`
79
+ .. _`emoji-variation-sequences.txt`: https://www.unicode.org/Public/UCD/latest/ucd/emoji/emoji-variation-sequences.txt
@@ -44,7 +44,7 @@ def main():
44
44
  setuptools.setup(
45
45
  name='wcwidth',
46
46
  # NOTE: manually manage __version__ in wcwidth/__init__.py !
47
- version='0.2.12',
47
+ version='0.2.13',
48
48
  description=(
49
49
  "Measures the displayed width of unicode strings in a terminal"),
50
50
  long_description=codecs.open(
@@ -222,17 +222,48 @@ def test_balinese_script():
222
222
  assert length_phrase == expect_length_phrase
223
223
 
224
224
 
225
+ def test_kr_jamo():
226
+ """
227
+ Test basic combining of HANGUL CHOSEONG and JUNGSEONG
228
+
229
+ Example and from Raymond Chen's blog post,
230
+ https://devblogs.microsoft.com/oldnewthing/20201009-00/?p=104351
231
+ """
232
+ # This is an example where both characters are "wide" when displayed alone.
233
+ #
234
+ # But JUNGSEONG (vowel) is designed for combination with a CHOSEONG (consonant).
235
+ #
236
+ # This wcwidth library understands their width only when combination,
237
+ # and not by independent display, like other zero-width characters that may
238
+ # only combine with an appropriate preceding character.
239
+ phrase = (
240
+ u"\u1100" # ᄀ HANGUL CHOSEONG KIYEOK (consonant)
241
+ u"\u1161" # ᅡ HANGUL JUNGSEONG A (vowel)
242
+ )
243
+ expect_length_each = (2, 0)
244
+ expect_length_phrase = 2
245
+
246
+ # exercise,
247
+ length_each = tuple(map(wcwidth.wcwidth, phrase))
248
+ length_phrase = wcwidth.wcswidth(phrase)
249
+
250
+ # verify.
251
+ assert length_each == expect_length_each
252
+ assert length_phrase == expect_length_phrase
253
+
254
+
225
255
  def test_kr_jamo_filler():
226
256
  u"""
227
257
  Jamo filler is 0 width.
228
258
 
229
- According to https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf this character and others
230
- like it, ``\uffa0``, ``\u1160``, ``\u115f``, ``\u1160``, are not commonly viewed with a terminal,
231
- seems it doesn't matter whether it is implemented or not, they are not typically used !
259
+ Example from https://www.unicode.org/L2/L2006/06310-hangul-decompose9.pdf
232
260
  """
233
- phrase = u"\u1100\u1160"
234
- expect_length_each = (2, 1)
235
- expect_length_phrase = 3
261
+ phrase = (
262
+ u"\u1100" # HANGUL CHOSEONG KIYEOK (consonant)
263
+ u"\u1160" # HANGUL JUNGSEONG FILLER (vowel)
264
+ )
265
+ expect_length_each = (2, 0)
266
+ expect_length_phrase = 2
236
267
 
237
268
  # exercise,
238
269
  length_each = tuple(map(wcwidth.wcwidth, phrase))
@@ -355,3 +386,17 @@ def test_kannada_script_2():
355
386
  # verify.
356
387
  assert length_each == expect_length_each
357
388
  assert length_phrase == expect_length_phrase
389
+
390
+
391
+ def test_zero_wide_conflict():
392
+ # Test characters considered both "wide" and "zero" width
393
+ # - (0x03000, 0x0303e,), # Ideographic Space ..Ideographic Variation In
394
+ # + (0x03000, 0x03029,), # Ideographic Space ..Hangzhou Numeral Nine
395
+ assert wcwidth.wcwidth(unichr(0x03029), unicode_version='4.1.0') == 2
396
+ assert wcwidth.wcwidth(unichr(0x0302a), unicode_version='4.1.0') == 0
397
+
398
+ # - (0x03099, 0x030ff,), # Combining Katakana-hirag..Katakana Digraph Koto
399
+ # + (0x0309b, 0x030ff,), # Katakana-hiragana Voiced..Katakana Digraph Koto
400
+ assert wcwidth.wcwidth(unichr(0x03099), unicode_version='4.1.0') == 0
401
+ assert wcwidth.wcwidth(unichr(0x0309a), unicode_version='4.1.0') == 0
402
+ assert wcwidth.wcwidth(unichr(0x0309b), unicode_version='4.1.0') == 2
@@ -0,0 +1,15 @@
1
+ """
2
+ Executes verify-table-integrity.py as a unit test.
3
+ """
4
+ import os
5
+ import sys
6
+ import subprocess
7
+
8
+ import pytest
9
+
10
+ @pytest.mark.skipif(sys.version_info[:2] != (3, 12), reason='Test only with a single version of python')
11
+ def test_verify_table_integrity():
12
+ subprocess.check_output([sys.executable, os.path.join(os.path.dirname(__file__),
13
+ os.path.pardir,
14
+ 'bin',
15
+ 'verify-table-integrity.py')])
@@ -26,4 +26,4 @@ __all__ = ('wcwidth', 'wcswidth', 'list_versions')
26
26
  # We also used pkg_resources to load unicode version tables from version.json,
27
27
  # generated by bin/update-tables.py, but some environments are unable to
28
28
  # import pkg_resources for one reason or another, yikes!
29
- __version__ = '0.2.12'
29
+ __version__ = '0.2.13'