unicodedata-reader 0.1.7__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/PKG-INFO +5 -3
  2. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/pyproject.toml +4 -4
  3. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/__init__.py +1 -0
  4. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/__main__.py +2 -0
  5. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/cli.py +1 -0
  6. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/compressor.py +1 -0
  7. unicodedata_reader-0.2.0/unicodedata_reader/east_asian_width.py +30 -0
  8. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/emoji.py +1 -0
  9. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/entry.py +10 -0
  10. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/general_category.py +1 -0
  11. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/line_break.py +1 -0
  12. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/reader.py +5 -0
  13. unicodedata_reader-0.2.0/unicodedata_reader/set.py +72 -0
  14. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/vertical_orientation.py +1 -0
  15. unicodedata-reader-0.1.7/setup.py +0 -34
  16. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/LICENSE +0 -0
  17. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/README.md +0 -0
  18. {unicodedata-reader-0.1.7 → unicodedata_reader-0.2.0}/unicodedata_reader/bidi_brackets.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unicodedata-reader
3
- Version: 0.1.7
3
+ Version: 0.2.0
4
4
  Summary:
5
5
  Home-page: https://github.com/kojiishi/unicodedata-reader
6
6
  License: Apache-2.0
@@ -9,10 +9,12 @@ Author-email: kojii@chromium.org
9
9
  Requires-Python: >=3.8
10
10
  Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.10
13
12
  Classifier: Programming Language :: Python :: 3.8
14
13
  Classifier: Programming Language :: Python :: 3.9
15
- Requires-Dist: platformdirs (>=2.2.0,<3.0.0)
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Dist: platformdirs (>=2.2,<5.0)
16
18
  Project-URL: Repository, https://github.com/kojiishi/unicodedata-reader
17
19
  Description-Content-Type: text/markdown
18
20
 
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "unicodedata-reader"
7
- version = "0.1.7"
7
+ version = "0.2.0"
8
8
  description = ""
9
9
  authors = ["Koji Ishii <kojii@chromium.org>"]
10
10
  readme = "README.md"
@@ -13,13 +13,13 @@ license = "Apache-2.0"
13
13
 
14
14
  [tool.poetry.dependencies]
15
15
  python = ">=3.8"
16
- platformdirs = "^2.2.0"
16
+ platformdirs = ">=2.2,<5.0"
17
17
 
18
18
  [tool.poetry.dev-dependencies]
19
19
  pytest = "*"
20
20
  pytype = {version = "*", python = "<3.10"}
21
- tox = "^3.24.4"
22
- yapf = "^0.31.0"
21
+ tox = "^4.14.2"
22
+ yapf = "^0.40.2"
23
23
 
24
24
  [tool.poetry.scripts]
25
25
  unicodedata-reader = 'unicodedata_reader.__main__:main'
@@ -2,3 +2,4 @@ from .entry import *
2
2
  from .reader import *
3
3
  from .compressor import *
4
4
  from .cli import *
5
+ from .set import *
@@ -2,6 +2,7 @@ import pathlib
2
2
  import sys
3
3
 
4
4
  import unicodedata_reader.bidi_brackets as bidi_brackets
5
+ import unicodedata_reader.east_asian_width as ea
5
6
  import unicodedata_reader.emoji as emoji
6
7
  import unicodedata_reader.general_category as gc
7
8
  import unicodedata_reader.line_break as lb
@@ -12,6 +13,7 @@ def main():
12
13
  args = sys.argv
13
14
  sub_commands = {
14
15
  'bidi': lambda: bidi_brackets.dump_bidi_brackets(),
16
+ 'ea': lambda: ea.UnicodeEastAsianWidthDataCli().main(),
15
17
  'emoji': lambda: emoji.UnicodeEmojiDataCli().main(),
16
18
  'gc': lambda: gc.UnicodeGeneralCategoryDataCli().main(),
17
19
  'lb': lambda: lb.UnicodeLineBreakDataCli().main(),
@@ -73,6 +73,7 @@ def _init_logging(verbose):
73
73
 
74
74
 
75
75
  class UnicodeDataCli(object):
76
+
76
77
  def __init__(self):
77
78
  self._parse_args()
78
79
 
@@ -22,6 +22,7 @@ def _init_logging(verbose: int):
22
22
 
23
23
 
24
24
  class UnicodeDataCompressor(object):
25
+
25
26
  def __init__(self, entries: UnicodeDataEntries):
26
27
  self._entries = entries
27
28
 
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ import unicodedata
3
+ from typing import Any
4
+ from typing import Callable
5
+ from typing import Dict
6
+
7
+ from unicodedata_reader import *
8
+
9
+
10
+ class UnicodeEastAsianWidthDataCli(UnicodeDataCli):
11
+
12
+ def __init__(self):
13
+ super().__init__()
14
+ self._entries = UnicodeDataReader.default.east_asian_width()
15
+
16
+ def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
17
+ return {
18
+ 'EA': lambda code, ch: self._entries.value(code),
19
+ 'GC': lambda code, ch: unicodedata.category(ch),
20
+ 'EAW': lambda code, ch: unicodedata.east_asian_width(ch),
21
+ 'cp932': lambda code, ch: u_enc(ch, 'cp932'),
22
+ 'sjis04': lambda code, ch: u_enc(ch, 'sjis_2004'),
23
+ 'cp936': lambda code, ch: u_enc(ch, 'cp936'),
24
+ 'cp949': lambda code, ch: u_enc(ch, 'cp949'),
25
+ 'cp950': lambda code, ch: u_enc(ch, 'cp950'),
26
+ }
27
+
28
+
29
+ if __name__ == '__main__':
30
+ UnicodeEastAsianWidthDataCli().main()
@@ -7,6 +7,7 @@ from unicodedata_reader import *
7
7
 
8
8
 
9
9
  class UnicodeEmojiDataCli(UnicodeDataCli):
10
+
10
11
  def __init__(self):
11
12
  super().__init__()
12
13
  self._entries = UnicodeDataReader.default.emoji()
@@ -27,6 +27,7 @@ def u_enc(c, encoding):
27
27
 
28
28
 
29
29
  class BidiBrackets(object):
30
+
30
31
  def __init__(self, pair: int, type: str):
31
32
  self.pair = pair
32
33
  self.type = type
@@ -55,6 +56,9 @@ class UnicodeDataEntry(object):
55
56
 
56
57
  [Unicode character database]: https://unicode.org/reports/tr44/
57
58
  """
59
+
60
+ max_code_point = 0x10FFFF
61
+
58
62
  def __init__(self, min: int, max: int, value):
59
63
  self.min = min
60
64
  self.max = max
@@ -166,6 +170,7 @@ class UnicodeDataEntries(object):
166
170
  or a list of `UnicodeDataEntry`.
167
171
  [Unicode character database]: https://unicode.org/reports/tr44/
168
172
  """
173
+
169
174
  def __init__(self,
170
175
  entries: Optional[Union[Iterable[UnicodeDataEntry],
171
176
  Sequence[UnicodeDataEntry]]] = None,
@@ -310,12 +315,14 @@ class UnicodeDataEntries(object):
310
315
 
311
316
 
312
317
  class UnicodeBidiBracketsDataEntries(UnicodeDataEntries):
318
+
313
319
  def _load_lines(self, lines: Iterable[str], converter=None):
314
320
  converter = converter or BidiBrackets.from_values
315
321
  super()._load_lines(lines, converter=converter)
316
322
 
317
323
 
318
324
  class UnicodeEmojiDataEntries(UnicodeDataEntries):
325
+
319
326
  def _load_lines(self, lines: Iterable[str], converter=None):
320
327
  converter = converter or (lambda v: EmojiType[v])
321
328
  super()._load_lines(lines, converter=converter)
@@ -340,6 +347,7 @@ class UnicodeEmojiDataEntries(UnicodeDataEntries):
340
347
 
341
348
 
342
349
  class UnicodeLineBreakDataEntries(UnicodeDataEntries):
350
+
343
351
  def _load_comment(self, comment: str, start_index: int):
344
352
  # Load missing value entries. See the comments in:
345
353
  # https://www.unicode.org/Public/UNIDATA/LineBreak.txt
@@ -362,12 +370,14 @@ class UnicodeLineBreakDataEntries(UnicodeDataEntries):
362
370
 
363
371
 
364
372
  class UnicodeScriptExtensionsDataEntries(UnicodeDataEntries):
373
+
365
374
  def _load_lines(self, lines: Iterable[str], converter=None):
366
375
  converter = converter or (lambda v: v.split())
367
376
  super()._load_lines(lines, converter=converter)
368
377
 
369
378
 
370
379
  class UnicodeVerticalOrientationDataEntries(UnicodeDataEntries):
380
+
371
381
  def _load_comment(self, comment: str, start_index: int):
372
382
  # Load missing value entries. See the comments in:
373
383
  # https://www.unicode.org/Public/UNIDATA/VerticalOrientation.txt
@@ -8,6 +8,7 @@ from unicodedata_reader import *
8
8
 
9
9
 
10
10
  class UnicodeGeneralCategoryDataCli(UnicodeDataCli):
11
+
11
12
  def __init__(self):
12
13
  super().__init__()
13
14
  self._entries = UnicodeDataReader.default.general_category()
@@ -8,6 +8,7 @@ from unicodedata_reader import *
8
8
 
9
9
 
10
10
  class UnicodeLineBreakDataCli(UnicodeDataCli):
11
+
11
12
  def __init__(self):
12
13
  super().__init__()
13
14
  self._entries = UnicodeDataReader.default.line_break()
@@ -33,6 +33,11 @@ class UnicodeDataReader(object):
33
33
  lines = self.read_lines(name)
34
34
  return UnicodeDataEntries(name=name, lines=lines)
35
35
 
36
+ def east_asian_width(self) -> UnicodeDataEntries:
37
+ name = 'EastAsianWidth'
38
+ lines = self.read_lines(name)
39
+ return UnicodeDataEntries(name=name, lines=lines)
40
+
36
41
  def emoji(self) -> UnicodeDataEntries:
37
42
  lines = self.read_lines('emoji/emoji-data')
38
43
  return UnicodeEmojiDataEntries(name='Emoji', lines=lines)
@@ -0,0 +1,72 @@
1
+ from typing import Callable
2
+
3
+ from unicodedata_reader.entry import *
4
+ from unicodedata_reader.reader import *
5
+
6
+
7
+ class Set(object):
8
+ """A simple set of Unicode code points."""
9
+
10
+ def __init__(self) -> None:
11
+ self.set = set() # type: set[int]
12
+
13
+ def __contains__(self, code_point: int) -> bool:
14
+ return code_point in self.set
15
+
16
+ def __iter__(self) -> Iterable[int]:
17
+ return self.set.__iter__()
18
+
19
+ def __isub__(self, other: 'Set') -> None:
20
+ self.set -= other.set
21
+
22
+ def __iand__(self, other: 'Set') -> None:
23
+ self.set &= other.set
24
+
25
+ def __ior__(self, other: 'Set') -> None:
26
+ self.set |= other.set
27
+
28
+ def add(self, code: int) -> None:
29
+ self.set.add(code)
30
+
31
+ def remove(self, code: int) -> None:
32
+ self.set.discard(code)
33
+
34
+ def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
35
+ bool]):
36
+ for entry in entries:
37
+ if pred(entry.value):
38
+ for code in entry.range():
39
+ self.set.add(code)
40
+
41
+ @staticmethod
42
+ def east_asian_width(
43
+ value: str,
44
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
45
+ set = Set()
46
+ set.add_entries(reader.east_asian_width(), lambda v: v == value)
47
+ return set
48
+
49
+ @staticmethod
50
+ def general_category(
51
+ value: str,
52
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
53
+ set = Set()
54
+ set.add_entries(reader.general_category(),
55
+ lambda v: v.startswith(value))
56
+ return set
57
+
58
+ @staticmethod
59
+ def scripts(
60
+ value: str,
61
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
62
+ set = Set()
63
+ set.add_entries(reader.scripts(), lambda v: v == value)
64
+ return set
65
+
66
+ @staticmethod
67
+ def script_extensions(
68
+ value: str,
69
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
70
+ set = Set()
71
+ set.add_entries(reader.script_extensions(), lambda v: value in v)
72
+ return set
@@ -8,6 +8,7 @@ from unicodedata_reader import *
8
8
 
9
9
 
10
10
  class UnicodeVerticalOrientationDataCli(UnicodeDataCli):
11
+
11
12
  def __init__(self):
12
13
  super().__init__()
13
14
  self._entries = UnicodeDataReader.default.vertical_orientation()
@@ -1,34 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- from setuptools import setup
3
-
4
- packages = \
5
- ['unicodedata_reader']
6
-
7
- package_data = \
8
- {'': ['*']}
9
-
10
- install_requires = \
11
- ['platformdirs>=2.2.0,<3.0.0']
12
-
13
- entry_points = \
14
- {'console_scripts': ['unicodedata-reader = unicodedata_reader.__main__:main']}
15
-
16
- setup_kwargs = {
17
- 'name': 'unicodedata-reader',
18
- 'version': '0.1.7',
19
- 'description': '',
20
- 'long_description': '[![CI](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml/badge.svg)](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)\n[![PyPI](https://img.shields.io/pypi/v/unicodedata-reader.svg)](https://pypi.org/project/unicodedata-reader/)\n[![Dependencies](https://badgen.net/github/dependabot/kojiishi/unicodedata-reader)](https://github.com/kojiishi/unicodedata-reader/network/updates)\n\n\n# unicodedata-reader\n\nThis package reads and parses the [Unicode Character Database] files.\n\nMany of them are available in the [unicodedata] module,\nor in other 3rd party modules.\nWhen the desired data is not in any existing modules,\nsuch as the [Line_Break property] or the [Vertical_Orientation property],\nthis package can read the data files\nat <https://www.unicode.org/Public/UNIDATA/>.\n\nThis package can also generate JavaScript functions\nthat can read the property values of the [Unicode Character Database]\nin browsers.\nPlease see the [JavaScript] section below.\n\n[General_Category property]: http://unicode.org/reports/tr44/#General_Category\n[Line_Break property]: http://unicode.org/reports/tr44/#Line_Break\n[Unicode Character Database]: https://unicode.org/reports/tr44/\n[unicodedata]: https://docs.python.org/3/library/unicodedata.html\n[Vertical_Orientation property]: http://unicode.org/reports/tr44/#Vertical_Orientation\n\n## Install\n\n```sh\npip install unicodedata-reader\n```\nIf you want to clone and install using [poetry]:\n```sh\ngit clone https://github.com/kojiishi/unicodedata-reader\ncd unicodedata-reader\npoetry install\npoetry shell\n```\n\n[poetry]: https://github.com/python-poetry/poetry\n\n\n## Python\n\n```python\nimport unicodedata_reader\n\nreader = unicodedata_reader.UnicodeDataReader.default\nlb = reader.line_break()\nprint(lb.value(0x41))\n```\nThe example above prints `AL`,\nthe [Line_Break property] value for U+0041.\nPlease also see [line_break_test.py] for more usages.\n\n[line_break_test.py]: https://github.com/kojiishi/unicodedata-reader/blob/main/tests/line_break_test.py\n\n## JavaScript\n[JavaScript]: #javascript\n\nThe [`UnicodeDataCompressor` class] in this package\ncan generate JavaScript functions that can read the property values\nof the [Unicode Character Database] in browsers.\n\nFollowing examples are available in the "`js`" directory:\n* [GeneralCategory.js] is a generated JavaScript file\n for the Unicode [General_Category property].\n* [LineBreak.js] is a generated JavaScript file\n for the Unicode [Line_Break property].\n* [LineBreak.html] for an example usage of [LineBreak.js].\n\nThe following command generates a JavaScript file for the [Line_Break property]\nusing `js/template.js` as the template file:\n```sh\nunicodedata-reader lb -t js/template.js\n```\n\n[`UnicodeDataCompressor` class]: https://github.com/kojiishi/unicodedata-reader/blob/main/unicodedata_reader/compressor.py\n[GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js\n[LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html\n[LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js\n',
21
- 'author': 'Koji Ishii',
22
- 'author_email': 'kojii@chromium.org',
23
- 'maintainer': None,
24
- 'maintainer_email': None,
25
- 'url': 'https://github.com/kojiishi/unicodedata-reader',
26
- 'packages': packages,
27
- 'package_data': package_data,
28
- 'install_requires': install_requires,
29
- 'entry_points': entry_points,
30
- 'python_requires': '>=3.8',
31
- }
32
-
33
-
34
- setup(**setup_kwargs)