PyPI - unicodedata-reader - Versions diffs - 0.1.7__tar.gz → 1.0.0__tar.gz - Mend

unicodedata-reader 0.1.7tar.gz → 1.0.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: unicodedata-reader
-Version: 0.1.7
+Version: 1.0.0
 Summary:
 Home-page: https://github.com/kojiishi/unicodedata-reader
 License: Apache-2.0
@@ -9,10 +9,12 @@ Author-email: kojii@chromium.org
 Requires-Python: >=3.8
 Classifier: License :: OSI Approved :: Apache Software License
 Classifier: Programming Language :: Python :: 3
-Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3.9
-Requires-Dist: platformdirs (>=2.2.0,<3.0.0)
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Dist: platformdirs (>=2.2,<5.0)
 Project-URL: Repository, https://github.com/kojiishi/unicodedata-reader
 Description-Content-Type: text/markdown

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "unicodedata-reader"
-version = "0.1.7"
+version = "1.0.0"
 description = ""
 authors = ["Koji Ishii <kojii@chromium.org>"]
 readme = "README.md"
@@ -13,13 +13,13 @@ license = "Apache-2.0"
 [tool.poetry.dependencies]
 python = ">=3.8"
-platformdirs = "^2.2.0"
+platformdirs = ">=2.2,<5.0"
 [tool.poetry.dev-dependencies]
 pytest = "*"
 pytype = {version = "*", python = "<3.10"}
-tox = "^3.24.4"
-yapf = "^0.31.0"
+tox = "^4.14.2"
+yapf = "^0.40.2"
 [tool.poetry.scripts]
 unicodedata-reader = 'unicodedata_reader.__main__:main'

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/__init__.py RENAMED Viewed

@@ -2,3 +2,4 @@ from .entry import *
 from .reader import *
 from .compressor import *
 from .cli import *
+from .set import *

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/__main__.py RENAMED Viewed

@@ -2,6 +2,7 @@ import pathlib
 import sys
 import unicodedata_reader.bidi_brackets as bidi_brackets
+import unicodedata_reader.east_asian_width as ea
 import unicodedata_reader.emoji as emoji
 import unicodedata_reader.general_category as gc
 import unicodedata_reader.line_break as lb
@@ -12,6 +13,7 @@ def main():
     args = sys.argv
     sub_commands = {
         'bidi': lambda: bidi_brackets.dump_bidi_brackets(),
+        'ea': lambda: ea.UnicodeEastAsianWidthDataCli().main(),
         'emoji': lambda: emoji.UnicodeEmojiDataCli().main(),
         'gc': lambda: gc.UnicodeGeneralCategoryDataCli().main(),
         'lb': lambda: lb.UnicodeLineBreakDataCli().main(),

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/cli.py RENAMED Viewed

@@ -73,6 +73,7 @@ def _init_logging(verbose):
 class UnicodeDataCli(object):
     def __init__(self):
         self._parse_args()

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/compressor.py RENAMED Viewed

@@ -22,6 +22,7 @@ def _init_logging(verbose: int):
 class UnicodeDataCompressor(object):
     def __init__(self, entries: UnicodeDataEntries):
         self._entries = entries

unicodedata_reader-1.0.0/unicodedata_reader/east_asian_width.py ADDED Viewed

@@ -0,0 +1,30 @@
+#!/usr/bin/env python3
+import unicodedata
+from typing import Any
+from typing import Callable
+from typing import Dict
+from unicodedata_reader import *
+class UnicodeEastAsianWidthDataCli(UnicodeDataCli):
+    def __init__(self):
+        super().__init__()
+        self._entries = UnicodeDataReader.default.east_asian_width()
+    def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
+        return {
+            'EA': lambda code, ch: self._entries.value(code),
+            'GC': lambda code, ch: unicodedata.category(ch),
+            'EAW': lambda code, ch: unicodedata.east_asian_width(ch),
+            'cp932': lambda code, ch: u_enc(ch, 'cp932'),
+            'sjis04': lambda code, ch: u_enc(ch, 'sjis_2004'),
+            'cp936': lambda code, ch: u_enc(ch, 'cp936'),
+            'cp949': lambda code, ch: u_enc(ch, 'cp949'),
+            'cp950': lambda code, ch: u_enc(ch, 'cp950'),
+        }
+if __name__ == '__main__':
+    UnicodeEastAsianWidthDataCli().main()

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/emoji.py RENAMED Viewed

@@ -7,6 +7,7 @@ from unicodedata_reader import *
 class UnicodeEmojiDataCli(UnicodeDataCli):
     def __init__(self):
         super().__init__()
         self._entries = UnicodeDataReader.default.emoji()

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/entry.py RENAMED Viewed

@@ -27,6 +27,7 @@ def u_enc(c, encoding):
 class BidiBrackets(object):
     def __init__(self, pair: int, type: str):
         self.pair = pair
         self.type = type
@@ -55,6 +56,9 @@ class UnicodeDataEntry(object):
     [Unicode character database]: https://unicode.org/reports/tr44/
     """
+    max_code_point = 0x10FFFF
     def __init__(self, min: int, max: int, value):
         self.min = min
         self.max = max
@@ -166,6 +170,7 @@ class UnicodeDataEntries(object):
     or a list of `UnicodeDataEntry`.
     [Unicode character database]: https://unicode.org/reports/tr44/
     """
     def __init__(self,
                  entries: Optional[Union[Iterable[UnicodeDataEntry],
                                          Sequence[UnicodeDataEntry]]] = None,
@@ -310,12 +315,14 @@ class UnicodeDataEntries(object):
 class UnicodeBidiBracketsDataEntries(UnicodeDataEntries):
     def _load_lines(self, lines: Iterable[str], converter=None):
         converter = converter or BidiBrackets.from_values
         super()._load_lines(lines, converter=converter)
 class UnicodeEmojiDataEntries(UnicodeDataEntries):
     def _load_lines(self, lines: Iterable[str], converter=None):
         converter = converter or (lambda v: EmojiType[v])
         super()._load_lines(lines, converter=converter)
@@ -340,6 +347,7 @@ class UnicodeEmojiDataEntries(UnicodeDataEntries):
 class UnicodeLineBreakDataEntries(UnicodeDataEntries):
     def _load_comment(self, comment: str, start_index: int):
         # Load missing value entries. See the comments in:
         # https://www.unicode.org/Public/UNIDATA/LineBreak.txt
@@ -362,12 +370,14 @@ class UnicodeLineBreakDataEntries(UnicodeDataEntries):
 class UnicodeScriptExtensionsDataEntries(UnicodeDataEntries):
     def _load_lines(self, lines: Iterable[str], converter=None):
         converter = converter or (lambda v: v.split())
         super()._load_lines(lines, converter=converter)
 class UnicodeVerticalOrientationDataEntries(UnicodeDataEntries):
     def _load_comment(self, comment: str, start_index: int):
         # Load missing value entries. See the comments in:
         # https://www.unicode.org/Public/UNIDATA/VerticalOrientation.txt

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/general_category.py RENAMED Viewed

@@ -8,6 +8,7 @@ from unicodedata_reader import *
 class UnicodeGeneralCategoryDataCli(UnicodeDataCli):
     def __init__(self):
         super().__init__()
         self._entries = UnicodeDataReader.default.general_category()

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/line_break.py RENAMED Viewed

@@ -8,6 +8,7 @@ from unicodedata_reader import *
 class UnicodeLineBreakDataCli(UnicodeDataCli):
     def __init__(self):
         super().__init__()
         self._entries = UnicodeDataReader.default.line_break()

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/reader.py RENAMED Viewed

@@ -33,6 +33,11 @@ class UnicodeDataReader(object):
         lines = self.read_lines(name)
         return UnicodeDataEntries(name=name, lines=lines)
+    def east_asian_width(self) -> UnicodeDataEntries:
+        name = 'EastAsianWidth'
+        lines = self.read_lines(name)
+        return UnicodeDataEntries(name=name, lines=lines)
     def emoji(self) -> UnicodeDataEntries:
         lines = self.read_lines('emoji/emoji-data')
         return UnicodeEmojiDataEntries(name='Emoji', lines=lines)

unicodedata_reader-1.0.0/unicodedata_reader/set.py ADDED Viewed

@@ -0,0 +1,72 @@
+from typing import Callable
+from unicodedata_reader.entry import *
+from unicodedata_reader.reader import *
+class Set(object):
+    """A simple set of Unicode code points."""
+    def __init__(self) -> None:
+        self.set = set()  # type: set[int]
+    def __contains__(self, code_point: int) -> bool:
+        return code_point in self.set
+    def __iter__(self) -> Iterable[int]:
+        return self.set.__iter__()
+    def __isub__(self, other: 'Set') -> None:
+        self.set -= other.set
+    def __iand__(self, other: 'Set') -> None:
+        self.set &= other.set
+    def __ior__(self, other: 'Set') -> None:
+        self.set |= other.set
+    def add(self, code: int) -> None:
+        self.set.add(code)
+    def remove(self, code: int) -> None:
+        self.set.discard(code)
+    def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
+                                                                      bool]):
+        for entry in entries:
+            if pred(entry.value):
+                for code in entry.range():
+                    self.set.add(code)
+    @staticmethod
+    def east_asian_width(
+            value: str,
+            reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
+        set = Set()
+        set.add_entries(reader.east_asian_width(), lambda v: v == value)
+        return set
+    @staticmethod
+    def general_category(
+            value: str,
+            reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
+        set = Set()
+        set.add_entries(reader.general_category(),
+                        lambda v: v.startswith(value))
+        return set
+    @staticmethod
+    def scripts(
+            value: str,
+            reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
+        set = Set()
+        set.add_entries(reader.scripts(), lambda v: v == value)
+        return set
+    @staticmethod
+    def script_extensions(
+            value: str,
+            reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
+        set = Set()
+        set.add_entries(reader.script_extensions(), lambda v: value in v)
+        return set

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/vertical_orientation.py RENAMED Viewed

@@ -8,6 +8,7 @@ from unicodedata_reader import *
 class UnicodeVerticalOrientationDataCli(UnicodeDataCli):
     def __init__(self):
         super().__init__()
         self._entries = UnicodeDataReader.default.vertical_orientation()

unicodedata-reader-0.1.7/setup.py DELETED Viewed

@@ -1,34 +0,0 @@
-# -*- coding: utf-8 -*-
-from setuptools import setup
-packages = \
-['unicodedata_reader']
-package_data = \
-{'': ['*']}
-install_requires = \
-['platformdirs>=2.2.0,<3.0.0']
-entry_points = \
-{'console_scripts': ['unicodedata-reader = unicodedata_reader.__main__:main']}
-setup_kwargs = {
-    'name': 'unicodedata-reader',
-    'version': '0.1.7',
-    'description': '',
-    'long_description': '[![CI](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml/badge.svg)](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)\n[![PyPI](https://img.shields.io/pypi/v/unicodedata-reader.svg)](https://pypi.org/project/unicodedata-reader/)\n[![Dependencies](https://badgen.net/github/dependabot/kojiishi/unicodedata-reader)](https://github.com/kojiishi/unicodedata-reader/network/updates)\n\n\n# unicodedata-reader\n\nThis package reads and parses the [Unicode Character Database] files.\n\nMany of them are available in the [unicodedata] module,\nor in other 3rd party modules.\nWhen the desired data is not in any existing modules,\nsuch as the [Line_Break property] or the [Vertical_Orientation property],\nthis package can read the data files\nat <https://www.unicode.org/Public/UNIDATA/>.\n\nThis package can also generate JavaScript functions\nthat can read the property values of the [Unicode Character Database]\nin browsers.\nPlease see the [JavaScript] section below.\n\n[General_Category property]: http://unicode.org/reports/tr44/#General_Category\n[Line_Break property]: http://unicode.org/reports/tr44/#Line_Break\n[Unicode Character Database]: https://unicode.org/reports/tr44/\n[unicodedata]: https://docs.python.org/3/library/unicodedata.html\n[Vertical_Orientation property]: http://unicode.org/reports/tr44/#Vertical_Orientation\n\n## Install\n\n```sh\npip install unicodedata-reader\n```\nIf you want to clone and install using [poetry]:\n```sh\ngit clone https://github.com/kojiishi/unicodedata-reader\ncd unicodedata-reader\npoetry install\npoetry shell\n```\n\n[poetry]: https://github.com/python-poetry/poetry\n\n\n## Python\n\n```python\nimport unicodedata_reader\n\nreader = unicodedata_reader.UnicodeDataReader.default\nlb = reader.line_break()\nprint(lb.value(0x41))\n```\nThe example above prints `AL`,\nthe [Line_Break property] value for U+0041.\nPlease also see [line_break_test.py] for more usages.\n\n[line_break_test.py]: https://github.com/kojiishi/unicodedata-reader/blob/main/tests/line_break_test.py\n\n## JavaScript\n[JavaScript]: #javascript\n\nThe [`UnicodeDataCompressor` class] in this package\ncan generate JavaScript functions that can read the property values\nof the [Unicode Character Database] in browsers.\n\nFollowing examples are available in the "`js`" directory:\n* [GeneralCategory.js] is a generated JavaScript file\n  for the Unicode [General_Category property].\n* [LineBreak.js] is a generated JavaScript file\n  for the Unicode [Line_Break property].\n* [LineBreak.html] for an example usage of [LineBreak.js].\n\nThe following command generates a JavaScript file for the [Line_Break property]\nusing `js/template.js` as the template file:\n```sh\nunicodedata-reader lb -t js/template.js\n```\n\n[`UnicodeDataCompressor` class]: https://github.com/kojiishi/unicodedata-reader/blob/main/unicodedata_reader/compressor.py\n[GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js\n[LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html\n[LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js\n',
-    'author': 'Koji Ishii',
-    'author_email': 'kojii@chromium.org',
-    'maintainer': None,
-    'maintainer_email': None,
-    'url': 'https://github.com/kojiishi/unicodedata-reader',
-    'packages': packages,
-    'package_data': package_data,
-    'install_requires': install_requires,
-    'entry_points': entry_points,
-    'python_requires': '>=3.8',
-}
-setup(**setup_kwargs)

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/LICENSE RENAMED Viewed

File without changes

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/README.md RENAMED Viewed

File without changes

{unicodedata-reader-0.1.7 → unicodedata_reader-1.0.0}/unicodedata_reader/bidi_brackets.py RENAMED Viewed

File without changes

unicodedata-reader 0.1.7__tar.gz → 1.0.0__tar.gz

unicodedata-reader 0.1.7tar.gz → 1.0.0tar.gz