PyPI - unicodedata-reader - Versions diffs - 1.3.8__tar.gz → 1.3.9__tar.gz - Mend

unicodedata-reader 1.3.8tar.gz → 1.3.9tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (50) hide show

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/.github/workflows/ci.yml RENAMED Viewed

@@ -19,7 +19,7 @@ jobs:
         python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
     steps:
-    - uses: actions/checkout@v6
+    - uses: actions/checkout@v7
     # https://docs.astral.sh/uv/guides/integration/github/
     - name: Install uv and set up Python ${{ matrix.python-version }}
@@ -29,20 +29,19 @@ jobs:
     # https://taskfile.dev/docs/installation#github-actions
     - name: Install Task
-      uses: go-task/setup-task@v1
+      uses: go-task/setup-task@v2
     - name: Install dependencies
-      run: |
-        uv sync --all-extras --dev
+      run: uv sync --all-extras --dev
     - name: Test
-      run: |
-        task test
+      run: task test
+    - name: Type
+      run: task type
     - name: Lint
-      run: |
-        task lint
+      run: task lint
     - name: Format check
-      run: |
-        task fmtchk
+      run: task fmtchk

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/.github/workflows/publish.yml RENAMED Viewed

@@ -12,7 +12,7 @@ jobs:
   publish:
     runs-on: ubuntu-latest
     steps:
-      - uses: actions/checkout@v6
+      - uses: actions/checkout@v7
       - name: Set up Python
         uses: actions/setup-python@v6

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: unicodedata-reader
-Version: 1.3.8
+Version: 1.3.9
 Project-URL: repository, https://github.com/kojiishi/unicodedata-reader
 Author-email: Koji Ishii <kojii@chromium.org>
 License-Expression: Apache-2.0

unicodedata_reader-1.3.9/Taskfile.yml ADDED Viewed

@@ -0,0 +1,41 @@
+# yaml-language-server: $schema=https://taskfile.dev/schema.json
+version: '3'
+tasks:
+  default:
+    deps: [check]
+  check:
+    - task: tests
+    - task: type
+    - task: lint
+    - task: fmtchk
+    - git diff --exit-code
+  fix: "{{.RUN}} ruff check --fix {{.CLI_ARGS}}"
+  fmt: "{{.RUN}} ruff format {{.CLI_ARGS}}"
+  fmtchk: "{{.RUN}} ruff format --check {{.CLI_ARGS}}"
+  lint: "{{.RUN}} ruff check {{.CLI_ARGS}}"
+  test: "{{.RUN}} pytest tests {{.PYTEST}} {{.CLI_ARGS}}"
+  tests: "{{.RUN}} tox {{.TOX}} {{.CLI_ARGS}}"
+  type: "{{.RUN}} ty check {{.TY}} {{.CLI_ARGS}}"
+  gen:
+    - "{{.RUN}} unicodedata-reader lb -t js/template.js {{.GEN}} {{.CLI_ARGS}}"
+    - "{{.RUN}} unicodedata-reader gc -t js/template.js {{.GEN}} {{.CLI_ARGS}}"
+  install-git-hooks:
+    desc: Create git hooks
+    cmds:
+      - echo '#!/bin/sh' > .git/hooks/pre-push
+      - echo 'task check' >> .git/hooks/pre-push
+      - cmd: chmod +x .git/hooks/pre-push
+        platforms: [linux, darwin]
+vars:
+  GEN: -fv
+  PYTEST:
+  RUN: uv run
+  TOX: -p
+  TY:

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "unicodedata-reader"
-version = "1.3.8"
+version = "1.3.9"
 description = ""
 authors = [{name = "Koji Ishii", email="kojii@chromium.org"}]
 readme = "README.md"
@@ -15,12 +15,11 @@ repository = "https://github.com/kojiishi/unicodedata-reader"
 [dependency-groups]
 dev = [
-    "pytest>=9.1.0",
-    "pytype>=2024.9.13",
-    "ruff>=0.15.17",
+    "pytest>=9.1.1",
+    "ruff>=0.15.18",
     "tox>=4.55.1",
     "tox-uv>=1.35.2",
-    "yapf>=0.43.0",
+    "ty>=0.0.51",
 ]
 [project.scripts]

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/src/unicodedata_reader/__main__.py RENAMED Viewed

@@ -12,12 +12,12 @@ import unicodedata_reader.vertical_orientation as vo
 def main():
     args = sys.argv
     sub_commands = {
-        'bidi': lambda: bidi_brackets.dump_bidi_brackets(),
-        'ea': lambda: ea.UnicodeEastAsianWidthDataCli().main(),
-        'emoji': lambda: emoji.UnicodeEmojiDataCli().main(),
-        'gc': lambda: gc.UnicodeGeneralCategoryDataCli().main(),
-        'lb': lambda: lb.UnicodeLineBreakDataCli().main(),
-        'vo': lambda: vo.UnicodeVerticalOrientationDataCli().main(),
+        "bidi": lambda: bidi_brackets.dump_bidi_brackets(),
+        "ea": lambda: ea.UnicodeEastAsianWidthDataCli().main(),
+        "emoji": lambda: emoji.UnicodeEmojiDataCli().main(),
+        "gc": lambda: gc.UnicodeGeneralCategoryDataCli().main(),
+        "lb": lambda: lb.UnicodeLineBreakDataCli().main(),
+        "vo": lambda: vo.UnicodeVerticalOrientationDataCli().main(),
     }
     if len(args) > 1:
         func = sub_commands.get(args[1])
@@ -27,9 +27,9 @@ def main():
             return
     name = pathlib.Path(args[0]).name
-    sub_commands = '|'.join(sub_commands.keys())
-    print(f'usage: {name} {sub_commands} [options...]', file=sys.stderr)
+    sub_commands = "|".join(sub_commands.keys())
+    print(f"usage: {name} {sub_commands} [options...]", file=sys.stderr)
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/src/unicodedata_reader/bidi_brackets.py RENAMED Viewed

@@ -13,27 +13,28 @@ def dump_bidi_brackets():
     def bidi_brackets_type(code):
         bracket = bidi_brackets.get(code)
-        return bracket.type if bracket else 'x'
+        return bracket.type if bracket else "x"
     columns = {
-        'Code': lambda code, ch: u_hex(code),
-        'Char': lambda code, ch: chr(code),
-        'Bidi_Paired_Bracket_Type': lambda code, ch: bidi_brackets_type(code),
-        'EAW': lambda code, ch: unicodedata.east_asian_width(ch),
-        'Script': lambda code, ch: scripts.get(code),
-        'ScriptExt': lambda code, ch: str(script_extensions.get(code, [])),
+        "Code": lambda code, ch: u_hex(code),
+        "Char": lambda code, ch: chr(code),
+        "Bidi_Paired_Bracket_Type": lambda code, ch: bidi_brackets_type(code),
+        "EAW": lambda code, ch: unicodedata.east_asian_width(ch),
+        "Script": lambda code, ch: scripts.get(code),
+        "ScriptExt": lambda code, ch: str(script_extensions.get(code, [])),
     }
-    print(f'# {" ".join(columns.keys())}')
+    print(f"# {' '.join(columns.keys())}")
     last_block = None
     for code in get_unicodes_from_args(bidi_brackets.keys()):
         block = blocks[code]
         if block != last_block:
-            print(f'# {block}')
+            print(f"# {block}")
             last_block = block
         ch = chr(code)
         values = (func(code, ch) for func in columns.values())
-        print(f'{" ".join(values)} # {unicodedata.name(chr(code))}')
+        values = ("" if v is None else str(v) for v in values)
+        print(f"{' '.join(values)} # {unicodedata.name(chr(code))}")
-if __name__ == '__main__':
+if __name__ == "__main__":
     dump_bidi_brackets()

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/src/unicodedata_reader/cli.py RENAMED Viewed

@@ -8,6 +8,7 @@ from typing import Callable
 from typing import Dict
 from typing import Iterable
 from typing import Optional
+from typing import Sequence
 import unicodedata
 from unicodedata_reader import *
@@ -16,7 +17,8 @@ from unicodedata_reader import *
 def _to_unicodes_from_str(text):
     while text:
         match = re.match(
-            r'([uU]\+?)?([0-9a-fA-F]{4,5})(-([0-9a-fA-F]{4,5}))?,?\s*', text)
+            r"([uU]\+?)?([0-9a-fA-F]{4,5})(-([0-9a-fA-F]{4,5}))?,?\s*", text
+        )
         if match:
             prefix = match.group(1)
             hex = match.group(2)
@@ -27,7 +29,7 @@ def _to_unicodes_from_str(text):
                     yield from range(code, int(hex_end, 16) + 1)
                 else:
                     yield code
-                text = text[match.end():]
+                text = text[match.end() :]
                 continue
         code = ord(text[0])
         yield code
@@ -42,7 +44,7 @@ def to_unicodes(text):
 def get_unicodes_from_args(default=None):
     parser = argparse.ArgumentParser()
-    parser.add_argument('text', nargs='+' if default is None else '*')
+    parser.add_argument("text", nargs="+" if default is None else "*")
     args = parser.parse_args()
     if args.text:
         return to_unicodes(args.text)
@@ -51,8 +53,8 @@ def get_unicodes_from_args(default=None):
 def u_printable_chr(ch):
     gc = unicodedata.category(ch)
-    if gc == 'Cc':
-        return ''
+    if gc == "Cc":
+        return ""
     return ch
@@ -60,7 +62,7 @@ def u_name_or_empty(ch):
     try:
         return unicodedata.name(ch)
     except ValueError:
-        return ''
+        return ""
 def _init_logging(verbose):
@@ -73,6 +75,14 @@ def _init_logging(verbose):
 class UnicodeDataCli(object):
+    text: Optional[Sequence[str]]
+    clear_cache: bool
+    no_cache: bool
+    name: Optional[str]
+    template: Optional[pathlib.Path]
+    output: Optional[pathlib.Path]
+    verbose: int
+    _entries: UnicodeDataEntries
     def __init__(self):
         self._parse_args()
@@ -80,39 +90,43 @@ class UnicodeDataCli(object):
     def _columns(self) -> Dict[str, Callable[[int, str], Any]]:
         columns = self._core_columns()
         columns = dict(
-            itertools.chain({
-                'Code': lambda code, ch: 'U' + u_hex(code),
-                'Char': lambda code, ch: u_printable_chr(ch),
-            }.items(), columns.items(), {
-                'Name': lambda code, ch: u_name_or_empty(ch),
-            }.items()))
+            itertools.chain(
+                {
+                    "Code": lambda code, ch: "U" + u_hex(code),
+                    "Char": lambda code, ch: u_printable_chr(ch),
+                }.items(),
+                columns.items(),
+                {
+                    "Name": lambda code, ch: u_name_or_empty(ch),
+                }.items(),
+            )
+        )
         return columns
     def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
         raise NotImplementedError()
-    def _unicodes(self) -> Optional[Iterable[int]]:
+    def _unicodes(self) -> Iterable[int]:
         if self.text:
             return to_unicodes(self.text)
         return self._default_unicodes()
-    def _default_unicodes(self) -> Optional[Iterable[int]]:
+    def _default_unicodes(self) -> Iterable[int]:
         return self._entries.unicodes()
     def print(self):
         columns = self._columns()
-        print('\t'.join(key for key in columns.keys()))
+        print("\t".join(key for key in columns.keys()))
         for code in self._unicodes():
             try:
                 ch = chr(code)
                 values = (func(code, ch) for func in columns.values())
-                values = ('' if v is None else str(v) for v in values)
-                print('\t'.join(values))
+                values = ("" if v is None else str(v) for v in values)
+                print("\t".join(values))
             except UnicodeEncodeError:
                 continue
-    def substitute_template(self, template: pathlib.Path,
-                            output: pathlib.Path):
+    def substitute_template(self, template: pathlib.Path, output: pathlib.Path):
         entries = self._entries
         entries.fill_missing_values()
         entries.map_values_to_int()
@@ -122,22 +136,24 @@ class UnicodeDataCli(object):
     def _parse_args(self):
         parser = argparse.ArgumentParser()
-        parser.add_argument('text',
-                            nargs='*',
-                            help='show properties for the text')
-        parser.add_argument('-f', '--clear-cache', action='store_true')
-        parser.add_argument('-F', '--no-cache', action='store_true')
-        parser.add_argument('--name', help='$NAME in the template')
-        parser.add_argument('-t',
-                            '--template',
-                            type=pathlib.Path,
-                            help='generate a file from the template')
-        parser.add_argument('-o', '--output', type=pathlib.Path)
-        parser.add_argument('-v',
-                            '--verbose',
-                            help='increase output verbosity',
-                            action='count',
-                            default=0)
+        parser.add_argument("text", nargs="*", help="show properties for the text")
+        parser.add_argument("-f", "--clear-cache", action="store_true")
+        parser.add_argument("-F", "--no-cache", action="store_true")
+        parser.add_argument("--name", help="$NAME in the template")
+        parser.add_argument(
+            "-t",
+            "--template",
+            type=pathlib.Path,
+            help="generate a file from the template",
+        )
+        parser.add_argument("-o", "--output", type=pathlib.Path)
+        parser.add_argument(
+            "-v",
+            "--verbose",
+            help="increase output verbosity",
+            action="count",
+            default=0,
+        )
         parser.parse_args(namespace=self)
         _init_logging(self.verbose)  # pytype: disable=attribute-error
         if self.clear_cache:

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/src/unicodedata_reader/compressor.py RENAMED Viewed

@@ -9,7 +9,7 @@ from typing import Optional
 from unicodedata_reader import *
-_logger = logging.getLogger('UnicodeDataCompressor')
+_logger = logging.getLogger("UnicodeDataCompressor")
 def _init_logging(verbose: int):
@@ -22,7 +22,6 @@ def _init_logging(verbose: int):
 class UnicodeDataCompressor(object):
     def __init__(self, entries: UnicodeDataEntries):
         self._entries = entries
@@ -62,16 +61,23 @@ class UnicodeDataCompressor(object):
             assert entry.value < (1 << value_bits)
             assert entry.count > 0
             combined = ((entry.count - 1) << value_bits) | entry.value
-            _logger.debug('%04X %s=%d: %d -> %X', entry.min,
-                          entries.values_for_int()[entry.value], entry.value,
-                          entry.count, combined)
+            _logger.debug(
+                "%04X %s=%d: %d -> %X",
+                entry.min,
+                entries.values_for_int()[entry.value],
+                entry.value,
+                entry.count,
+                combined,
+            )
             bytes.extend(self._to_bytes(combined))
         return bytes
-    def substitute_template(self,
-                            template: pathlib.Path,
-                            output: Optional[pathlib.Path] = None,
-                            name: Optional[str] = None) -> str:
+    def substitute_template(
+        self,
+        template: pathlib.Path,
+        output: Optional[pathlib.Path] = None,
+        name: Optional[str] = None,
+    ) -> str:
         entries = self._entries
         bytes = self.compress()
         base64bytes = base64.b64encode(bytes)
@@ -79,15 +85,20 @@ class UnicodeDataCompressor(object):
         value_bits = self._bitsize
         name = name or entries.name
         assert name
-        _logger.info('%s: Bytes=%d, Base64=%d, #values=%d (%d bits)', name,
-                     len(bytes), len(base64bytes), len(values_for_int),
-                     value_bits)
+        _logger.info(
+            "%s: Bytes=%d, Base64=%d, #values=%d (%d bits)",
+            name,
+            len(bytes),
+            len(base64bytes),
+            len(values_for_int),
+            value_bits,
+        )
         mapping = {
-            'NAME': name,
-            'BASE64BYTES': base64bytes.decode('ascii'),
-            'VALUE_BITS': str(value_bits),
-            'VALUE_MASK': str((1 << value_bits) - 1),
-            'VALUE_LIST': ','.join(f'"{v}"' for v in values_for_int),
+            "NAME": name,
+            "BASE64BYTES": base64bytes.decode("ascii"),
+            "VALUE_BITS": str(value_bits),
+            "VALUE_MASK": str((1 << value_bits) - 1),
+            "VALUE_LIST": ",".join(f'"{v}"' for v in values_for_int),
         }
         text = template.read_text()
@@ -95,13 +106,13 @@ class UnicodeDataCompressor(object):
         text = text.substitute(mapping)
         if output:
-            if str(output) == '-':
+            if str(output) == "-":
                 sys.stdout.write(text)
             else:
                 if output.is_dir():
-                    output = output / f'{name}{template.suffix}'
-                output.write_text(text, newline='\n')
-                _logger.info('Saved to %s', output)
+                    output = output / f"{name}{template.suffix}"
+                output.write_text(text, newline="\n")
+                _logger.info("Saved to %s", output)
         return text
@@ -109,16 +120,14 @@ class UnicodeDataCompressor(object):
 def main():
     this_dir = pathlib.Path(__file__).resolve().parent
     parser = argparse.ArgumentParser()
-    parser.add_argument('--name', default='LineBreak')
-    parser.add_argument('--template',
-                        type=pathlib.Path,
-                        default=this_dir.parent / 'js' / 'template.js')
-    parser.add_argument('-o', '--output', type=pathlib.Path)
-    parser.add_argument('-v',
-                        '--verbose',
-                        help='increase output verbosity',
-                        action='count',
-                        default=0)
+    parser.add_argument("--name", default="LineBreak")
+    parser.add_argument(
+        "--template", type=pathlib.Path, default=this_dir.parent / "js" / "template.js"
+    )
+    parser.add_argument("-o", "--output", type=pathlib.Path)
+    parser.add_argument(
+        "-v", "--verbose", help="increase output verbosity", action="count", default=0
+    )
     args = parser.parse_args()
     _init_logging(args.verbose)
@@ -130,8 +139,9 @@ def main():
     output = args.output
     compressor = UnicodeDataCompressor(entries)
     compressor.substitute_template(
-        template, output=output if output else template.parent, name=args.name)
+        template, output=output if output else template.parent, name=args.name
+    )
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

unicodedata_reader-1.3.9/src/unicodedata_reader/east_asian_width.py ADDED Viewed

@@ -0,0 +1,29 @@
+#!/usr/bin/env python3
+import unicodedata
+from typing import Any
+from typing import Callable
+from typing import Dict
+from unicodedata_reader import *
+class UnicodeEastAsianWidthDataCli(UnicodeDataCli):
+    def __init__(self):
+        super().__init__()
+        self._entries = UnicodeDataReader.default.east_asian_width()
+    def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
+        return {
+            "EA": lambda code, ch: self._entries.value(code),
+            "GC": lambda code, ch: unicodedata.category(ch),
+            "EAW": lambda code, ch: unicodedata.east_asian_width(ch),
+            "cp932": lambda code, ch: u_enc(ch, "cp932"),
+            "sjis04": lambda code, ch: u_enc(ch, "sjis_2004"),
+            "cp936": lambda code, ch: u_enc(ch, "cp936"),
+            "cp949": lambda code, ch: u_enc(ch, "cp949"),
+            "cp950": lambda code, ch: u_enc(ch, "cp950"),
+        }
+if __name__ == "__main__":
+    UnicodeEastAsianWidthDataCli().main()

{unicodedata_reader-1.3.8 → unicodedata_reader-1.3.9}/src/unicodedata_reader/east_asian_width_common.py RENAMED Viewed

@@ -24,8 +24,7 @@ def dump_east_asian_width():
         "Bidi_Paired_Bracket_Type": lambda code, ch: bidi_brackets_type(code),
         "EAW": lambda code, ch: unicodedata.east_asian_width(ch),
         "Script": lambda code, ch: scripts.get(code),
-        "ScriptExt":
-        lambda code, ch: " ".join(script_extensions.get(code, [])),
+        "ScriptExt": lambda code, ch: " ".join(script_extensions.get(code, [])),
     }
     sep = "\t"
     print(f"# {sep.join(columns.keys())},Name")
@@ -38,6 +37,7 @@ def dump_east_asian_width():
         if script != "Common":
             continue
         values = (func(code, ch) for func in columns.values())
+        values = ("" if v is None else str(v) for v in values)
         output = sep.join(values)
         try:
             output += f"{sep}{unicodedata.name(chr(code))}"

unicodedata_reader-1.3.9/src/unicodedata_reader/emoji.py ADDED Viewed

@@ -0,0 +1,32 @@
+#!/usr/bin/env python3
+from typing import Any
+from typing import Callable
+from typing import Dict
+from unicodedata_reader import *
+class UnicodeEmojiDataCli(UnicodeDataCli):
+    def __init__(self):
+        super().__init__()
+        self._entries = UnicodeDataReader.default.emoji()
+    def _emoji_flag_func(self, mask: EmojiType):
+        return lambda code, ch: 1 if self._entries.value(code) & mask else 0
+    def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
+        return {
+            "Emoji": self._emoji_flag_func(EmojiType.Emoji),
+            "Emoji_Presentation": self._emoji_flag_func(EmojiType.Emoji_Presentation),
+            "Emoji_Modifier": self._emoji_flag_func(EmojiType.Emoji_Modifier),
+            "Emoji_Modifier_Base": self._emoji_flag_func(EmojiType.Emoji_Modifier_Base),
+            "Emoji_Component": self._emoji_flag_func(EmojiType.Emoji_Component),
+            "Extended_Pictographic": self._emoji_flag_func(
+                EmojiType.Extended_Pictographic
+            ),
+            "EmojiCombined": lambda code, ch: self._entries.value(code),
+        }
+if __name__ == "__main__":
+    UnicodeEmojiDataCli().main()

unicodedata-reader 1.3.8__tar.gz → 1.3.9__tar.gz

unicodedata-reader 1.3.8tar.gz → 1.3.9tar.gz