unicodedata-reader 1.3.4__tar.gz → 1.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. unicodedata_reader-1.3.6/.github/dependabot.yml +10 -0
  2. unicodedata_reader-1.3.6/.github/workflows/ci.yml +36 -0
  3. unicodedata_reader-1.3.6/.github/workflows/publish.yml +35 -0
  4. unicodedata_reader-1.3.6/.gitignore +131 -0
  5. unicodedata_reader-1.3.6/.yapfignore +4 -0
  6. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/PKG-INFO +7 -18
  7. unicodedata_reader-1.3.6/js/GeneralCategory.js +26 -0
  8. unicodedata_reader-1.3.6/js/LineBreak.html +19 -0
  9. unicodedata_reader-1.3.6/js/LineBreak.js +26 -0
  10. unicodedata_reader-1.3.6/js/template.js +26 -0
  11. unicodedata_reader-1.3.6/precommit.sh +9 -0
  12. unicodedata_reader-1.3.6/pyproject.toml +34 -0
  13. unicodedata_reader-1.3.6/tests/__init__.py +0 -0
  14. unicodedata_reader-1.3.6/tests/cli_test.py +31 -0
  15. unicodedata_reader-1.3.6/tests/conftest.py +6 -0
  16. unicodedata_reader-1.3.6/tests/entry_test.py +166 -0
  17. unicodedata_reader-1.3.6/tests/line_break_test.py +61 -0
  18. unicodedata_reader-1.3.6/tests/reader_test.py +30 -0
  19. unicodedata_reader-1.3.6/tests/set_test.py +98 -0
  20. unicodedata_reader-1.3.6/tox.ini +12 -0
  21. unicodedata_reader-1.3.6/unicodedata_reader/set.py +84 -0
  22. unicodedata_reader-1.3.6/uv.lock +716 -0
  23. unicodedata_reader-1.3.4/pyproject.toml +0 -31
  24. unicodedata_reader-1.3.4/unicodedata_reader/set.py +0 -66
  25. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/LICENSE +0 -0
  26. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/README.md +0 -0
  27. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/__init__.py +0 -0
  28. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/__main__.py +0 -0
  29. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/bidi_brackets.py +0 -0
  30. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/cli.py +0 -0
  31. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/compressor.py +0 -0
  32. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/east_asian_width.py +0 -0
  33. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/emoji.py +0 -0
  34. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/entry.py +0 -0
  35. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/general_category.py +0 -0
  36. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/line_break.py +0 -0
  37. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/reader.py +0 -0
  38. {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/vertical_orientation.py +0 -0
@@ -0,0 +1,10 @@
1
+ # Please see the documentation for all configuration options:
2
+ # https://docs.github.com/en/code-security/dependabot/ecosystems-supported-by-dependabot/supported-ecosystems-and-repositories
3
+
4
+ version: 2
5
+ updates:
6
+ - package-ecosystem: "uv"
7
+ directory: "/"
8
+ schedule:
9
+ interval: "daily"
10
+ open-pull-requests-limit: 10
@@ -0,0 +1,36 @@
1
+ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
2
+ # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
3
+
4
+ name: Python package CI
5
+
6
+ on:
7
+ push:
8
+ branches: [ "main" ]
9
+ pull_request:
10
+ branches: [ "main" ]
11
+
12
+ jobs:
13
+ build:
14
+
15
+ runs-on: ubuntu-latest
16
+ strategy:
17
+ fail-fast: false
18
+ matrix:
19
+ python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
20
+
21
+ steps:
22
+ - uses: actions/checkout@v4
23
+
24
+ # https://docs.astral.sh/uv/guides/integration/github/
25
+ - name: Install uv and set up Python ${{ matrix.python-version }}
26
+ uses: astral-sh/setup-uv@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Install dependencies
31
+ run: |
32
+ uv sync --all-extras --dev
33
+
34
+ - name: Test with pytest
35
+ run: |
36
+ uv run pytest
@@ -0,0 +1,35 @@
1
+ name: Publish
2
+
3
+ # Controls when the action will run.
4
+ on:
5
+ release:
6
+ types: [created]
7
+
8
+ # Allows you to run this workflow manually from the Actions tab
9
+ workflow_dispatch:
10
+
11
+ jobs:
12
+ publish:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+
17
+ - name: Set up Python
18
+ uses: actions/setup-python@v5
19
+ with:
20
+ python-version: '3.x'
21
+
22
+ # https://docs.astral.sh/uv/guides/integration/github/
23
+ - name: Install uv
24
+ uses: astral-sh/setup-uv@v5
25
+
26
+ - name: Install Dependencies
27
+ run: |
28
+ uv sync --all-extras --dev
29
+
30
+ - name: Build and publish
31
+ run: |
32
+ uv build
33
+ uv publish
34
+ env:
35
+ UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
@@ -0,0 +1,131 @@
1
+ .vscode/
2
+
3
+ # Byte-compiled / optimized / DLL files
4
+ __pycache__/
5
+ *.py[cod]
6
+ *$py.class
7
+
8
+ # C extensions
9
+ *.so
10
+
11
+ # Distribution / packaging
12
+ .Python
13
+ build/
14
+ develop-eggs/
15
+ dist/
16
+ downloads/
17
+ eggs/
18
+ .eggs/
19
+ lib/
20
+ lib64/
21
+ parts/
22
+ sdist/
23
+ var/
24
+ wheels/
25
+ pip-wheel-metadata/
26
+ share/python-wheels/
27
+ *.egg-info/
28
+ .installed.cfg
29
+ *.egg
30
+ MANIFEST
31
+
32
+ # PyInstaller
33
+ # Usually these files are written by a python script from a template
34
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
35
+ *.manifest
36
+ *.spec
37
+
38
+ # Installer logs
39
+ pip-log.txt
40
+ pip-delete-this-directory.txt
41
+
42
+ # Unit test / coverage reports
43
+ htmlcov/
44
+ .tox/
45
+ .nox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ *.py,cover
53
+ .hypothesis/
54
+ .pytest_cache/
55
+
56
+ # Translations
57
+ *.mo
58
+ *.pot
59
+
60
+ # Django stuff:
61
+ *.log
62
+ local_settings.py
63
+ db.sqlite3
64
+ db.sqlite3-journal
65
+
66
+ # Flask stuff:
67
+ instance/
68
+ .webassets-cache
69
+
70
+ # Scrapy stuff:
71
+ .scrapy
72
+
73
+ # Sphinx documentation
74
+ docs/_build/
75
+
76
+ # PyBuilder
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ .python-version
88
+
89
+ # pipenv
90
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
91
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
92
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
93
+ # install all needed dependencies.
94
+ #Pipfile.lock
95
+
96
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
97
+ __pypackages__/
98
+
99
+ # Celery stuff
100
+ celerybeat-schedule
101
+ celerybeat.pid
102
+
103
+ # SageMath parsed files
104
+ *.sage.py
105
+
106
+ # Environments
107
+ .env
108
+ .venv
109
+ env/
110
+ venv/
111
+ ENV/
112
+ env.bak/
113
+ venv.bak/
114
+
115
+ # Spyder project settings
116
+ .spyderproject
117
+ .spyproject
118
+
119
+ # Rope project settings
120
+ .ropeproject
121
+
122
+ # mkdocs documentation
123
+ /site
124
+
125
+ # mypy
126
+ .mypy_cache/
127
+ .dmypy.json
128
+ dmypy.json
129
+
130
+ # Pyre type checker
131
+ .pyre/
@@ -0,0 +1,4 @@
1
+ .eggs
2
+ .tox
3
+ .venv
4
+ .vscode
@@ -1,21 +1,11 @@
1
- Metadata-Version: 2.1
1
+ Metadata-Version: 2.4
2
2
  Name: unicodedata-reader
3
- Version: 1.3.4
4
- Summary:
5
- Home-page: https://github.com/kojiishi/unicodedata-reader
6
- License: Apache-2.0
7
- Author: Koji Ishii
8
- Author-email: kojii@chromium.org
9
- Requires-Python: >=3.8
10
- Classifier: License :: OSI Approved :: Apache Software License
11
- Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.8
13
- Classifier: Programming Language :: Python :: 3.9
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Requires-Dist: platformdirs (>=2.2,<5.0)
18
- Project-URL: Repository, https://github.com/kojiishi/unicodedata-reader
3
+ Version: 1.3.6
4
+ Author-email: Koji Ishii <kojii@chromium.org>
5
+ License-Expression: Apache-2.0
6
+ License-File: LICENSE
7
+ Requires-Python: >=3.9
8
+ Requires-Dist: platformdirs>=4.3.8
19
9
  Description-Content-Type: text/markdown
20
10
 
21
11
  [![CI](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml/badge.svg)](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)
@@ -100,4 +90,3 @@ unicodedata-reader lb -t js/template.js
100
90
  [GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js
101
91
  [LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html
102
92
  [LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js
103
-
@@ -0,0 +1,26 @@
1
+ const uGeneralCategoryAsInt = (function () {
2
+ const bytes = atob("h2ABQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQaIAAECYw0CCg0ODwYQDQoNBjEKDCIKEQ4SUQKFSQaBSYVsBoFsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDCkMCQwJTCkMCQwpDEksaQwpDElMKQwpDAkMCQwpDAksCQwpDEkMCQwpLA4JTG4JEwwJEwwJEwwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAksCRMMCQxJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCYFMKQwpLAkMaQwJDAkMCQwJkQwOhkyENGqCdIMqgRSBShQKFIQKm3UJDAkMFAoJDDYUTAIJdioJAkkWCRYpDIQJFoIJiEwJLElMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmBDAkMBgkMKSyMSYtsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA2BFTcJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwpDAkMCQwJDAkMCQwJLAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDBaJKTYUgSKKDAIHNi0DFosVBxUCNQI1AhWBdoZOdm4iglaBMEYiAyItglUCEEKHbhSCLoUVgihiLhWYTgIOgVUQDYE1NDUNdS6CKE4tDoMiFhAOFYcuhlU2lg6CVQ6DNoIoiA6CFTQNQhQ2FSOFLnUUghUUVRSBFTaDQhaGDlU2AhaCToEWhW4KgS4WMIEWghWKDhSFdRCHdRiNLhUYFQ5YgXV4FTgOgVWCLjUigigCFINOFTgWgW42LjaFLhaBThYOVm42FQ5YdTY4NjgVDoF2GHYuFk41NoIoLiOBMQ0DDgIVNjUYFoEudi42hS4WgU4WLhYuFi42FRZYNXY1NlVWFYFWbhYOgVaCKDVOFQKCNjUYFoIOFk4WhS4WgU4WLhaBDjYVDliBFRY1GBY4FTYOg1YuNTaCKAIDgVYOgTUWFTgWgW42LjaFLhaBThYuFoEONhUOGBUYdTY4NjgVgVY1GHYuFk41NoIoDQ6BMYI2FQ4WgS5WThZuVi4WDhYuVi5WTlaCbnY4FThWWBZYFTYOgTYYgzaCKFGBLQMNgRYVWBWBbhZOFoVOFoNuNhUOVXgWVRZ1gVY1Fk42DjYuNTaCKIFWAoFRDQ4VOAKBbhZOFoVOFoIuFoEONhUOGBWBGBYVOBY4NYFWOIE2LhYuNTaCKBYuGIJ2NTiCDhZOFooONQ5YdRZYFlgVDg12ThiBUU41NoIoghENgS4WFTgWhC5WhW4Wgg4WDjaBTlYVdlhVFhUWgXiBNoIoNjgCgnaLbhUugVV2A4EuFIF1AoIoIokWLhYOFoEOFoVuFg4Wgi4VLoIVDjaBDhYUFoFVFoIoNm6Hdg5Ng0INAk01gS2CKIIxDRUNFQ0VBAUEBTiBbhaIbnaDNRiBFQI1gQ6CVRaIdRaBbRWBLRYtgQJtIokWik44dRiBNRg1ODUOgiiBIoEuODVuVQ5YLoFYTnWDDhU4NYE4FQ4YgihYFS2JKRYJgRYJNopMAhRM0g4WbjaBThYOFm42ig4WbjaIDhZuNoFOFg4WbjaDThaODhZuNpBONlWCAoRxVoNugi2BNpUpNoEsNgeBmm4NAoQOAYYuBAVWkk5CWYFugVaELlUYghaETjUYIoIWhC41gnaDDhZOFjWCdoxuNRiBVYF4FTiCVUIUQgMOFTaCKIE2gjGBNoEiB2JVEBWCKIE2iE4UjQ6BVoEONYguFQ6BFpEugjaHThZVeDVYdjgVgThVdg1WIoIohy42gQ6CVopudoYugTaCKBFWiC2FTjU4FTYijQ4YFRiBVRYVGBU4gXWBOII1NhWCKIE2giiBNoFCFIEiNoM1F4N1jBZ1GItOFRiBFRgVgRgVOIFuFiKCKIFCgi2CFYINQjUYhy4YdTg1GFUugiiKbhUYNVgVGFU4gXZiiG6BeIF1ODVWgQKCKFZOgiiHLoE0IoIMCQyBFopJNkmBYoF2VQKDFRiBVW4VgS4VLhg1DoEWimyPVIMMFIgsiRSPdQkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJggwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJggyBaYEsNoEpNoFsgWmBbIFpgSw2gSk2gWwWCRYJFgkWCYFsgWmDLDaBbIFzgWyBc4FsgXOBDBYsaRMKDEpMFixpE0psNixpFkqBbIEJSjZMFixpEyoWgkGBEIEnIg8SBC8SBA+BYhobgRABggIPEmIrQgYEBYJCBgILgiIBgRAWgjARFDaBMUYEBRSCMUYEBRaDFFaIA4NWgxV3FVeCdYNWLQltCS0MSSxJDA0JLQaBCYEtCQ0JDQkNaQ0MaQxuDC0sKYEGCWwNBi0MDYNxiFkJDHkRLXaBBoENJm0GLQYtBoFNBodNJi0GDQaHTcJmgW0EBQQFhG0mgU0EBZQNBocthgaJbYEmkW2FNoJNhRaOcZMthTGtTQaCDQaNLYFmm00GvW0EBQQFBAUEBQQFBAUEBYcxim2BBgQFh0YEBQQFBAUEBQQFg2a/baBGBAUEBQQFBAUEBQQFBAUEBQQFBAUEBY9GBAUEBYdmBAXAJotthQYtgSaJTTaHbRaaDYtpi2wJDEksCQwJDAkMaQwJLAmBLDRJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAksgS0JDAkMVQkMgRZiESKJLBYMgRYMNo1ugVYUAoM2FYVOghaBThaBThaBThaBThaBThaBThaBThaBThaHdSIPEg8SQg8SAg8SggIHIgcCDxIiDxIEBQQFBAUEBYECFIIiJ2IHAgSDAi1CBAUEBQQFBAUHiDaGLRaWDYJ2tS2GNoNtAUINFA4ZBAUEBQQFBAUEBS0EBQQFBAUEBQcEJQ2CGXU4B4EULVkUDgItFpUuNjUqNA4Hli4CVA6BFopOFpcuFi1xgi2HboktghYNg26HTRaCMYctgXENg1GHbYIxiU2DUc9tjO9uj22phQ4Ugp1OVo1NghaJboE0IsJuFEKDboIoLoR2CQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA4VVwKCNQIUCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDDQ1kS6COTWBIoF2hUqCFCoJDAkMCQwJDAkMCQwJTAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMFIFsCQwJDCkMCQwJDAkMCQwUKgkMCQwOCQwJTAkMCQwJDAkMCQwJDAkMCQwJDAkMgQkMgQkMCQwJDAkMCQwJDAkMCQxpDAkMKQw2CQwWDBYMCQwJDAkMCYUWVAkMDjQMgU4VThVuFYVOODUYbRVWgTEtAw2BNoxuYoF2OIwug3g1gXYigiiBNoQ1gS5CDgIuFYIohm6BdSKFToJVOIJWAocOVlUYi04VOHU4NViDAhYUgih2IoEOFRSCDoIogQ4Wig6BNTg1ODWCFk4VgW4VGDaCKDZig24UgS5NDhgVGIwuFQ5VLjWBDjUOFQ6Fdi4UIoJOGDU4Ig40GBWCNoEuNoEuNoEughaBThaBThaKTAp0ggwUKnaTbIhOOBU4FTgCGBU2giiBNpXoboJ2hU52jA52g/98jL992y42mi6JNoFMgnaBDIEWDhWCLgaDDhaBDhYOFi4WLhaaboQKg3baTgUEg22PbjaNLoFWDYd2gm4DTYN1gUIEBQKBNoN1AicrBAUEBQQFBAUEBQQFBAUEBSIEBWJLQhZiBwQFBAUEBUIGB0YWAgMidoEOFqFONhAWQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQYEBQIEBSKCLhSLDjSHTlaBLjaBLjaBLjZOViMGCg0jFg1mLYI2UC02gm4Whi4WhE4WLhaDTjaDLog2nk6BFkJ2ixFWgg2NGXGEDTFNFoMNVg2LVosNFaA2hw5WjA6DVhWGUXaHbnGCFoRuGYFuGYEWiS6BFYEWhy4WAohudoFuAoEZijaJaYlsky42giiBNohpdohsdolugXaMboJWAoJJFoNJFoFJFikWgkwWg0wWgUwWLFaMboJ2zU6CFoUugjaBboV2gTQWijQWghSRFoEuNg4Wim4WLlYONoVOFgKBcYVOLYFRh06BdoIRi3aEThYugRaBEYUugTFWAoYugRYCj3aNbnYxLoNxNosxDlUWNYEWdW4WThaHDjZVdhWCEYFWggKBVocOMQKHDlGHdoFuDYZuNXaBEYFCghaNLlaBQoUuNoFxhE6BFoFxhC6BVmKCdoFRk3aSDo1WjEmDFoxMgVaBMYhudYF2giiBNoIobhQOhSlWgRUHFIUsgXYms3aHURaKLhY1BzYug3ZOjVZ1hw6CMQ6BdoUuglVxgQKFNoQudWKJNoUOgVGEdoVOghYYFRiNDoNVgUJ2hHGCKBUuNQ6CFlUYiw5YdTg1IhBiFYI2EDaGDoFWgiiBNlWIboEVGIF1FoIoYg44DoF2iE4VIg6CFjUYi25YghU4bmJ1AhgVgigOAg5CFoRxglaELhaGDlhVOBUYNYEiFS4VjzaBThYOFm4Wg04Wgi4CgTaLThVYgXWBFoIogTY1OBaBbjYuNoUuFoFOFi4WgQ4WNQ44FXg2ODZYNg6BNhiBFoEOODaBVVaBFYJWgi4WDjYOFokuFg5YgTUWGDYYFngWOBUYFQ4VDiIWIoF2NYcWjQ5YgXU4VRgVboECgigiFgIVToc2i25YgTUYFXg1GDUuAg6BdoIoqTaLTlh1Nng1GDWFQm41iDaLbliBdTgVGDVCDoJWgiiBNoMChFaKThUYFTiBNRgVDgKBNoIogTaEaIZ2hk42FRgVOHUYgRV2gigxQg2BTq4Wim5YghUYNQKYdodph2yCKIIRgnaBbjYONoFuFi4WhW6BOBY4NjUYFQ4YDhgVQoIWgiiRNoFuNolOWHU2NXgVDgIOGIZWDoI1iW6BNRgOdYFiFYF2DoE1OFWLLoMVGDVCDoECgxaSDoFWgiKtNogOAoM2giiBNoIOFokOGIFVFoE1GBUOgQKCNoIohFFWIocuNoU1FhiBVRg1GDWSFoFOFi4WiS6BNVYVFjUWgVUOFYF2giiBNoEuFi4Wh26BGBY1FjgVGBUOgVaCKM02hE41OCKBVjUOGIMOFoguOIEVVjgVGBWDAoIoFZUWDoNWhRGBbWOEDYMWAoHmLpk2m1kWgQKCVrBuhZJ2mA4igxaCi26DcBWBLoNVgjaH5k6BFoGRTo2uFocugnVYVYIog7E2gY4OgVaHThaCKHYik04WgiiBNocuNoEVAoI2i26BVYECbXQCDYI2gigWgVEWhQ6BFoRO63ZUiW40QoIosTaHaYdshVFimRaSTnYVDo1YgVZ1gxSPdjQCFBWCVjiDNov9boF2grUuihaCLpG5VnQWgVQWNBbIToNWDocWTjYOgzZugXbiboTAdppOgRaDDlaCDoFWgi42DTUCcIfWdrttgiiBNuxtknaLNTaFVYIWnG2Odr0tgjaJTTaObThVTYE4gXCBdS2BVYctdY8NhRaQLVUNnjaEcYJ2hHGCdpVNghaGEaFWhimGLIYpgUwWhCyGKYYsCRYpNgk2KTZpFoFpbBYMFoFMFoJMhimGLCkWaTaBaRaBSRaGLCkWaRaBCRYJVoFJFoYshimGLIYphiyGKYYshimGLIYphiyGKYZsNoYJBoYMBoEshgkGhgwGgSyGCQaGDAaBLIYJBoYMBoEshgkGhgwGgSwJDDaMKP9tjVVtjDWBbRWDLRUtgQKDVoEVFoNVgpN2giwOhGyBNoEstRaBVRaEFTaBVRY1FoEVgRaPNIgWFZt2iw5WgVWBVDaCKHYODc92hy4VhBaKbnWCKIEWA/N2hk4UdYIotTaHLjUOgih2Avd2gU4WbhYuFoNOFrEONoIRgVWKFogpiCyBVRR2gih2IoHEFo5RDVEDcZJ2ixENg1GwNm4Whk4WLhYONg4Wgi4WbhYOFg6BNg52DhYOFg4WThYuFg42DhYOFg4WDhYOFi4WDjZuFoFOFm4WbhYOFoIuFoQOgRZOFoEOFoQOjHYmwzaKbXaYbYJ2g002g00Wg00WiQ2CNoMRqA2NdocNgxaKbXaCDYFWLYM2gS2mNr5NgQqBtW12hA1Wgw1WnU12l02BNoJtdg2DVoJtdo1tgXaCLYE2iW2BdoctNoJtdi2PNtRtgnaDLTaDDVaCLYEWjW2BVoNNNoJNgTaCDYFWpE0Wlm2CKIKBNtO3bod2iI4ugTa3LjaLoC6DNo7MDoNWgZsuhOg2gYcugvg2idJOgRaIl26K7pQWEIc2l3Cfdrt1/4N2//89Nv//PTY=");
3
+ const len = bytes.length;
4
+ const entries = []
5
+ let value = 0;
6
+ for (let i = 0; i < len; ++i) {
7
+ const byte = bytes.charCodeAt(i);
8
+ if (byte & 0x80) {
9
+ value = (value | (byte & 0x7F)) << 7;
10
+ continue;
11
+ }
12
+ value |= byte;
13
+ entries.push((value >> 5) + 1);
14
+ entries.push(value & 31);
15
+ value = 0;
16
+ }
17
+ return function (c) {
18
+ for (let i = 0; i < entries.length; i += 2) {
19
+ c -= entries[i];
20
+ if (c < 0)
21
+ return entries[i + 1];
22
+ }
23
+ }
24
+ })();
25
+ const uGeneralCategoryValues = ["Cc","Zs","Po","Sc","Ps","Pe","Sm","Pd","Nd","Lu","Sk","Pc","Ll","So","Lo","Pi","Cf","No","Pf","Lt","Lm","Mn","Cn","Me","Mc","Nl","Zl","Zp","Cs","Co"];
26
+ function uGeneralCategory(c) { return uGeneralCategoryValues[uGeneralCategoryAsInt(c)]; }
@@ -0,0 +1,19 @@
1
+ <!DOCTYPE html>
2
+ <script src="LineBreak.js"></script>
3
+ <body>
4
+ <div>
5
+ <input id="input" autofocus style="width: 100%">
6
+ </div>
7
+ <pre id="result"></pre>
8
+ <script>
9
+ input.addEventListener('input', (e) => {
10
+ const text = input.value;
11
+ const results = [];
12
+ for (const ch of text) {
13
+ const code = ch.codePointAt(0);
14
+ results.push(`${ch} U+${code.toString(16)}: ${uLineBreak(code)}`);
15
+ }
16
+ result.textContent = results.join('\n');
17
+ });
18
+ </script>
19
+ </body>
@@ -0,0 +1,26 @@
1
+ const uLineBreakAsInt = (function () {
2
+ const bytes = atob("hAABAkMEiEAFBgcICQoIBwsMCAkNDg0PhFBNgQgGjQgLCQyOCAsBEQiCQBKMQBMLCoEJCFQIFAcIAUgKCVQVCIIUB4EUC4sIFI8IFIHnCBQVgRQVFEgUgwiBVAgUCBWPSKcAE4VAgxOGAINIVoFIDQiBVoMIFggWiUgW70iDANJIFpJIVpdIDQFWSAkWlgABAAhACEAGAINWjReBVoFXSIUWglCBCIEKTUiFAAYAgQaVCIoAhFAKUIEIALEIBgiDABAIgkBIQAiBQEiEUIlIFkgAjkiNAFasCIUACIZWhFCQCIQAgUgNBghWAEmKSIFACIQACIEACIIAVocIFowIgQBWCBaFCIIWjwgWUIIWhACUSItAEJAAmkiBAAiIQAiDAIRIQEGEUIgIgQAWg0hWSFaKSBaDCBYIgRaBSFYACIMAVkBWgQAIg1YAgVZIFoEIQFaEUEhKgggKCAlIAFaBABaCSIFWSFaKSBaDCBZIFkgWSFYAFoIAgVZAVoEAgRYAgxaBSBYIgxaEUECBCAAIhFaBABaECBaBCBaKSBaDCBZIFoIIVgAIg0AWgQAWgQBWCIcWSEBWhFAICYMWCIJAFoEAFoNIVkhWikgWgwgWSBaCCFYACIMAVkBWgQCDFoEAgVZIFoEIQFaEUINIhFYACBaCSIEWgQgWgUiBFkgWCBZIgRZIgRaBCIEWhUiBVoIAgRaBABaBQFYIglYAhlaEUIQICQiCFoIAg0gWgQgWiwgWh0hWAAiDABaBABaBQIMWQBaBCFYIVkhAVoRQgxYVhAiBABWDSBaBCBaLCBaESBaCCFYACIMAFoEAFoFAgxZAglZIFkhAVoRQFkgAhVaBQIQIFoEIFpQIQAiDABaBABaBQEiBVoEIAIRIQFaEUIQICoJIFoEAFohIgRaLSBaECBYIVoMIgRYAgVaCQBYAFoNAglaEUFZACIVWnFiBVgmHGAiEUEGSFlgWGBaCGBaLWBYYFosYVoIYFhgWgxgWhFBWgViPVgiBVQhVE1UBE4IGEwgGgQhAgkiEUIRIAQAIAAgACxELEUCDSBaRSIFWhkABggABQIIIhQAWkUAWQYJIAIJIFkhVARWCCFOSFp9YhFBBgUifWIRQgliSSBYIghYIVpdIr1mjWqtbpAgWgUhWgwgWCBaBSFaUCBaBSFaQCBaBSFaDCBYIFoFIVocIFpwIFoFIVqEIVoEACAGNCIEWjEiCVqpIVoJIVgGCvwgBjEgLEYEWpQiBAYUIgxaISIFAhBaJCIEAQYQWiEhAhVaGCBaBCBZAhVapWEEcGAEIAQlYVoRQglaESIJWSEZBFQhGCIEAEwCEUIJWrAiDFoIIQJBIAAiCFqJIhFaPCBaFQIFWhUCBVgiBFkaEUI5YVoIYhRaVWIFWjFiCVoUQgRZYmwiCAFZInxgWjhhWAIRQglaEUIJWhlhWjwCYFoIAlx2HQB6DXRZBhF9BIIFBhGCEAIQggQGBAI5IhgBIhFCCSJJfhUBhg1aTSIlAgRaCAYRQgRaBCIRQkUhBhQiCFpUIVoUIg1aBAAiKAIFIAIJIAEiBAAiCFt9IhgATlkATgQCBikhWgkhWkkhWgkhWg0gWCBYIFggWjwhWmggWhwgWhkhWgkgWiQhWgQgWgwgVCBaDAROBASIAI0ABE0EkVAhHC4EHCwdUSIElAUOCABODSghHFFyCSA0LEYEchUgBCoFBCIEBJoFIFoRASFYUg0gLERQIgVSDSAsRFoYIgRaDCQqGSQqBSQpJCgkKhwmQAIcWgQgKCBSBCAqECBRICYRIVINIFJFIhxQIhVSBSIRUhwgUSIFWhFSbSBQIFJUIFAhUgQhUSBSBCBQIFEkIFIFIFEiBVEgUCBQIglQIFIIIgVSBSFSESBSBCBSCCBSGCFRIgVRIVEhUiEhUSFSGCBSBCBSFCBSMCBSXCCWLSAsRCxGCSBSDCGCGCAsR4giBYJpIilaFCIoWzxQIpVSBSJIUhQiHVEiBVIRIVAiDFINIVEhUgUhUSFSBSIEUSBRIgVSHSIFUhAgUh0iBYAhUSBSBSFSBSGBUIAiBICdgjAiBIIFIFAgUjghUCIEUCBQgVAhUCBSHCCCOSFSOCIVggVQgCIEgFGCBFGBUIFSBIAgUgUhUIIJUgiAUYCcgVINggQhggWekCBSBCIJHCEYggQgLEQsRCxELEQsRCxELEY5UmAgLEY8ICxELEQsRCxELEYHJCAsRCxELEQsRCxELEQsRCxELEQsRCxGfCAsRCxGPSAsRgasIghSMSFaPSBaBq0iBAEiCFgaBAQgGAZJIFgiCFghWm0iDFggBhlYAiwiEFoMIFoMIFoMIFoMIFoMIFoMIFoMIFoMIFo9AhkeDQQgBCwFIR0hHCxELEQsRCxGBQQYIQQhBgghkgQEIQQuDQQgBCEGBCEYLDAsMCwwLDAGQVoxgFqwghVbqYIxWh2ABUWAcYAsRCxELEQsRCxFgCxELEQsRCxEcC1GEYIJAgiAAgiBcgSAWKCAoICggKCAojCAojyAoICggKIJgKIJgaFZAgVwgHCggKCAoICggKIwgKI8gKCAoICiCYCiCYGiBYBwoXCCCFpUgFq5gFqpghBYgh2iPIBaTYINUm7dgn0jSiiAchLsggRabIIQWlkhBgYYIAQYBh0iEUEiJVpcIgUAIhECPSECnSEAIggGDVuZIVkgWCBaDSIoWh0gAgQgAgUgAiwiCAIFIAIEWg0gKCIJWmUhVRoNWQJhIiECDVkGEUIJWiECESBVIAIRQjUiDQEGLCIYAhRYIjhmBFoFAlx2GAB6CYIEBgWAWAYRfgVZgh1iEUIIYFpQfhkCEFoEBAINBQFaEX1YggQGxGItWghiFCIIAQYEIQIRWgkhWgkhWgkiEFoMIFoMIFp1IgVa5CINAAUBWhFCCVimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKoVWixqBVpgbgVaH/2uY/1aB/2CDCIVWggiCFhcAhFcIhhcWghcWFxZXFlcWhFe5CIdWgbUIEQunSFaaSIMWCI9WhUgKgQiHQIERXEYLESWCVhMAEwATAFMAEwATAFMAgiALEQsRCxELEQsRCxELEQsRYAsRgyARIBEWXEYgCxELEQsRg2AWIAkKIIFWgggWwwhWJhYGYAkKYAsRYBEgEYUgXIEgBo0gCyARjiALIBEgC1ELURwghGiWIFyPIIEWgmBWgmBWgmBWgSCBFgoJgSBJFoMIhFaBACwUVoVIFoxIFokIFkgWhwhWhkiQVr0IghaBAYFWlgiBFqtIFoYIgRYIlxaWCADAVo4IgRaYCIcWAI0IgVaRSIQWjkiCFpJIggCCFo5IFgGRSIFWg0gBggiUVs5IVoRQglaRSIFWkUiBVpNIg1aZSIUWhUgWhwgWgwgWSBaFCBaHCBaDCBZIgRaZSIVWgZsIhBaKSIRWg0iLVoJIFpRIFoQIohaCSFYIFpVIFkiBFghWiwgWAaMIg1aECJdWiQgWSIIWkAiBFgGMSIIWCJ9Wm0iBVolIVpcIgQAWQIIWgUCBSBaBCBaOCFaBAIFWAIQIgxaDQQiDFp9Ij1aSCECBVoIIgkElhBaaSIEWgwGKSFaNCIIWjEiDFoFIhVaDCKdWpAibFpkIhhaZCIMWlEiBQINWhFCCVoRQjUiBFoIAAYsIg1ZI51aPCBaUSBZAAVZIh1aBCJsWgUCTSINWikiFAIQIilaISIFAgUiSVo1IiVaLCIQWgQBtmR2GQB5BgiCBVolghF8AXUAdhBYTgQCWCIUASBCBQQCEVhBWjAiDFoRQglaBAJFIhkAWhFCBQQhACINWkQgACBUIhBaBAJdIhkCBSEEIAYFACECEUAgVCIEBFolIhRaISBaMCIVAQQhBCABIAJ5WgwgWCBaBSBaHCBaESAGCVpcIhUCCFoRQglaBQBaDXVZdVopdFoMdFl0Wgh0WQAGDAFZAVkAeVh+CVgCCFgFfXUBWgwCBFoIAhRaEXxYfVh8WX5FdFiCEABYAVgAWgUAWgUAeLQCBIBZgg1ZAjhaaCIhAgUiBQQiEUEEWCACBCI5Wl0iJQIFIg1aEUNJWlwiDAFaEABVBRoEIhwGBSECQVpdIiABBSIUWhFCCVoYViRaVCIYASIJWhFCCVolQjVaNGFaHGIFWhFBYgQGDWNwWlUiHAAixVp9IhFCECIVWCIMdVh1Wg10WXRaLXYJAFkBWgQAeLQAtQIEBhBaEX6JWg0hWkwiDAFaDAAgVCACNFgiEQJNIgwAIgUAVCIFBFQgAg1YIhQCWSIdAgQEIgRVBhhakCIMWhFXaVpBIhlaEUIJWhAgWkgiDQBaDQAiCAYRWhFCJCIEWFQaOSFaKQBaGQKQWgwgWSBaSSIJAgRYAFkAWgwAIAINWhFCCVoJIFkgWj0iCABZAFoIACIMWhFCBmlaIXwGBQEGDFkAtAIYdFpBdgwCBFoFAHkGFIIRfAKoWCIcWjgiBSogIhhYBg8xIsla3CBaCAYUW4UiKpVaxCIYWgqtIgQuBEZFIEYEICxELEfcIC1HZCAuDEwsRgRMLEQsRAIJIhwCEVo/NCIIWgeZICxG7CJrcFo5fiECEX4biVoKcCIMWjwgWhFCBVkGnCBaEUIJWjkhWggABhFaXSIMAgQGESAEIhFaEUBaDCBaKCIIWiQiB11aWSEGEUOJWqwhBSLIWpQiBVgAImwCDFoFAhgifVoFcE4UWQIZWl/tgg1aC/2CB6kiUFgiEIKLzFoFIFoMIFkgWgZEghxYojhaBKFYohlaBaINWgcVgiYFWtQiCFoYIgRaECIMWhEhWCEABgUCPrVb3SIRQglaB2UilVpZAVosAhBa5SJ1W+kiEVpMIVp1IggCBCIpASIMAjkiBQJ4IihagSIEACLxWiUiFVolIhVarCIQWjAjDFqoIFqMIFkhWCFZIVoFIFoVIFggWgwgWoAgWgUhWg0gWgwgWjUgWgUgWgggWCIEWgwgWgalIVoGRSFaYUIH/SJsAgUiYQINIAIZIAEiBQQiHFoIAFocAhKdWjwiCVoJI6haDABaIAFaDABZAFoIAghaeSJAWALdWlgiBFoMAgwhWhFCBVkiBn1aOSACIFpVIgUCEUIIWCYHnVo1IgUCEUOpWjkhACIRQgVYIge9WgwgWgUgWSBaHCBbiCFaECIMAlBahSIMACIFWhFCBVkuDiBadCAqBCAqBSKVWngjgVoFIFo0IFkgWCFYIFoRIFoFIFggWCIJWCIFWCBYIFggWgQgWSBYIVggWCBYIFggWCBZIFghWgUgWgwgWgUgWgUgWCBaESBaICIIWgQgWgggWiAiZVkiBhlb/YIYUgQiOVEicVIJInhQIm2CMboHCICeKYEiLIEiCIAiCIIEnYCdggSeWYIIvoGBnYIUniiCJJ4EgJ4FggScggSeDICcgJ4ZgCCAIIAiCICeBYAggSKYggwiHYIZIhiCLSJRgZ4FgJ4ogJ4FgZ54gg0iLYIJIpSCBJ4EggieSSIEHgRyBSJEgJ4dggSeEICeFICeZILlIhWCqCJUghUiBYJtIg2CESIJgk0iDYI5IYIVIgWBInmCFSCdgJ4Ngg2eCYCeEIIRnYIEnm2AnniBnIGcgJ4gggScghieQYKlItyCBJ5RghCeDIMkIFq1IhFCCVoP+YFaB//5gVoH//mCUgIEWAI5Wr0C/VvdAhf6GVg==");
3
+ const len = bytes.length;
4
+ const entries = []
5
+ let value = 0;
6
+ for (let i = 0; i < len; ++i) {
7
+ const byte = bytes.charCodeAt(i);
8
+ if (byte & 0x80) {
9
+ value = (value | (byte & 0x7F)) << 7;
10
+ continue;
11
+ }
12
+ value |= byte;
13
+ entries.push((value >> 6) + 1);
14
+ entries.push(value & 63);
15
+ value = 0;
16
+ }
17
+ return function (c) {
18
+ for (let i = 0; i < entries.length; i += 2) {
19
+ c -= entries[i];
20
+ if (c < 0)
21
+ return entries[i + 1];
22
+ }
23
+ }
24
+ })();
25
+ const uLineBreakValues = ["CM","BA","LF","BK","CR","SP","EX","QU","AL","PR","PO","OP","CP","IS","HY","SY","NU","CL","NL","GL","AI","BB","XX","HL","SA","JL","JV","JT","NS","AK","VI","AS","ID","VF","ZW","ZWJ","B2","IN","WJ","EB","CJ","H2","H3","SG","CB","AP","RI","EM"];
26
+ function uLineBreak(c) { return uLineBreakValues[uLineBreakAsInt(c)]; }
@@ -0,0 +1,26 @@
1
+ const u${NAME}AsInt = (function () {
2
+ const bytes = atob("$BASE64BYTES");
3
+ const len = bytes.length;
4
+ const entries = []
5
+ let value = 0;
6
+ for (let i = 0; i < len; ++i) {
7
+ const byte = bytes.charCodeAt(i);
8
+ if (byte & 0x80) {
9
+ value = (value | (byte & 0x7F)) << 7;
10
+ continue;
11
+ }
12
+ value |= byte;
13
+ entries.push((value >> $VALUE_BITS) + 1);
14
+ entries.push(value & $VALUE_MASK);
15
+ value = 0;
16
+ }
17
+ return function (c) {
18
+ for (let i = 0; i < entries.length; i += 2) {
19
+ c -= entries[i];
20
+ if (c < 0)
21
+ return entries[i + 1];
22
+ }
23
+ }
24
+ })();
25
+ const u${NAME}Values = [$VALUE_LIST];
26
+ function u${NAME}(c) { return u${NAME}Values[u${NAME}AsInt(c)]; }
@@ -0,0 +1,9 @@
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ unicodedata-reader lb -fv -t js/template.js
5
+ unicodedata-reader gc -fv -t js/template.js
6
+
7
+ yapf -ir -vv .
8
+ tox -p
9
+ pytype unicodedata_reader
@@ -0,0 +1,34 @@
1
+ [project]
2
+ name = "unicodedata-reader"
3
+ version = "1.3.6"
4
+ description = ""
5
+ authors = [{name = "Koji Ishii", email="kojii@chromium.org"}]
6
+ readme = "README.md"
7
+ repository = "https://github.com/kojiishi/unicodedata-reader"
8
+ license = "Apache-2.0"
9
+ requires-python = ">=3.9"
10
+ dependencies = [
11
+ "platformdirs>=4.3.8",
12
+ ]
13
+
14
+ [dependency-groups]
15
+ dev = [
16
+ "pytest>=8.3.5",
17
+ "pytype>=2024.9.13",
18
+ "tox>=4.25.0",
19
+ "tox-uv>=1.25.0",
20
+ "yapf>=0.43.0",
21
+ ]
22
+
23
+ [project.scripts]
24
+ unicodedata-reader = 'unicodedata_reader.__main__:main'
25
+
26
+ [build-system]
27
+ requires = ["hatchling"]
28
+ build-backend = "hatchling.build"
29
+
30
+ [tool.pytest.ini_options]
31
+ testpaths = "tests"
32
+
33
+ [tool.yapf]
34
+ based_on_style = "pep8"
File without changes
@@ -0,0 +1,31 @@
1
+ from unicodedata_reader import *
2
+
3
+
4
+ def _to_unicodes(text):
5
+ return tuple(to_unicodes(text))
6
+
7
+
8
+ def test_to_unicodes():
9
+ assert _to_unicodes('1234') == (0x1234, )
10
+ assert _to_unicodes('12FE') == (0x12FE, )
11
+ assert _to_unicodes('ABCD') == (0xABCD, )
12
+
13
+ assert _to_unicodes('12345') == (0x12345, )
14
+
15
+ assert _to_unicodes('u0009') == (0x9, )
16
+ assert _to_unicodes('u1234') == (0x1234, )
17
+ assert _to_unicodes('U+1234') == (0x1234, )
18
+
19
+ assert _to_unicodes('1234 5678') == (0x1234, 0x5678)
20
+ assert _to_unicodes('1234,5678') == (0x1234, 0x5678)
21
+ assert _to_unicodes('1234, 5678') == (0x1234, 0x5678)
22
+
23
+ assert _to_unicodes('xy') == (ord('x'), ord('y'))
24
+
25
+
26
+ def test_to_unicodes_range():
27
+ assert _to_unicodes('1234-1236') == (0x1234, 0x1235, 0x1236)
28
+
29
+
30
+ def test_to_unicodes_array():
31
+ assert _to_unicodes(['1234', '5678']) == (0x1234, 0x5678)
@@ -0,0 +1,6 @@
1
+ import pathlib
2
+ import sys
3
+
4
+ tests_dir = pathlib.Path(__file__).parent
5
+ root_dir = tests_dir.parent
6
+ sys.path.append(str(root_dir / 'unicodedata_parser'))
@@ -0,0 +1,166 @@
1
+ import pytest
2
+
3
+ from unicodedata_reader import *
4
+
5
+
6
+ def test_entry_eq():
7
+ assert UnicodeDataEntry(1, 3, 'A') == UnicodeDataEntry(1, 3, 'A')
8
+ assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(1, 3, 'B')
9
+ assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(2, 3, 'A')
10
+ assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(1, 2, 'A')
11
+
12
+
13
+ def test_from_pairs():
14
+ entries = UnicodeDataEntry.from_pairs((
15
+ (1, 'A'),
16
+ (2, 'A'),
17
+ (3, 'B'),
18
+ (4, 'B'),
19
+ (6, 'C'),
20
+ (8, 'C'),
21
+ (9, 'C'),
22
+ (11, 'C'),
23
+ ))
24
+ entries = tuple(entries)
25
+ expects = (UnicodeDataEntry(1, 2, 'A'), UnicodeDataEntry(3, 4, 'B'),
26
+ UnicodeDataEntry(6, 6, 'C'), UnicodeDataEntry(8, 9, 'C'),
27
+ UnicodeDataEntry(11, 11, 'C'))
28
+ assert entries == expects
29
+
30
+
31
+ def test_from_pairs_unsorted():
32
+ entries = UnicodeDataEntry.from_pairs((
33
+ (1, 'A'),
34
+ (3, 'A'),
35
+ (2, 'A'),
36
+ ))
37
+ with pytest.raises(AssertionError):
38
+ entries = tuple(entries)
39
+
40
+
41
+ def test_from_pairs_none():
42
+ entries = UnicodeDataEntry.from_pairs((
43
+ (1, None),
44
+ (2, 'A'),
45
+ (3, 'A'),
46
+ (5, None),
47
+ (7, 'A'),
48
+ (9, None),
49
+ ))
50
+ entries = tuple(entries)
51
+ expects = (UnicodeDataEntry(1, 1, None), UnicodeDataEntry(2, 3, 'A'),
52
+ UnicodeDataEntry(5, 5, None), UnicodeDataEntry(7, 7, 'A'),
53
+ UnicodeDataEntry(9, 9, None))
54
+ assert entries == expects
55
+
56
+
57
+ def test_from_values_none():
58
+ entries = UnicodeDataEntry.from_values((None, 'A', None, 'A', None, 'B'))
59
+ entries = tuple(entries)
60
+ expects = (UnicodeDataEntry(1, 1, 'A'), UnicodeDataEntry(3, 3, 'A'),
61
+ UnicodeDataEntry(5, 5, 'B'))
62
+ assert entries == expects
63
+
64
+
65
+ def test_value():
66
+ entries = UnicodeDataEntries(entries=(
67
+ UnicodeDataEntry(1, 3, 'A'),
68
+ UnicodeDataEntry(5, 6, 'B'),
69
+ ))
70
+ expect = (None, 'A', 'A', 'A', None, 'B', 'B')
71
+ for code, value in enumerate(expect):
72
+ assert entries.value(code) == value
73
+ assert entries[code] == value
74
+
75
+ assert entries.value(code + 1) is None
76
+
77
+ values_for_code = tuple(entries.values_for_code())
78
+ assert values_for_code == expect
79
+
80
+
81
+ def test_missing_directive():
82
+ lines = [
83
+ '# test\n',
84
+ '# @missing: 0000..10FFFF; R\n',
85
+ '0000..001F ; R\n',
86
+ '3000 ; U\n',
87
+ ]
88
+ entries = UnicodeDataEntries(lines=lines)
89
+ assert entries.value(0x001F) == 'R'
90
+ assert entries.value(0x2FFF) == 'R'
91
+ assert entries.value(0x3000) == 'U'
92
+ assert entries.value(0x3001) == 'R'
93
+ assert entries._missing_entries[0] == UnicodeDataEntry(0, 0x10FFFF, 'R')
94
+
95
+
96
+ def test_missing_directive_lb():
97
+ lines = [
98
+ '# test\n',
99
+ '# - The unassigned code points in the following blocks default to "ID":\n',
100
+ '# CJK Unified Ideographs Extension A: U+3400..U+4DBF\n',
101
+ '# - The unassigned code points in the following block default to "PR":\n',
102
+ '# Currency Symbols: U+20A0..U+20CF\n',
103
+ '# @missing: 0000..10FFFF; XX\n',
104
+ ]
105
+ entries = UnicodeLineBreakDataEntries(lines=lines)
106
+ assert entries.value(0x33FF) == 'XX'
107
+ for code in range(0x3400, 0x4DC0):
108
+ assert entries.value(code) == 'ID'
109
+ assert entries.value(0x4DC0) == 'XX'
110
+ assert entries.value(0x209F) == 'XX'
111
+ for code in range(0x20A0, 0x20D0):
112
+ assert entries.value(code) == 'PR'
113
+ assert entries.value(0x20D0) == 'XX'
114
+
115
+
116
+ def test_missing_directive_vo():
117
+ lines = [
118
+ '# test\n',
119
+ '# Control Pictures & OCR U+2400..U+245F\n',
120
+ '# @missing: 0000..10FFFF; R\n',
121
+ ]
122
+ entries = UnicodeVerticalOrientationDataEntries(lines=lines)
123
+ assert entries.value(0x23FF) == 'R'
124
+ for code in range(0x2400, 0x2460):
125
+ assert entries.value(code) == 'U'
126
+ assert entries.value(0x2460) == 'R'
127
+
128
+
129
+ def test_normalie_no_changes():
130
+ entries = UnicodeDataEntries(entries=(
131
+ UnicodeDataEntry(1, 3, 'A'),
132
+ UnicodeDataEntry(5, 6, 'B'),
133
+ ))
134
+ nomalized_entries = UnicodeDataEntries(entries=entries)
135
+ nomalized_entries.fill_missing_values()
136
+ assert tuple(entries) == tuple(nomalized_entries)
137
+
138
+
139
+ def test_fill_missing_values():
140
+
141
+ class TestEntries(UnicodeDataEntries):
142
+
143
+ def missing_value(self, code: int):
144
+ return 'B'
145
+
146
+ entries = TestEntries(entries=(
147
+ UnicodeDataEntry(0, 10, 'A'),
148
+ UnicodeDataEntry(12, 20, 'B'),
149
+ ))
150
+ entries.fill_missing_values()
151
+ assert len(entries) == 2
152
+ assert entries._entries == (UnicodeDataEntry(0, 10, 'A'),
153
+ UnicodeDataEntry(11, 20, 'B'))
154
+
155
+
156
+ def test_range_as_str():
157
+ entry = UnicodeDataEntry(9, 9, 'A')
158
+ assert entry.range_as_str() == '0009'
159
+
160
+ entry = UnicodeDataEntry(9, 11, 'A')
161
+ assert entry.range_as_str() == '0009..000B'
162
+ assert entry.range_as_str(lambda c: str(c)) == '9..11'
163
+ assert entry.range_as_str(lambda c: 'XYZ') == 'XYZ'
164
+
165
+ entry = UnicodeDataEntry(0xFFFF, 0x10001, 'A')
166
+ assert entry.range_as_str() == 'FFFF..10001'