unicodedata-reader 1.3.4__tar.gz → 1.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unicodedata_reader-1.3.6/.github/dependabot.yml +10 -0
- unicodedata_reader-1.3.6/.github/workflows/ci.yml +36 -0
- unicodedata_reader-1.3.6/.github/workflows/publish.yml +35 -0
- unicodedata_reader-1.3.6/.gitignore +131 -0
- unicodedata_reader-1.3.6/.yapfignore +4 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/PKG-INFO +7 -18
- unicodedata_reader-1.3.6/js/GeneralCategory.js +26 -0
- unicodedata_reader-1.3.6/js/LineBreak.html +19 -0
- unicodedata_reader-1.3.6/js/LineBreak.js +26 -0
- unicodedata_reader-1.3.6/js/template.js +26 -0
- unicodedata_reader-1.3.6/precommit.sh +9 -0
- unicodedata_reader-1.3.6/pyproject.toml +34 -0
- unicodedata_reader-1.3.6/tests/__init__.py +0 -0
- unicodedata_reader-1.3.6/tests/cli_test.py +31 -0
- unicodedata_reader-1.3.6/tests/conftest.py +6 -0
- unicodedata_reader-1.3.6/tests/entry_test.py +166 -0
- unicodedata_reader-1.3.6/tests/line_break_test.py +61 -0
- unicodedata_reader-1.3.6/tests/reader_test.py +30 -0
- unicodedata_reader-1.3.6/tests/set_test.py +98 -0
- unicodedata_reader-1.3.6/tox.ini +12 -0
- unicodedata_reader-1.3.6/unicodedata_reader/set.py +84 -0
- unicodedata_reader-1.3.6/uv.lock +716 -0
- unicodedata_reader-1.3.4/pyproject.toml +0 -31
- unicodedata_reader-1.3.4/unicodedata_reader/set.py +0 -66
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/LICENSE +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/README.md +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/__main__.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/bidi_brackets.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/cli.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/compressor.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/east_asian_width.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/emoji.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/entry.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/general_category.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/line_break.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/reader.py +0 -0
- {unicodedata_reader-1.3.4 → unicodedata_reader-1.3.6}/unicodedata_reader/vertical_orientation.py +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Please see the documentation for all configuration options:
|
|
2
|
+
# https://docs.github.com/en/code-security/dependabot/ecosystems-supported-by-dependabot/supported-ecosystems-and-repositories
|
|
3
|
+
|
|
4
|
+
version: 2
|
|
5
|
+
updates:
|
|
6
|
+
- package-ecosystem: "uv"
|
|
7
|
+
directory: "/"
|
|
8
|
+
schedule:
|
|
9
|
+
interval: "daily"
|
|
10
|
+
open-pull-requests-limit: 10
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
|
2
|
+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
|
3
|
+
|
|
4
|
+
name: Python package CI
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
branches: [ "main" ]
|
|
9
|
+
pull_request:
|
|
10
|
+
branches: [ "main" ]
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
# https://docs.astral.sh/uv/guides/integration/github/
|
|
25
|
+
- name: Install uv and set up Python ${{ matrix.python-version }}
|
|
26
|
+
uses: astral-sh/setup-uv@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: |
|
|
32
|
+
uv sync --all-extras --dev
|
|
33
|
+
|
|
34
|
+
- name: Test with pytest
|
|
35
|
+
run: |
|
|
36
|
+
uv run pytest
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
# Controls when the action will run.
|
|
4
|
+
on:
|
|
5
|
+
release:
|
|
6
|
+
types: [created]
|
|
7
|
+
|
|
8
|
+
# Allows you to run this workflow manually from the Actions tab
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
publish:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: '3.x'
|
|
21
|
+
|
|
22
|
+
# https://docs.astral.sh/uv/guides/integration/github/
|
|
23
|
+
- name: Install uv
|
|
24
|
+
uses: astral-sh/setup-uv@v5
|
|
25
|
+
|
|
26
|
+
- name: Install Dependencies
|
|
27
|
+
run: |
|
|
28
|
+
uv sync --all-extras --dev
|
|
29
|
+
|
|
30
|
+
- name: Build and publish
|
|
31
|
+
run: |
|
|
32
|
+
uv build
|
|
33
|
+
uv publish
|
|
34
|
+
env:
|
|
35
|
+
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
.vscode/
|
|
2
|
+
|
|
3
|
+
# Byte-compiled / optimized / DLL files
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[cod]
|
|
6
|
+
*$py.class
|
|
7
|
+
|
|
8
|
+
# C extensions
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
.Python
|
|
13
|
+
build/
|
|
14
|
+
develop-eggs/
|
|
15
|
+
dist/
|
|
16
|
+
downloads/
|
|
17
|
+
eggs/
|
|
18
|
+
.eggs/
|
|
19
|
+
lib/
|
|
20
|
+
lib64/
|
|
21
|
+
parts/
|
|
22
|
+
sdist/
|
|
23
|
+
var/
|
|
24
|
+
wheels/
|
|
25
|
+
pip-wheel-metadata/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
|
|
56
|
+
# Translations
|
|
57
|
+
*.mo
|
|
58
|
+
*.pot
|
|
59
|
+
|
|
60
|
+
# Django stuff:
|
|
61
|
+
*.log
|
|
62
|
+
local_settings.py
|
|
63
|
+
db.sqlite3
|
|
64
|
+
db.sqlite3-journal
|
|
65
|
+
|
|
66
|
+
# Flask stuff:
|
|
67
|
+
instance/
|
|
68
|
+
.webassets-cache
|
|
69
|
+
|
|
70
|
+
# Scrapy stuff:
|
|
71
|
+
.scrapy
|
|
72
|
+
|
|
73
|
+
# Sphinx documentation
|
|
74
|
+
docs/_build/
|
|
75
|
+
|
|
76
|
+
# PyBuilder
|
|
77
|
+
target/
|
|
78
|
+
|
|
79
|
+
# Jupyter Notebook
|
|
80
|
+
.ipynb_checkpoints
|
|
81
|
+
|
|
82
|
+
# IPython
|
|
83
|
+
profile_default/
|
|
84
|
+
ipython_config.py
|
|
85
|
+
|
|
86
|
+
# pyenv
|
|
87
|
+
.python-version
|
|
88
|
+
|
|
89
|
+
# pipenv
|
|
90
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
91
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
92
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
93
|
+
# install all needed dependencies.
|
|
94
|
+
#Pipfile.lock
|
|
95
|
+
|
|
96
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
97
|
+
__pypackages__/
|
|
98
|
+
|
|
99
|
+
# Celery stuff
|
|
100
|
+
celerybeat-schedule
|
|
101
|
+
celerybeat.pid
|
|
102
|
+
|
|
103
|
+
# SageMath parsed files
|
|
104
|
+
*.sage.py
|
|
105
|
+
|
|
106
|
+
# Environments
|
|
107
|
+
.env
|
|
108
|
+
.venv
|
|
109
|
+
env/
|
|
110
|
+
venv/
|
|
111
|
+
ENV/
|
|
112
|
+
env.bak/
|
|
113
|
+
venv.bak/
|
|
114
|
+
|
|
115
|
+
# Spyder project settings
|
|
116
|
+
.spyderproject
|
|
117
|
+
.spyproject
|
|
118
|
+
|
|
119
|
+
# Rope project settings
|
|
120
|
+
.ropeproject
|
|
121
|
+
|
|
122
|
+
# mkdocs documentation
|
|
123
|
+
/site
|
|
124
|
+
|
|
125
|
+
# mypy
|
|
126
|
+
.mypy_cache/
|
|
127
|
+
.dmypy.json
|
|
128
|
+
dmypy.json
|
|
129
|
+
|
|
130
|
+
# Pyre type checker
|
|
131
|
+
.pyre/
|
|
@@ -1,21 +1,11 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: unicodedata-reader
|
|
3
|
-
Version: 1.3.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
License:
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Requires-Python: >=3.8
|
|
10
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Dist: platformdirs (>=2.2,<5.0)
|
|
18
|
-
Project-URL: Repository, https://github.com/kojiishi/unicodedata-reader
|
|
3
|
+
Version: 1.3.6
|
|
4
|
+
Author-email: Koji Ishii <kojii@chromium.org>
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
License-File: LICENSE
|
|
7
|
+
Requires-Python: >=3.9
|
|
8
|
+
Requires-Dist: platformdirs>=4.3.8
|
|
19
9
|
Description-Content-Type: text/markdown
|
|
20
10
|
|
|
21
11
|
[](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)
|
|
@@ -100,4 +90,3 @@ unicodedata-reader lb -t js/template.js
|
|
|
100
90
|
[GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js
|
|
101
91
|
[LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html
|
|
102
92
|
[LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js
|
|
103
|
-
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uGeneralCategoryAsInt = (function () {
|
|
2
|
+
const bytes = atob("h2ABQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQaIAAECYw0CCg0ODwYQDQoNBjEKDCIKEQ4SUQKFSQaBSYVsBoFsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDCkMCQwJTCkMCQwpDEksaQwpDElMKQwpDAkMCQwpDAksCQwpDEkMCQwpLA4JTG4JEwwJEwwJEwwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAksCRMMCQxJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCYFMKQwpLAkMaQwJDAkMCQwJkQwOhkyENGqCdIMqgRSBShQKFIQKm3UJDAkMFAoJDDYUTAIJdioJAkkWCRYpDIQJFoIJiEwJLElMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmBDAkMBgkMKSyMSYtsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA2BFTcJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwpDAkMCQwJDAkMCQwJLAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDBaJKTYUgSKKDAIHNi0DFosVBxUCNQI1AhWBdoZOdm4iglaBMEYiAyItglUCEEKHbhSCLoUVgihiLhWYTgIOgVUQDYE1NDUNdS6CKE4tDoMiFhAOFYcuhlU2lg6CVQ6DNoIoiA6CFTQNQhQ2FSOFLnUUghUUVRSBFTaDQhaGDlU2AhaCToEWhW4KgS4WMIEWghWKDhSFdRCHdRiNLhUYFQ5YgXV4FTgOgVWCLjUigigCFINOFTgWgW42LjaFLhaBThYOVm42FQ5YdTY4NjgVDoF2GHYuFk41NoIoLiOBMQ0DDgIVNjUYFoEudi42hS4WgU4WLhYuFi42FRZYNXY1NlVWFYFWbhYOgVaCKDVOFQKCNjUYFoIOFk4WhS4WgU4WLhaBDjYVDliBFRY1GBY4FTYOg1YuNTaCKAIDgVYOgTUWFTgWgW42LjaFLhaBThYuFoEONhUOGBUYdTY4NjgVgVY1GHYuFk41NoIoDQ6BMYI2FQ4WgS5WThZuVi4WDhYuVi5WTlaCbnY4FThWWBZYFTYOgTYYgzaCKFGBLQMNgRYVWBWBbhZOFoVOFoNuNhUOVXgWVRZ1gVY1Fk42DjYuNTaCKIFWAoFRDQ4VOAKBbhZOFoVOFoIuFoEONhUOGBWBGBYVOBY4NYFWOIE2LhYuNTaCKBYuGIJ2NTiCDhZOFooONQ5YdRZYFlgVDg12ThiBUU41NoIoghENgS4WFTgWhC5WhW4Wgg4WDjaBTlYVdlhVFhUWgXiBNoIoNjgCgnaLbhUugVV2A4EuFIF1AoIoIokWLhYOFoEOFoVuFg4Wgi4VLoIVDjaBDhYUFoFVFoIoNm6Hdg5Ng0INAk01gS2CKIIxDRUNFQ0VBAUEBTiBbhaIbnaDNRiBFQI1gQ6CVRaIdRaBbRWBLRYtgQJtIokWik44dRiBNRg1ODUOgiiBIoEuODVuVQ5YLoFYTnWDDhU4NYE4FQ4YgihYFS2JKRYJgRYJNopMAhRM0g4WbjaBThYOFm42ig4WbjaIDhZuNoFOFg4WbjaDThaODhZuNpBONlWCAoRxVoNugi2BNpUpNoEsNgeBmm4NAoQOAYYuBAVWkk5CWYFugVaELlUYghaETjUYIoIWhC41gnaDDhZOFjWCdoxuNRiBVYF4FTiCVUIUQgMOFTaCKIE2gjGBNoEiB2JVEBWCKIE2iE4UjQ6BVoEONYguFQ6BFpEugjaHThZVeDVYdjgVgThVdg1WIoIohy42gQ6CVopudoYugTaCKBFWiC2FTjU4FTYijQ4YFRiBVRYVGBU4gXWBOII1NhWCKIE2giiBNoFCFIEiNoM1F4N1jBZ1GItOFRiBFRgVgRgVOIFuFiKCKIFCgi2CFYINQjUYhy4YdTg1GFUugiiKbhUYNVgVGFU4gXZiiG6BeIF1ODVWgQKCKFZOgiiHLoE0IoIMCQyBFopJNkmBYoF2VQKDFRiBVW4VgS4VLhg1DoEWimyPVIMMFIgsiRSPdQkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJggwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJggyBaYEsNoEpNoFsgWmBbIFpgSw2gSk2gWwWCRYJFgkWCYFsgWmDLDaBbIFzgWyBc4FsgXOBDBYsaRMKDEpMFixpE0psNixpFkqBbIEJSjZMFixpEyoWgkGBEIEnIg8SBC8SBA+BYhobgRABggIPEmIrQgYEBYJCBgILgiIBgRAWgjARFDaBMUYEBRSCMUYEBRaDFFaIA4NWgxV3FVeCdYNWLQltCS0MSSxJDA0JLQaBCYEtCQ0JDQkNaQ0MaQxuDC0sKYEGCWwNBi0MDYNxiFkJDHkRLXaBBoENJm0GLQYtBoFNBodNJi0GDQaHTcJmgW0EBQQFhG0mgU0EBZQNBocthgaJbYEmkW2FNoJNhRaOcZMthTGtTQaCDQaNLYFmm00GvW0EBQQFBAUEBQQFBAUEBYcxim2BBgQFh0YEBQQFBAUEBQQFg2a/baBGBAUEBQQFBAUEBQQFBAUEBQQFBAUEBY9GBAUEBYdmBAXAJotthQYtgSaJTTaHbRaaDYtpi2wJDEksCQwJDAkMaQwJLAmBLDRJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAksgS0JDAkMVQkMgRZiESKJLBYMgRYMNo1ugVYUAoM2FYVOghaBThaBThaBThaBThaBThaBThaBThaBThaHdSIPEg8SQg8SAg8SggIHIgcCDxIiDxIEBQQFBAUEBYECFIIiJ2IHAgSDAi1CBAUEBQQFBAUHiDaGLRaWDYJ2tS2GNoNtAUINFA4ZBAUEBQQFBAUEBS0EBQQFBAUEBQcEJQ2CGXU4B4EULVkUDgItFpUuNjUqNA4Hli4CVA6BFopOFpcuFi1xgi2HboktghYNg26HTRaCMYctgXENg1GHbYIxiU2DUc9tjO9uj22phQ4Ugp1OVo1NghaJboE0IsJuFEKDboIoLoR2CQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA4VVwKCNQIUCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDDQ1kS6COTWBIoF2hUqCFCoJDAkMCQwJDAkMCQwJTAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMFIFsCQwJDCkMCQwJDAkMCQwUKgkMCQwOCQwJTAkMCQwJDAkMCQwJDAkMCQwJDAkMgQkMgQkMCQwJDAkMCQwJDAkMCQxpDAkMKQw2CQwWDBYMCQwJDAkMCYUWVAkMDjQMgU4VThVuFYVOODUYbRVWgTEtAw2BNoxuYoF2OIwug3g1gXYigiiBNoQ1gS5CDgIuFYIohm6BdSKFToJVOIJWAocOVlUYi04VOHU4NViDAhYUgih2IoEOFRSCDoIogQ4Wig6BNTg1ODWCFk4VgW4VGDaCKDZig24UgS5NDhgVGIwuFQ5VLjWBDjUOFQ6Fdi4UIoJOGDU4Ig40GBWCNoEuNoEuNoEughaBThaBThaKTAp0ggwUKnaTbIhOOBU4FTgCGBU2giiBNpXoboJ2hU52jA52g/98jL992y42mi6JNoFMgnaBDIEWDhWCLgaDDhaBDhYOFi4WLhaaboQKg3baTgUEg22PbjaNLoFWDYd2gm4DTYN1gUIEBQKBNoN1AicrBAUEBQQFBAUEBQQFBAUEBSIEBWJLQhZiBwQFBAUEBUIGB0YWAgMidoEOFqFONhAWQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQYEBQIEBSKCLhSLDjSHTlaBLjaBLjaBLjZOViMGCg0jFg1mLYI2UC02gm4Whi4WhE4WLhaDTjaDLog2nk6BFkJ2ixFWgg2NGXGEDTFNFoMNVg2LVosNFaA2hw5WjA6DVhWGUXaHbnGCFoRuGYFuGYEWiS6BFYEWhy4WAohudoFuAoEZijaJaYlsky42giiBNohpdohsdolugXaMboJWAoJJFoNJFoFJFikWgkwWg0wWgUwWLFaMboJ2zU6CFoUugjaBboV2gTQWijQWghSRFoEuNg4Wim4WLlYONoVOFgKBcYVOLYFRh06BdoIRi3aEThYugRaBEYUugTFWAoYugRYCj3aNbnYxLoNxNosxDlUWNYEWdW4WThaHDjZVdhWCEYFWggKBVocOMQKHDlGHdoFuDYZuNXaBEYFCghaNLlaBQoUuNoFxhE6BFoFxhC6BVmKCdoFRk3aSDo1WjEmDFoxMgVaBMYhudYF2giiBNoIobhQOhSlWgRUHFIUsgXYms3aHURaKLhY1BzYug3ZOjVZ1hw6CMQ6BdoUuglVxgQKFNoQudWKJNoUOgVGEdoVOghYYFRiNDoNVgUJ2hHGCKBUuNQ6CFlUYiw5YdTg1IhBiFYI2EDaGDoFWgiiBNlWIboEVGIF1FoIoYg44DoF2iE4VIg6CFjUYi25YghU4bmJ1AhgVgigOAg5CFoRxglaELhaGDlhVOBUYNYEiFS4VjzaBThYOFm4Wg04Wgi4CgTaLThVYgXWBFoIogTY1OBaBbjYuNoUuFoFOFi4WgQ4WNQ44FXg2ODZYNg6BNhiBFoEOODaBVVaBFYJWgi4WDjYOFokuFg5YgTUWGDYYFngWOBUYFQ4VDiIWIoF2NYcWjQ5YgXU4VRgVboECgigiFgIVToc2i25YgTUYFXg1GDUuAg6BdoIoqTaLTlh1Nng1GDWFQm41iDaLbliBdTgVGDVCDoJWgiiBNoMChFaKThUYFTiBNRgVDgKBNoIogTaEaIZ2hk42FRgVOHUYgRV2gigxQg2BTq4Wim5YghUYNQKYdodph2yCKIIRgnaBbjYONoFuFi4WhW6BOBY4NjUYFQ4YDhgVQoIWgiiRNoFuNolOWHU2NXgVDgIOGIZWDoI1iW6BNRgOdYFiFYF2DoE1OFWLLoMVGDVCDoECgxaSDoFWgiKtNogOAoM2giiBNoIOFokOGIFVFoE1GBUOgQKCNoIohFFWIocuNoU1FhiBVRg1GDWSFoFOFi4WiS6BNVYVFjUWgVUOFYF2giiBNoEuFi4Wh26BGBY1FjgVGBUOgVaCKM02hE41OCKBVjUOGIMOFoguOIEVVjgVGBWDAoIoFZUWDoNWhRGBbWOEDYMWAoHmLpk2m1kWgQKCVrBuhZJ2mA4igxaCi26DcBWBLoNVgjaH5k6BFoGRTo2uFocugnVYVYIog7E2gY4OgVaHThaCKHYik04WgiiBNocuNoEVAoI2i26BVYECbXQCDYI2gigWgVEWhQ6BFoRO63ZUiW40QoIosTaHaYdshVFimRaSTnYVDo1YgVZ1gxSPdjQCFBWCVjiDNov9boF2grUuihaCLpG5VnQWgVQWNBbIToNWDocWTjYOgzZugXbiboTAdppOgRaDDlaCDoFWgi42DTUCcIfWdrttgiiBNuxtknaLNTaFVYIWnG2Odr0tgjaJTTaObThVTYE4gXCBdS2BVYctdY8NhRaQLVUNnjaEcYJ2hHGCdpVNghaGEaFWhimGLIYpgUwWhCyGKYYsCRYpNgk2KTZpFoFpbBYMFoFMFoJMhimGLCkWaTaBaRaBSRaGLCkWaRaBCRYJVoFJFoYshimGLIYphiyGKYYshimGLIYphiyGKYZsNoYJBoYMBoEshgkGhgwGgSyGCQaGDAaBLIYJBoYMBoEshgkGhgwGgSwJDDaMKP9tjVVtjDWBbRWDLRUtgQKDVoEVFoNVgpN2giwOhGyBNoEstRaBVRaEFTaBVRY1FoEVgRaPNIgWFZt2iw5WgVWBVDaCKHYODc92hy4VhBaKbnWCKIEWA/N2hk4UdYIotTaHLjUOgih2Avd2gU4WbhYuFoNOFrEONoIRgVWKFogpiCyBVRR2gih2IoHEFo5RDVEDcZJ2ixENg1GwNm4Whk4WLhYONg4Wgi4WbhYOFg6BNg52DhYOFg4WThYuFg42DhYOFg4WDhYOFi4WDjZuFoFOFm4WbhYOFoIuFoQOgRZOFoEOFoQOjHYmwzaKbXaYbYJ2g002g00Wg00WiQ2CNoMRqA2NdocNgxaKbXaCDYFWLYM2gS2mNr5NgQqBtW12hA1Wgw1WnU12l02BNoJtdg2DVoJtdo1tgXaCLYE2iW2BdoctNoJtdi2PNtRtgnaDLTaDDVaCLYEWjW2BVoNNNoJNgTaCDYFWpE0Wlm2CKIKBNtO3bod2iI4ugTa3LjaLoC6DNo7MDoNWgZsuhOg2gYcugvg2idJOgRaIl26K7pQWEIc2l3Cfdrt1/4N2//89Nv//PTY=");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 5) + 1);
|
|
14
|
+
entries.push(value & 31);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uGeneralCategoryValues = ["Cc","Zs","Po","Sc","Ps","Pe","Sm","Pd","Nd","Lu","Sk","Pc","Ll","So","Lo","Pi","Cf","No","Pf","Lt","Lm","Mn","Cn","Me","Mc","Nl","Zl","Zp","Cs","Co"];
|
|
26
|
+
function uGeneralCategory(c) { return uGeneralCategoryValues[uGeneralCategoryAsInt(c)]; }
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<script src="LineBreak.js"></script>
|
|
3
|
+
<body>
|
|
4
|
+
<div>
|
|
5
|
+
<input id="input" autofocus style="width: 100%">
|
|
6
|
+
</div>
|
|
7
|
+
<pre id="result"></pre>
|
|
8
|
+
<script>
|
|
9
|
+
input.addEventListener('input', (e) => {
|
|
10
|
+
const text = input.value;
|
|
11
|
+
const results = [];
|
|
12
|
+
for (const ch of text) {
|
|
13
|
+
const code = ch.codePointAt(0);
|
|
14
|
+
results.push(`${ch} U+${code.toString(16)}: ${uLineBreak(code)}`);
|
|
15
|
+
}
|
|
16
|
+
result.textContent = results.join('\n');
|
|
17
|
+
});
|
|
18
|
+
</script>
|
|
19
|
+
</body>
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uLineBreakAsInt = (function () {
|
|
2
|
+
const bytes = atob("hAABAkMEiEAFBgcICQoIBwsMCAkNDg0PhFBNgQgGjQgLCQyOCAsBEQiCQBKMQBMLCoEJCFQIFAcIAUgKCVQVCIIUB4EUC4sIFI8IFIHnCBQVgRQVFEgUgwiBVAgUCBWPSKcAE4VAgxOGAINIVoFIDQiBVoMIFggWiUgW70iDANJIFpJIVpdIDQFWSAkWlgABAAhACEAGAINWjReBVoFXSIUWglCBCIEKTUiFAAYAgQaVCIoAhFAKUIEIALEIBgiDABAIgkBIQAiBQEiEUIlIFkgAjkiNAFasCIUACIZWhFCQCIQAgUgNBghWAEmKSIFACIQACIEACIIAVocIFowIgQBWCBaFCIIWjwgWUIIWhACUSItAEJAAmkiBAAiIQAiDAIRIQEGEUIgIgQAWg0hWSFaKSBaDCBYIgRaBSFYACIMAVkBWgQAIg1YAgVZIFoEIQFaEUEhKgggKCAlIAFaBABaCSIFWSFaKSBaDCBZIFkgWSFYAFoIAgVZAVoEAgRYAgxaBSBYIgxaEUECBCAAIhFaBABaECBaBCBaKSBaDCBZIFoIIVgAIg0AWgQAWgQBWCIcWSEBWhFAICYMWCIJAFoEAFoNIVkhWikgWgwgWSBaCCFYACIMAVkBWgQCDFoEAgVZIFoEIQFaEUINIhFYACBaCSIEWgQgWgUiBFkgWCBZIgRZIgRaBCIEWhUiBVoIAgRaBABaBQFYIglYAhlaEUIQICQiCFoIAg0gWgQgWiwgWh0hWAAiDABaBABaBQIMWQBaBCFYIVkhAVoRQgxYVhAiBABWDSBaBCBaLCBaESBaCCFYACIMAFoEAFoFAgxZAglZIFkhAVoRQFkgAhVaBQIQIFoEIFpQIQAiDABaBABaBQEiBVoEIAIRIQFaEUIQICoJIFoEAFohIgRaLSBaECBYIVoMIgRYAgVaCQBYAFoNAglaEUFZACIVWnFiBVgmHGAiEUEGSFlgWGBaCGBaLWBYYFosYVoIYFhgWgxgWhFBWgViPVgiBVQhVE1UBE4IGEwgGgQhAgkiEUIRIAQAIAAgACxELEUCDSBaRSIFWhkABggABQIIIhQAWkUAWQYJIAIJIFkhVARWCCFOSFp9YhFBBgUifWIRQgliSSBYIghYIVpdIr1mjWqtbpAgWgUhWgwgWCBaBSFaUCBaBSFaQCBaBSFaDCBYIFoFIVocIFpwIFoFIVqEIVoEACAGNCIEWjEiCVqpIVoJIVgGCvwgBjEgLEYEWpQiBAYUIgxaISIFAhBaJCIEAQYQWiEhAhVaGCBaBCBZAhVapWEEcGAEIAQlYVoRQglaESIJWSEZBFQhGCIEAEwCEUIJWrAiDFoIIQJBIAAiCFqJIhFaPCBaFQIFWhUCBVgiBFkaEUI5YVoIYhRaVWIFWjFiCVoUQgRZYmwiCAFZInxgWjhhWAIRQglaEUIJWhlhWjwCYFoIAlx2HQB6DXRZBhF9BIIFBhGCEAIQggQGBAI5IhgBIhFCCSJJfhUBhg1aTSIlAgRaCAYRQgRaBCIRQkUhBhQiCFpUIVoUIg1aBAAiKAIFIAIJIAEiBAAiCFt9IhgATlkATgQCBikhWgkhWkkhWgkhWg0gWCBYIFggWjwhWmggWhwgWhkhWgkgWiQhWgQgWgwgVCBaDAROBASIAI0ABE0EkVAhHC4EHCwdUSIElAUOCABODSghHFFyCSA0LEYEchUgBCoFBCIEBJoFIFoRASFYUg0gLERQIgVSDSAsRFoYIgRaDCQqGSQqBSQpJCgkKhwmQAIcWgQgKCBSBCAqECBRICYRIVINIFJFIhxQIhVSBSIRUhwgUSIFWhFSbSBQIFJUIFAhUgQhUSBSBCBQIFEkIFIFIFEiBVEgUCBQIglQIFIIIgVSBSFSESBSBCBSCCBSGCFRIgVRIVEhUiEhUSFSGCBSBCBSFCBSMCBSXCCWLSAsRCxGCSBSDCGCGCAsR4giBYJpIilaFCIoWzxQIpVSBSJIUhQiHVEiBVIRIVAiDFINIVEhUgUhUSFSBSIEUSBRIgVSHSIFUhAgUh0iBYAhUSBSBSFSBSGBUIAiBICdgjAiBIIFIFAgUjghUCIEUCBQgVAhUCBSHCCCOSFSOCIVggVQgCIEgFGCBFGBUIFSBIAgUgUhUIIJUgiAUYCcgVINggQhggWekCBSBCIJHCEYggQgLEQsRCxELEQsRCxELEY5UmAgLEY8ICxELEQsRCxELEYHJCAsRCxELEQsRCxELEQsRCxELEQsRCxGfCAsRCxGPSAsRgasIghSMSFaPSBaBq0iBAEiCFgaBAQgGAZJIFgiCFghWm0iDFggBhlYAiwiEFoMIFoMIFoMIFoMIFoMIFoMIFoMIFoMIFo9AhkeDQQgBCwFIR0hHCxELEQsRCxGBQQYIQQhBgghkgQEIQQuDQQgBCEGBCEYLDAsMCwwLDAGQVoxgFqwghVbqYIxWh2ABUWAcYAsRCxELEQsRCxFgCxELEQsRCxEcC1GEYIJAgiAAgiBcgSAWKCAoICggKCAojCAojyAoICggKIJgKIJgaFZAgVwgHCggKCAoICggKIwgKI8gKCAoICiCYCiCYGiBYBwoXCCCFpUgFq5gFqpghBYgh2iPIBaTYINUm7dgn0jSiiAchLsggRabIIQWlkhBgYYIAQYBh0iEUEiJVpcIgUAIhECPSECnSEAIggGDVuZIVkgWCBaDSIoWh0gAgQgAgUgAiwiCAIFIAIEWg0gKCIJWmUhVRoNWQJhIiECDVkGEUIJWiECESBVIAIRQjUiDQEGLCIYAhRYIjhmBFoFAlx2GAB6CYIEBgWAWAYRfgVZgh1iEUIIYFpQfhkCEFoEBAINBQFaEX1YggQGxGItWghiFCIIAQYEIQIRWgkhWgkhWgkiEFoMIFoMIFp1IgVa5CINAAUBWhFCCVimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKoVWixqBVpgbgVaH/2uY/1aB/2CDCIVWggiCFhcAhFcIhhcWghcWFxZXFlcWhFe5CIdWgbUIEQunSFaaSIMWCI9WhUgKgQiHQIERXEYLESWCVhMAEwATAFMAEwATAFMAgiALEQsRCxELEQsRCxELEQsRYAsRgyARIBEWXEYgCxELEQsRg2AWIAkKIIFWgggWwwhWJhYGYAkKYAsRYBEgEYUgXIEgBo0gCyARjiALIBEgC1ELURwghGiWIFyPIIEWgmBWgmBWgmBWgSCBFgoJgSBJFoMIhFaBACwUVoVIFoxIFokIFkgWhwhWhkiQVr0IghaBAYFWlgiBFqtIFoYIgRYIlxaWCADAVo4IgRaYCIcWAI0IgVaRSIQWjkiCFpJIggCCFo5IFgGRSIFWg0gBggiUVs5IVoRQglaRSIFWkUiBVpNIg1aZSIUWhUgWhwgWgwgWSBaFCBaHCBaDCBZIgRaZSIVWgZsIhBaKSIRWg0iLVoJIFpRIFoQIohaCSFYIFpVIFkiBFghWiwgWAaMIg1aECJdWiQgWSIIWkAiBFgGMSIIWCJ9Wm0iBVolIVpcIgQAWQIIWgUCBSBaBCBaOCFaBAIFWAIQIgxaDQQiDFp9Ij1aSCECBVoIIgkElhBaaSIEWgwGKSFaNCIIWjEiDFoFIhVaDCKdWpAibFpkIhhaZCIMWlEiBQINWhFCCVoRQjUiBFoIAAYsIg1ZI51aPCBaUSBZAAVZIh1aBCJsWgUCTSINWikiFAIQIilaISIFAgUiSVo1IiVaLCIQWgQBtmR2GQB5BgiCBVolghF8AXUAdhBYTgQCWCIUASBCBQQCEVhBWjAiDFoRQglaBAJFIhkAWhFCBQQhACINWkQgACBUIhBaBAJdIhkCBSEEIAYFACECEUAgVCIEBFolIhRaISBaMCIVAQQhBCABIAJ5WgwgWCBaBSBaHCBaESAGCVpcIhUCCFoRQglaBQBaDXVZdVopdFoMdFl0Wgh0WQAGDAFZAVkAeVh+CVgCCFgFfXUBWgwCBFoIAhRaEXxYfVh8WX5FdFiCEABYAVgAWgUAWgUAeLQCBIBZgg1ZAjhaaCIhAgUiBQQiEUEEWCACBCI5Wl0iJQIFIg1aEUNJWlwiDAFaEABVBRoEIhwGBSECQVpdIiABBSIUWhFCCVoYViRaVCIYASIJWhFCCVolQjVaNGFaHGIFWhFBYgQGDWNwWlUiHAAixVp9IhFCECIVWCIMdVh1Wg10WXRaLXYJAFkBWgQAeLQAtQIEBhBaEX6JWg0hWkwiDAFaDAAgVCACNFgiEQJNIgwAIgUAVCIFBFQgAg1YIhQCWSIdAgQEIgRVBhhakCIMWhFXaVpBIhlaEUIJWhAgWkgiDQBaDQAiCAYRWhFCJCIEWFQaOSFaKQBaGQKQWgwgWSBaSSIJAgRYAFkAWgwAIAINWhFCCVoJIFkgWj0iCABZAFoIACIMWhFCBmlaIXwGBQEGDFkAtAIYdFpBdgwCBFoFAHkGFIIRfAKoWCIcWjgiBSogIhhYBg8xIsla3CBaCAYUW4UiKpVaxCIYWgqtIgQuBEZFIEYEICxELEfcIC1HZCAuDEwsRgRMLEQsRAIJIhwCEVo/NCIIWgeZICxG7CJrcFo5fiECEX4biVoKcCIMWjwgWhFCBVkGnCBaEUIJWjkhWggABhFaXSIMAgQGESAEIhFaEUBaDCBaKCIIWiQiB11aWSEGEUOJWqwhBSLIWpQiBVgAImwCDFoFAhgifVoFcE4UWQIZWl/tgg1aC/2CB6kiUFgiEIKLzFoFIFoMIFkgWgZEghxYojhaBKFYohlaBaINWgcVgiYFWtQiCFoYIgRaECIMWhEhWCEABgUCPrVb3SIRQglaB2UilVpZAVosAhBa5SJ1W+kiEVpMIVp1IggCBCIpASIMAjkiBQJ4IihagSIEACLxWiUiFVolIhVarCIQWjAjDFqoIFqMIFkhWCFZIVoFIFoVIFggWgwgWoAgWgUhWg0gWgwgWjUgWgUgWgggWCIEWgwgWgalIVoGRSFaYUIH/SJsAgUiYQINIAIZIAEiBQQiHFoIAFocAhKdWjwiCVoJI6haDABaIAFaDABZAFoIAghaeSJAWALdWlgiBFoMAgwhWhFCBVkiBn1aOSACIFpVIgUCEUIIWCYHnVo1IgUCEUOpWjkhACIRQgVYIge9WgwgWgUgWSBaHCBbiCFaECIMAlBahSIMACIFWhFCBVkuDiBadCAqBCAqBSKVWngjgVoFIFo0IFkgWCFYIFoRIFoFIFggWCIJWCIFWCBYIFggWgQgWSBYIVggWCBYIFggWCBZIFghWgUgWgwgWgUgWgUgWCBaESBaICIIWgQgWgggWiAiZVkiBhlb/YIYUgQiOVEicVIJInhQIm2CMboHCICeKYEiLIEiCIAiCIIEnYCdggSeWYIIvoGBnYIUniiCJJ4EgJ4FggScggSeDICcgJ4ZgCCAIIAiCICeBYAggSKYggwiHYIZIhiCLSJRgZ4FgJ4ogJ4FgZ54gg0iLYIJIpSCBJ4EggieSSIEHgRyBSJEgJ4dggSeEICeFICeZILlIhWCqCJUghUiBYJtIg2CESIJgk0iDYI5IYIVIgWBInmCFSCdgJ4Ngg2eCYCeEIIRnYIEnm2AnniBnIGcgJ4gggScghieQYKlItyCBJ5RghCeDIMkIFq1IhFCCVoP+YFaB//5gVoH//mCUgIEWAI5Wr0C/VvdAhf6GVg==");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 6) + 1);
|
|
14
|
+
entries.push(value & 63);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uLineBreakValues = ["CM","BA","LF","BK","CR","SP","EX","QU","AL","PR","PO","OP","CP","IS","HY","SY","NU","CL","NL","GL","AI","BB","XX","HL","SA","JL","JV","JT","NS","AK","VI","AS","ID","VF","ZW","ZWJ","B2","IN","WJ","EB","CJ","H2","H3","SG","CB","AP","RI","EM"];
|
|
26
|
+
function uLineBreak(c) { return uLineBreakValues[uLineBreakAsInt(c)]; }
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const u${NAME}AsInt = (function () {
|
|
2
|
+
const bytes = atob("$BASE64BYTES");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> $VALUE_BITS) + 1);
|
|
14
|
+
entries.push(value & $VALUE_MASK);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const u${NAME}Values = [$VALUE_LIST];
|
|
26
|
+
function u${NAME}(c) { return u${NAME}Values[u${NAME}AsInt(c)]; }
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "unicodedata-reader"
|
|
3
|
+
version = "1.3.6"
|
|
4
|
+
description = ""
|
|
5
|
+
authors = [{name = "Koji Ishii", email="kojii@chromium.org"}]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
repository = "https://github.com/kojiishi/unicodedata-reader"
|
|
8
|
+
license = "Apache-2.0"
|
|
9
|
+
requires-python = ">=3.9"
|
|
10
|
+
dependencies = [
|
|
11
|
+
"platformdirs>=4.3.8",
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
[dependency-groups]
|
|
15
|
+
dev = [
|
|
16
|
+
"pytest>=8.3.5",
|
|
17
|
+
"pytype>=2024.9.13",
|
|
18
|
+
"tox>=4.25.0",
|
|
19
|
+
"tox-uv>=1.25.0",
|
|
20
|
+
"yapf>=0.43.0",
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
[project.scripts]
|
|
24
|
+
unicodedata-reader = 'unicodedata_reader.__main__:main'
|
|
25
|
+
|
|
26
|
+
[build-system]
|
|
27
|
+
requires = ["hatchling"]
|
|
28
|
+
build-backend = "hatchling.build"
|
|
29
|
+
|
|
30
|
+
[tool.pytest.ini_options]
|
|
31
|
+
testpaths = "tests"
|
|
32
|
+
|
|
33
|
+
[tool.yapf]
|
|
34
|
+
based_on_style = "pep8"
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from unicodedata_reader import *
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _to_unicodes(text):
|
|
5
|
+
return tuple(to_unicodes(text))
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_to_unicodes():
|
|
9
|
+
assert _to_unicodes('1234') == (0x1234, )
|
|
10
|
+
assert _to_unicodes('12FE') == (0x12FE, )
|
|
11
|
+
assert _to_unicodes('ABCD') == (0xABCD, )
|
|
12
|
+
|
|
13
|
+
assert _to_unicodes('12345') == (0x12345, )
|
|
14
|
+
|
|
15
|
+
assert _to_unicodes('u0009') == (0x9, )
|
|
16
|
+
assert _to_unicodes('u1234') == (0x1234, )
|
|
17
|
+
assert _to_unicodes('U+1234') == (0x1234, )
|
|
18
|
+
|
|
19
|
+
assert _to_unicodes('1234 5678') == (0x1234, 0x5678)
|
|
20
|
+
assert _to_unicodes('1234,5678') == (0x1234, 0x5678)
|
|
21
|
+
assert _to_unicodes('1234, 5678') == (0x1234, 0x5678)
|
|
22
|
+
|
|
23
|
+
assert _to_unicodes('xy') == (ord('x'), ord('y'))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_to_unicodes_range():
|
|
27
|
+
assert _to_unicodes('1234-1236') == (0x1234, 0x1235, 0x1236)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_to_unicodes_array():
|
|
31
|
+
assert _to_unicodes(['1234', '5678']) == (0x1234, 0x5678)
|
|
@@ -0,0 +1,166 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from unicodedata_reader import *
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def test_entry_eq():
|
|
7
|
+
assert UnicodeDataEntry(1, 3, 'A') == UnicodeDataEntry(1, 3, 'A')
|
|
8
|
+
assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(1, 3, 'B')
|
|
9
|
+
assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(2, 3, 'A')
|
|
10
|
+
assert UnicodeDataEntry(1, 3, 'A') != UnicodeDataEntry(1, 2, 'A')
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def test_from_pairs():
|
|
14
|
+
entries = UnicodeDataEntry.from_pairs((
|
|
15
|
+
(1, 'A'),
|
|
16
|
+
(2, 'A'),
|
|
17
|
+
(3, 'B'),
|
|
18
|
+
(4, 'B'),
|
|
19
|
+
(6, 'C'),
|
|
20
|
+
(8, 'C'),
|
|
21
|
+
(9, 'C'),
|
|
22
|
+
(11, 'C'),
|
|
23
|
+
))
|
|
24
|
+
entries = tuple(entries)
|
|
25
|
+
expects = (UnicodeDataEntry(1, 2, 'A'), UnicodeDataEntry(3, 4, 'B'),
|
|
26
|
+
UnicodeDataEntry(6, 6, 'C'), UnicodeDataEntry(8, 9, 'C'),
|
|
27
|
+
UnicodeDataEntry(11, 11, 'C'))
|
|
28
|
+
assert entries == expects
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def test_from_pairs_unsorted():
|
|
32
|
+
entries = UnicodeDataEntry.from_pairs((
|
|
33
|
+
(1, 'A'),
|
|
34
|
+
(3, 'A'),
|
|
35
|
+
(2, 'A'),
|
|
36
|
+
))
|
|
37
|
+
with pytest.raises(AssertionError):
|
|
38
|
+
entries = tuple(entries)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_from_pairs_none():
|
|
42
|
+
entries = UnicodeDataEntry.from_pairs((
|
|
43
|
+
(1, None),
|
|
44
|
+
(2, 'A'),
|
|
45
|
+
(3, 'A'),
|
|
46
|
+
(5, None),
|
|
47
|
+
(7, 'A'),
|
|
48
|
+
(9, None),
|
|
49
|
+
))
|
|
50
|
+
entries = tuple(entries)
|
|
51
|
+
expects = (UnicodeDataEntry(1, 1, None), UnicodeDataEntry(2, 3, 'A'),
|
|
52
|
+
UnicodeDataEntry(5, 5, None), UnicodeDataEntry(7, 7, 'A'),
|
|
53
|
+
UnicodeDataEntry(9, 9, None))
|
|
54
|
+
assert entries == expects
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def test_from_values_none():
|
|
58
|
+
entries = UnicodeDataEntry.from_values((None, 'A', None, 'A', None, 'B'))
|
|
59
|
+
entries = tuple(entries)
|
|
60
|
+
expects = (UnicodeDataEntry(1, 1, 'A'), UnicodeDataEntry(3, 3, 'A'),
|
|
61
|
+
UnicodeDataEntry(5, 5, 'B'))
|
|
62
|
+
assert entries == expects
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def test_value():
|
|
66
|
+
entries = UnicodeDataEntries(entries=(
|
|
67
|
+
UnicodeDataEntry(1, 3, 'A'),
|
|
68
|
+
UnicodeDataEntry(5, 6, 'B'),
|
|
69
|
+
))
|
|
70
|
+
expect = (None, 'A', 'A', 'A', None, 'B', 'B')
|
|
71
|
+
for code, value in enumerate(expect):
|
|
72
|
+
assert entries.value(code) == value
|
|
73
|
+
assert entries[code] == value
|
|
74
|
+
|
|
75
|
+
assert entries.value(code + 1) is None
|
|
76
|
+
|
|
77
|
+
values_for_code = tuple(entries.values_for_code())
|
|
78
|
+
assert values_for_code == expect
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_missing_directive():
|
|
82
|
+
lines = [
|
|
83
|
+
'# test\n',
|
|
84
|
+
'# @missing: 0000..10FFFF; R\n',
|
|
85
|
+
'0000..001F ; R\n',
|
|
86
|
+
'3000 ; U\n',
|
|
87
|
+
]
|
|
88
|
+
entries = UnicodeDataEntries(lines=lines)
|
|
89
|
+
assert entries.value(0x001F) == 'R'
|
|
90
|
+
assert entries.value(0x2FFF) == 'R'
|
|
91
|
+
assert entries.value(0x3000) == 'U'
|
|
92
|
+
assert entries.value(0x3001) == 'R'
|
|
93
|
+
assert entries._missing_entries[0] == UnicodeDataEntry(0, 0x10FFFF, 'R')
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def test_missing_directive_lb():
|
|
97
|
+
lines = [
|
|
98
|
+
'# test\n',
|
|
99
|
+
'# - The unassigned code points in the following blocks default to "ID":\n',
|
|
100
|
+
'# CJK Unified Ideographs Extension A: U+3400..U+4DBF\n',
|
|
101
|
+
'# - The unassigned code points in the following block default to "PR":\n',
|
|
102
|
+
'# Currency Symbols: U+20A0..U+20CF\n',
|
|
103
|
+
'# @missing: 0000..10FFFF; XX\n',
|
|
104
|
+
]
|
|
105
|
+
entries = UnicodeLineBreakDataEntries(lines=lines)
|
|
106
|
+
assert entries.value(0x33FF) == 'XX'
|
|
107
|
+
for code in range(0x3400, 0x4DC0):
|
|
108
|
+
assert entries.value(code) == 'ID'
|
|
109
|
+
assert entries.value(0x4DC0) == 'XX'
|
|
110
|
+
assert entries.value(0x209F) == 'XX'
|
|
111
|
+
for code in range(0x20A0, 0x20D0):
|
|
112
|
+
assert entries.value(code) == 'PR'
|
|
113
|
+
assert entries.value(0x20D0) == 'XX'
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def test_missing_directive_vo():
|
|
117
|
+
lines = [
|
|
118
|
+
'# test\n',
|
|
119
|
+
'# Control Pictures & OCR U+2400..U+245F\n',
|
|
120
|
+
'# @missing: 0000..10FFFF; R\n',
|
|
121
|
+
]
|
|
122
|
+
entries = UnicodeVerticalOrientationDataEntries(lines=lines)
|
|
123
|
+
assert entries.value(0x23FF) == 'R'
|
|
124
|
+
for code in range(0x2400, 0x2460):
|
|
125
|
+
assert entries.value(code) == 'U'
|
|
126
|
+
assert entries.value(0x2460) == 'R'
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def test_normalie_no_changes():
|
|
130
|
+
entries = UnicodeDataEntries(entries=(
|
|
131
|
+
UnicodeDataEntry(1, 3, 'A'),
|
|
132
|
+
UnicodeDataEntry(5, 6, 'B'),
|
|
133
|
+
))
|
|
134
|
+
nomalized_entries = UnicodeDataEntries(entries=entries)
|
|
135
|
+
nomalized_entries.fill_missing_values()
|
|
136
|
+
assert tuple(entries) == tuple(nomalized_entries)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def test_fill_missing_values():
|
|
140
|
+
|
|
141
|
+
class TestEntries(UnicodeDataEntries):
|
|
142
|
+
|
|
143
|
+
def missing_value(self, code: int):
|
|
144
|
+
return 'B'
|
|
145
|
+
|
|
146
|
+
entries = TestEntries(entries=(
|
|
147
|
+
UnicodeDataEntry(0, 10, 'A'),
|
|
148
|
+
UnicodeDataEntry(12, 20, 'B'),
|
|
149
|
+
))
|
|
150
|
+
entries.fill_missing_values()
|
|
151
|
+
assert len(entries) == 2
|
|
152
|
+
assert entries._entries == (UnicodeDataEntry(0, 10, 'A'),
|
|
153
|
+
UnicodeDataEntry(11, 20, 'B'))
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
def test_range_as_str():
|
|
157
|
+
entry = UnicodeDataEntry(9, 9, 'A')
|
|
158
|
+
assert entry.range_as_str() == '0009'
|
|
159
|
+
|
|
160
|
+
entry = UnicodeDataEntry(9, 11, 'A')
|
|
161
|
+
assert entry.range_as_str() == '0009..000B'
|
|
162
|
+
assert entry.range_as_str(lambda c: str(c)) == '9..11'
|
|
163
|
+
assert entry.range_as_str(lambda c: 'XYZ') == 'XYZ'
|
|
164
|
+
|
|
165
|
+
entry = UnicodeDataEntry(0xFFFF, 0x10001, 'A')
|
|
166
|
+
assert entry.range_as_str() == 'FFFF..10001'
|