unicodedata-reader 1.3.5__tar.gz → 1.3.7__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unicodedata_reader-1.3.7/.github/dependabot.yml +10 -0
- unicodedata_reader-1.3.7/.github/workflows/ci.yml +40 -0
- unicodedata_reader-1.3.7/.github/workflows/publish.yml +35 -0
- unicodedata_reader-1.3.7/.gitignore +131 -0
- unicodedata_reader-1.3.7/.yapfignore +4 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7}/PKG-INFO +8 -18
- unicodedata_reader-1.3.7/js/GeneralCategory.js +26 -0
- unicodedata_reader-1.3.7/js/LineBreak.html +19 -0
- unicodedata_reader-1.3.7/js/LineBreak.js +26 -0
- unicodedata_reader-1.3.7/js/template.js +26 -0
- unicodedata_reader-1.3.7/precommit.sh +16 -0
- unicodedata_reader-1.3.7/pyproject.toml +43 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/entry.py +9 -6
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/general_category.py +0 -1
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/reader.py +2 -2
- unicodedata_reader-1.3.7/tests/__init__.py +0 -0
- unicodedata_reader-1.3.7/tests/cli_test.py +31 -0
- unicodedata_reader-1.3.7/tests/conftest.py +6 -0
- unicodedata_reader-1.3.7/tests/entry_test.py +166 -0
- unicodedata_reader-1.3.7/tests/line_break_test.py +61 -0
- unicodedata_reader-1.3.7/tests/reader_test.py +30 -0
- unicodedata_reader-1.3.7/tests/set_test.py +98 -0
- unicodedata_reader-1.3.7/tox.ini +12 -0
- unicodedata_reader-1.3.7/uv.lock +829 -0
- unicodedata_reader-1.3.5/pyproject.toml +0 -31
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7}/LICENSE +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7}/README.md +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/__main__.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/bidi_brackets.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/cli.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/compressor.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/east_asian_width.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/emoji.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/line_break.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/set.py +0 -0
- {unicodedata_reader-1.3.5 → unicodedata_reader-1.3.7/src}/unicodedata_reader/vertical_orientation.py +0 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Please see the documentation for all configuration options:
|
|
2
|
+
# https://docs.github.com/en/code-security/dependabot/ecosystems-supported-by-dependabot/supported-ecosystems-and-repositories
|
|
3
|
+
|
|
4
|
+
version: 2
|
|
5
|
+
updates:
|
|
6
|
+
- package-ecosystem: "uv"
|
|
7
|
+
directory: "/"
|
|
8
|
+
schedule:
|
|
9
|
+
interval: "daily"
|
|
10
|
+
open-pull-requests-limit: 10
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
|
|
2
|
+
# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
|
|
3
|
+
|
|
4
|
+
name: Python package CI
|
|
5
|
+
|
|
6
|
+
on:
|
|
7
|
+
push:
|
|
8
|
+
branches: [ "main" ]
|
|
9
|
+
pull_request:
|
|
10
|
+
branches: [ "main" ]
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
build:
|
|
14
|
+
|
|
15
|
+
runs-on: ubuntu-latest
|
|
16
|
+
strategy:
|
|
17
|
+
fail-fast: false
|
|
18
|
+
matrix:
|
|
19
|
+
python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
|
|
20
|
+
|
|
21
|
+
steps:
|
|
22
|
+
- uses: actions/checkout@v4
|
|
23
|
+
|
|
24
|
+
# https://docs.astral.sh/uv/guides/integration/github/
|
|
25
|
+
- name: Install uv and set up Python ${{ matrix.python-version }}
|
|
26
|
+
uses: astral-sh/setup-uv@v5
|
|
27
|
+
with:
|
|
28
|
+
python-version: ${{ matrix.python-version }}
|
|
29
|
+
|
|
30
|
+
- name: Install dependencies
|
|
31
|
+
run: |
|
|
32
|
+
uv sync --all-extras --dev
|
|
33
|
+
|
|
34
|
+
- name: Test with pytest
|
|
35
|
+
run: |
|
|
36
|
+
uv run pytest
|
|
37
|
+
|
|
38
|
+
- name: lint with ruff
|
|
39
|
+
run: |
|
|
40
|
+
uv run ruff check
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
name: Publish
|
|
2
|
+
|
|
3
|
+
# Controls when the action will run.
|
|
4
|
+
on:
|
|
5
|
+
release:
|
|
6
|
+
types: [created]
|
|
7
|
+
|
|
8
|
+
# Allows you to run this workflow manually from the Actions tab
|
|
9
|
+
workflow_dispatch:
|
|
10
|
+
|
|
11
|
+
jobs:
|
|
12
|
+
publish:
|
|
13
|
+
runs-on: ubuntu-latest
|
|
14
|
+
steps:
|
|
15
|
+
- uses: actions/checkout@v4
|
|
16
|
+
|
|
17
|
+
- name: Set up Python
|
|
18
|
+
uses: actions/setup-python@v5
|
|
19
|
+
with:
|
|
20
|
+
python-version: '3.x'
|
|
21
|
+
|
|
22
|
+
# https://docs.astral.sh/uv/guides/integration/github/
|
|
23
|
+
- name: Install uv
|
|
24
|
+
uses: astral-sh/setup-uv@v5
|
|
25
|
+
|
|
26
|
+
- name: Install Dependencies
|
|
27
|
+
run: |
|
|
28
|
+
uv sync --all-extras --dev
|
|
29
|
+
|
|
30
|
+
- name: Build and publish
|
|
31
|
+
run: |
|
|
32
|
+
uv build
|
|
33
|
+
uv publish
|
|
34
|
+
env:
|
|
35
|
+
UV_PUBLISH_TOKEN: ${{ secrets.PYPI_TOKEN }}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
.vscode/
|
|
2
|
+
|
|
3
|
+
# Byte-compiled / optimized / DLL files
|
|
4
|
+
__pycache__/
|
|
5
|
+
*.py[cod]
|
|
6
|
+
*$py.class
|
|
7
|
+
|
|
8
|
+
# C extensions
|
|
9
|
+
*.so
|
|
10
|
+
|
|
11
|
+
# Distribution / packaging
|
|
12
|
+
.Python
|
|
13
|
+
build/
|
|
14
|
+
develop-eggs/
|
|
15
|
+
dist/
|
|
16
|
+
downloads/
|
|
17
|
+
eggs/
|
|
18
|
+
.eggs/
|
|
19
|
+
lib/
|
|
20
|
+
lib64/
|
|
21
|
+
parts/
|
|
22
|
+
sdist/
|
|
23
|
+
var/
|
|
24
|
+
wheels/
|
|
25
|
+
pip-wheel-metadata/
|
|
26
|
+
share/python-wheels/
|
|
27
|
+
*.egg-info/
|
|
28
|
+
.installed.cfg
|
|
29
|
+
*.egg
|
|
30
|
+
MANIFEST
|
|
31
|
+
|
|
32
|
+
# PyInstaller
|
|
33
|
+
# Usually these files are written by a python script from a template
|
|
34
|
+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
35
|
+
*.manifest
|
|
36
|
+
*.spec
|
|
37
|
+
|
|
38
|
+
# Installer logs
|
|
39
|
+
pip-log.txt
|
|
40
|
+
pip-delete-this-directory.txt
|
|
41
|
+
|
|
42
|
+
# Unit test / coverage reports
|
|
43
|
+
htmlcov/
|
|
44
|
+
.tox/
|
|
45
|
+
.nox/
|
|
46
|
+
.coverage
|
|
47
|
+
.coverage.*
|
|
48
|
+
.cache
|
|
49
|
+
nosetests.xml
|
|
50
|
+
coverage.xml
|
|
51
|
+
*.cover
|
|
52
|
+
*.py,cover
|
|
53
|
+
.hypothesis/
|
|
54
|
+
.pytest_cache/
|
|
55
|
+
|
|
56
|
+
# Translations
|
|
57
|
+
*.mo
|
|
58
|
+
*.pot
|
|
59
|
+
|
|
60
|
+
# Django stuff:
|
|
61
|
+
*.log
|
|
62
|
+
local_settings.py
|
|
63
|
+
db.sqlite3
|
|
64
|
+
db.sqlite3-journal
|
|
65
|
+
|
|
66
|
+
# Flask stuff:
|
|
67
|
+
instance/
|
|
68
|
+
.webassets-cache
|
|
69
|
+
|
|
70
|
+
# Scrapy stuff:
|
|
71
|
+
.scrapy
|
|
72
|
+
|
|
73
|
+
# Sphinx documentation
|
|
74
|
+
docs/_build/
|
|
75
|
+
|
|
76
|
+
# PyBuilder
|
|
77
|
+
target/
|
|
78
|
+
|
|
79
|
+
# Jupyter Notebook
|
|
80
|
+
.ipynb_checkpoints
|
|
81
|
+
|
|
82
|
+
# IPython
|
|
83
|
+
profile_default/
|
|
84
|
+
ipython_config.py
|
|
85
|
+
|
|
86
|
+
# pyenv
|
|
87
|
+
.python-version
|
|
88
|
+
|
|
89
|
+
# pipenv
|
|
90
|
+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
|
|
91
|
+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
|
|
92
|
+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
|
|
93
|
+
# install all needed dependencies.
|
|
94
|
+
#Pipfile.lock
|
|
95
|
+
|
|
96
|
+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
|
|
97
|
+
__pypackages__/
|
|
98
|
+
|
|
99
|
+
# Celery stuff
|
|
100
|
+
celerybeat-schedule
|
|
101
|
+
celerybeat.pid
|
|
102
|
+
|
|
103
|
+
# SageMath parsed files
|
|
104
|
+
*.sage.py
|
|
105
|
+
|
|
106
|
+
# Environments
|
|
107
|
+
.env
|
|
108
|
+
.venv
|
|
109
|
+
env/
|
|
110
|
+
venv/
|
|
111
|
+
ENV/
|
|
112
|
+
env.bak/
|
|
113
|
+
venv.bak/
|
|
114
|
+
|
|
115
|
+
# Spyder project settings
|
|
116
|
+
.spyderproject
|
|
117
|
+
.spyproject
|
|
118
|
+
|
|
119
|
+
# Rope project settings
|
|
120
|
+
.ropeproject
|
|
121
|
+
|
|
122
|
+
# mkdocs documentation
|
|
123
|
+
/site
|
|
124
|
+
|
|
125
|
+
# mypy
|
|
126
|
+
.mypy_cache/
|
|
127
|
+
.dmypy.json
|
|
128
|
+
dmypy.json
|
|
129
|
+
|
|
130
|
+
# Pyre type checker
|
|
131
|
+
.pyre/
|
|
@@ -1,21 +1,12 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: unicodedata-reader
|
|
3
|
-
Version: 1.3.
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
License: Apache-2.0
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
Requires-
|
|
10
|
-
Classifier: License :: OSI Approved :: Apache Software License
|
|
11
|
-
Classifier: Programming Language :: Python :: 3
|
|
12
|
-
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
-
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
-
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Requires-Dist: platformdirs (>=2.2,<5.0)
|
|
18
|
-
Project-URL: Repository, https://github.com/kojiishi/unicodedata-reader
|
|
3
|
+
Version: 1.3.7
|
|
4
|
+
Project-URL: repository, https://github.com/kojiishi/unicodedata-reader
|
|
5
|
+
Author-email: Koji Ishii <kojii@chromium.org>
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: >=3.9
|
|
9
|
+
Requires-Dist: platformdirs>=4.3.8
|
|
19
10
|
Description-Content-Type: text/markdown
|
|
20
11
|
|
|
21
12
|
[](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)
|
|
@@ -100,4 +91,3 @@ unicodedata-reader lb -t js/template.js
|
|
|
100
91
|
[GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js
|
|
101
92
|
[LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html
|
|
102
93
|
[LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js
|
|
103
|
-
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uGeneralCategoryAsInt = (function () {
|
|
2
|
+
const bytes = atob("h2ABQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQaIAAECYw0CCg0ODwYQDQoNBjEKDCIKEQ4SUQKFSQaBSYVsBoFsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDCkMCQwJTCkMCQwpDEksaQwpDElMKQwpDAkMCQwpDAksCQwpDEkMCQwpLA4JTG4JEwwJEwwJEwwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAksCRMMCQxJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCYFMKQwpLAkMaQwJDAkMCQwJkQwOhkyENGqCdIMqgRSBShQKFIQKm3UJDAkMFAoJDDYUTAIJdioJAkkWCRYpDIQJFoIJiEwJLElMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmBDAkMBgkMKSyMSYtsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA2BFTcJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwpDAkMCQwJDAkMCQwJLAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDBaJKTYUgSKKDAIHNi0DFosVBxUCNQI1AhWBdoZOdm4iglaBMEYiAyItglUCEEKHbhSCLoUVgihiLhWYTgIOgVUQDYE1NDUNdS6CKE4tDoMiFhAOFYcuhlU2lg6CVQ6DNoIoiA6CFTQNQhQ2FSOFLnUUghUUVRSBFTaDQhaGDlU2AhaCToEWhW4KgS4WMIEWghWKDhSFdRCHdRiNLhUYFQ5YgXV4FTgOgVWCLjUigigCFINOFTgWgW42LjaFLhaBThYOVm42FQ5YdTY4NjgVDoF2GHYuFk41NoIoLiOBMQ0DDgIVNjUYFoEudi42hS4WgU4WLhYuFi42FRZYNXY1NlVWFYFWbhYOgVaCKDVOFQKCNjUYFoIOFk4WhS4WgU4WLhaBDjYVDliBFRY1GBY4FTYOg1YuNTaCKAIDgVYOgTUWFTgWgW42LjaFLhaBThYuFoEONhUOGBUYdTY4NjgVgVY1GHYuFk41NoIoDQ6BMYI2FQ4WgS5WThZuVi4WDhYuVi5WTlaCbnY4FThWWBZYFTYOgTYYgzaCKFGBLQMNgRYVWBWBbhZOFoVOFoNuNhUOVXgWVRZ1gVY1Fk42DjYuNTaCKIFWAoFRDQ4VOAKBbhZOFoVOFoIuFoEONhUOGBWBGBYVOBY4NYFWOIE2LhYuNTaCKBYuGIJ2NTiCDhZOFooONQ5YdRZYFlgVDg12ThiBUU41NoIoghENgS4WFTgWhC5WhW4Wgg4WDjaBTlYVdlhVFhUWgXiBNoIoNjgCgnaLbhUugVV2A4EuFIF1AoIoIokWLhYOFoEOFoVuFg4Wgi4VLoIVDjaBDhYUFoFVFoIoNm6Hdg5Ng0INAk01gS2CKIIxDRUNFQ0VBAUEBTiBbhaIbnaDNRiBFQI1gQ6CVRaIdRaBbRWBLRYtgQJtIokWik44dRiBNRg1ODUOgiiBIoEuODVuVQ5YLoFYTnWDDhU4NYE4FQ4YgihYFS2JKRYJgRYJNopMAhRM0g4WbjaBThYOFm42ig4WbjaIDhZuNoFOFg4WbjaDThaODhZuNpBONlWCAoRxVoNugi2BNpUpNoEsNgeBmm4NAoQOAYYuBAVWkk5CWYFugVaELlUYghaETjUYIoIWhC41gnaDDhZOFjWCdoxuNRiBVYF4FTiCVUIUQgMOFTaCKIE2gjGBNoEiB2JVEBWCKIE2iE4UjQ6BVoEONYguFQ6BFpEugjaHThZVeDVYdjgVgThVdg1WIoIohy42gQ6CVopudoYugTaCKBFWiC2FTjU4FTYijQ4YFRiBVRYVGBU4gXWBOII1NhWCKIE2giiBNoFCFIEiNoM1F4N1jBZ1GItOFRiBFRgVgRgVOIFuFiKCKIFCgi2CFYINQjUYhy4YdTg1GFUugiiKbhUYNVgVGFU4gXZiiG6BeIF1ODVWgQKCKFZOgiiHLoE0IoIMCQyBFopJNkmBYoF2VQKDFRiBVW4VgS4VLhg1DoEWimyPVIMMFIgsiRSPdQkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJggwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJggyBaYEsNoEpNoFsgWmBbIFpgSw2gSk2gWwWCRYJFgkWCYFsgWmDLDaBbIFzgWyBc4FsgXOBDBYsaRMKDEpMFixpE0psNixpFkqBbIEJSjZMFixpEyoWgkGBEIEnIg8SBC8SBA+BYhobgRABggIPEmIrQgYEBYJCBgILgiIBgRAWgjARFDaBMUYEBRSCMUYEBRaDFFaIA4NWgxV3FVeCdYNWLQltCS0MSSxJDA0JLQaBCYEtCQ0JDQkNaQ0MaQxuDC0sKYEGCWwNBi0MDYNxiFkJDHkRLXaBBoENJm0GLQYtBoFNBodNJi0GDQaHTcJmgW0EBQQFhG0mgU0EBZQNBocthgaJbYEmkW2FNoJNhRaOcZMthTGtTQaCDQaNLYFmm00GvW0EBQQFBAUEBQQFBAUEBYcxim2BBgQFh0YEBQQFBAUEBQQFg2a/baBGBAUEBQQFBAUEBQQFBAUEBQQFBAUEBY9GBAUEBYdmBAXAJotthQYtgSaJTTaHbRaaDYtpi2wJDEksCQwJDAkMaQwJLAmBLDRJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAksgS0JDAkMVQkMgRZiESKJLBYMgRYMNo1ugVYUAoM2FYVOghaBThaBThaBThaBThaBThaBThaBThaBThaHdSIPEg8SQg8SAg8SggIHIgcCDxIiDxIEBQQFBAUEBYECFIIiJ2IHAgSDAi1CBAUEBQQFBAUHiDaGLRaWDYJ2tS2GNoNtAUINFA4ZBAUEBQQFBAUEBS0EBQQFBAUEBQcEJQ2CGXU4B4EULVkUDgItFpUuNjUqNA4Hli4CVA6BFopOFpcuFi1xgi2HboktghYNg26HTRaCMYctgXENg1GHbYIxiU2DUc9tjO9uj22phQ4Ugp1OVo1NghaJboE0IsJuFEKDboIoLoR2CQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA4VVwKCNQIUCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDDQ1kS6COTWBIoF2hUqCFCoJDAkMCQwJDAkMCQwJTAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMFIFsCQwJDCkMCQwJDAkMCQwUKgkMCQwOCQwJTAkMCQwJDAkMCQwJDAkMCQwJDAkMgQkMgQkMCQwJDAkMCQwJDAkMCQxpDAkMKQw2CQwWDBYMCQwJDAkMCYUWVAkMDjQMgU4VThVuFYVOODUYbRVWgTEtAw2BNoxuYoF2OIwug3g1gXYigiiBNoQ1gS5CDgIuFYIohm6BdSKFToJVOIJWAocOVlUYi04VOHU4NViDAhYUgih2IoEOFRSCDoIogQ4Wig6BNTg1ODWCFk4VgW4VGDaCKDZig24UgS5NDhgVGIwuFQ5VLjWBDjUOFQ6Fdi4UIoJOGDU4Ig40GBWCNoEuNoEuNoEughaBThaBThaKTAp0ggwUKnaTbIhOOBU4FTgCGBU2giiBNpXoboJ2hU52jA52g/98jL992y42mi6JNoFMgnaBDIEWDhWCLgaDDhaBDhYOFi4WLhaaboQKg3baTgUEg22PbjaNLoFWDYd2gm4DTYN1gUIEBQKBNoN1AicrBAUEBQQFBAUEBQQFBAUEBSIEBWJLQhZiBwQFBAUEBUIGB0YWAgMidoEOFqFONhAWQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQYEBQIEBSKCLhSLDjSHTlaBLjaBLjaBLjZOViMGCg0jFg1mLYI2UC02gm4Whi4WhE4WLhaDTjaDLog2nk6BFkJ2ixFWgg2NGXGEDTFNFoMNVg2LVosNFaA2hw5WjA6DVhWGUXaHbnGCFoRuGYFuGYEWiS6BFYEWhy4WAohudoFuAoEZijaJaYlsky42giiBNohpdohsdolugXaMboJWAoJJFoNJFoFJFikWgkwWg0wWgUwWLFaMboJ2zU6CFoUugjaBboV2gTQWijQWghSRFoEuNg4Wim4WLlYONoVOFgKBcYVOLYFRh06BdoIRi3aEThYugRaBEYUugTFWAoYugRYCj3aNbnYxLoNxNosxDlUWNYEWdW4WThaHDjZVdhWCEYFWggKBVocOMQKHDlGHdoFuDYZuNXaBEYFCghaNLlaBQoUuNoFxhE6BFoFxhC6BVmKCdoFRk3aSDo1WjEmDFoxMgVaBMYhudYF2giiBNoIobhQOhSlWgRUHFIUsgXYms3aHURaKLhY1BzYug3ZOjVZ1hw6CMQ6BdoUuglVxgQKFNoQudWKJNoUOgVGEdoVOghYYFRiNDoNVgUJ2hHGCKBUuNQ6CFlUYiw5YdTg1IhBiFYI2EDaGDoFWgiiBNlWIboEVGIF1FoIoYg44DoF2iE4VIg6CFjUYi25YghU4bmJ1AhgVgigOAg5CFoRxglaELhaGDlhVOBUYNYEiFS4VjzaBThYOFm4Wg04Wgi4CgTaLThVYgXWBFoIogTY1OBaBbjYuNoUuFoFOFi4WgQ4WNQ44FXg2ODZYNg6BNhiBFoEOODaBVVaBFYJWgi4WDjYOFokuFg5YgTUWGDYYFngWOBUYFQ4VDiIWIoF2NYcWjQ5YgXU4VRgVboECgigiFgIVToc2i25YgTUYFXg1GDUuAg6BdoIoqTaLTlh1Nng1GDWFQm41iDaLbliBdTgVGDVCDoJWgiiBNoMChFaKThUYFTiBNRgVDgKBNoIogTaEaIZ2hk42FRgVOHUYgRV2gigxQg2BTq4Wim5YghUYNQKYdodph2yCKIIRgnaBbjYONoFuFi4WhW6BOBY4NjUYFQ4YDhgVQoIWgiiRNoFuNolOWHU2NXgVDgIOGIZWDoI1iW6BNRgOdYFiFYF2DoE1OFWLLoMVGDVCDoECgxaSDoFWgiKtNogOAoM2giiBNoIOFokOGIFVFoE1GBUOgQKCNoIohFFWIocuNoU1FhiBVRg1GDWSFoFOFi4WiS6BNVYVFjUWgVUOFYF2giiBNoEuFi4Wh26BGBY1FjgVGBUOgVaCKM02hE41OCKBVjUOGIMOFoguOIEVVjgVGBWDAoIoFZUWDoNWhRGBbWOEDYMWAoHmLpk2m1kWgQKCVrBuhZJ2mA4igxaCi26DcBWBLoNVgjaH5k6BFoGRTo2uFocugnVYVYIog7E2gY4OgVaHThaCKHYik04WgiiBNocuNoEVAoI2i26BVYECbXQCDYI2gigWgVEWhQ6BFoRO63ZUiW40QoIosTaHaYdshVFimRaSTnYVDo1YgVZ1gxSPdjQCFBWCVjiDNov9boF2grUuihaCLpG5VnQWgVQWNBbIToNWDocWTjYOgzZugXbiboTAdppOgRaDDlaCDoFWgi42DTUCcIfWdrttgiiBNuxtknaLNTaFVYIWnG2Odr0tgjaJTTaObThVTYE4gXCBdS2BVYctdY8NhRaQLVUNnjaEcYJ2hHGCdpVNghaGEaFWhimGLIYpgUwWhCyGKYYsCRYpNgk2KTZpFoFpbBYMFoFMFoJMhimGLCkWaTaBaRaBSRaGLCkWaRaBCRYJVoFJFoYshimGLIYphiyGKYYshimGLIYphiyGKYZsNoYJBoYMBoEshgkGhgwGgSyGCQaGDAaBLIYJBoYMBoEshgkGhgwGgSwJDDaMKP9tjVVtjDWBbRWDLRUtgQKDVoEVFoNVgpN2giwOhGyBNoEstRaBVRaEFTaBVRY1FoEVgRaPNIgWFZt2iw5WgVWBVDaCKHYODc92hy4VhBaKbnWCKIEWA/N2hk4UdYIotTaHLjUOgih2Avd2gU4WbhYuFoNOFrEONoIRgVWKFogpiCyBVRR2gih2IoHEFo5RDVEDcZJ2ixENg1GwNm4Whk4WLhYONg4Wgi4WbhYOFg6BNg52DhYOFg4WThYuFg42DhYOFg4WDhYOFi4WDjZuFoFOFm4WbhYOFoIuFoQOgRZOFoEOFoQOjHYmwzaKbXaYbYJ2g002g00Wg00WiQ2CNoMRqA2NdocNgxaKbXaCDYFWLYM2gS2mNr5NgQqBtW12hA1Wgw1WnU12l02BNoJtdg2DVoJtdo1tgXaCLYE2iW2BdoctNoJtdi2PNtRtgnaDLTaDDVaCLYEWjW2BVoNNNoJNgTaCDYFWpE0Wlm2CKIKBNtO3bod2iI4ugTa3LjaLoC6DNo7MDoNWgZsuhOg2gYcugvg2idJOgRaIl26K7pQWEIc2l3Cfdrt1/4N2//89Nv//PTY=");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 5) + 1);
|
|
14
|
+
entries.push(value & 31);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uGeneralCategoryValues = ["Cc","Zs","Po","Sc","Ps","Pe","Sm","Pd","Nd","Lu","Sk","Pc","Ll","So","Lo","Pi","Cf","No","Pf","Lt","Lm","Mn","Cn","Me","Mc","Nl","Zl","Zp","Cs","Co"];
|
|
26
|
+
function uGeneralCategory(c) { return uGeneralCategoryValues[uGeneralCategoryAsInt(c)]; }
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
<!DOCTYPE html>
|
|
2
|
+
<script src="LineBreak.js"></script>
|
|
3
|
+
<body>
|
|
4
|
+
<div>
|
|
5
|
+
<input id="input" autofocus style="width: 100%">
|
|
6
|
+
</div>
|
|
7
|
+
<pre id="result"></pre>
|
|
8
|
+
<script>
|
|
9
|
+
input.addEventListener('input', (e) => {
|
|
10
|
+
const text = input.value;
|
|
11
|
+
const results = [];
|
|
12
|
+
for (const ch of text) {
|
|
13
|
+
const code = ch.codePointAt(0);
|
|
14
|
+
results.push(`${ch} U+${code.toString(16)}: ${uLineBreak(code)}`);
|
|
15
|
+
}
|
|
16
|
+
result.textContent = results.join('\n');
|
|
17
|
+
});
|
|
18
|
+
</script>
|
|
19
|
+
</body>
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uLineBreakAsInt = (function () {
|
|
2
|
+
const bytes = atob("hAABAkMEiEAFBgcICQoIBwsMCAkNDg0PhFBNgQgGjQgLCQyOCAsBEQiCQBKMQBMLCoEJCFQIFAcIAUgKCVQVCIIUB4EUC4sIFI8IFIHnCBQVgRQVFEgUgwiBVAgUCBWPSKcAE4VAgxOGAINIVoFIDQiBVoMIFggWiUgW70iDANJIFpJIVpdIDQFWSAkWlgABAAhACEAGAINWjReBVoFXSIUWglCBCIEKTUiFAAYAgQaVCIoAhFAKUIEIALEIBgiDABAIgkBIQAiBQEiEUIlIFkgAjkiNAFasCIUACIZWhFCQCIQAgUgNBghWAEmKSIFACIQACIEACIIAVocIFowIgQBWCBaFCIIWjwgWUIIWhACUSItAEJAAmkiBAAiIQAiDAIRIQEGEUIgIgQAWg0hWSFaKSBaDCBYIgRaBSFYACIMAVkBWgQAIg1YAgVZIFoEIQFaEUEhKgggKCAlIAFaBABaCSIFWSFaKSBaDCBZIFkgWSFYAFoIAgVZAVoEAgRYAgxaBSBYIgxaEUECBCAAIhFaBABaECBaBCBaKSBaDCBZIFoIIVgAIg0AWgQAWgQBWCIcWSEBWhFAICYMWCIJAFoEAFoNIVkhWikgWgwgWSBaCCFYACIMAVkBWgQCDFoEAgVZIFoEIQFaEUINIhFYACBaCSIEWgQgWgUiBFkgWCBZIgRZIgRaBCIEWhUiBVoIAgRaBABaBQFYIglYAhlaEUIQICQiCFoIAg0gWgQgWiwgWh0hWAAiDABaBABaBQIMWQBaBCFYIVkhAVoRQgxYVhAiBABWDSBaBCBaLCBaESBaCCFYACIMAFoEAFoFAgxZAglZIFkhAVoRQFkgAhVaBQIQIFoEIFpQIQAiDABaBABaBQEiBVoEIAIRIQFaEUIQICoJIFoEAFohIgRaLSBaECBYIVoMIgRYAgVaCQBYAFoNAglaEUFZACIVWnFiBVgmHGAiEUEGSFlgWGBaCGBaLWBYYFosYVoIYFhgWgxgWhFBWgViPVgiBVQhVE1UBE4IGEwgGgQhAgkiEUIRIAQAIAAgACxELEUCDSBaRSIFWhkABggABQIIIhQAWkUAWQYJIAIJIFkhVARWCCFOSFp9YhFBBgUifWIRQgliSSBYIghYIVpdIr1mjWqtbpAgWgUhWgwgWCBaBSFaUCBaBSFaQCBaBSFaDCBYIFoFIVocIFpwIFoFIVqEIVoEACAGNCIEWjEiCVqpIVoJIVgGCvwgBjEgLEYEWpQiBAYUIgxaISIFAhBaJCIEAQYQWiEhAhVaGCBaBCBZAhVapWEEcGAEIAQlYVoRQglaESIJWSEZBFQhGCIEAEwCEUIJWrAiDFoIIQJBIAAiCFqJIhFaPCBaFQIFWhUCBVgiBFkaEUI5YVoIYhRaVWIFWjFiCVoUQgRZYmwiCAFZInxgWjhhWAIRQglaEUIJWhlhWjwCYFoIAlx2HQB6DXRZBhF9BIIFBhGCEAIQggQGBAI5IhgBIhFCCSJJfhUBhg1aTSIlAgRaCAYRQgRaBCIRQkUhBhQiCFpUIVoUIg1aBAAiKAIFIAIJIAEiBAAiCFt9IhgATlkATgQCBikhWgkhWkkhWgkhWg0gWCBYIFggWjwhWmggWhwgWhkhWgkgWiQhWgQgWgwgVCBaDAROBASIAI0ABE0EkVAhHC4EHCwdUSIElAUOCABODSghHFFyCSA0LEYEchUgBCoFBCIEBJoFIFoRASFYUg0gLERQIgVSDSAsRFoYIgRaDCQqGSQqBSQpJCgkKhwmQAIcWgQgKCBSBCAqECBRICYRIVINIFJFIhxQIhVSBSIRUhwgUSIFWhFSbSBQIFJUIFAhUgQhUSBSBCBQIFEkIFIFIFEiBVEgUCBQIglQIFIIIgVSBSFSESBSBCBSCCBSGCFRIgVRIVEhUiEhUSFSGCBSBCBSFCBSMCBSXCCWLSAsRCxGCSBSDCGCGCAsR4giBYJpIilaFCIoWzxQIpVSBSJIUhQiHVEiBVIRIVAiDFINIVEhUgUhUSFSBSIEUSBRIgVSHSIFUhAgUh0iBYAhUSBSBSFSBSGBUIAiBICdgjAiBIIFIFAgUjghUCIEUCBQgVAhUCBSHCCCOSFSOCIVggVQgCIEgFGCBFGBUIFSBIAgUgUhUIIJUgiAUYCcgVINggQhggWekCBSBCIJHCEYggQgLEQsRCxELEQsRCxELEY5UmAgLEY8ICxELEQsRCxELEYHJCAsRCxELEQsRCxELEQsRCxELEQsRCxGfCAsRCxGPSAsRgasIghSMSFaPSBaBq0iBAEiCFgaBAQgGAZJIFgiCFghWm0iDFggBhlYAiwiEFoMIFoMIFoMIFoMIFoMIFoMIFoMIFoMIFo9AhkeDQQgBCwFIR0hHCxELEQsRCxGBQQYIQQhBgghkgQEIQQuDQQgBCEGBCEYLDAsMCwwLDAGQVoxgFqwghVbqYIxWh2ABUWAcYAsRCxELEQsRCxFgCxELEQsRCxEcC1GEYIJAgiAAgiBcgSAWKCAoICggKCAojCAojyAoICggKIJgKIJgaFZAgVwgHCggKCAoICggKIwgKI8gKCAoICiCYCiCYGiBYBwoXCCCFpUgFq5gFqpghBYgh2iPIBaTYINUm7dgn0jSiiAchLsggRabIIQWlkhBgYYIAQYBh0iEUEiJVpcIgUAIhECPSECnSEAIggGDVuZIVkgWCBaDSIoWh0gAgQgAgUgAiwiCAIFIAIEWg0gKCIJWmUhVRoNWQJhIiECDVkGEUIJWiECESBVIAIRQjUiDQEGLCIYAhRYIjhmBFoFAlx2GAB6CYIEBgWAWAYRfgVZgh1iEUIIYFpQfhkCEFoEBAINBQFaEX1YggQGxGItWghiFCIIAQYEIQIRWgkhWgkhWgkiEFoMIFoMIFp1IgVa5CINAAUBWhFCCVimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKimNKoVWixqBVpgbgVaH/2uY/1aB/2CDCIVWggiCFhcAhFcIhhcWghcWFxZXFlcWhFe5CIdWgbUIEQunSFaaSIMWCI9WhUgKgQiHQIERXEYLESWCVhMAEwATAFMAEwATAFMAgiALEQsRCxELEQsRCxELEQsRYAsRgyARIBEWXEYgCxELEQsRg2AWIAkKIIFWgggWwwhWJhYGYAkKYAsRYBEgEYUgXIEgBo0gCyARjiALIBEgC1ELURwghGiWIFyPIIEWgmBWgmBWgmBWgSCBFgoJgSBJFoMIhFaBACwUVoVIFoxIFokIFkgWhwhWhkiQVr0IghaBAYFWlgiBFqtIFoYIgRYIlxaWCADAVo4IgRaYCIcWAI0IgVaRSIQWjkiCFpJIggCCFo5IFgGRSIFWg0gBggiUVs5IVoRQglaRSIFWkUiBVpNIg1aZSIUWhUgWhwgWgwgWSBaFCBaHCBaDCBZIgRaZSIVWgZsIhBaKSIRWg0iLVoJIFpRIFoQIohaCSFYIFpVIFkiBFghWiwgWAaMIg1aECJdWiQgWSIIWkAiBFgGMSIIWCJ9Wm0iBVolIVpcIgQAWQIIWgUCBSBaBCBaOCFaBAIFWAIQIgxaDQQiDFp9Ij1aSCECBVoIIgkElhBaaSIEWgwGKSFaNCIIWjEiDFoFIhVaDCKdWpAibFpkIhhaZCIMWlEiBQINWhFCCVoRQjUiBFoIAAYsIg1ZI51aPCBaUSBZAAVZIh1aBCJsWgUCTSINWikiFAIQIilaISIFAgUiSVo1IiVaLCIQWgQBtmR2GQB5BgiCBVolghF8AXUAdhBYTgQCWCIUASBCBQQCEVhBWjAiDFoRQglaBAJFIhkAWhFCBQQhACINWkQgACBUIhBaBAJdIhkCBSEEIAYFACECEUAgVCIEBFolIhRaISBaMCIVAQQhBCABIAJ5WgwgWCBaBSBaHCBaESAGCVpcIhUCCFoRQglaBQBaDXVZdVopdFoMdFl0Wgh0WQAGDAFZAVkAeVh+CVgCCFgFfXUBWgwCBFoIAhRaEXxYfVh8WX5FdFiCEABYAVgAWgUAWgUAeLQCBIBZgg1ZAjhaaCIhAgUiBQQiEUEEWCACBCI5Wl0iJQIFIg1aEUNJWlwiDAFaEABVBRoEIhwGBSECQVpdIiABBSIUWhFCCVoYViRaVCIYASIJWhFCCVolQjVaNGFaHGIFWhFBYgQGDWNwWlUiHAAixVp9IhFCECIVWCIMdVh1Wg10WXRaLXYJAFkBWgQAeLQAtQIEBhBaEX6JWg0hWkwiDAFaDAAgVCACNFgiEQJNIgwAIgUAVCIFBFQgAg1YIhQCWSIdAgQEIgRVBhhakCIMWhFXaVpBIhlaEUIJWhAgWkgiDQBaDQAiCAYRWhFCJCIEWFQaOSFaKQBaGQKQWgwgWSBaSSIJAgRYAFkAWgwAIAINWhFCCVoJIFkgWj0iCABZAFoIACIMWhFCBmlaIXwGBQEGDFkAtAIYdFpBdgwCBFoFAHkGFIIRfAKoWCIcWjgiBSogIhhYBg8xIsla3CBaCAYUW4UiKpVaxCIYWgqtIgQuBEZFIEYEICxELEfcIC1HZCAuDEwsRgRMLEQsRAIJIhwCEVo/NCIIWgeZICxG7CJrcFo5fiECEX4biVoKcCIMWjwgWhFCBVkGnCBaEUIJWjkhWggABhFaXSIMAgQGESAEIhFaEUBaDCBaKCIIWiQiB11aWSEGEUOJWqwhBSLIWpQiBVgAImwCDFoFAhgifVoFcE4UWQIZWl/tgg1aC/2CB6kiUFgiEIKLzFoFIFoMIFkgWgZEghxYojhaBKFYohlaBaINWgcVgiYFWtQiCFoYIgRaECIMWhEhWCEABgUCPrVb3SIRQglaB2UilVpZAVosAhBa5SJ1W+kiEVpMIVp1IggCBCIpASIMAjkiBQJ4IihagSIEACLxWiUiFVolIhVarCIQWjAjDFqoIFqMIFkhWCFZIVoFIFoVIFggWgwgWoAgWgUhWg0gWgwgWjUgWgUgWgggWCIEWgwgWgalIVoGRSFaYUIH/SJsAgUiYQINIAIZIAEiBQQiHFoIAFocAhKdWjwiCVoJI6haDABaIAFaDABZAFoIAghaeSJAWALdWlgiBFoMAgwhWhFCBVkiBn1aOSACIFpVIgUCEUIIWCYHnVo1IgUCEUOpWjkhACIRQgVYIge9WgwgWgUgWSBaHCBbiCFaECIMAlBahSIMACIFWhFCBVkuDiBadCAqBCAqBSKVWngjgVoFIFo0IFkgWCFYIFoRIFoFIFggWCIJWCIFWCBYIFggWgQgWSBYIVggWCBYIFggWCBZIFghWgUgWgwgWgUgWgUgWCBaESBaICIIWgQgWgggWiAiZVkiBhlb/YIYUgQiOVEicVIJInhQIm2CMboHCICeKYEiLIEiCIAiCIIEnYCdggSeWYIIvoGBnYIUniiCJJ4EgJ4FggScggSeDICcgJ4ZgCCAIIAiCICeBYAggSKYggwiHYIZIhiCLSJRgZ4FgJ4ogJ4FgZ54gg0iLYIJIpSCBJ4EggieSSIEHgRyBSJEgJ4dggSeEICeFICeZILlIhWCqCJUghUiBYJtIg2CESIJgk0iDYI5IYIVIgWBInmCFSCdgJ4Ngg2eCYCeEIIRnYIEnm2AnniBnIGcgJ4gggScghieQYKlItyCBJ5RghCeDIMkIFq1IhFCCVoP+YFaB//5gVoH//mCUgIEWAI5Wr0C/VvdAhf6GVg==");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 6) + 1);
|
|
14
|
+
entries.push(value & 63);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uLineBreakValues = ["CM","BA","LF","BK","CR","SP","EX","QU","AL","PR","PO","OP","CP","IS","HY","SY","NU","CL","NL","GL","AI","BB","XX","HL","SA","JL","JV","JT","NS","AK","VI","AS","ID","VF","ZW","ZWJ","B2","IN","WJ","EB","CJ","H2","H3","SG","CB","AP","RI","EM"];
|
|
26
|
+
function uLineBreak(c) { return uLineBreakValues[uLineBreakAsInt(c)]; }
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const u${NAME}AsInt = (function () {
|
|
2
|
+
const bytes = atob("$BASE64BYTES");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> $VALUE_BITS) + 1);
|
|
14
|
+
entries.push(value & $VALUE_MASK);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const u${NAME}Values = [$VALUE_LIST];
|
|
26
|
+
function u${NAME}(c) { return u${NAME}Values[u${NAME}AsInt(c)]; }
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
if [[ -z "$VIRTUAL_ENV" ]]; then
|
|
3
|
+
echo "Activating the virtual environment and rerunning..."
|
|
4
|
+
uv run "$0" "$@"
|
|
5
|
+
exit $?
|
|
6
|
+
fi
|
|
7
|
+
|
|
8
|
+
set -e
|
|
9
|
+
|
|
10
|
+
unicodedata-reader lb -fv -t js/template.js
|
|
11
|
+
unicodedata-reader gc -fv -t js/template.js
|
|
12
|
+
|
|
13
|
+
yapf -ir -vv .
|
|
14
|
+
tox -p
|
|
15
|
+
pytype src/unicodedata_reader
|
|
16
|
+
ruff check
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "unicodedata-reader"
|
|
3
|
+
version = "1.3.7"
|
|
4
|
+
description = ""
|
|
5
|
+
authors = [{name = "Koji Ishii", email="kojii@chromium.org"}]
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
license = "Apache-2.0"
|
|
8
|
+
requires-python = ">=3.9"
|
|
9
|
+
dependencies = [
|
|
10
|
+
"platformdirs>=4.3.8",
|
|
11
|
+
]
|
|
12
|
+
|
|
13
|
+
[project.urls]
|
|
14
|
+
repository = "https://github.com/kojiishi/unicodedata-reader"
|
|
15
|
+
|
|
16
|
+
[dependency-groups]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=8.3.5",
|
|
19
|
+
"pytype>=2024.9.13",
|
|
20
|
+
"ruff>=0.11.10",
|
|
21
|
+
"tox>=4.25.0",
|
|
22
|
+
"tox-uv>=1.25.0",
|
|
23
|
+
"yapf>=0.43.0",
|
|
24
|
+
]
|
|
25
|
+
|
|
26
|
+
[project.scripts]
|
|
27
|
+
unicodedata-reader = 'unicodedata_reader.__main__:main'
|
|
28
|
+
|
|
29
|
+
[build-system]
|
|
30
|
+
requires = ["hatchling"]
|
|
31
|
+
build-backend = "hatchling.build"
|
|
32
|
+
|
|
33
|
+
[tool.ruff.lint]
|
|
34
|
+
ignore = [
|
|
35
|
+
"F403", # `from ... import *` used; unable to detect undefined names
|
|
36
|
+
"F405", # `...` may be undefined, or defined from star imports
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
[tool.pytest.ini_options]
|
|
40
|
+
testpaths = "tests"
|
|
41
|
+
|
|
42
|
+
[tool.yapf]
|
|
43
|
+
based_on_style = "pep8"
|
|
@@ -237,16 +237,19 @@ class UnicodeDataEntries(object):
|
|
|
237
237
|
return None
|
|
238
238
|
|
|
239
239
|
def _is_contiguous(self):
|
|
240
|
-
|
|
241
|
-
return all(
|
|
240
|
+
entries = self._entries
|
|
241
|
+
return all(entries[i].max + 1 == entries[i + 1].min
|
|
242
|
+
for i in range(len(entries) - 1))
|
|
242
243
|
|
|
243
244
|
def _is_distinct(self):
|
|
244
|
-
|
|
245
|
-
return all(
|
|
245
|
+
entries = self._entries
|
|
246
|
+
return all(entries[i].max < entries[i + 1].min
|
|
247
|
+
for i in range(len(entries) - 1))
|
|
246
248
|
|
|
247
249
|
def _is_sorted(self):
|
|
248
|
-
|
|
249
|
-
return all(
|
|
250
|
+
entries = self._entries
|
|
251
|
+
return all(entries[i].min <= entries[i + 1].min
|
|
252
|
+
for i in range(len(entries) - 1))
|
|
250
253
|
|
|
251
254
|
def sort(self):
|
|
252
255
|
self._entries = sorted(self._entries, key=lambda e: e.min)
|
|
@@ -127,12 +127,12 @@ class UnicodeDataCachedReader(UnicodeDataReader):
|
|
|
127
127
|
cache = UnicodeDataCachedReader._cache_dir / name
|
|
128
128
|
if UnicodeDataCachedReader.is_caching_allowed and cache.exists():
|
|
129
129
|
_logger.debug('Reading cache %s', cache)
|
|
130
|
-
return cache.read_text().splitlines(keepends=True)
|
|
130
|
+
return cache.read_text(encoding='utf-8').splitlines(keepends=True)
|
|
131
131
|
|
|
132
132
|
lines = self._reader.read_lines(name)
|
|
133
133
|
|
|
134
134
|
cache.parent.mkdir(parents=True, exist_ok=True)
|
|
135
|
-
with cache.open('w') as file:
|
|
135
|
+
with cache.open('w', encoding='utf-8') as file:
|
|
136
136
|
_logger.debug('Writing cache %s', cache)
|
|
137
137
|
file.writelines(lines)
|
|
138
138
|
|
|
File without changes
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from unicodedata_reader import *
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def _to_unicodes(text):
|
|
5
|
+
return tuple(to_unicodes(text))
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def test_to_unicodes():
|
|
9
|
+
assert _to_unicodes('1234') == (0x1234, )
|
|
10
|
+
assert _to_unicodes('12FE') == (0x12FE, )
|
|
11
|
+
assert _to_unicodes('ABCD') == (0xABCD, )
|
|
12
|
+
|
|
13
|
+
assert _to_unicodes('12345') == (0x12345, )
|
|
14
|
+
|
|
15
|
+
assert _to_unicodes('u0009') == (0x9, )
|
|
16
|
+
assert _to_unicodes('u1234') == (0x1234, )
|
|
17
|
+
assert _to_unicodes('U+1234') == (0x1234, )
|
|
18
|
+
|
|
19
|
+
assert _to_unicodes('1234 5678') == (0x1234, 0x5678)
|
|
20
|
+
assert _to_unicodes('1234,5678') == (0x1234, 0x5678)
|
|
21
|
+
assert _to_unicodes('1234, 5678') == (0x1234, 0x5678)
|
|
22
|
+
|
|
23
|
+
assert _to_unicodes('xy') == (ord('x'), ord('y'))
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def test_to_unicodes_range():
|
|
27
|
+
assert _to_unicodes('1234-1236') == (0x1234, 0x1235, 0x1236)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def test_to_unicodes_array():
|
|
31
|
+
assert _to_unicodes(['1234', '5678']) == (0x1234, 0x5678)
|