unicodedata-reader 1.3.6__tar.gz → 1.3.8__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unicodedata_reader-1.3.8/.github/dependabot.yml +25 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/.github/workflows/ci.yml +17 -5
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/.github/workflows/publish.yml +3 -3
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/PKG-INFO +4 -3
- unicodedata_reader-1.3.8/Taskfile.yml +35 -0
- unicodedata_reader-1.3.8/js/GeneralCategory.js +26 -0
- unicodedata_reader-1.3.8/js/LineBreak.js +26 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/pyproject.toml +16 -7
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/compressor.py +1 -1
- unicodedata_reader-1.3.8/src/unicodedata_reader/east_asian_width_common.py +50 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/entry.py +9 -6
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/general_category.py +0 -1
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/reader.py +31 -21
- unicodedata_reader-1.3.8/tests/cache/EastAsianWidth +2721 -0
- unicodedata_reader-1.3.8/tests/cache/LineBreak +3709 -0
- unicodedata_reader-1.3.8/tests/cache/ScriptExtensions +235 -0
- unicodedata_reader-1.3.8/tests/cache/Scripts +3182 -0
- unicodedata_reader-1.3.8/tests/cache/extracted/DerivedGeneralCategory +4368 -0
- unicodedata_reader-1.3.8/tests/cache/extracted/DerivedName +45828 -0
- unicodedata_reader-1.3.8/tests/conftest.py +20 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tests/line_break_test.py +3 -4
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tests/reader_test.py +4 -4
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tests/set_test.py +6 -6
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tox.ini +1 -1
- unicodedata_reader-1.3.8/uv.lock +790 -0
- unicodedata_reader-1.3.6/.github/dependabot.yml +0 -10
- unicodedata_reader-1.3.6/js/GeneralCategory.js +0 -26
- unicodedata_reader-1.3.6/js/LineBreak.js +0 -26
- unicodedata_reader-1.3.6/precommit.sh +0 -9
- unicodedata_reader-1.3.6/tests/conftest.py +0 -6
- unicodedata_reader-1.3.6/uv.lock +0 -716
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/.gitignore +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/.yapfignore +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/LICENSE +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/README.md +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/js/LineBreak.html +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/js/template.js +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/__main__.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/bidi_brackets.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/cli.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/east_asian_width.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/emoji.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/line_break.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/set.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8/src}/unicodedata_reader/vertical_orientation.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tests/__init__.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tests/cli_test.py +0 -0
- {unicodedata_reader-1.3.6 → unicodedata_reader-1.3.8}/tests/entry_test.py +0 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Please see the documentation for all configuration options:
|
|
2
|
+
# https://docs.github.com/en/code-security/dependabot/ecosystems-supported-by-dependabot/supported-ecosystems-and-repositories
|
|
3
|
+
|
|
4
|
+
version: 2
|
|
5
|
+
updates:
|
|
6
|
+
- package-ecosystem: "uv"
|
|
7
|
+
directory: "/"
|
|
8
|
+
schedule:
|
|
9
|
+
interval: "weekly"
|
|
10
|
+
groups:
|
|
11
|
+
dependencies:
|
|
12
|
+
patterns:
|
|
13
|
+
- '*'
|
|
14
|
+
update-types:
|
|
15
|
+
- "minor"
|
|
16
|
+
- "patch"
|
|
17
|
+
|
|
18
|
+
- package-ecosystem: "github-actions"
|
|
19
|
+
directory: "/"
|
|
20
|
+
schedule:
|
|
21
|
+
interval: "weekly"
|
|
22
|
+
groups:
|
|
23
|
+
actions:
|
|
24
|
+
patterns:
|
|
25
|
+
- '*'
|
|
@@ -16,21 +16,33 @@ jobs:
|
|
|
16
16
|
strategy:
|
|
17
17
|
fail-fast: false
|
|
18
18
|
matrix:
|
|
19
|
-
python-version: ["3.
|
|
19
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
20
20
|
|
|
21
21
|
steps:
|
|
22
|
-
- uses: actions/checkout@
|
|
22
|
+
- uses: actions/checkout@v6
|
|
23
23
|
|
|
24
24
|
# https://docs.astral.sh/uv/guides/integration/github/
|
|
25
25
|
- name: Install uv and set up Python ${{ matrix.python-version }}
|
|
26
|
-
uses: astral-sh/setup-uv@
|
|
26
|
+
uses: astral-sh/setup-uv@v7
|
|
27
27
|
with:
|
|
28
28
|
python-version: ${{ matrix.python-version }}
|
|
29
29
|
|
|
30
|
+
# https://taskfile.dev/docs/installation#github-actions
|
|
31
|
+
- name: Install Task
|
|
32
|
+
uses: go-task/setup-task@v1
|
|
33
|
+
|
|
30
34
|
- name: Install dependencies
|
|
31
35
|
run: |
|
|
32
36
|
uv sync --all-extras --dev
|
|
33
37
|
|
|
34
|
-
- name: Test
|
|
38
|
+
- name: Test
|
|
39
|
+
run: |
|
|
40
|
+
task test
|
|
41
|
+
|
|
42
|
+
- name: Lint
|
|
43
|
+
run: |
|
|
44
|
+
task lint
|
|
45
|
+
|
|
46
|
+
- name: Format check
|
|
35
47
|
run: |
|
|
36
|
-
|
|
48
|
+
task fmtchk
|
|
@@ -12,16 +12,16 @@ jobs:
|
|
|
12
12
|
publish:
|
|
13
13
|
runs-on: ubuntu-latest
|
|
14
14
|
steps:
|
|
15
|
-
- uses: actions/checkout@
|
|
15
|
+
- uses: actions/checkout@v6
|
|
16
16
|
|
|
17
17
|
- name: Set up Python
|
|
18
|
-
uses: actions/setup-python@
|
|
18
|
+
uses: actions/setup-python@v6
|
|
19
19
|
with:
|
|
20
20
|
python-version: '3.x'
|
|
21
21
|
|
|
22
22
|
# https://docs.astral.sh/uv/guides/integration/github/
|
|
23
23
|
- name: Install uv
|
|
24
|
-
uses: astral-sh/setup-uv@
|
|
24
|
+
uses: astral-sh/setup-uv@v7
|
|
25
25
|
|
|
26
26
|
- name: Install Dependencies
|
|
27
27
|
run: |
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unicodedata-reader
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.8
|
|
4
|
+
Project-URL: repository, https://github.com/kojiishi/unicodedata-reader
|
|
4
5
|
Author-email: Koji Ishii <kojii@chromium.org>
|
|
5
6
|
License-Expression: Apache-2.0
|
|
6
7
|
License-File: LICENSE
|
|
7
|
-
Requires-Python: >=3.
|
|
8
|
-
Requires-Dist: platformdirs>=4.
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: platformdirs>=4.10.0
|
|
9
10
|
Description-Content-Type: text/markdown
|
|
10
11
|
|
|
11
12
|
[](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
# yaml-language-server: $schema=https://taskfile.dev/schema.json
|
|
2
|
+
|
|
3
|
+
version: '3'
|
|
4
|
+
|
|
5
|
+
tasks:
|
|
6
|
+
default:
|
|
7
|
+
deps: [check]
|
|
8
|
+
|
|
9
|
+
check:
|
|
10
|
+
- task: tests
|
|
11
|
+
- task: lint
|
|
12
|
+
- task: fmtchk
|
|
13
|
+
- git diff --exit-code
|
|
14
|
+
|
|
15
|
+
fix: "{{.PYRUN}} ruff check --fix {{.CLI_ARGS}}"
|
|
16
|
+
fmt: "{{.PYRUN}} yapf -ir -vv . {{.CLI_ARGS}}"
|
|
17
|
+
fmtchk: "{{.PYRUN}} yapf -qr -vv . {{.CLI_ARGS}}"
|
|
18
|
+
lint: "{{.PYRUN}} ruff check {{.CLI_ARGS}}"
|
|
19
|
+
test: "{{.PYRUN}} pytest tests {{.CLI_ARGS}}"
|
|
20
|
+
tests: "{{.PYRUN}} tox -p {{.CLI_ARGS}}"
|
|
21
|
+
|
|
22
|
+
gen:
|
|
23
|
+
- "{{.PYRUN}} unicodedata-reader lb -fv -t js/template.js"
|
|
24
|
+
- "{{.PYRUN}} unicodedata-reader gc -fv -t js/template.js"
|
|
25
|
+
|
|
26
|
+
install-git-hooks:
|
|
27
|
+
desc: Create git hooks
|
|
28
|
+
cmds:
|
|
29
|
+
- echo '#!/bin/sh' > .git/hooks/pre-push
|
|
30
|
+
- echo 'task check' >> .git/hooks/pre-push
|
|
31
|
+
- cmd: chmod +x .git/hooks/pre-push
|
|
32
|
+
platforms: [linux, darwin]
|
|
33
|
+
|
|
34
|
+
vars:
|
|
35
|
+
PYRUN: uv run
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uGeneralCategoryAsInt = (function () {
|
|
2
|
+
const bytes = atob("h2ABQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQaIAAECYw0CCg0ODwYQDQoNBjEKDCIKEQ4SUQKFSQaBSYVsBoFsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDCkMCQwJTCkMCQwpDEksaQwpDElMKQwpDAkMCQwpDAksCQwpDEkMCQwpLA4JTG4JEwwJEwwJEwwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAksCRMMCQxJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCYFMKQwpLAkMaQwJDAkMCQwJkQwuhiyENGqCdIMqgRSBShQKFIQKm3UJDAkMFAoJDDYUTAIJdioJAkkWCRYpDIQJFoIJiEwJLElMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmBDAkMBgkMKSyMSYtsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA2BFTcJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwpDAkMCQwJDAkMCQwJLAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDBaJKTYUgSKKDAIHNi0DFosVBxUCNQI1AhWBdoZOdm4iglaBMEYiAyItglUCEEKHbhSCLoUVgihiLhWYTgIOgVUQDYE1NDUNdS6CKE4tDoMiFhAOFYcuhlU2lg6CVQ6DNoIoiA6CFTQNQhQ2FSOFLnUUghUUVRSBFTaDQhaGDlU2AhaCToEWhW4KgU4wgRaCFYoOFIV1EId1GI0uFRgVDliBdXgVOA6BVYIuNSKCKAIUg04VOBaBbjYuNoUuFoFOFg5WbjYVDlh1Njg2OBUOgXYYdi4WTjU2giguI4ExDQMOAhU2NRgWgS52LjaFLhaBThYuFi4WLjYVFlg1djU2VVYVgVZuFg6BVoIoNU4VAoI2NRgWgg4WThaFLhaBThYuFoEONhUOWIEVFjUYFjgVNg6DVi41NoIoAgOBVg6BNRYVOBaBbjYuNoUuFoFOFi4WgQ42FQ4YFRh1Njg2OBWBVjUYdi4WTjU2gigNDoExgjYVDhaBLlZOFm5WLhYOFi5WLlZOVoJudjgVOFZYFlgVNg6BNhiDNoIoUYEtAw2BFhVYFYFuFk4WhU4Wg242FQ5VeBZVFnWBVjUWThYuNi41NoIogVYCgVENDhU4AoFuFk4WhU4Wgi4WgQ42FQ4YFYEYFhU4Fjg1gVY4gRZOFi41NoIoFi4YgnY1OIIOFk4Wig41Dlh1FlgWWBUODXZOGIFRTjU2giiCEQ2BLhYVOBaELlaFbhaCDhYONoFOVhV2WFUWFRaBeIE2gig2OAKCdotuFS6BVXYDgS4UgXUCgigiiRYuFg4WgQ4WhW4WDhaCLhUughUONoEOFhQWgVUWgig2bod2Dk2DQg0CTTWBLYIogjENFQ0VDRUEBQQFOIFuFohudoM1GIEVAjWBDoJVFoh1FoFtFYEtFi2BAm0iiRaKTjh1GIE1GDU4NQ6CKIEigS44NW5VDlgugVhOdYMOFTg1gTgVDhiCKFgVLYkpFgmBFgk2ikwCFEzSDhZuNoFOFg4WbjaKDhZuNogOFm42gU4WDhZuNoNOFo4OFm42kE42VYIChHFWg26CLYE2lSk2gSw2B4Gabg0ChA4Bhi4EBVaSTkJZgW6BVoQuVRiCFoRONRgighaELjWCdoMOFk4WNYJ2jG41GIFVgXgVOIJVQhRCAw4VNoIogTaCMYE2gSIHYlUQFYIogTaIThSNDoFWgQ41iC4VDoEWkS6CNodOFlV4NVh2OBWBOFV2DVYigiiHLjaBDoJWim52hi6BNoIoEVaILYVONTgVNiKNDhgVGIFVFhUYFTiBdYE4gjU2FYIogTaCKIE2gUIUgSI2gzUXh1U2gnWEdnUYi04VGIEVGBWBGBU4gW4WIoIogUKCLYIVgg1CNRiHLhh1ODUYVS6CKIpuFRg1WBUYVTiBdmKIboF4gXU4NVaBAoIoVk6CKIcugTQiggwJDIEWikk2SYFigXZVAoMVGIFVbhWBLhUuGDUOgRaKbI9UgwwUiCyJFI91CQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmCDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmCDIFpgSw2gSk2gWyBaYFsgWmBLDaBKTaBbBYJFgkWCRYJgWyBaYMsNoFsgXOBbIFzgWyBc4EMFixpEwoMSkwWLGkTSmw2LGkWSoFsgQlKNkwWLGkTKhaCQYEQgSciDxIELxIED4FiGhuBEAGCAg8SYitCBgQFgkIGAguCIgGBEBaCMBEUNoExRgQFFIIxRgQFFoMUVogjgzaDFXcVV4J1g1YtCW0JLQxJLEkMDQktBoEJgS0JDQkNCQ1pDQxpDG4MLSwpgQYJbA0GLQwNg3GIWQkMeREtdoEGgQ0mbQYtBi0GgU0Gh00mLQYNBodNwmaBbQQFBAWEbSaBTQQFlA0Ghy2GBoltgSaRbYU2gk2FFo5xky2FMa1NBoINBo0tgWabTQa9bQQFBAUEBQQFBAUEBQQFhzGKbYEGBAWHRgQFBAUEBQQFBAWDZr9toEYEBQQFBAUEBQQFBAUEBQQFBAUEBQQFj0YEBQQFh2YEBcAmi22FBi2BJolNNqIti2mLbAkMSSwJDAkMCQxpDAksCYEsNEkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSyBLQkMCQxVCQyBFmIRIoksFgyBFgw2jW6BVhQCgzYVhU6CFoFOFoFOFoFOFoFOFoFOFoFOFoFOFoFOFod1Ig8SDxJCDxICDxKCAgciBwIPEiIPEgQFBAUEBQQFgQIUgiInYgcCBIMCLUIEBQQFBAUEBQeINoYtFpYNgna1LYY2g20BQg0UDhkEBQQFBAUEBQQFLQQFBAUEBQQFBwQlDYIZdTgHgRQtWRQOAi0WlS42NSo0DgeWLgJUDoEWik4Wly4WLXGCLYduiS2CFg2DbodNFoIxhy2BcQ2DUYdtgjGJTYNRz22M726PbamFDhSCnU5WjU2CFolugTQiwm4UQoNugiguhHYJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMDhVXAoI1AhQJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMNDWRLoI5NYEigXaFSoIUKgkMCQwJDAkMCQwJDAlMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwUgWwJDAkMKQwJDAkMCQwJDBQqCQwJDA4JDAlMCQwJDAkMCQwJDAkMCQwJDAkMCQyBCQyBCQwJDAkMCQwJDAkMCQwJDGkMCQwpDAkMCQwJDAkMCQwJDAkMCYR2dAkMDjQMgU4VThVuFYVOODUYbRVWgTEtAw2BNoxuYoF2OIwug3g1gXYigiiBNoQ1gS5CDgIuFYIohm6BdSKFToJVOIJWAocOVlUYi04VOHU4NViDAhYUgih2IoEOFRSCDoIogQ4Wig6BNTg1ODWCFk4VgW4VGDaCKDZig24UgS5NDhgVGIwuFQ5VLjWBDjUOFQ6Fdi4UIoJOGDU4Ig40GBWCNoEuNoEuNoEughaBThaBThaKTAp0ggwUKnaTbIhOOBU4FTgCGBU2giiBNpXoboJ2hU52jA52g/98jL992y42mi6JNoFMgnaBDIEWDhWCLgaDDhaBDhYOFi4WLhaaboQKg23aTgUEg22Pbi2NLoFth3aCbgNNg3WBQgQFAoE2g3UCJysEBQQFBAUEBQQFBAUEBQQFIgQFYktCFmIHBAUEBQQFQgYHRhYCAyJ2gQ4WoU42EBZCA0IEBQIGAgcigigiRiKGKQQCBQoLCoYsBAYFBgQFAgQFIoIuFIsONIdOVoEuNoEuNoEuNk5WIwYKDSMWDWYtgjZQLTaCbhaGLhaEThYuFoNONoMuiDaeToEWQnaLEVaCDY0ZcYQNMU0Wgw1WDYtWiw0VoDaHDlaMDoNWFYZRdoducYIWhG4ZgW4ZgRaJLoEVgRaHLhYCiG52gW4CgRmKNolpiWyTLjaCKIE2iGl2iGx2iW6BdoxuglYCgkkWg0kWgUkWKRaCTBaDTBaBTBYsVoxugnbNToIWhS6CNoFuhXaBNBaKNBaCFJEWgS42DhaKbhYuVg42hU4WAoFxhU4tgVGHToF2ghGLdoROFi6BFoERhS6BMVYChi6BFgKGLok2jW52MS6DcTaLMQ5VFjWBFnVuFk4Whw42VXYVghGBVoICgVaHDjEChw5Rh3aBbg2GbjV2gRGBQoIWjS5WgUKFLjaBcYROgRaBcYQugVZignaBUZN2kg6NVoxJgxaMTIFWgTGIbnWBdoIogTaCKG4UDoUpVoEVBxSFLIF2JrN2h1EWii4WNQc2LoN2ThQugXYCgW2IFoE1hw6CMQ6BdoUuglVxgQKFNoQudWKJNoUOgVGEdoVOghYYFRiNDoNVgUJ2hHGCKBUuNQ6CFlUYiw5YdTg1IhBiFYI2EDaGDoFWgiiBNlWIboEVGIF1FoIoYg44DoF2iE4VIg6CFjUYi25YghU4bmJ1AhgVgigOAg5CFoRxglaELhaGDlhVOBUYNYEiFS4VjzaBThYOFm4Wg04Wgi4CgTaLThVYgXWBFoIogTY1OBaBbjYuNoUuFoFOFi4WgQ4WNQ44FXg2ODZYNg6BNhiBFoEOODaBVVaBFYJWgi4WDjYOFokuFg5YgTUWGDYYFngWOBUYFQ4VDiIWIoF2NYcWjQ5YgXU4VRgVboECgigiFgIVToc2i25YgTUYFXg1GDUuAg6BdoIoqTaLTlh1Nng1GDWFQm41iDaLbliBdTgVGDVCDoJWgiiBNoMChFaKThUYFTiBNRgVDgKBNoIogTaEaIZ2hk42FRgVOHUYgRV2gigxQg2BTq4Wim5YghUYNQKYdodph2yCKIIRgnaBbjYONoFuFi4WhW6BOBY4NjUYFQ4YDhgVQoIWgiiRNoFuNolOWHU2NXgVDgIOGIZWDoI1iW6BNRgOdYFiFYF2DoE1OFWLLoMVGDVCDoECgxaSDoFWgiKVNhUYVRgVGJV2iA4CgzaCKIE2gg4WiQ4YgVUWgTUYFQ6BAoI2giiEUVYihy42hTUWGIFVGDUYNZIWgU4WLhaJLoE1VhUWNRaBVQ4VgXaCKIE2gS4WLhaHboEYFjUWOBUYFQ6BVoIogTaKDhQudoIovTaETjU4IoFWNQ4Ygw4WiC44gRVWOBUYFYMCgigVlRYOg1aFEYFtY4QNgxYCgeYumTabWRaBAoJWsG6FknaYDiKDFoKLboNwFYEug1WCNofmToEWgZFOja4Why6CdVhVgiiDsTaBjg6BVodOFoIodiKTThaCKIE2hy42gRUCgjaLboFVgQJtdAINgjaCKBaBURaFDoEWhE7rdlSJbjRCgiixNodph2yFUWKBFoYJNoYMinaSTnYVDo1YgVZ1gxSPdjQCFBWCVjg0WYIWjrUuihaHbpgWnE6Q/xZ0FoFUFjQWyE6DVg6HFk42DoM2boF24m6EwHaaToEWgw5Wgg6BVoIuNg01AnCH1na7bYIoTVbsbYE2hU2DVoNtBoNWizU2hVWCFpxtjna9LYI2iU02jm04VU2BOIFwgXUtgVWHLXWPDYUWkC1VDZ42hHGCdoRxgnaVTYIWhhGhVoYphiyGKYFMFoQshimGLAkWKTYJNik2aRaBaWwWDBaBTBaCTIYphiwpFmk2gWkWgUkWhiwpFmkWgQkWCVaBSRaGLIYphiyGKYYshimGLIYphiyGKYYshimGbDaGCQaGDAaBLIYJBoYMBoEshgkGhgwGgSyGCQaGDAaBLIYJBoYMBoEsCQw2jCj/bY1VbYw1gW0Vgy0VLYECg1aBFRaDVYKTdoIsDoRsgTaBLLUWgVUWhBU2gVUWNRaBFYEWjzSIFhWbdosOVoFVgVQ2gih2Dg3PdocuFYQWim51giiBFgPzdoZOFHWCKLU2hy41DoIodgKvdodOFk4VLhWBTjWBDhWBdg4Ut3aBThZuFi4Wg04WsQ42ghGBVYoWiCmILIFVFHaCKHYigcQWjlENUQNxknaLEQ2DUbA2bhaGThYuFg42DhaCLhZuFg4WDoE2DnYOFg4WDhZOFi4WDjYOFg4WDhYOFg4WLhYONm4WgU4WbhZuFg4Wgi4WhA6BFk4WgQ4WhA6MdibDNoptdphtgnaDTTaDTRaDTRaJDYI2gxGoDY12hw2DFoptdoINgVYtgzaBLaY2vk2BCoG2DVaEDVaDDVa2LYE2gm12DYNWgm12jW2BdoItgTaJbYF2hy02gm12LYM2ggaJVtVtgXaDLTaDDVaCTVaODRYNdoNtNoJtdoItgVakTRaWbYIoDYKBFtO3bod2iMcuNoujLjaOzA6DVoGbLoToNoGHLoL4NonSToEWkMouiuXhVhCHNpdwn3a7df+Ddv//PTb//z02");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 5) + 1);
|
|
14
|
+
entries.push(value & 31);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uGeneralCategoryValues = ["Cc","Zs","Po","Sc","Ps","Pe","Sm","Pd","Nd","Lu","Sk","Pc","Ll","So","Lo","Pi","Cf","No","Pf","Lt","Lm","Mn","Cn","Me","Mc","Nl","Zl","Zp","Cs","Co"];
|
|
26
|
+
function uGeneralCategory(c) { return uGeneralCategoryValues[uGeneralCategoryAsInt(c)]; }
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uLineBreakAsInt = (function () {
|
|
2
|
+
const bytes = atob("hAABAkMEiEAFBgcICQoIBwsMCAkNDg0PhFBNgQgGjQgLCQyOCAsBEQiCQBKMQBMLCoEJCFQIFAcIAUgKCVQVCIIUB4EUC4sIFI8IFIHnCBQVgRQVFEgUgwiBVAgUCBWPSK1AgxOGAINIVoFIDQiBVoMIFggWiUgW70iDANJIFpJIVpdIDRdWSAkWlgAXAAhACEAGAINWjRiBVoFYSIUWglCBCIEKTUiFAAYAgQaVCIoAhFAKUIEIALEIBgiDABAIgkBIQAiBQEiEUIlIFkgAjkiNAFasCIUACIZWhFCQCIQAgUgNBghWAEmKSIFACIQACIEACIIAVocIFowIgQBWCBaFCIIWj0hQghaEAJRIi0AQkACaSIEACIhACIMAhEhAQYRQiAiBABaDSFZIVopIFoMIFgiBFoFIVgAIgwBWQFaBAAiDVgCBVkgWgQhAVoRQSEqCCAoICUgAVoEAFoJIgVZIVopIFoMIFkgWSBZIVgAWggCBVkBWgQCBFgCDFoFIFgiDFoRQQIEIAAiEVoEAFoQIFoEIFopIFoMIFkgWgghWAAiDQBaBABaBAFYIhxZIQFaEUAgJgxYIgkAWgQAWg0hWSFaKSBaDCBZIFoIIVgAIgwBWQFaBAIMWgQCBVkgWgQhAVoRQg0iEVgAIFoJIgRaBCBaBSIEWSBYIFkiBFkiBFoEIgRaFSIFWggCBFoEAFoFAVgiCVgCGVoRQhAgJCIIWggCDSBaBCBaLCBaHSFYACIMAFoEAFoFAgxZAFoEIFkhWSEBWhFCDFhWECIEAFYNIFoEIFosIFoRIFoIIVgAIgwAWgQAWgUCDFkCCFoEIFkhAVoRQFkgAhVaBQIQIFoEIFpQIQAiDABaBABaBQEiBVoEIAIRIQFaEUIQICoJIFoEAFohIgRaLSBaECBYIVoMIgRYAgVaCQBYAFoNAglaEUFZACIVWnFmBVgmHGQiEUEGSFlkWGRaCGRaLWRYZFosZVoIZFhkWgxkWhFBWgVmPVgiBVQhVE1UBE4IGEwgGgQhAgkiEUIRIAQAIAAgACxELEUCDSBaRSIFWhkABggABQIIIhQAWkUAWQYJIAIJIFkhVARWCCFOSFp9ZhFBBgUifWYRQglmSSBYIghYIVpdIr1qjW6tcpAgWgUhWgwgWCBaBSFaUCBaBSFaQCBaBSFaDCBYIFoFIVocIFpwIFoFIVqEIVoEACAGNCIEWjEiCVqpIVoJIVheCvwgBjEgLEYEWpQiBAYUIgxaISIFAhBaJCIEAQYQWiEhAhVaGCBaBCBZAhVapWUEdGQEIAQlZVoRQglaESIJWSEZBFQhGCIEAEwCEUIJWrAiDFoIIQJBIAAiCFqJIhFaPCBaFQIFWhUCBVgiBFkaEUI5ZVoIZhRaVWYFWjFmCVoUQgRZZmwiCAFZInxkWjhlWAIRQglaEUIJWhllWlkBWhQATiVaCAJceh0Afg14WQYRgQSGBQYRhhACEIYEBgQCOSIYASIRQgkiSYIVAYoNWk0iJQIEWggGEUIEWgQiEUJFIQYUIghaVCFaFCINWgQAIigCBSACCSABIgQAIghbfSIYAE5ZAE4EAgYpIVoJIVpJIVoJIVoNIFggWCBYIFo8IVpoIFocIFoZIVoJIFokIVoEIFoMIFQgWgwETgQEjACRAFxNXJVQIRwuBBwsHVEiBJgFDggATg0oIRxRdgkgNCxGBHYVIAQqBQQiBASeBSBaEQEhWFINICxEUCIFUg0gLERaGCIEWgwkKhkkKgUkKSQoJCocJkACHFoEICggUgQgKhAgUSAmESFSDSBSRSIcUCIVUgUiEVIcIFEiBVoRUm0gUCBSVCBQIVIEIVEgUgQgUCBRJCBSBSBRIgVRIFAgUCIJUCBSCCIFUgUhUhEgUgQgUgggUhghUSIFUSFRIVIhIVEhUhggUgQgUhQgUjAgUlwgmi0gLEQsRgkgUgwhhhggLEeIIgWGaSIpWhQiKFs8UCKVUgUiSFIUIh1RIgVSESFQIgxSDSFRIVIFIVEhUgUiBFEgUSIFUh0iBVIQIFIdIgWEIVEgUgUhUgUhhVCEIgSEoYYwIgSGBSBQIFI4IVAiBFAgUIVQIVAgUhwghjkhUjgiFYYFUIQiBIRRhgRRhVCFUgSEIFIFIVCGCVIIhFGEoIVSDYYEIYYFopAgUgQiCRwhGIYEICxELEQsRCxELEQsRCxGOVJgICxGPCAsRCxELEQsRCxGHSAGBwEgLEQsRCxELEQsRCxELEQsRCxELEQsRnwgLEQsRj0gLEYGrCIIUjEhWgbwIgQBIghYGgQEIBgGSSBYIghYIVptIgxYIAYZWAIsIhBaDCBaDCBaDCBaDCBaDCBaDCBaDCBaDCBaPQIZHg0EIFwsBSEdIRwsRCxELEQsRgUEGCEEIQYIIZYEBCBcBC4NBCAEIQYEIRgsMCwwLDAsMF5BWjGEWrCGFVuphjFaHYQFRYR1hCxELEQsRCxELEWELEQsRCxELER0LUYRhgkCCIQCCIV2BIRYpISkhKSEpISmMISmPISkhKSEpgmEpgmFpVkCBXSEdKSEpISkhKSEpjCEpjyEpISkhKYJhKYJhaYFhHSldIYIWlSEWrmEWqmGEFiGHaY8hFpNhg1Sbt2GfSNKKIR2EuyGBFpshhBaWSEGBhggBBgGHSIRQSIlWlwiBQAiEQI9IQKdIQAiCAYNW7giJVogIAIEIAIFIAIsIggCBSACBFoNICgiCVplIVUaDVkCYSIhAg1ZBhFCCVohAhEgVSACEUI1Ig0BBiwiGAIUWCI4agRaBQJcehgAfgmGBAYFhFgGEYIFWYYdZhFCCGRaUIIZAhBaBAQCDQUBWhGBWIYEBsRmLVoIZhQiCAEGBCECEVoJIVoJIVoJIhBaDCBaDCBadSIFWuQiDQAFAVoRQglYqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSuFVosbgVaYHIFWh/9smP9Wgf9hgwiFVoIIghYYAIRYCIYYFoIYFhgWWBZYFoRYgfZIEQvHSI9WhUgKgQiHQIERXUYLESaCVhMAEwATAFMAEwATAFMAgiELEQsRCxELEQsRCxELEQsRYQsRgyERIREWXUYhCxELEQsRg2EWIQkKIYFWgggWwwhWJxYGYQkKYQsRYREhEYUhXYEhBo0hCyERjiELIREhC1ELUR0hhGmWIV2PIYEWgmFWgmFWgmFWgSGBFgoJgSFJFoMIhFaBAC0UVoVIFoxIFokIFkgWhwhWhkiQVr0IghaBAYFWlgiBFqtIFoYIgRYIlxaWCADAVo4IgRaYCIcWAI0IgVaRSIQWjkiCFpJIggCCFo5IFgGRSIFWg0gBggiUVs5IVoRQglaRSIFWkUiBVpNIg1aZSIUWhUgWhwgWgwgWSBaFCBaHCBaDCBZIgRaZSIVWgZsIhBaKSIRWg0iLVoJIFpRIFoQIohaCSFYIFpVIFkiBFghWiwgWAaMIg1aECJdWiQgWSIIWkAiBFgGMSIIWjQiSVptIgVaJSFaXCIEAFkCCFoFAgUgWgQgWjghWgQCBVgCECIMWg0EIgxafSI9WkghAgVaCCIJBJoQWmkiBFoMBikhWjQiCFoxIgxaBSIVWgwinVqQImxaZCIYWmQiDFpRIgUCDVoRQglaEUI1IgRaCABeLCINWSOdWjwgWlEgWQBdWSIdWgkiDVgGDSJAWgkCTSINWikiFAIQIilaISIFAgUiSVo1IiVaLCIQWgQBumR6GQB9BgiGBVolhhGAAXkAehBYTgQCWCIUASBCBQQCEVhBWjAiDFoRQglaBAJFIhkAWhFCBQQhACINWkQgACBUIhBaBAJdIhkCBSEEIAYFACECEUAgVCIEBFolIhRaISBaMCIVAQQhBCABIAJ5WgwgWCBaBSBaHCBaESAGCVpcIhUCCFoRQglaBQBaDXlZeVopeFoMeFl4Wgh4WQAGDAFZAVkAfViCCVgCCFgFgXkBWgwCBFoIAhRaEYBYgViAWYJFeFiGEABYAVgAWgUAWgUAfLgCBIRZhg1ZAjhaaCIhAgUiBQQiEUEEWCACBCI5Wl0iJQIFIg1aEUNJWlwiDAFaEABVBRoEIhwGBSECQVpdIiABBSIUWhFCCVoYViRaVCIYASIJWhFCCVolQjVaNGVaHGYFWhFBZgQGDWdwWlUiHAAixVp9IhFCECIVWCIMeVh5Wg14WXhaLXoJAFkBWgQAfLgAuQIEBhBaEYKJWg0hWkwiDAFaDAAgVCACNFgiEQJNIgwAIgUAVCIFBFQgAg1YIhQCWSIdAgQEIgRVBhhakCIMWhFWqVoNAq1aQSIZWhFCCVoQIFpIIg0AWg0AIggGEVoRQiQiBFhUGjkhWikAWhkCkFoMIFkgWkkiCQIEWABZAFoMACACDVoRQglaCSBZIFo9IggAWQBaCAAiDFoRQglaVSIFWhFD6VohgAYFAQYMWQC4Ahh4WkF6DAIEWgUAfQYUhhGAAqhYIhxaOCIFKiAiGFgGDzEiyVrcIFoIBhRbhSIqlVrEIhhaCq0iBC4ERkUgRgQgLEQsR9wgLUdkIC4MTCxGBEwsRCxEAgkiHAIRWj80IghaB5kgLEbsImtwWjmCIQIRghuJWgpwIgxaPCBaEUIFWQacIFoRQglaOSFaCAAGEVpdIgwCBAYRIAQiEVoRQFoMIFooIghaJCIHXVpZIQYRQ4larCEFIghaMCFaMCJVWpQiBVgAImwCDFoFAhgifVoFdE4UWQF2BIYQWmv9hgepIlBYIjyGwFrkhof4WgUgWgwgWSBaBkSGHFimOFoEpVimGVoFpg1aBxWGJgVa1CIIWhgiBFoQIgxaESFYIQAGBQI+tVvdIhFCBCIEWgdlIglaLCIcWiAiHFpZAVosAhBa5SJ1W+kiEVpMIVp1IggCBCIpASIMAjkiBQJ4IihagSIEACLxWiUiFVolIhVarCIQWjAjDFqoIFqMIFkhWCFZIVoFIFoVIFggWgwgWoAgWgUhWg0gWgwgWjUgWgUgWgggWCIEWgwgWgalIVoGRSFaYUIH/SJsAgUiYQINIAIZIAEiBQQiHFoIAFocAhKdWjwiCVoJI6haDABaIAFaDABZAFoIAghaeSJAWALdWlgiBFoMAgwhWhFCBVkiBn1aOSACIFpVIgUCEUIIWCYHnVo1IgUCEUOpWjkhACIRQgVYI31aPCBaBCABIAIMIQIIIAINWSO9WgwgWgUgWSBaHCBbiCFaECIMAlBahSIMACIFWhFCBVkuDiBadCAqBCAqBSKVWngjgVoFIFo0IFkgWCFYIFoRIFoFIFggWCIJWCIFWCBYIFggWgQgWSBYIVggWCBYIFggWCBZIFghWgUgWgwgWgUgWgUgWCBaESBaICIIWgQgWgggWiAiZVkiBhlb/YYYUgQiOVEicVIJInhQIm2GMb4HCISiKYUiLIUiCIQiCIYEoYShhgSiWYYIwoGFoYYUoiiGJKIEhKIFhgSghgSiDISghKIZhCCEIIQiCISiBYQghSKYhgwiHYYZIhiGLSJRhaIFhKIohKIFhaJ4hg0iLYYJIpSGBKIEhgiiSSIEHgR2BSJEhKIdhgSiEISiFISiZIblIgSGBSIIhqgiVIYVIgWGbSINhhEiCYZNIg2GOSGGFSIFhSIZhhAiTIYVIKGEog2GDaIJhKIQhhGhhgSibYSieIWghaCEoiCGBKCGGKJBhq0i1IYEolGGEKIMhyQgWrUiEUAiCFoP+YVaB//5hVoH//mGUgIEWAI5Wr0C/VvdAhf6GVg==");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 6) + 1);
|
|
14
|
+
entries.push(value & 63);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uLineBreakValues = ["CM","BA","LF","BK","CR","SP","EX","QU","AL","PR","PO","OP","CP","IS","HY","SY","NU","CL","NL","GL","AI","BB","XX","HH","HL","SA","JL","JV","JT","NS","AK","VI","AS","ID","VF","ZW","ZWJ","B2","IN","WJ","EB","CJ","H2","H3","SG","CB","AP","RI","EM"];
|
|
26
|
+
function uLineBreak(c) { return uLineBreakValues[uLineBreakAsInt(c)]; }
|
|
@@ -1,22 +1,25 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "unicodedata-reader"
|
|
3
|
-
version = "1.3.
|
|
3
|
+
version = "1.3.8"
|
|
4
4
|
description = ""
|
|
5
5
|
authors = [{name = "Koji Ishii", email="kojii@chromium.org"}]
|
|
6
6
|
readme = "README.md"
|
|
7
|
-
repository = "https://github.com/kojiishi/unicodedata-reader"
|
|
8
7
|
license = "Apache-2.0"
|
|
9
|
-
requires-python = ">=3.
|
|
8
|
+
requires-python = ">=3.10"
|
|
10
9
|
dependencies = [
|
|
11
|
-
"platformdirs>=4.
|
|
10
|
+
"platformdirs>=4.10.0",
|
|
12
11
|
]
|
|
13
12
|
|
|
13
|
+
[project.urls]
|
|
14
|
+
repository = "https://github.com/kojiishi/unicodedata-reader"
|
|
15
|
+
|
|
14
16
|
[dependency-groups]
|
|
15
17
|
dev = [
|
|
16
|
-
"pytest>=
|
|
18
|
+
"pytest>=9.1.0",
|
|
17
19
|
"pytype>=2024.9.13",
|
|
18
|
-
"
|
|
19
|
-
"tox
|
|
20
|
+
"ruff>=0.15.17",
|
|
21
|
+
"tox>=4.55.1",
|
|
22
|
+
"tox-uv>=1.35.2",
|
|
20
23
|
"yapf>=0.43.0",
|
|
21
24
|
]
|
|
22
25
|
|
|
@@ -27,6 +30,12 @@ unicodedata-reader = 'unicodedata_reader.__main__:main'
|
|
|
27
30
|
requires = ["hatchling"]
|
|
28
31
|
build-backend = "hatchling.build"
|
|
29
32
|
|
|
33
|
+
[tool.ruff.lint]
|
|
34
|
+
ignore = [
|
|
35
|
+
"F403", # `from ... import *` used; unable to detect undefined names
|
|
36
|
+
"F405", # `...` may be undefined, or defined from star imports
|
|
37
|
+
]
|
|
38
|
+
|
|
30
39
|
[tool.pytest.ini_options]
|
|
31
40
|
testpaths = "tests"
|
|
32
41
|
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import unicodedata
|
|
3
|
+
|
|
4
|
+
from unicodedata_reader import *
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def dump_east_asian_width():
|
|
8
|
+
reader = UnicodeDataReader.default
|
|
9
|
+
blocks = reader.blocks().to_dict()
|
|
10
|
+
bidi_brackets = reader.bidi_brackets().to_dict()
|
|
11
|
+
scripts = reader.scripts().to_dict()
|
|
12
|
+
script_extensions = reader.script_extensions().to_dict()
|
|
13
|
+
gc = UnicodeDataReader.default.general_category()
|
|
14
|
+
|
|
15
|
+
def bidi_brackets_type(code):
|
|
16
|
+
bracket = bidi_brackets.get(code)
|
|
17
|
+
return bracket.type if bracket else ""
|
|
18
|
+
|
|
19
|
+
columns = {
|
|
20
|
+
"Block": lambda code, ch: str(blocks.get(code)),
|
|
21
|
+
"Code": lambda code, ch: "U+" + u_hex(code),
|
|
22
|
+
"Char": lambda code, ch: chr(code),
|
|
23
|
+
"GC": lambda code, ch: gc.value(code),
|
|
24
|
+
"Bidi_Paired_Bracket_Type": lambda code, ch: bidi_brackets_type(code),
|
|
25
|
+
"EAW": lambda code, ch: unicodedata.east_asian_width(ch),
|
|
26
|
+
"Script": lambda code, ch: scripts.get(code),
|
|
27
|
+
"ScriptExt":
|
|
28
|
+
lambda code, ch: " ".join(script_extensions.get(code, [])),
|
|
29
|
+
}
|
|
30
|
+
sep = "\t"
|
|
31
|
+
print(f"# {sep.join(columns.keys())},Name")
|
|
32
|
+
for code in blocks.keys():
|
|
33
|
+
ch = chr(code)
|
|
34
|
+
eaw = unicodedata.east_asian_width(ch)
|
|
35
|
+
if eaw is None or not (eaw == "F" or eaw == "W" or eaw == "H"):
|
|
36
|
+
continue
|
|
37
|
+
script = scripts.get(code)
|
|
38
|
+
if script != "Common":
|
|
39
|
+
continue
|
|
40
|
+
values = (func(code, ch) for func in columns.values())
|
|
41
|
+
output = sep.join(values)
|
|
42
|
+
try:
|
|
43
|
+
output += f"{sep}{unicodedata.name(chr(code))}"
|
|
44
|
+
except: # noqa: E722
|
|
45
|
+
pass
|
|
46
|
+
print(output)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
if __name__ == "__main__":
|
|
50
|
+
dump_east_asian_width()
|
|
@@ -237,16 +237,19 @@ class UnicodeDataEntries(object):
|
|
|
237
237
|
return None
|
|
238
238
|
|
|
239
239
|
def _is_contiguous(self):
|
|
240
|
-
|
|
241
|
-
return all(
|
|
240
|
+
entries = self._entries
|
|
241
|
+
return all(entries[i].max + 1 == entries[i + 1].min
|
|
242
|
+
for i in range(len(entries) - 1))
|
|
242
243
|
|
|
243
244
|
def _is_distinct(self):
|
|
244
|
-
|
|
245
|
-
return all(
|
|
245
|
+
entries = self._entries
|
|
246
|
+
return all(entries[i].max < entries[i + 1].min
|
|
247
|
+
for i in range(len(entries) - 1))
|
|
246
248
|
|
|
247
249
|
def _is_sorted(self):
|
|
248
|
-
|
|
249
|
-
return all(
|
|
250
|
+
entries = self._entries
|
|
251
|
+
return all(entries[i].min <= entries[i + 1].min
|
|
252
|
+
for i in range(len(entries) - 1))
|
|
250
253
|
|
|
251
254
|
def sort(self):
|
|
252
255
|
self._entries = sorted(self._entries, key=lambda e: e.min)
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
import
|
|
2
|
+
from pathlib import Path
|
|
3
3
|
from typing import Iterable
|
|
4
|
+
from typing import Optional
|
|
4
5
|
import shutil
|
|
5
6
|
import urllib.request
|
|
6
7
|
|
|
@@ -111,40 +112,49 @@ class UnicodeDataReader(object):
|
|
|
111
112
|
class UnicodeDataCachedReader(UnicodeDataReader):
|
|
112
113
|
try:
|
|
113
114
|
import platformdirs
|
|
114
|
-
|
|
115
|
-
_logger.debug('cache_dir: %s',
|
|
115
|
+
_default_cache_dir = Path(platformdirs.user_cache_dir('UNIDATA'))
|
|
116
|
+
_logger.debug('cache_dir: %s', _default_cache_dir)
|
|
116
117
|
except ModuleNotFoundError:
|
|
117
|
-
|
|
118
|
+
_default_cache_dir = None
|
|
118
119
|
|
|
119
|
-
def __init__(self,
|
|
120
|
+
def __init__(self,
|
|
121
|
+
reader: UnicodeDataReader = UnicodeDataReader(),
|
|
122
|
+
cache_dir: Optional[Path] = None):
|
|
120
123
|
self._reader = reader
|
|
124
|
+
self._cache_dir = cache_dir
|
|
121
125
|
|
|
122
126
|
def read_lines(self, name: str) -> Iterable[str]:
|
|
123
|
-
|
|
124
|
-
if
|
|
125
|
-
return self._reader.read_lines(name)
|
|
126
|
-
|
|
127
|
-
cache = UnicodeDataCachedReader._cache_dir / name
|
|
128
|
-
if UnicodeDataCachedReader.is_caching_allowed and cache.exists():
|
|
127
|
+
cache = self._cache_path(name)
|
|
128
|
+
if cache and cache.exists():
|
|
129
129
|
_logger.debug('Reading cache %s', cache)
|
|
130
|
-
return cache.read_text().splitlines(keepends=True)
|
|
130
|
+
return cache.read_text(encoding='utf-8').splitlines(keepends=True)
|
|
131
131
|
|
|
132
132
|
lines = self._reader.read_lines(name)
|
|
133
133
|
|
|
134
|
-
cache
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
134
|
+
if cache:
|
|
135
|
+
cache.parent.mkdir(parents=True, exist_ok=True)
|
|
136
|
+
with cache.open('w', encoding='utf-8') as file:
|
|
137
|
+
_logger.debug('Writing cache %s', cache)
|
|
138
|
+
file.writelines(lines)
|
|
138
139
|
|
|
139
140
|
return lines
|
|
140
141
|
|
|
142
|
+
def _cache_path(self, name: str) -> Optional[Path]:
|
|
143
|
+
if not UnicodeDataCachedReader.is_caching_allowed:
|
|
144
|
+
return None
|
|
145
|
+
if self._cache_dir:
|
|
146
|
+
return self._cache_dir / name
|
|
147
|
+
cache_dir = UnicodeDataCachedReader._default_cache_dir
|
|
148
|
+
if cache_dir:
|
|
149
|
+
return cache_dir / name
|
|
150
|
+
return None
|
|
151
|
+
|
|
141
152
|
@staticmethod
|
|
142
153
|
def clear_cache(ignore_errors: bool = False):
|
|
143
|
-
cache_dir = UnicodeDataCachedReader.
|
|
144
|
-
if
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
shutil.rmtree(cache_dir, ignore_errors=ignore_errors)
|
|
154
|
+
cache_dir = UnicodeDataCachedReader._default_cache_dir
|
|
155
|
+
if cache_dir and cache_dir.exists():
|
|
156
|
+
_logger.debug('Deleting cache %s', cache_dir)
|
|
157
|
+
shutil.rmtree(cache_dir, ignore_errors=ignore_errors)
|
|
148
158
|
|
|
149
159
|
|
|
150
160
|
UnicodeDataReader.default = UnicodeDataCachedReader()
|