unicodedata-reader 1.3.7__tar.gz → 1.3.9__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- unicodedata_reader-1.3.9/.github/dependabot.yml +25 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/.github/workflows/ci.yml +18 -11
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/.github/workflows/publish.yml +3 -3
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/PKG-INFO +3 -3
- unicodedata_reader-1.3.9/Taskfile.yml +41 -0
- unicodedata_reader-1.3.9/js/GeneralCategory.js +26 -0
- unicodedata_reader-1.3.9/js/LineBreak.js +26 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/pyproject.toml +8 -9
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/__main__.py +9 -9
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/bidi_brackets.py +12 -11
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/cli.py +51 -35
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/compressor.py +43 -33
- unicodedata_reader-1.3.9/src/unicodedata_reader/east_asian_width.py +29 -0
- unicodedata_reader-1.3.9/src/unicodedata_reader/east_asian_width_common.py +50 -0
- unicodedata_reader-1.3.9/src/unicodedata_reader/emoji.py +32 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/entry.py +58 -54
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/general_category.py +2 -3
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/line_break.py +4 -5
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/reader.py +54 -42
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/set.py +16 -13
- unicodedata_reader-1.3.9/src/unicodedata_reader/vertical_orientation.py +29 -0
- unicodedata_reader-1.3.9/tests/cache/EastAsianWidth +2721 -0
- unicodedata_reader-1.3.9/tests/cache/LineBreak +3709 -0
- unicodedata_reader-1.3.9/tests/cache/ScriptExtensions +235 -0
- unicodedata_reader-1.3.9/tests/cache/Scripts +3182 -0
- unicodedata_reader-1.3.9/tests/cache/extracted/DerivedGeneralCategory +4368 -0
- unicodedata_reader-1.3.9/tests/cache/extracted/DerivedName +45828 -0
- unicodedata_reader-1.3.9/tests/cli_test.py +31 -0
- unicodedata_reader-1.3.9/tests/conftest.py +20 -0
- unicodedata_reader-1.3.9/tests/entry_test.py +190 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/tests/line_break_test.py +10 -11
- unicodedata_reader-1.3.9/tests/reader_test.py +30 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/tests/set_test.py +12 -12
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/tox.ini +1 -1
- unicodedata_reader-1.3.9/uv.lock +386 -0
- unicodedata_reader-1.3.7/.github/dependabot.yml +0 -10
- unicodedata_reader-1.3.7/js/GeneralCategory.js +0 -26
- unicodedata_reader-1.3.7/js/LineBreak.js +0 -26
- unicodedata_reader-1.3.7/precommit.sh +0 -16
- unicodedata_reader-1.3.7/src/unicodedata_reader/east_asian_width.py +0 -30
- unicodedata_reader-1.3.7/src/unicodedata_reader/emoji.py +0 -38
- unicodedata_reader-1.3.7/src/unicodedata_reader/vertical_orientation.py +0 -30
- unicodedata_reader-1.3.7/tests/cli_test.py +0 -31
- unicodedata_reader-1.3.7/tests/conftest.py +0 -6
- unicodedata_reader-1.3.7/tests/entry_test.py +0 -166
- unicodedata_reader-1.3.7/tests/reader_test.py +0 -30
- unicodedata_reader-1.3.7/uv.lock +0 -829
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/.gitignore +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/.yapfignore +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/LICENSE +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/README.md +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/js/LineBreak.html +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/js/template.js +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/tests/__init__.py +0 -0
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Please see the documentation for all configuration options:
|
|
2
|
+
# https://docs.github.com/en/code-security/dependabot/ecosystems-supported-by-dependabot/supported-ecosystems-and-repositories
|
|
3
|
+
|
|
4
|
+
version: 2
|
|
5
|
+
updates:
|
|
6
|
+
- package-ecosystem: "uv"
|
|
7
|
+
directory: "/"
|
|
8
|
+
schedule:
|
|
9
|
+
interval: "weekly"
|
|
10
|
+
groups:
|
|
11
|
+
dependencies:
|
|
12
|
+
patterns:
|
|
13
|
+
- '*'
|
|
14
|
+
update-types:
|
|
15
|
+
- "minor"
|
|
16
|
+
- "patch"
|
|
17
|
+
|
|
18
|
+
- package-ecosystem: "github-actions"
|
|
19
|
+
directory: "/"
|
|
20
|
+
schedule:
|
|
21
|
+
interval: "weekly"
|
|
22
|
+
groups:
|
|
23
|
+
actions:
|
|
24
|
+
patterns:
|
|
25
|
+
- '*'
|
|
@@ -16,25 +16,32 @@ jobs:
|
|
|
16
16
|
strategy:
|
|
17
17
|
fail-fast: false
|
|
18
18
|
matrix:
|
|
19
|
-
python-version: ["3.
|
|
19
|
+
python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
|
|
20
20
|
|
|
21
21
|
steps:
|
|
22
|
-
- uses: actions/checkout@
|
|
22
|
+
- uses: actions/checkout@v7
|
|
23
23
|
|
|
24
24
|
# https://docs.astral.sh/uv/guides/integration/github/
|
|
25
25
|
- name: Install uv and set up Python ${{ matrix.python-version }}
|
|
26
|
-
uses: astral-sh/setup-uv@
|
|
26
|
+
uses: astral-sh/setup-uv@v7
|
|
27
27
|
with:
|
|
28
28
|
python-version: ${{ matrix.python-version }}
|
|
29
29
|
|
|
30
|
+
# https://taskfile.dev/docs/installation#github-actions
|
|
31
|
+
- name: Install Task
|
|
32
|
+
uses: go-task/setup-task@v2
|
|
33
|
+
|
|
30
34
|
- name: Install dependencies
|
|
31
|
-
run:
|
|
32
|
-
|
|
35
|
+
run: uv sync --all-extras --dev
|
|
36
|
+
|
|
37
|
+
- name: Test
|
|
38
|
+
run: task test
|
|
39
|
+
|
|
40
|
+
- name: Type
|
|
41
|
+
run: task type
|
|
33
42
|
|
|
34
|
-
- name:
|
|
35
|
-
run:
|
|
36
|
-
uv run pytest
|
|
43
|
+
- name: Lint
|
|
44
|
+
run: task lint
|
|
37
45
|
|
|
38
|
-
- name:
|
|
39
|
-
run:
|
|
40
|
-
uv run ruff check
|
|
46
|
+
- name: Format check
|
|
47
|
+
run: task fmtchk
|
|
@@ -12,16 +12,16 @@ jobs:
|
|
|
12
12
|
publish:
|
|
13
13
|
runs-on: ubuntu-latest
|
|
14
14
|
steps:
|
|
15
|
-
- uses: actions/checkout@
|
|
15
|
+
- uses: actions/checkout@v7
|
|
16
16
|
|
|
17
17
|
- name: Set up Python
|
|
18
|
-
uses: actions/setup-python@
|
|
18
|
+
uses: actions/setup-python@v6
|
|
19
19
|
with:
|
|
20
20
|
python-version: '3.x'
|
|
21
21
|
|
|
22
22
|
# https://docs.astral.sh/uv/guides/integration/github/
|
|
23
23
|
- name: Install uv
|
|
24
|
-
uses: astral-sh/setup-uv@
|
|
24
|
+
uses: astral-sh/setup-uv@v7
|
|
25
25
|
|
|
26
26
|
- name: Install Dependencies
|
|
27
27
|
run: |
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unicodedata-reader
|
|
3
|
-
Version: 1.3.
|
|
3
|
+
Version: 1.3.9
|
|
4
4
|
Project-URL: repository, https://github.com/kojiishi/unicodedata-reader
|
|
5
5
|
Author-email: Koji Ishii <kojii@chromium.org>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
7
7
|
License-File: LICENSE
|
|
8
|
-
Requires-Python: >=3.
|
|
9
|
-
Requires-Dist: platformdirs>=4.
|
|
8
|
+
Requires-Python: >=3.10
|
|
9
|
+
Requires-Dist: platformdirs>=4.10.0
|
|
10
10
|
Description-Content-Type: text/markdown
|
|
11
11
|
|
|
12
12
|
[](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# yaml-language-server: $schema=https://taskfile.dev/schema.json
|
|
2
|
+
|
|
3
|
+
version: '3'
|
|
4
|
+
|
|
5
|
+
tasks:
|
|
6
|
+
default:
|
|
7
|
+
deps: [check]
|
|
8
|
+
|
|
9
|
+
check:
|
|
10
|
+
- task: tests
|
|
11
|
+
- task: type
|
|
12
|
+
- task: lint
|
|
13
|
+
- task: fmtchk
|
|
14
|
+
- git diff --exit-code
|
|
15
|
+
|
|
16
|
+
fix: "{{.RUN}} ruff check --fix {{.CLI_ARGS}}"
|
|
17
|
+
fmt: "{{.RUN}} ruff format {{.CLI_ARGS}}"
|
|
18
|
+
fmtchk: "{{.RUN}} ruff format --check {{.CLI_ARGS}}"
|
|
19
|
+
lint: "{{.RUN}} ruff check {{.CLI_ARGS}}"
|
|
20
|
+
test: "{{.RUN}} pytest tests {{.PYTEST}} {{.CLI_ARGS}}"
|
|
21
|
+
tests: "{{.RUN}} tox {{.TOX}} {{.CLI_ARGS}}"
|
|
22
|
+
type: "{{.RUN}} ty check {{.TY}} {{.CLI_ARGS}}"
|
|
23
|
+
|
|
24
|
+
gen:
|
|
25
|
+
- "{{.RUN}} unicodedata-reader lb -t js/template.js {{.GEN}} {{.CLI_ARGS}}"
|
|
26
|
+
- "{{.RUN}} unicodedata-reader gc -t js/template.js {{.GEN}} {{.CLI_ARGS}}"
|
|
27
|
+
|
|
28
|
+
install-git-hooks:
|
|
29
|
+
desc: Create git hooks
|
|
30
|
+
cmds:
|
|
31
|
+
- echo '#!/bin/sh' > .git/hooks/pre-push
|
|
32
|
+
- echo 'task check' >> .git/hooks/pre-push
|
|
33
|
+
- cmd: chmod +x .git/hooks/pre-push
|
|
34
|
+
platforms: [linux, darwin]
|
|
35
|
+
|
|
36
|
+
vars:
|
|
37
|
+
GEN: -fv
|
|
38
|
+
PYTEST:
|
|
39
|
+
RUN: uv run
|
|
40
|
+
TOX: -p
|
|
41
|
+
TY:
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uGeneralCategoryAsInt = (function () {
|
|
2
|
+
const bytes = atob("h2ABQgNCBAUCBgIHIoIoIkYihikEAgUKCwqGLAQGBQaIAAECYw0CCg0ODwYQDQoNBjEKDCIKEQ4SUQKFSQaBSYVsBoFsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDCkMCQwJTCkMCQwpDEksaQwpDElMKQwpDAkMCQwpDAksCQwpDEkMCQwpLA4JTG4JEwwJEwwJEwwJDAkMCQwJDAkMCQwJDAksCQwJDAkMCQwJDAkMCQwJDAksCRMMCQxJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCYFMKQwpLAkMaQwJDAkMCQwJkQwuhiyENGqCdIMqgRSBShQKFIQKm3UJDAkMFAoJDDYUTAIJdioJAkkWCRYpDIQJFoIJiEwJLElMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmBDAkMBgkMKSyMSYtsCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDA2BFTcJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwpDAkMCQwJDAkMCQwJLAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDBaJKTYUgSKKDAIHNi0DFosVBxUCNQI1AhWBdoZOdm4iglaBMEYiAyItglUCEEKHbhSCLoUVgihiLhWYTgIOgVUQDYE1NDUNdS6CKE4tDoMiFhAOFYcuhlU2lg6CVQ6DNoIoiA6CFTQNQhQ2FSOFLnUUghUUVRSBFTaDQhaGDlU2AhaCToEWhW4KgU4wgRaCFYoOFIV1EId1GI0uFRgVDliBdXgVOA6BVYIuNSKCKAIUg04VOBaBbjYuNoUuFoFOFg5WbjYVDlh1Njg2OBUOgXYYdi4WTjU2giguI4ExDQMOAhU2NRgWgS52LjaFLhaBThYuFi4WLjYVFlg1djU2VVYVgVZuFg6BVoIoNU4VAoI2NRgWgg4WThaFLhaBThYuFoEONhUOWIEVFjUYFjgVNg6DVi41NoIoAgOBVg6BNRYVOBaBbjYuNoUuFoFOFi4WgQ42FQ4YFRh1Njg2OBWBVjUYdi4WTjU2gigNDoExgjYVDhaBLlZOFm5WLhYOFi5WLlZOVoJudjgVOFZYFlgVNg6BNhiDNoIoUYEtAw2BFhVYFYFuFk4WhU4Wg242FQ5VeBZVFnWBVjUWThYuNi41NoIogVYCgVENDhU4AoFuFk4WhU4Wgi4WgQ42FQ4YFYEYFhU4Fjg1gVY4gRZOFi41NoIoFi4YgnY1OIIOFk4Wig41Dlh1FlgWWBUODXZOGIFRTjU2giiCEQ2BLhYVOBaELlaFbhaCDhYONoFOVhV2WFUWFRaBeIE2gig2OAKCdotuFS6BVXYDgS4UgXUCgigiiRYuFg4WgQ4WhW4WDhaCLhUughUONoEOFhQWgVUWgig2bod2Dk2DQg0CTTWBLYIogjENFQ0VDRUEBQQFOIFuFohudoM1GIEVAjWBDoJVFoh1FoFtFYEtFi2BAm0iiRaKTjh1GIE1GDU4NQ6CKIEigS44NW5VDlgugVhOdYMOFTg1gTgVDhiCKFgVLYkpFgmBFgk2ikwCFEzSDhZuNoFOFg4WbjaKDhZuNogOFm42gU4WDhZuNoNOFo4OFm42kE42VYIChHFWg26CLYE2lSk2gSw2B4Gabg0ChA4Bhi4EBVaSTkJZgW6BVoQuVRiCFoRONRgighaELjWCdoMOFk4WNYJ2jG41GIFVgXgVOIJVQhRCAw4VNoIogTaCMYE2gSIHYlUQFYIogTaIThSNDoFWgQ41iC4VDoEWkS6CNodOFlV4NVh2OBWBOFV2DVYigiiHLjaBDoJWim52hi6BNoIoEVaILYVONTgVNiKNDhgVGIFVFhUYFTiBdYE4gjU2FYIogTaCKIE2gUIUgSI2gzUXh1U2gnWEdnUYi04VGIEVGBWBGBU4gW4WIoIogUKCLYIVgg1CNRiHLhh1ODUYVS6CKIpuFRg1WBUYVTiBdmKIboF4gXU4NVaBAoIoVk6CKIcugTQiggwJDIEWikk2SYFigXZVAoMVGIFVbhWBLhUuGDUOgRaKbI9UgwwUiCyJFI91CQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmCDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAmCDIFpgSw2gSk2gWyBaYFsgWmBLDaBKTaBbBYJFgkWCRYJgWyBaYMsNoFsgXOBbIFzgWyBc4EMFixpEwoMSkwWLGkTSmw2LGkWSoFsgQlKNkwWLGkTKhaCQYEQgSciDxIELxIED4FiGhuBEAGCAg8SYitCBgQFgkIGAguCIgGBEBaCMBEUNoExRgQFFIIxRgQFFoMUVogjgzaDFXcVV4J1g1YtCW0JLQxJLEkMDQktBoEJgS0JDQkNCQ1pDQxpDG4MLSwpgQYJbA0GLQwNg3GIWQkMeREtdoEGgQ0mbQYtBi0GgU0Gh00mLQYNBodNwmaBbQQFBAWEbSaBTQQFlA0Ghy2GBoltgSaRbYU2gk2FFo5xky2FMa1NBoINBo0tgWabTQa9bQQFBAUEBQQFBAUEBQQFhzGKbYEGBAWHRgQFBAUEBQQFBAWDZr9toEYEBQQFBAUEBQQFBAUEBQQFBAUEBQQFj0YEBQQFh2YEBcAmi22FBi2BJolNNqIti2mLbAkMSSwJDAkMCQxpDAksCYEsNEkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCSyBLQkMCQxVCQyBFmIRIoksFgyBFgw2jW6BVhQCgzYVhU6CFoFOFoFOFoFOFoFOFoFOFoFOFoFOFoFOFod1Ig8SDxJCDxICDxKCAgciBwIPEiIPEgQFBAUEBQQFgQIUgiInYgcCBIMCLUIEBQQFBAUEBQeINoYtFpYNgna1LYY2g20BQg0UDhkEBQQFBAUEBQQFLQQFBAUEBQQFBwQlDYIZdTgHgRQtWRQOAi0WlS42NSo0DgeWLgJUDoEWik4Wly4WLXGCLYduiS2CFg2DbodNFoIxhy2BcQ2DUYdtgjGJTYNRz22M726PbamFDhSCnU5WjU2CFolugTQiwm4UQoNugiguhHYJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMDhVXAoI1AhQJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMNDWRLoI5NYEigXaFSoIUKgkMCQwJDAkMCQwJDAlMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwJDAkMCQwUgWwJDAkMKQwJDAkMCQwJDBQqCQwJDA4JDAlMCQwJDAkMCQwJDAkMCQwJDAkMCQyBCQyBCQwJDAkMCQwJDAkMCQwJDGkMCQwpDAkMCQwJDAkMCQwJDAkMCYR2dAkMDjQMgU4VThVuFYVOODUYbRVWgTEtAw2BNoxuYoF2OIwug3g1gXYigiiBNoQ1gS5CDgIuFYIohm6BdSKFToJVOIJWAocOVlUYi04VOHU4NViDAhYUgih2IoEOFRSCDoIogQ4Wig6BNTg1ODWCFk4VgW4VGDaCKDZig24UgS5NDhgVGIwuFQ5VLjWBDjUOFQ6Fdi4UIoJOGDU4Ig40GBWCNoEuNoEuNoEughaBThaBThaKTAp0ggwUKnaTbIhOOBU4FTgCGBU2giiBNpXoboJ2hU52jA52g/98jL992y42mi6JNoFMgnaBDIEWDhWCLgaDDhaBDhYOFi4WLhaaboQKg23aTgUEg22Pbi2NLoFth3aCbgNNg3WBQgQFAoE2g3UCJysEBQQFBAUEBQQFBAUEBQQFIgQFYktCFmIHBAUEBQQFQgYHRhYCAyJ2gQ4WoU42EBZCA0IEBQIGAgcigigiRiKGKQQCBQoLCoYsBAYFBgQFAgQFIoIuFIsONIdOVoEuNoEuNoEuNk5WIwYKDSMWDWYtgjZQLTaCbhaGLhaEThYuFoNONoMuiDaeToEWQnaLEVaCDY0ZcYQNMU0Wgw1WDYtWiw0VoDaHDlaMDoNWFYZRdoducYIWhG4ZgW4ZgRaJLoEVgRaHLhYCiG52gW4CgRmKNolpiWyTLjaCKIE2iGl2iGx2iW6BdoxuglYCgkkWg0kWgUkWKRaCTBaDTBaBTBYsVoxugnbNToIWhS6CNoFuhXaBNBaKNBaCFJEWgS42DhaKbhYuVg42hU4WAoFxhU4tgVGHToF2ghGLdoROFi6BFoERhS6BMVYChi6BFgKGLok2jW52MS6DcTaLMQ5VFjWBFnVuFk4Whw42VXYVghGBVoICgVaHDjEChw5Rh3aBbg2GbjV2gRGBQoIWjS5WgUKFLjaBcYROgRaBcYQugVZignaBUZN2kg6NVoxJgxaMTIFWgTGIbnWBdoIogTaCKG4UDoUpVoEVBxSFLIF2JrN2h1EWii4WNQc2LoN2ThQugXYCgW2IFoE1hw6CMQ6BdoUuglVxgQKFNoQudWKJNoUOgVGEdoVOghYYFRiNDoNVgUJ2hHGCKBUuNQ6CFlUYiw5YdTg1IhBiFYI2EDaGDoFWgiiBNlWIboEVGIF1FoIoYg44DoF2iE4VIg6CFjUYi25YghU4bmJ1AhgVgigOAg5CFoRxglaELhaGDlhVOBUYNYEiFS4VjzaBThYOFm4Wg04Wgi4CgTaLThVYgXWBFoIogTY1OBaBbjYuNoUuFoFOFi4WgQ4WNQ44FXg2ODZYNg6BNhiBFoEOODaBVVaBFYJWgi4WDjYOFokuFg5YgTUWGDYYFngWOBUYFQ4VDiIWIoF2NYcWjQ5YgXU4VRgVboECgigiFgIVToc2i25YgTUYFXg1GDUuAg6BdoIoqTaLTlh1Nng1GDWFQm41iDaLbliBdTgVGDVCDoJWgiiBNoMChFaKThUYFTiBNRgVDgKBNoIogTaEaIZ2hk42FRgVOHUYgRV2gigxQg2BTq4Wim5YghUYNQKYdodph2yCKIIRgnaBbjYONoFuFi4WhW6BOBY4NjUYFQ4YDhgVQoIWgiiRNoFuNolOWHU2NXgVDgIOGIZWDoI1iW6BNRgOdYFiFYF2DoE1OFWLLoMVGDVCDoECgxaSDoFWgiKVNhUYVRgVGJV2iA4CgzaCKIE2gg4WiQ4YgVUWgTUYFQ6BAoI2giiEUVYihy42hTUWGIFVGDUYNZIWgU4WLhaJLoE1VhUWNRaBVQ4VgXaCKIE2gS4WLhaHboEYFjUWOBUYFQ6BVoIogTaKDhQudoIovTaETjU4IoFWNQ4Ygw4WiC44gRVWOBUYFYMCgigVlRYOg1aFEYFtY4QNgxYCgeYumTabWRaBAoJWsG6FknaYDiKDFoKLboNwFYEug1WCNofmToEWgZFOja4Why6CdVhVgiiDsTaBjg6BVodOFoIodiKTThaCKIE2hy42gRUCgjaLboFVgQJtdAINgjaCKBaBURaFDoEWhE7rdlSJbjRCgiixNodph2yFUWKBFoYJNoYMinaSTnYVDo1YgVZ1gxSPdjQCFBWCVjg0WYIWjrUuihaHbpgWnE6Q/xZ0FoFUFjQWyE6DVg6HFk42DoM2boF24m6EwHaaToEWgw5Wgg6BVoIuNg01AnCH1na7bYIoTVbsbYE2hU2DVoNtBoNWizU2hVWCFpxtjna9LYI2iU02jm04VU2BOIFwgXUtgVWHLXWPDYUWkC1VDZ42hHGCdoRxgnaVTYIWhhGhVoYphiyGKYFMFoQshimGLAkWKTYJNik2aRaBaWwWDBaBTBaCTIYphiwpFmk2gWkWgUkWhiwpFmkWgQkWCVaBSRaGLIYphiyGKYYshimGLIYphiyGKYYshimGbDaGCQaGDAaBLIYJBoYMBoEshgkGhgwGgSyGCQaGDAaBLIYJBoYMBoEsCQw2jCj/bY1VbYw1gW0Vgy0VLYECg1aBFRaDVYKTdoIsDoRsgTaBLLUWgVUWhBU2gVUWNRaBFYEWjzSIFhWbdosOVoFVgVQ2gih2Dg3PdocuFYQWim51giiBFgPzdoZOFHWCKLU2hy41DoIodgKvdodOFk4VLhWBTjWBDhWBdg4Ut3aBThZuFi4Wg04WsQ42ghGBVYoWiCmILIFVFHaCKHYigcQWjlENUQNxknaLEQ2DUbA2bhaGThYuFg42DhaCLhZuFg4WDoE2DnYOFg4WDhZOFi4WDjYOFg4WDhYOFg4WLhYONm4WgU4WbhZuFg4Wgi4WhA6BFk4WgQ4WhA6MdibDNoptdphtgnaDTTaDTRaDTRaJDYI2gxGoDY12hw2DFoptdoINgVYtgzaBLaY2vk2BCoG2DVaEDVaDDVa2LYE2gm12DYNWgm12jW2BdoItgTaJbYF2hy02gm12LYM2ggaJVtVtgXaDLTaDDVaCTVaODRYNdoNtNoJtdoItgVakTRaWbYIoDYKBFtO3bod2iMcuNoujLjaOzA6DVoGbLoToNoGHLoL4NonSToEWkMouiuXhVhCHNpdwn3a7df+Ddv//PTb//z02");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 5) + 1);
|
|
14
|
+
entries.push(value & 31);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uGeneralCategoryValues = ["Cc","Zs","Po","Sc","Ps","Pe","Sm","Pd","Nd","Lu","Sk","Pc","Ll","So","Lo","Pi","Cf","No","Pf","Lt","Lm","Mn","Cn","Me","Mc","Nl","Zl","Zp","Cs","Co"];
|
|
26
|
+
function uGeneralCategory(c) { return uGeneralCategoryValues[uGeneralCategoryAsInt(c)]; }
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
const uLineBreakAsInt = (function () {
|
|
2
|
+
const bytes = atob("hAABAkMEiEAFBgcICQoIBwsMCAkNDg0PhFBNgQgGjQgLCQyOCAsBEQiCQBKMQBMLCoEJCFQIFAcIAUgKCVQVCIIUB4EUC4sIFI8IFIHnCBQVgRQVFEgUgwiBVAgUCBWPSK1AgxOGAINIVoFIDQiBVoMIFggWiUgW70iDANJIFpJIVpdIDRdWSAkWlgAXAAhACEAGAINWjRiBVoFYSIUWglCBCIEKTUiFAAYAgQaVCIoAhFAKUIEIALEIBgiDABAIgkBIQAiBQEiEUIlIFkgAjkiNAFasCIUACIZWhFCQCIQAgUgNBghWAEmKSIFACIQACIEACIIAVocIFowIgQBWCBaFCIIWj0hQghaEAJRIi0AQkACaSIEACIhACIMAhEhAQYRQiAiBABaDSFZIVopIFoMIFgiBFoFIVgAIgwBWQFaBAAiDVgCBVkgWgQhAVoRQSEqCCAoICUgAVoEAFoJIgVZIVopIFoMIFkgWSBZIVgAWggCBVkBWgQCBFgCDFoFIFgiDFoRQQIEIAAiEVoEAFoQIFoEIFopIFoMIFkgWgghWAAiDQBaBABaBAFYIhxZIQFaEUAgJgxYIgkAWgQAWg0hWSFaKSBaDCBZIFoIIVgAIgwBWQFaBAIMWgQCBVkgWgQhAVoRQg0iEVgAIFoJIgRaBCBaBSIEWSBYIFkiBFkiBFoEIgRaFSIFWggCBFoEAFoFAVgiCVgCGVoRQhAgJCIIWggCDSBaBCBaLCBaHSFYACIMAFoEAFoFAgxZAFoEIFkhWSEBWhFCDFhWECIEAFYNIFoEIFosIFoRIFoIIVgAIgwAWgQAWgUCDFkCCFoEIFkhAVoRQFkgAhVaBQIQIFoEIFpQIQAiDABaBABaBQEiBVoEIAIRIQFaEUIQICoJIFoEAFohIgRaLSBaECBYIVoMIgRYAgVaCQBYAFoNAglaEUFZACIVWnFmBVgmHGQiEUEGSFlkWGRaCGRaLWRYZFosZVoIZFhkWgxkWhFBWgVmPVgiBVQhVE1UBE4IGEwgGgQhAgkiEUIRIAQAIAAgACxELEUCDSBaRSIFWhkABggABQIIIhQAWkUAWQYJIAIJIFkhVARWCCFOSFp9ZhFBBgUifWYRQglmSSBYIghYIVpdIr1qjW6tcpAgWgUhWgwgWCBaBSFaUCBaBSFaQCBaBSFaDCBYIFoFIVocIFpwIFoFIVqEIVoEACAGNCIEWjEiCVqpIVoJIVheCvwgBjEgLEYEWpQiBAYUIgxaISIFAhBaJCIEAQYQWiEhAhVaGCBaBCBZAhVapWUEdGQEIAQlZVoRQglaESIJWSEZBFQhGCIEAEwCEUIJWrAiDFoIIQJBIAAiCFqJIhFaPCBaFQIFWhUCBVgiBFkaEUI5ZVoIZhRaVWYFWjFmCVoUQgRZZmwiCAFZInxkWjhlWAIRQglaEUIJWhllWlkBWhQATiVaCAJceh0Afg14WQYRgQSGBQYRhhACEIYEBgQCOSIYASIRQgkiSYIVAYoNWk0iJQIEWggGEUIEWgQiEUJFIQYUIghaVCFaFCINWgQAIigCBSACCSABIgQAIghbfSIYAE5ZAE4EAgYpIVoJIVpJIVoJIVoNIFggWCBYIFo8IVpoIFocIFoZIVoJIFokIVoEIFoMIFQgWgwETgQEjACRAFxNXJVQIRwuBBwsHVEiBJgFDggATg0oIRxRdgkgNCxGBHYVIAQqBQQiBASeBSBaEQEhWFINICxEUCIFUg0gLERaGCIEWgwkKhkkKgUkKSQoJCocJkACHFoEICggUgQgKhAgUSAmESFSDSBSRSIcUCIVUgUiEVIcIFEiBVoRUm0gUCBSVCBQIVIEIVEgUgQgUCBRJCBSBSBRIgVRIFAgUCIJUCBSCCIFUgUhUhEgUgQgUgggUhghUSIFUSFRIVIhIVEhUhggUgQgUhQgUjAgUlwgmi0gLEQsRgkgUgwhhhggLEeIIgWGaSIpWhQiKFs8UCKVUgUiSFIUIh1RIgVSESFQIgxSDSFRIVIFIVEhUgUiBFEgUSIFUh0iBVIQIFIdIgWEIVEgUgUhUgUhhVCEIgSEoYYwIgSGBSBQIFI4IVAiBFAgUIVQIVAgUhwghjkhUjgiFYYFUIQiBIRRhgRRhVCFUgSEIFIFIVCGCVIIhFGEoIVSDYYEIYYFopAgUgQiCRwhGIYEICxELEQsRCxELEQsRCxGOVJgICxGPCAsRCxELEQsRCxGHSAGBwEgLEQsRCxELEQsRCxELEQsRCxELEQsRnwgLEQsRj0gLEYGrCIIUjEhWgbwIgQBIghYGgQEIBgGSSBYIghYIVptIgxYIAYZWAIsIhBaDCBaDCBaDCBaDCBaDCBaDCBaDCBaDCBaPQIZHg0EIFwsBSEdIRwsRCxELEQsRgUEGCEEIQYIIZYEBCBcBC4NBCAEIQYEIRgsMCwwLDAsMF5BWjGEWrCGFVuphjFaHYQFRYR1hCxELEQsRCxELEWELEQsRCxELER0LUYRhgkCCIQCCIV2BIRYpISkhKSEpISmMISmPISkhKSEpgmEpgmFpVkCBXSEdKSEpISkhKSEpjCEpjyEpISkhKYJhKYJhaYFhHSldIYIWlSEWrmEWqmGEFiGHaY8hFpNhg1Sbt2GfSNKKIR2EuyGBFpshhBaWSEGBhggBBgGHSIRQSIlWlwiBQAiEQI9IQKdIQAiCAYNW7giJVogIAIEIAIFIAIsIggCBSACBFoNICgiCVplIVUaDVkCYSIhAg1ZBhFCCVohAhEgVSACEUI1Ig0BBiwiGAIUWCI4agRaBQJcehgAfgmGBAYFhFgGEYIFWYYdZhFCCGRaUIIZAhBaBAQCDQUBWhGBWIYEBsRmLVoIZhQiCAEGBCECEVoJIVoJIVoJIhBaDCBaDCBadSIFWuQiDQAFAVoRQglYqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSsqjSuFVosbgVaYHIFWh/9smP9Wgf9hgwiFVoIIghYYAIRYCIYYFoIYFhgWWBZYFoRYgfZIEQvHSI9WhUgKgQiHQIERXUYLESaCVhMAEwATAFMAEwATAFMAgiELEQsRCxELEQsRCxELEQsRYQsRgyERIREWXUYhCxELEQsRg2EWIQkKIYFWgggWwwhWJxYGYQkKYQsRYREhEYUhXYEhBo0hCyERjiELIREhC1ELUR0hhGmWIV2PIYEWgmFWgmFWgmFWgSGBFgoJgSFJFoMIhFaBAC0UVoVIFoxIFokIFkgWhwhWhkiQVr0IghaBAYFWlgiBFqtIFoYIgRYIlxaWCADAVo4IgRaYCIcWAI0IgVaRSIQWjkiCFpJIggCCFo5IFgGRSIFWg0gBggiUVs5IVoRQglaRSIFWkUiBVpNIg1aZSIUWhUgWhwgWgwgWSBaFCBaHCBaDCBZIgRaZSIVWgZsIhBaKSIRWg0iLVoJIFpRIFoQIohaCSFYIFpVIFkiBFghWiwgWAaMIg1aECJdWiQgWSIIWkAiBFgGMSIIWjQiSVptIgVaJSFaXCIEAFkCCFoFAgUgWgQgWjghWgQCBVgCECIMWg0EIgxafSI9WkghAgVaCCIJBJoQWmkiBFoMBikhWjQiCFoxIgxaBSIVWgwinVqQImxaZCIYWmQiDFpRIgUCDVoRQglaEUI1IgRaCABeLCINWSOdWjwgWlEgWQBdWSIdWgkiDVgGDSJAWgkCTSINWikiFAIQIilaISIFAgUiSVo1IiVaLCIQWgQBumR6GQB9BgiGBVolhhGAAXkAehBYTgQCWCIUASBCBQQCEVhBWjAiDFoRQglaBAJFIhkAWhFCBQQhACINWkQgACBUIhBaBAJdIhkCBSEEIAYFACECEUAgVCIEBFolIhRaISBaMCIVAQQhBCABIAJ5WgwgWCBaBSBaHCBaESAGCVpcIhUCCFoRQglaBQBaDXlZeVopeFoMeFl4Wgh4WQAGDAFZAVkAfViCCVgCCFgFgXkBWgwCBFoIAhRaEYBYgViAWYJFeFiGEABYAVgAWgUAWgUAfLgCBIRZhg1ZAjhaaCIhAgUiBQQiEUEEWCACBCI5Wl0iJQIFIg1aEUNJWlwiDAFaEABVBRoEIhwGBSECQVpdIiABBSIUWhFCCVoYViRaVCIYASIJWhFCCVolQjVaNGVaHGYFWhFBZgQGDWdwWlUiHAAixVp9IhFCECIVWCIMeVh5Wg14WXhaLXoJAFkBWgQAfLgAuQIEBhBaEYKJWg0hWkwiDAFaDAAgVCACNFgiEQJNIgwAIgUAVCIFBFQgAg1YIhQCWSIdAgQEIgRVBhhakCIMWhFWqVoNAq1aQSIZWhFCCVoQIFpIIg0AWg0AIggGEVoRQiQiBFhUGjkhWikAWhkCkFoMIFkgWkkiCQIEWABZAFoMACACDVoRQglaCSBZIFo9IggAWQBaCAAiDFoRQglaVSIFWhFD6VohgAYFAQYMWQC4Ahh4WkF6DAIEWgUAfQYUhhGAAqhYIhxaOCIFKiAiGFgGDzEiyVrcIFoIBhRbhSIqlVrEIhhaCq0iBC4ERkUgRgQgLEQsR9wgLUdkIC4MTCxGBEwsRCxEAgkiHAIRWj80IghaB5kgLEbsImtwWjmCIQIRghuJWgpwIgxaPCBaEUIFWQacIFoRQglaOSFaCAAGEVpdIgwCBAYRIAQiEVoRQFoMIFooIghaJCIHXVpZIQYRQ4larCEFIghaMCFaMCJVWpQiBVgAImwCDFoFAhgifVoFdE4UWQF2BIYQWmv9hgepIlBYIjyGwFrkhof4WgUgWgwgWSBaBkSGHFimOFoEpVimGVoFpg1aBxWGJgVa1CIIWhgiBFoQIgxaESFYIQAGBQI+tVvdIhFCBCIEWgdlIglaLCIcWiAiHFpZAVosAhBa5SJ1W+kiEVpMIVp1IggCBCIpASIMAjkiBQJ4IihagSIEACLxWiUiFVolIhVarCIQWjAjDFqoIFqMIFkhWCFZIVoFIFoVIFggWgwgWoAgWgUhWg0gWgwgWjUgWgUgWgggWCIEWgwgWgalIVoGRSFaYUIH/SJsAgUiYQINIAIZIAEiBQQiHFoIAFocAhKdWjwiCVoJI6haDABaIAFaDABZAFoIAghaeSJAWALdWlgiBFoMAgwhWhFCBVkiBn1aOSACIFpVIgUCEUIIWCYHnVo1IgUCEUOpWjkhACIRQgVYI31aPCBaBCABIAIMIQIIIAINWSO9WgwgWgUgWSBaHCBbiCFaECIMAlBahSIMACIFWhFCBVkuDiBadCAqBCAqBSKVWngjgVoFIFo0IFkgWCFYIFoRIFoFIFggWCIJWCIFWCBYIFggWgQgWSBYIVggWCBYIFggWCBZIFghWgUgWgwgWgUgWgUgWCBaESBaICIIWgQgWgggWiAiZVkiBhlb/YYYUgQiOVEicVIJInhQIm2GMb4HCISiKYUiLIUiCIQiCIYEoYShhgSiWYYIwoGFoYYUoiiGJKIEhKIFhgSghgSiDISghKIZhCCEIIQiCISiBYQghSKYhgwiHYYZIhiGLSJRhaIFhKIohKIFhaJ4hg0iLYYJIpSGBKIEhgiiSSIEHgR2BSJEhKIdhgSiEISiFISiZIblIgSGBSIIhqgiVIYVIgWGbSINhhEiCYZNIg2GOSGGFSIFhSIZhhAiTIYVIKGEog2GDaIJhKIQhhGhhgSibYSieIWghaCEoiCGBKCGGKJBhq0i1IYEolGGEKIMhyQgWrUiEUAiCFoP+YVaB//5hVoH//mGUgIEWAI5Wr0C/VvdAhf6GVg==");
|
|
3
|
+
const len = bytes.length;
|
|
4
|
+
const entries = []
|
|
5
|
+
let value = 0;
|
|
6
|
+
for (let i = 0; i < len; ++i) {
|
|
7
|
+
const byte = bytes.charCodeAt(i);
|
|
8
|
+
if (byte & 0x80) {
|
|
9
|
+
value = (value | (byte & 0x7F)) << 7;
|
|
10
|
+
continue;
|
|
11
|
+
}
|
|
12
|
+
value |= byte;
|
|
13
|
+
entries.push((value >> 6) + 1);
|
|
14
|
+
entries.push(value & 63);
|
|
15
|
+
value = 0;
|
|
16
|
+
}
|
|
17
|
+
return function (c) {
|
|
18
|
+
for (let i = 0; i < entries.length; i += 2) {
|
|
19
|
+
c -= entries[i];
|
|
20
|
+
if (c < 0)
|
|
21
|
+
return entries[i + 1];
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
})();
|
|
25
|
+
const uLineBreakValues = ["CM","BA","LF","BK","CR","SP","EX","QU","AL","PR","PO","OP","CP","IS","HY","SY","NU","CL","NL","GL","AI","BB","XX","HH","HL","SA","JL","JV","JT","NS","AK","VI","AS","ID","VF","ZW","ZWJ","B2","IN","WJ","EB","CJ","H2","H3","SG","CB","AP","RI","EM"];
|
|
26
|
+
function uLineBreak(c) { return uLineBreakValues[uLineBreakAsInt(c)]; }
|
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "unicodedata-reader"
|
|
3
|
-
version = "1.3.
|
|
3
|
+
version = "1.3.9"
|
|
4
4
|
description = ""
|
|
5
5
|
authors = [{name = "Koji Ishii", email="kojii@chromium.org"}]
|
|
6
6
|
readme = "README.md"
|
|
7
7
|
license = "Apache-2.0"
|
|
8
|
-
requires-python = ">=3.
|
|
8
|
+
requires-python = ">=3.10"
|
|
9
9
|
dependencies = [
|
|
10
|
-
"platformdirs>=4.
|
|
10
|
+
"platformdirs>=4.10.0",
|
|
11
11
|
]
|
|
12
12
|
|
|
13
13
|
[project.urls]
|
|
@@ -15,12 +15,11 @@ repository = "https://github.com/kojiishi/unicodedata-reader"
|
|
|
15
15
|
|
|
16
16
|
[dependency-groups]
|
|
17
17
|
dev = [
|
|
18
|
-
"pytest>=
|
|
19
|
-
"
|
|
20
|
-
"
|
|
21
|
-
"tox>=
|
|
22
|
-
"
|
|
23
|
-
"yapf>=0.43.0",
|
|
18
|
+
"pytest>=9.1.1",
|
|
19
|
+
"ruff>=0.15.18",
|
|
20
|
+
"tox>=4.55.1",
|
|
21
|
+
"tox-uv>=1.35.2",
|
|
22
|
+
"ty>=0.0.51",
|
|
24
23
|
]
|
|
25
24
|
|
|
26
25
|
[project.scripts]
|
|
@@ -12,12 +12,12 @@ import unicodedata_reader.vertical_orientation as vo
|
|
|
12
12
|
def main():
|
|
13
13
|
args = sys.argv
|
|
14
14
|
sub_commands = {
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
15
|
+
"bidi": lambda: bidi_brackets.dump_bidi_brackets(),
|
|
16
|
+
"ea": lambda: ea.UnicodeEastAsianWidthDataCli().main(),
|
|
17
|
+
"emoji": lambda: emoji.UnicodeEmojiDataCli().main(),
|
|
18
|
+
"gc": lambda: gc.UnicodeGeneralCategoryDataCli().main(),
|
|
19
|
+
"lb": lambda: lb.UnicodeLineBreakDataCli().main(),
|
|
20
|
+
"vo": lambda: vo.UnicodeVerticalOrientationDataCli().main(),
|
|
21
21
|
}
|
|
22
22
|
if len(args) > 1:
|
|
23
23
|
func = sub_commands.get(args[1])
|
|
@@ -27,9 +27,9 @@ def main():
|
|
|
27
27
|
return
|
|
28
28
|
|
|
29
29
|
name = pathlib.Path(args[0]).name
|
|
30
|
-
sub_commands =
|
|
31
|
-
print(f
|
|
30
|
+
sub_commands = "|".join(sub_commands.keys())
|
|
31
|
+
print(f"usage: {name} {sub_commands} [options...]", file=sys.stderr)
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
if __name__ ==
|
|
34
|
+
if __name__ == "__main__":
|
|
35
35
|
main()
|
{unicodedata_reader-1.3.7 → unicodedata_reader-1.3.9}/src/unicodedata_reader/bidi_brackets.py
RENAMED
|
@@ -13,27 +13,28 @@ def dump_bidi_brackets():
|
|
|
13
13
|
|
|
14
14
|
def bidi_brackets_type(code):
|
|
15
15
|
bracket = bidi_brackets.get(code)
|
|
16
|
-
return bracket.type if bracket else
|
|
16
|
+
return bracket.type if bracket else "x"
|
|
17
17
|
|
|
18
18
|
columns = {
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
19
|
+
"Code": lambda code, ch: u_hex(code),
|
|
20
|
+
"Char": lambda code, ch: chr(code),
|
|
21
|
+
"Bidi_Paired_Bracket_Type": lambda code, ch: bidi_brackets_type(code),
|
|
22
|
+
"EAW": lambda code, ch: unicodedata.east_asian_width(ch),
|
|
23
|
+
"Script": lambda code, ch: scripts.get(code),
|
|
24
|
+
"ScriptExt": lambda code, ch: str(script_extensions.get(code, [])),
|
|
25
25
|
}
|
|
26
|
-
print(f
|
|
26
|
+
print(f"# {' '.join(columns.keys())}")
|
|
27
27
|
last_block = None
|
|
28
28
|
for code in get_unicodes_from_args(bidi_brackets.keys()):
|
|
29
29
|
block = blocks[code]
|
|
30
30
|
if block != last_block:
|
|
31
|
-
print(f
|
|
31
|
+
print(f"# {block}")
|
|
32
32
|
last_block = block
|
|
33
33
|
ch = chr(code)
|
|
34
34
|
values = (func(code, ch) for func in columns.values())
|
|
35
|
-
|
|
35
|
+
values = ("" if v is None else str(v) for v in values)
|
|
36
|
+
print(f"{' '.join(values)} # {unicodedata.name(chr(code))}")
|
|
36
37
|
|
|
37
38
|
|
|
38
|
-
if __name__ ==
|
|
39
|
+
if __name__ == "__main__":
|
|
39
40
|
dump_bidi_brackets()
|
|
@@ -8,6 +8,7 @@ from typing import Callable
|
|
|
8
8
|
from typing import Dict
|
|
9
9
|
from typing import Iterable
|
|
10
10
|
from typing import Optional
|
|
11
|
+
from typing import Sequence
|
|
11
12
|
import unicodedata
|
|
12
13
|
|
|
13
14
|
from unicodedata_reader import *
|
|
@@ -16,7 +17,8 @@ from unicodedata_reader import *
|
|
|
16
17
|
def _to_unicodes_from_str(text):
|
|
17
18
|
while text:
|
|
18
19
|
match = re.match(
|
|
19
|
-
r
|
|
20
|
+
r"([uU]\+?)?([0-9a-fA-F]{4,5})(-([0-9a-fA-F]{4,5}))?,?\s*", text
|
|
21
|
+
)
|
|
20
22
|
if match:
|
|
21
23
|
prefix = match.group(1)
|
|
22
24
|
hex = match.group(2)
|
|
@@ -27,7 +29,7 @@ def _to_unicodes_from_str(text):
|
|
|
27
29
|
yield from range(code, int(hex_end, 16) + 1)
|
|
28
30
|
else:
|
|
29
31
|
yield code
|
|
30
|
-
text = text[match.end():]
|
|
32
|
+
text = text[match.end() :]
|
|
31
33
|
continue
|
|
32
34
|
code = ord(text[0])
|
|
33
35
|
yield code
|
|
@@ -42,7 +44,7 @@ def to_unicodes(text):
|
|
|
42
44
|
|
|
43
45
|
def get_unicodes_from_args(default=None):
|
|
44
46
|
parser = argparse.ArgumentParser()
|
|
45
|
-
parser.add_argument(
|
|
47
|
+
parser.add_argument("text", nargs="+" if default is None else "*")
|
|
46
48
|
args = parser.parse_args()
|
|
47
49
|
if args.text:
|
|
48
50
|
return to_unicodes(args.text)
|
|
@@ -51,8 +53,8 @@ def get_unicodes_from_args(default=None):
|
|
|
51
53
|
|
|
52
54
|
def u_printable_chr(ch):
|
|
53
55
|
gc = unicodedata.category(ch)
|
|
54
|
-
if gc ==
|
|
55
|
-
return
|
|
56
|
+
if gc == "Cc":
|
|
57
|
+
return ""
|
|
56
58
|
return ch
|
|
57
59
|
|
|
58
60
|
|
|
@@ -60,7 +62,7 @@ def u_name_or_empty(ch):
|
|
|
60
62
|
try:
|
|
61
63
|
return unicodedata.name(ch)
|
|
62
64
|
except ValueError:
|
|
63
|
-
return
|
|
65
|
+
return ""
|
|
64
66
|
|
|
65
67
|
|
|
66
68
|
def _init_logging(verbose):
|
|
@@ -73,6 +75,14 @@ def _init_logging(verbose):
|
|
|
73
75
|
|
|
74
76
|
|
|
75
77
|
class UnicodeDataCli(object):
|
|
78
|
+
text: Optional[Sequence[str]]
|
|
79
|
+
clear_cache: bool
|
|
80
|
+
no_cache: bool
|
|
81
|
+
name: Optional[str]
|
|
82
|
+
template: Optional[pathlib.Path]
|
|
83
|
+
output: Optional[pathlib.Path]
|
|
84
|
+
verbose: int
|
|
85
|
+
_entries: UnicodeDataEntries
|
|
76
86
|
|
|
77
87
|
def __init__(self):
|
|
78
88
|
self._parse_args()
|
|
@@ -80,39 +90,43 @@ class UnicodeDataCli(object):
|
|
|
80
90
|
def _columns(self) -> Dict[str, Callable[[int, str], Any]]:
|
|
81
91
|
columns = self._core_columns()
|
|
82
92
|
columns = dict(
|
|
83
|
-
itertools.chain(
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
93
|
+
itertools.chain(
|
|
94
|
+
{
|
|
95
|
+
"Code": lambda code, ch: "U" + u_hex(code),
|
|
96
|
+
"Char": lambda code, ch: u_printable_chr(ch),
|
|
97
|
+
}.items(),
|
|
98
|
+
columns.items(),
|
|
99
|
+
{
|
|
100
|
+
"Name": lambda code, ch: u_name_or_empty(ch),
|
|
101
|
+
}.items(),
|
|
102
|
+
)
|
|
103
|
+
)
|
|
89
104
|
return columns
|
|
90
105
|
|
|
91
106
|
def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
|
|
92
107
|
raise NotImplementedError()
|
|
93
108
|
|
|
94
|
-
def _unicodes(self) ->
|
|
109
|
+
def _unicodes(self) -> Iterable[int]:
|
|
95
110
|
if self.text:
|
|
96
111
|
return to_unicodes(self.text)
|
|
97
112
|
return self._default_unicodes()
|
|
98
113
|
|
|
99
|
-
def _default_unicodes(self) ->
|
|
114
|
+
def _default_unicodes(self) -> Iterable[int]:
|
|
100
115
|
return self._entries.unicodes()
|
|
101
116
|
|
|
102
117
|
def print(self):
|
|
103
118
|
columns = self._columns()
|
|
104
|
-
print(
|
|
119
|
+
print("\t".join(key for key in columns.keys()))
|
|
105
120
|
for code in self._unicodes():
|
|
106
121
|
try:
|
|
107
122
|
ch = chr(code)
|
|
108
123
|
values = (func(code, ch) for func in columns.values())
|
|
109
|
-
values = (
|
|
110
|
-
print(
|
|
124
|
+
values = ("" if v is None else str(v) for v in values)
|
|
125
|
+
print("\t".join(values))
|
|
111
126
|
except UnicodeEncodeError:
|
|
112
127
|
continue
|
|
113
128
|
|
|
114
|
-
def substitute_template(self, template: pathlib.Path,
|
|
115
|
-
output: pathlib.Path):
|
|
129
|
+
def substitute_template(self, template: pathlib.Path, output: pathlib.Path):
|
|
116
130
|
entries = self._entries
|
|
117
131
|
entries.fill_missing_values()
|
|
118
132
|
entries.map_values_to_int()
|
|
@@ -122,22 +136,24 @@ class UnicodeDataCli(object):
|
|
|
122
136
|
|
|
123
137
|
def _parse_args(self):
|
|
124
138
|
parser = argparse.ArgumentParser()
|
|
125
|
-
parser.add_argument(
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
parser.add_argument(
|
|
129
|
-
parser.add_argument(
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
parser.add_argument(
|
|
136
|
-
parser.add_argument(
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
parser.add_argument("text", nargs="*", help="show properties for the text")
|
|
140
|
+
parser.add_argument("-f", "--clear-cache", action="store_true")
|
|
141
|
+
parser.add_argument("-F", "--no-cache", action="store_true")
|
|
142
|
+
parser.add_argument("--name", help="$NAME in the template")
|
|
143
|
+
parser.add_argument(
|
|
144
|
+
"-t",
|
|
145
|
+
"--template",
|
|
146
|
+
type=pathlib.Path,
|
|
147
|
+
help="generate a file from the template",
|
|
148
|
+
)
|
|
149
|
+
parser.add_argument("-o", "--output", type=pathlib.Path)
|
|
150
|
+
parser.add_argument(
|
|
151
|
+
"-v",
|
|
152
|
+
"--verbose",
|
|
153
|
+
help="increase output verbosity",
|
|
154
|
+
action="count",
|
|
155
|
+
default=0,
|
|
156
|
+
)
|
|
141
157
|
parser.parse_args(namespace=self)
|
|
142
158
|
_init_logging(self.verbose) # pytype: disable=attribute-error
|
|
143
159
|
if self.clear_cache:
|
|
@@ -9,7 +9,7 @@ from typing import Optional
|
|
|
9
9
|
|
|
10
10
|
from unicodedata_reader import *
|
|
11
11
|
|
|
12
|
-
_logger = logging.getLogger(
|
|
12
|
+
_logger = logging.getLogger("UnicodeDataCompressor")
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
def _init_logging(verbose: int):
|
|
@@ -22,7 +22,6 @@ def _init_logging(verbose: int):
|
|
|
22
22
|
|
|
23
23
|
|
|
24
24
|
class UnicodeDataCompressor(object):
|
|
25
|
-
|
|
26
25
|
def __init__(self, entries: UnicodeDataEntries):
|
|
27
26
|
self._entries = entries
|
|
28
27
|
|
|
@@ -62,16 +61,23 @@ class UnicodeDataCompressor(object):
|
|
|
62
61
|
assert entry.value < (1 << value_bits)
|
|
63
62
|
assert entry.count > 0
|
|
64
63
|
combined = ((entry.count - 1) << value_bits) | entry.value
|
|
65
|
-
_logger.debug(
|
|
66
|
-
|
|
67
|
-
|
|
64
|
+
_logger.debug(
|
|
65
|
+
"%04X %s=%d: %d -> %X",
|
|
66
|
+
entry.min,
|
|
67
|
+
entries.values_for_int()[entry.value],
|
|
68
|
+
entry.value,
|
|
69
|
+
entry.count,
|
|
70
|
+
combined,
|
|
71
|
+
)
|
|
68
72
|
bytes.extend(self._to_bytes(combined))
|
|
69
73
|
return bytes
|
|
70
74
|
|
|
71
|
-
def substitute_template(
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
+
def substitute_template(
|
|
76
|
+
self,
|
|
77
|
+
template: pathlib.Path,
|
|
78
|
+
output: Optional[pathlib.Path] = None,
|
|
79
|
+
name: Optional[str] = None,
|
|
80
|
+
) -> str:
|
|
75
81
|
entries = self._entries
|
|
76
82
|
bytes = self.compress()
|
|
77
83
|
base64bytes = base64.b64encode(bytes)
|
|
@@ -79,15 +85,20 @@ class UnicodeDataCompressor(object):
|
|
|
79
85
|
value_bits = self._bitsize
|
|
80
86
|
name = name or entries.name
|
|
81
87
|
assert name
|
|
82
|
-
_logger.info(
|
|
83
|
-
|
|
84
|
-
|
|
88
|
+
_logger.info(
|
|
89
|
+
"%s: Bytes=%d, Base64=%d, #values=%d (%d bits)",
|
|
90
|
+
name,
|
|
91
|
+
len(bytes),
|
|
92
|
+
len(base64bytes),
|
|
93
|
+
len(values_for_int),
|
|
94
|
+
value_bits,
|
|
95
|
+
)
|
|
85
96
|
mapping = {
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
97
|
+
"NAME": name,
|
|
98
|
+
"BASE64BYTES": base64bytes.decode("ascii"),
|
|
99
|
+
"VALUE_BITS": str(value_bits),
|
|
100
|
+
"VALUE_MASK": str((1 << value_bits) - 1),
|
|
101
|
+
"VALUE_LIST": ",".join(f'"{v}"' for v in values_for_int),
|
|
91
102
|
}
|
|
92
103
|
|
|
93
104
|
text = template.read_text()
|
|
@@ -95,13 +106,13 @@ class UnicodeDataCompressor(object):
|
|
|
95
106
|
text = text.substitute(mapping)
|
|
96
107
|
|
|
97
108
|
if output:
|
|
98
|
-
if str(output) ==
|
|
109
|
+
if str(output) == "-":
|
|
99
110
|
sys.stdout.write(text)
|
|
100
111
|
else:
|
|
101
112
|
if output.is_dir():
|
|
102
|
-
output = output / f
|
|
103
|
-
output.write_text(text)
|
|
104
|
-
_logger.info(
|
|
113
|
+
output = output / f"{name}{template.suffix}"
|
|
114
|
+
output.write_text(text, newline="\n")
|
|
115
|
+
_logger.info("Saved to %s", output)
|
|
105
116
|
|
|
106
117
|
return text
|
|
107
118
|
|
|
@@ -109,16 +120,14 @@ class UnicodeDataCompressor(object):
|
|
|
109
120
|
def main():
|
|
110
121
|
this_dir = pathlib.Path(__file__).resolve().parent
|
|
111
122
|
parser = argparse.ArgumentParser()
|
|
112
|
-
parser.add_argument(
|
|
113
|
-
parser.add_argument(
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
parser.add_argument(
|
|
117
|
-
parser.add_argument(
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
action='count',
|
|
121
|
-
default=0)
|
|
123
|
+
parser.add_argument("--name", default="LineBreak")
|
|
124
|
+
parser.add_argument(
|
|
125
|
+
"--template", type=pathlib.Path, default=this_dir.parent / "js" / "template.js"
|
|
126
|
+
)
|
|
127
|
+
parser.add_argument("-o", "--output", type=pathlib.Path)
|
|
128
|
+
parser.add_argument(
|
|
129
|
+
"-v", "--verbose", help="increase output verbosity", action="count", default=0
|
|
130
|
+
)
|
|
122
131
|
args = parser.parse_args()
|
|
123
132
|
_init_logging(args.verbose)
|
|
124
133
|
|
|
@@ -130,8 +139,9 @@ def main():
|
|
|
130
139
|
output = args.output
|
|
131
140
|
compressor = UnicodeDataCompressor(entries)
|
|
132
141
|
compressor.substitute_template(
|
|
133
|
-
template, output=output if output else template.parent, name=args.name
|
|
142
|
+
template, output=output if output else template.parent, name=args.name
|
|
143
|
+
)
|
|
134
144
|
|
|
135
145
|
|
|
136
|
-
if __name__ ==
|
|
146
|
+
if __name__ == "__main__":
|
|
137
147
|
main()
|