unicodedata-reader 1.2.0__tar.gz → 1.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/PKG-INFO +2 -2
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/README.md +1 -1
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/pyproject.toml +2 -2
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/entry.py +32 -1
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/set.py +14 -21
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/LICENSE +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/__main__.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/bidi_brackets.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/cli.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/compressor.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/east_asian_width.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/emoji.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/general_category.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/line_break.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/reader.py +0 -0
- {unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/vertical_orientation.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: unicodedata-reader
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.3.1
|
|
4
4
|
Summary:
|
|
5
5
|
Home-page: https://github.com/kojiishi/unicodedata-reader
|
|
6
6
|
License: Apache-2.0
|
|
@@ -68,7 +68,7 @@ import unicodedata_reader
|
|
|
68
68
|
|
|
69
69
|
reader = unicodedata_reader.UnicodeDataReader.default
|
|
70
70
|
lb = reader.line_break()
|
|
71
|
-
print(lb
|
|
71
|
+
print(lb[0x41])
|
|
72
72
|
```
|
|
73
73
|
The example above prints `AL`,
|
|
74
74
|
the [Line_Break property] value for U+0041.
|
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "unicodedata-reader"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.1"
|
|
8
8
|
description = ""
|
|
9
9
|
authors = ["Koji Ishii <kojii@chromium.org>"]
|
|
10
10
|
readme = "README.md"
|
|
@@ -17,7 +17,7 @@ platformdirs = ">=2.2,<5.0"
|
|
|
17
17
|
|
|
18
18
|
[tool.poetry.dev-dependencies]
|
|
19
19
|
pytest = "*"
|
|
20
|
-
pytype =
|
|
20
|
+
pytype = "*"
|
|
21
21
|
tox = "^4.14.2"
|
|
22
22
|
yapf = "^0.40.2"
|
|
23
23
|
|
|
@@ -4,11 +4,13 @@ import logging
|
|
|
4
4
|
import re
|
|
5
5
|
import types
|
|
6
6
|
from typing import Any
|
|
7
|
+
from typing import Callable
|
|
7
8
|
from typing import Dict
|
|
8
9
|
from typing import Iterable
|
|
9
10
|
from typing import List
|
|
10
11
|
from typing import Optional
|
|
11
12
|
from typing import Sequence
|
|
13
|
+
from typing import Set
|
|
12
14
|
from typing import Union
|
|
13
15
|
from typing import Tuple
|
|
14
16
|
|
|
@@ -216,6 +218,9 @@ class UnicodeDataEntries(object):
|
|
|
216
218
|
self._ensure_multi_iterable()
|
|
217
219
|
return len(self._entries)
|
|
218
220
|
|
|
221
|
+
def __getitem__(self, code: int) -> Any:
|
|
222
|
+
return self.value(code)
|
|
223
|
+
|
|
219
224
|
def missing_value(self, code: int):
|
|
220
225
|
if self._missing_entries:
|
|
221
226
|
# `_missing_entries` can overlap, iterate all entries.
|
|
@@ -249,7 +254,7 @@ class UnicodeDataEntries(object):
|
|
|
249
254
|
self._ensure_multi_iterable()
|
|
250
255
|
return itertools.chain(*(e.range() for e in self._entries))
|
|
251
256
|
|
|
252
|
-
def value(self, code: int):
|
|
257
|
+
def value(self, code: int) -> Any:
|
|
253
258
|
"""Returns the value for the given code point."""
|
|
254
259
|
self._ensure_multi_iterable()
|
|
255
260
|
for entry in self._entries:
|
|
@@ -259,6 +264,32 @@ class UnicodeDataEntries(object):
|
|
|
259
264
|
return entry.value
|
|
260
265
|
return self.missing_value(code)
|
|
261
266
|
|
|
267
|
+
def filter(self, pred: Callable[[Any],
|
|
268
|
+
bool]) -> Iterable[UnicodeDataEntry]:
|
|
269
|
+
"""Returns an `Iterable` of `UnicodeDataEntry` for the given `pred`."""
|
|
270
|
+
return (entry for entry in self if pred(entry.value))
|
|
271
|
+
|
|
272
|
+
def codes_for(self, pred: Callable[[Any], bool]) -> Iterable[int]:
|
|
273
|
+
"""Returns an `Iterable` of Unicode code points for the given `pred`."""
|
|
274
|
+
return itertools.chain(*(e.range() for e in self.filter(pred)))
|
|
275
|
+
|
|
276
|
+
def add_to_set(self, pred: Callable[[Any], bool], set: Set[int]) -> None:
|
|
277
|
+
"""Add values `pred` returns `True` to `set[int]`."""
|
|
278
|
+
for code in self.codes_for(pred):
|
|
279
|
+
set.add(code)
|
|
280
|
+
|
|
281
|
+
def remove_from_set(self, pred: Callable[[Any], bool],
|
|
282
|
+
set: Set[int]) -> None:
|
|
283
|
+
"""Remove values `pred` returns `True` from `set[int]`."""
|
|
284
|
+
for code in self.codes_for(pred):
|
|
285
|
+
set.discard(code)
|
|
286
|
+
|
|
287
|
+
def to_set(self, pred: Callable[[Any], bool]) -> Set[int]:
|
|
288
|
+
"""Returns a `set[int]` of values `pred` returns `True`."""
|
|
289
|
+
s = set() # type: set[int]
|
|
290
|
+
self.add_to_set(pred, s)
|
|
291
|
+
return s
|
|
292
|
+
|
|
262
293
|
def values_for_code(self) -> Iterable[Any]:
|
|
263
294
|
"""Returns a list of values whose index is the Unicode code point.
|
|
264
295
|
|
|
@@ -1,18 +1,23 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from typing import Callable
|
|
3
3
|
from typing import Iterable
|
|
4
|
+
from typing import Set
|
|
4
5
|
|
|
5
6
|
from unicodedata_reader.entry import *
|
|
6
7
|
from unicodedata_reader.reader import *
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class Set(object):
|
|
10
|
-
"""A simple set of Unicode code points."""
|
|
11
|
+
"""A simple wrapper of a `set` of Unicode code points."""
|
|
11
12
|
|
|
12
|
-
def __init__(self
|
|
13
|
-
|
|
13
|
+
def __init__(self,
|
|
14
|
+
entries: UnicodeDataEntries = None,
|
|
15
|
+
pred: Callable[[Any], bool] = None) -> None:
|
|
16
|
+
self.set = set() # type: Set[int]
|
|
17
|
+
if entries:
|
|
18
|
+
self.add_entries(entries, pred)
|
|
14
19
|
|
|
15
|
-
def
|
|
20
|
+
def contains(self, code_point: int) -> bool:
|
|
16
21
|
return code_point in self.set
|
|
17
22
|
|
|
18
23
|
def __iter__(self) -> Iterable[int]:
|
|
@@ -38,40 +43,28 @@ class Set(object):
|
|
|
38
43
|
|
|
39
44
|
def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
|
|
40
45
|
bool]):
|
|
41
|
-
|
|
42
|
-
if pred(entry.value):
|
|
43
|
-
for code in entry.range():
|
|
44
|
-
self.set.add(code)
|
|
46
|
+
entries.add_to_set(pred, self.set)
|
|
45
47
|
|
|
46
48
|
@staticmethod
|
|
47
49
|
def east_asian_width(
|
|
48
50
|
value: str,
|
|
49
51
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
50
|
-
|
|
51
|
-
set.add_entries(reader.east_asian_width(), lambda v: v == value)
|
|
52
|
-
return set
|
|
52
|
+
return Set(reader.east_asian_width(), lambda v: v == value)
|
|
53
53
|
|
|
54
54
|
@staticmethod
|
|
55
55
|
def general_category(
|
|
56
56
|
value: str,
|
|
57
57
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
58
|
-
|
|
59
|
-
set.add_entries(reader.general_category(),
|
|
60
|
-
lambda v: v.startswith(value))
|
|
61
|
-
return set
|
|
58
|
+
return Set(reader.general_category(), lambda v: v.startswith(value))
|
|
62
59
|
|
|
63
60
|
@staticmethod
|
|
64
61
|
def scripts(
|
|
65
62
|
value: str,
|
|
66
63
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
67
|
-
|
|
68
|
-
set.add_entries(reader.scripts(), lambda v: v == value)
|
|
69
|
-
return set
|
|
64
|
+
return Set(reader.scripts(), lambda v: v == value)
|
|
70
65
|
|
|
71
66
|
@staticmethod
|
|
72
67
|
def script_extensions(
|
|
73
68
|
value: str,
|
|
74
69
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
75
|
-
|
|
76
|
-
set.add_entries(reader.script_extensions(), lambda v: value in v)
|
|
77
|
-
return set
|
|
70
|
+
return Set(reader.script_extensions(), lambda v: value in v)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/east_asian_width.py
RENAMED
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/general_category.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.2.0 → unicodedata_reader-1.3.1}/unicodedata_reader/vertical_orientation.py
RENAMED
|
File without changes
|