unicodedata-reader 1.3.3__tar.gz → 1.3.5__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/PKG-INFO +1 -1
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/pyproject.toml +1 -1
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/entry.py +14 -10
- unicodedata_reader-1.3.5/unicodedata_reader/set.py +84 -0
- unicodedata_reader-1.3.3/unicodedata_reader/set.py +0 -66
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/LICENSE +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/README.md +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/__main__.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/bidi_brackets.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/cli.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/compressor.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/east_asian_width.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/emoji.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/general_category.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/line_break.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/reader.py +0 -0
- {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/vertical_orientation.py +0 -0
|
@@ -16,7 +16,7 @@ from typing import Tuple
|
|
|
16
16
|
_logger = logging.getLogger('UnicodeDataEntry')
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def u_hex(value):
|
|
19
|
+
def u_hex(value: int) -> str:
|
|
20
20
|
return f'{value:04X}'
|
|
21
21
|
|
|
22
22
|
|
|
@@ -79,16 +79,24 @@ class UnicodeDataEntry(object):
|
|
|
79
79
|
def is_in_range(self, code: int) -> bool:
|
|
80
80
|
return code >= self.min and code <= self.max
|
|
81
81
|
|
|
82
|
+
@staticmethod
|
|
83
|
+
def to_codes(entries: Iterable['UnicodeDataEntry']):
|
|
84
|
+
return itertools.chain(*(e.range() for e in entries))
|
|
85
|
+
|
|
82
86
|
@property
|
|
83
87
|
def count(self):
|
|
84
88
|
self.assert_range()
|
|
85
89
|
return self.max - self.min + 1
|
|
86
90
|
|
|
87
|
-
def range_as_str(self):
|
|
91
|
+
def range_as_str(self, converter: Callable[[int], str] = u_hex):
|
|
88
92
|
self.assert_range()
|
|
93
|
+
min = converter(self.min)
|
|
89
94
|
if self.min == self.max:
|
|
90
|
-
return
|
|
91
|
-
|
|
95
|
+
return min
|
|
96
|
+
max = converter(self.max)
|
|
97
|
+
if min == max:
|
|
98
|
+
return min
|
|
99
|
+
return f'{min}..{max}'
|
|
92
100
|
|
|
93
101
|
def to_str(self, separator: str = ';'):
|
|
94
102
|
return separator.join((self.range_as_str(), str(self.value)))
|
|
@@ -268,18 +276,14 @@ class UnicodeDataEntries(object):
|
|
|
268
276
|
"""Returns an `Iterable` of `UnicodeDataEntry` for the given `pred`."""
|
|
269
277
|
return (entry for entry in self if pred(entry.value))
|
|
270
278
|
|
|
271
|
-
def codes_for(self, pred: Callable[[Any], bool]) -> Iterable[int]:
|
|
272
|
-
"""Returns an `Iterable` of Unicode code points for the given `pred`."""
|
|
273
|
-
return itertools.chain(*(e.range() for e in self.filter(pred)))
|
|
274
|
-
|
|
275
279
|
def add_to_set(self, pred: Callable[[Any], bool], set: set) -> None:
|
|
276
280
|
"""Add values `pred` returns `True` to `set[int]`."""
|
|
277
|
-
for code in self.
|
|
281
|
+
for code in UnicodeDataEntry.to_codes(self.filter(pred)):
|
|
278
282
|
set.add(code)
|
|
279
283
|
|
|
280
284
|
def remove_from_set(self, pred: Callable[[Any], bool], set: set) -> None:
|
|
281
285
|
"""Remove values `pred` returns `True` from `set[int]`."""
|
|
282
|
-
for code in self.
|
|
286
|
+
for code in UnicodeDataEntry.to_codes(self.filter(pred)):
|
|
283
287
|
set.discard(code)
|
|
284
288
|
|
|
285
289
|
def to_set(self, pred: Callable[[Any], bool]) -> set:
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from typing import Any
|
|
2
|
+
from typing import Callable
|
|
3
|
+
from typing import Iterable
|
|
4
|
+
from typing import Optional
|
|
5
|
+
|
|
6
|
+
from unicodedata_reader.entry import *
|
|
7
|
+
from unicodedata_reader.reader import *
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class Set(object):
|
|
11
|
+
"""A simple wrapper of a `set` of Unicode code points."""
|
|
12
|
+
|
|
13
|
+
def __init__(self,
|
|
14
|
+
entries: Optional[UnicodeDataEntries] = None,
|
|
15
|
+
predicate: Optional[Callable[[Any], bool]] = None) -> None:
|
|
16
|
+
self.set = set()
|
|
17
|
+
if entries:
|
|
18
|
+
entries.add_to_set(predicate, self.set)
|
|
19
|
+
|
|
20
|
+
@staticmethod
|
|
21
|
+
def east_asian_width(*values: str) -> 'Set':
|
|
22
|
+
entries = UnicodeDataReader.default.east_asian_width()
|
|
23
|
+
if len(values) == 1:
|
|
24
|
+
value = values[0]
|
|
25
|
+
return Set(entries, lambda v: v == value)
|
|
26
|
+
s = set(values)
|
|
27
|
+
return Set(entries, lambda v: v in s)
|
|
28
|
+
|
|
29
|
+
@staticmethod
|
|
30
|
+
def general_category(*values: str) -> 'Set':
|
|
31
|
+
entries = UnicodeDataReader.default.general_category()
|
|
32
|
+
if len(values) == 1:
|
|
33
|
+
value = values[0]
|
|
34
|
+
return Set(entries, lambda v: v.startswith(value))
|
|
35
|
+
|
|
36
|
+
def predicate(v: str) -> bool:
|
|
37
|
+
for value in values:
|
|
38
|
+
if v.startswith(value):
|
|
39
|
+
return True
|
|
40
|
+
return False
|
|
41
|
+
|
|
42
|
+
return Set(entries, predicate)
|
|
43
|
+
|
|
44
|
+
@staticmethod
|
|
45
|
+
def scripts(*values: str) -> 'Set':
|
|
46
|
+
entries = UnicodeDataReader.default.scripts()
|
|
47
|
+
if len(values) == 1:
|
|
48
|
+
value = values[0]
|
|
49
|
+
return Set(entries, lambda v: v == value)
|
|
50
|
+
s = set(values)
|
|
51
|
+
return Set(entries, lambda v: v in s)
|
|
52
|
+
|
|
53
|
+
@staticmethod
|
|
54
|
+
def script_extensions(*values: str) -> 'Set':
|
|
55
|
+
entries = UnicodeDataReader.default.script_extensions()
|
|
56
|
+
if len(values) == 1:
|
|
57
|
+
value = values[0]
|
|
58
|
+
return Set(entries, lambda v: value in v)
|
|
59
|
+
s = set(values)
|
|
60
|
+
return Set(entries, lambda v: len(set(v) & s))
|
|
61
|
+
|
|
62
|
+
def __contains__(self, code_point: int) -> bool:
|
|
63
|
+
return code_point in self.set
|
|
64
|
+
|
|
65
|
+
def __iter__(self) -> Iterable[int]:
|
|
66
|
+
return self.set.__iter__()
|
|
67
|
+
|
|
68
|
+
def __isub__(self, other: 'Set') -> 'Set':
|
|
69
|
+
self.set -= other.set
|
|
70
|
+
return self
|
|
71
|
+
|
|
72
|
+
def __iand__(self, other: 'Set') -> 'Set':
|
|
73
|
+
self.set &= other.set
|
|
74
|
+
return self
|
|
75
|
+
|
|
76
|
+
def __ior__(self, other: 'Set') -> 'Set':
|
|
77
|
+
self.set |= other.set
|
|
78
|
+
return self
|
|
79
|
+
|
|
80
|
+
def add(self, code: int) -> None:
|
|
81
|
+
self.set.add(code)
|
|
82
|
+
|
|
83
|
+
def remove(self, code: int) -> None:
|
|
84
|
+
self.set.discard(code)
|
|
@@ -1,66 +0,0 @@
|
|
|
1
|
-
from typing import Any
|
|
2
|
-
from typing import Callable
|
|
3
|
-
from typing import Iterable
|
|
4
|
-
from typing import Optional
|
|
5
|
-
|
|
6
|
-
from unicodedata_reader.entry import *
|
|
7
|
-
from unicodedata_reader.reader import *
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class Set(object):
|
|
11
|
-
"""A simple wrapper of a `set` of Unicode code points."""
|
|
12
|
-
|
|
13
|
-
def __init__(self,
|
|
14
|
-
entries: Optional[UnicodeDataEntries] = None,
|
|
15
|
-
pred: Optional[Callable[[Any], bool]] = None) -> None:
|
|
16
|
-
self.set = set()
|
|
17
|
-
if entries:
|
|
18
|
-
self.add_entries(entries, pred)
|
|
19
|
-
|
|
20
|
-
def __contains__(self, code_point: int) -> bool:
|
|
21
|
-
return code_point in self.set
|
|
22
|
-
|
|
23
|
-
def __iter__(self) -> Iterable[int]:
|
|
24
|
-
return self.set.__iter__()
|
|
25
|
-
|
|
26
|
-
def __isub__(self, other: 'Set') -> 'Set':
|
|
27
|
-
self.set -= other.set
|
|
28
|
-
return self
|
|
29
|
-
|
|
30
|
-
def __iand__(self, other: 'Set') -> 'Set':
|
|
31
|
-
self.set &= other.set
|
|
32
|
-
return self
|
|
33
|
-
|
|
34
|
-
def __ior__(self, other: 'Set') -> 'Set':
|
|
35
|
-
self.set |= other.set
|
|
36
|
-
return self
|
|
37
|
-
|
|
38
|
-
def add(self, code: int) -> None:
|
|
39
|
-
self.set.add(code)
|
|
40
|
-
|
|
41
|
-
def remove(self, code: int) -> None:
|
|
42
|
-
self.set.discard(code)
|
|
43
|
-
|
|
44
|
-
def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
|
|
45
|
-
bool]):
|
|
46
|
-
entries.add_to_set(pred, self.set)
|
|
47
|
-
|
|
48
|
-
@staticmethod
|
|
49
|
-
def east_asian_width(value: str) -> 'Set':
|
|
50
|
-
reader = UnicodeDataReader.default
|
|
51
|
-
return Set(reader.east_asian_width(), lambda v: v == value)
|
|
52
|
-
|
|
53
|
-
@staticmethod
|
|
54
|
-
def general_category(value: str) -> 'Set':
|
|
55
|
-
reader = UnicodeDataReader.default
|
|
56
|
-
return Set(reader.general_category(), lambda v: v.startswith(value))
|
|
57
|
-
|
|
58
|
-
@staticmethod
|
|
59
|
-
def scripts(value: str) -> 'Set':
|
|
60
|
-
reader = UnicodeDataReader.default
|
|
61
|
-
return Set(reader.scripts(), lambda v: v == value)
|
|
62
|
-
|
|
63
|
-
@staticmethod
|
|
64
|
-
def script_extensions(value: str) -> 'Set':
|
|
65
|
-
reader = UnicodeDataReader.default
|
|
66
|
-
return Set(reader.script_extensions(), lambda v: value in v)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/east_asian_width.py
RENAMED
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/general_category.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/vertical_orientation.py
RENAMED
|
File without changes
|