unicodedata-reader 1.3.3__tar.gz → 1.3.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (18) hide show
  1. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/PKG-INFO +1 -1
  2. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/pyproject.toml +1 -1
  3. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/entry.py +14 -10
  4. unicodedata_reader-1.3.5/unicodedata_reader/set.py +84 -0
  5. unicodedata_reader-1.3.3/unicodedata_reader/set.py +0 -66
  6. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/LICENSE +0 -0
  7. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/README.md +0 -0
  8. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/__init__.py +0 -0
  9. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/__main__.py +0 -0
  10. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/bidi_brackets.py +0 -0
  11. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/cli.py +0 -0
  12. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/compressor.py +0 -0
  13. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/east_asian_width.py +0 -0
  14. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/emoji.py +0 -0
  15. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/general_category.py +0 -0
  16. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/line_break.py +0 -0
  17. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/reader.py +0 -0
  18. {unicodedata_reader-1.3.3 → unicodedata_reader-1.3.5}/unicodedata_reader/vertical_orientation.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unicodedata-reader
3
- Version: 1.3.3
3
+ Version: 1.3.5
4
4
  Summary:
5
5
  Home-page: https://github.com/kojiishi/unicodedata-reader
6
6
  License: Apache-2.0
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "unicodedata-reader"
7
- version = "1.3.3"
7
+ version = "1.3.5"
8
8
  description = ""
9
9
  authors = ["Koji Ishii <kojii@chromium.org>"]
10
10
  readme = "README.md"
@@ -16,7 +16,7 @@ from typing import Tuple
16
16
  _logger = logging.getLogger('UnicodeDataEntry')
17
17
 
18
18
 
19
- def u_hex(value):
19
+ def u_hex(value: int) -> str:
20
20
  return f'{value:04X}'
21
21
 
22
22
 
@@ -79,16 +79,24 @@ class UnicodeDataEntry(object):
79
79
  def is_in_range(self, code: int) -> bool:
80
80
  return code >= self.min and code <= self.max
81
81
 
82
+ @staticmethod
83
+ def to_codes(entries: Iterable['UnicodeDataEntry']):
84
+ return itertools.chain(*(e.range() for e in entries))
85
+
82
86
  @property
83
87
  def count(self):
84
88
  self.assert_range()
85
89
  return self.max - self.min + 1
86
90
 
87
- def range_as_str(self):
91
+ def range_as_str(self, converter: Callable[[int], str] = u_hex):
88
92
  self.assert_range()
93
+ min = converter(self.min)
89
94
  if self.min == self.max:
90
- return u_hex(self.min)
91
- return f'{u_hex(self.min)}..{u_hex(self.max)}'
95
+ return min
96
+ max = converter(self.max)
97
+ if min == max:
98
+ return min
99
+ return f'{min}..{max}'
92
100
 
93
101
  def to_str(self, separator: str = ';'):
94
102
  return separator.join((self.range_as_str(), str(self.value)))
@@ -268,18 +276,14 @@ class UnicodeDataEntries(object):
268
276
  """Returns an `Iterable` of `UnicodeDataEntry` for the given `pred`."""
269
277
  return (entry for entry in self if pred(entry.value))
270
278
 
271
- def codes_for(self, pred: Callable[[Any], bool]) -> Iterable[int]:
272
- """Returns an `Iterable` of Unicode code points for the given `pred`."""
273
- return itertools.chain(*(e.range() for e in self.filter(pred)))
274
-
275
279
  def add_to_set(self, pred: Callable[[Any], bool], set: set) -> None:
276
280
  """Add values `pred` returns `True` to `set[int]`."""
277
- for code in self.codes_for(pred):
281
+ for code in UnicodeDataEntry.to_codes(self.filter(pred)):
278
282
  set.add(code)
279
283
 
280
284
  def remove_from_set(self, pred: Callable[[Any], bool], set: set) -> None:
281
285
  """Remove values `pred` returns `True` from `set[int]`."""
282
- for code in self.codes_for(pred):
286
+ for code in UnicodeDataEntry.to_codes(self.filter(pred)):
283
287
  set.discard(code)
284
288
 
285
289
  def to_set(self, pred: Callable[[Any], bool]) -> set:
@@ -0,0 +1,84 @@
1
+ from typing import Any
2
+ from typing import Callable
3
+ from typing import Iterable
4
+ from typing import Optional
5
+
6
+ from unicodedata_reader.entry import *
7
+ from unicodedata_reader.reader import *
8
+
9
+
10
+ class Set(object):
11
+ """A simple wrapper of a `set` of Unicode code points."""
12
+
13
+ def __init__(self,
14
+ entries: Optional[UnicodeDataEntries] = None,
15
+ predicate: Optional[Callable[[Any], bool]] = None) -> None:
16
+ self.set = set()
17
+ if entries:
18
+ entries.add_to_set(predicate, self.set)
19
+
20
+ @staticmethod
21
+ def east_asian_width(*values: str) -> 'Set':
22
+ entries = UnicodeDataReader.default.east_asian_width()
23
+ if len(values) == 1:
24
+ value = values[0]
25
+ return Set(entries, lambda v: v == value)
26
+ s = set(values)
27
+ return Set(entries, lambda v: v in s)
28
+
29
+ @staticmethod
30
+ def general_category(*values: str) -> 'Set':
31
+ entries = UnicodeDataReader.default.general_category()
32
+ if len(values) == 1:
33
+ value = values[0]
34
+ return Set(entries, lambda v: v.startswith(value))
35
+
36
+ def predicate(v: str) -> bool:
37
+ for value in values:
38
+ if v.startswith(value):
39
+ return True
40
+ return False
41
+
42
+ return Set(entries, predicate)
43
+
44
+ @staticmethod
45
+ def scripts(*values: str) -> 'Set':
46
+ entries = UnicodeDataReader.default.scripts()
47
+ if len(values) == 1:
48
+ value = values[0]
49
+ return Set(entries, lambda v: v == value)
50
+ s = set(values)
51
+ return Set(entries, lambda v: v in s)
52
+
53
+ @staticmethod
54
+ def script_extensions(*values: str) -> 'Set':
55
+ entries = UnicodeDataReader.default.script_extensions()
56
+ if len(values) == 1:
57
+ value = values[0]
58
+ return Set(entries, lambda v: value in v)
59
+ s = set(values)
60
+ return Set(entries, lambda v: len(set(v) & s))
61
+
62
+ def __contains__(self, code_point: int) -> bool:
63
+ return code_point in self.set
64
+
65
+ def __iter__(self) -> Iterable[int]:
66
+ return self.set.__iter__()
67
+
68
+ def __isub__(self, other: 'Set') -> 'Set':
69
+ self.set -= other.set
70
+ return self
71
+
72
+ def __iand__(self, other: 'Set') -> 'Set':
73
+ self.set &= other.set
74
+ return self
75
+
76
+ def __ior__(self, other: 'Set') -> 'Set':
77
+ self.set |= other.set
78
+ return self
79
+
80
+ def add(self, code: int) -> None:
81
+ self.set.add(code)
82
+
83
+ def remove(self, code: int) -> None:
84
+ self.set.discard(code)
@@ -1,66 +0,0 @@
1
- from typing import Any
2
- from typing import Callable
3
- from typing import Iterable
4
- from typing import Optional
5
-
6
- from unicodedata_reader.entry import *
7
- from unicodedata_reader.reader import *
8
-
9
-
10
- class Set(object):
11
- """A simple wrapper of a `set` of Unicode code points."""
12
-
13
- def __init__(self,
14
- entries: Optional[UnicodeDataEntries] = None,
15
- pred: Optional[Callable[[Any], bool]] = None) -> None:
16
- self.set = set()
17
- if entries:
18
- self.add_entries(entries, pred)
19
-
20
- def __contains__(self, code_point: int) -> bool:
21
- return code_point in self.set
22
-
23
- def __iter__(self) -> Iterable[int]:
24
- return self.set.__iter__()
25
-
26
- def __isub__(self, other: 'Set') -> 'Set':
27
- self.set -= other.set
28
- return self
29
-
30
- def __iand__(self, other: 'Set') -> 'Set':
31
- self.set &= other.set
32
- return self
33
-
34
- def __ior__(self, other: 'Set') -> 'Set':
35
- self.set |= other.set
36
- return self
37
-
38
- def add(self, code: int) -> None:
39
- self.set.add(code)
40
-
41
- def remove(self, code: int) -> None:
42
- self.set.discard(code)
43
-
44
- def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
45
- bool]):
46
- entries.add_to_set(pred, self.set)
47
-
48
- @staticmethod
49
- def east_asian_width(value: str) -> 'Set':
50
- reader = UnicodeDataReader.default
51
- return Set(reader.east_asian_width(), lambda v: v == value)
52
-
53
- @staticmethod
54
- def general_category(value: str) -> 'Set':
55
- reader = UnicodeDataReader.default
56
- return Set(reader.general_category(), lambda v: v.startswith(value))
57
-
58
- @staticmethod
59
- def scripts(value: str) -> 'Set':
60
- reader = UnicodeDataReader.default
61
- return Set(reader.scripts(), lambda v: v == value)
62
-
63
- @staticmethod
64
- def script_extensions(value: str) -> 'Set':
65
- reader = UnicodeDataReader.default
66
- return Set(reader.script_extensions(), lambda v: value in v)