unicodedata-reader 0.1.6__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (19) hide show
  1. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/PKG-INFO +20 -5
  2. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/README.md +14 -2
  3. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/pyproject.toml +7 -7
  4. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/__init__.py +1 -0
  5. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/__main__.py +2 -0
  6. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/cli.py +16 -10
  7. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/compressor.py +3 -2
  8. unicodedata_reader-0.2.0/unicodedata_reader/east_asian_width.py +30 -0
  9. unicodedata_reader-0.2.0/unicodedata_reader/emoji.py +38 -0
  10. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/entry.py +29 -13
  11. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/general_category.py +1 -0
  12. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/line_break.py +1 -0
  13. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/reader.py +5 -0
  14. unicodedata_reader-0.2.0/unicodedata_reader/set.py +72 -0
  15. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/vertical_orientation.py +1 -0
  16. unicodedata-reader-0.1.6/setup.py +0 -34
  17. unicodedata-reader-0.1.6/unicodedata_reader/emoji.py +0 -21
  18. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/LICENSE +0 -0
  19. {unicodedata-reader-0.1.6 → unicodedata_reader-0.2.0}/unicodedata_reader/bidi_brackets.py +0 -0
@@ -1,17 +1,20 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unicodedata-reader
3
- Version: 0.1.6
3
+ Version: 0.2.0
4
4
  Summary:
5
5
  Home-page: https://github.com/kojiishi/unicodedata-reader
6
6
  License: Apache-2.0
7
7
  Author: Koji Ishii
8
8
  Author-email: kojii@chromium.org
9
- Requires-Python: >=3.8,<3.10
9
+ Requires-Python: >=3.8
10
10
  Classifier: License :: OSI Approved :: Apache Software License
11
11
  Classifier: Programming Language :: Python :: 3
12
12
  Classifier: Programming Language :: Python :: 3.8
13
13
  Classifier: Programming Language :: Python :: 3.9
14
- Requires-Dist: platformdirs (>=2.2.0,<3.0.0)
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Dist: platformdirs (>=2.2,<5.0)
15
18
  Project-URL: Repository, https://github.com/kojiishi/unicodedata-reader
16
19
  Description-Content-Type: text/markdown
17
20
 
@@ -36,6 +39,7 @@ that can read the property values of the [Unicode Character Database]
36
39
  in browsers.
37
40
  Please see the [JavaScript] section below.
38
41
 
42
+ [General_Category property]: http://unicode.org/reports/tr44/#General_Category
39
43
  [Line_Break property]: http://unicode.org/reports/tr44/#Line_Break
40
44
  [Unicode Character Database]: https://unicode.org/reports/tr44/
41
45
  [unicodedata]: https://docs.python.org/3/library/unicodedata.html
@@ -79,10 +83,21 @@ The [`UnicodeDataCompressor` class] in this package
79
83
  can generate JavaScript functions that can read the property values
80
84
  of the [Unicode Character Database] in browsers.
81
85
 
82
- Please see [LineBreak.js] for an example of the generated functions
83
- and [LineBreak.html] for an example usage.
86
+ Following examples are available in the "`js`" directory:
87
+ * [GeneralCategory.js] is a generated JavaScript file
88
+ for the Unicode [General_Category property].
89
+ * [LineBreak.js] is a generated JavaScript file
90
+ for the Unicode [Line_Break property].
91
+ * [LineBreak.html] for an example usage of [LineBreak.js].
92
+
93
+ The following command generates a JavaScript file for the [Line_Break property]
94
+ using `js/template.js` as the template file:
95
+ ```sh
96
+ unicodedata-reader lb -t js/template.js
97
+ ```
84
98
 
85
99
  [`UnicodeDataCompressor` class]: https://github.com/kojiishi/unicodedata-reader/blob/main/unicodedata_reader/compressor.py
100
+ [GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js
86
101
  [LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html
87
102
  [LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js
88
103
 
@@ -19,6 +19,7 @@ that can read the property values of the [Unicode Character Database]
19
19
  in browsers.
20
20
  Please see the [JavaScript] section below.
21
21
 
22
+ [General_Category property]: http://unicode.org/reports/tr44/#General_Category
22
23
  [Line_Break property]: http://unicode.org/reports/tr44/#Line_Break
23
24
  [Unicode Character Database]: https://unicode.org/reports/tr44/
24
25
  [unicodedata]: https://docs.python.org/3/library/unicodedata.html
@@ -62,9 +63,20 @@ The [`UnicodeDataCompressor` class] in this package
62
63
  can generate JavaScript functions that can read the property values
63
64
  of the [Unicode Character Database] in browsers.
64
65
 
65
- Please see [LineBreak.js] for an example of the generated functions
66
- and [LineBreak.html] for an example usage.
66
+ Following examples are available in the "`js`" directory:
67
+ * [GeneralCategory.js] is a generated JavaScript file
68
+ for the Unicode [General_Category property].
69
+ * [LineBreak.js] is a generated JavaScript file
70
+ for the Unicode [Line_Break property].
71
+ * [LineBreak.html] for an example usage of [LineBreak.js].
72
+
73
+ The following command generates a JavaScript file for the [Line_Break property]
74
+ using `js/template.js` as the template file:
75
+ ```sh
76
+ unicodedata-reader lb -t js/template.js
77
+ ```
67
78
 
68
79
  [`UnicodeDataCompressor` class]: https://github.com/kojiishi/unicodedata-reader/blob/main/unicodedata_reader/compressor.py
80
+ [GeneralCategory.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/GeneralCategory.js
69
81
  [LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html
70
82
  [LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "unicodedata-reader"
7
- version = "0.1.6"
7
+ version = "0.2.0"
8
8
  description = ""
9
9
  authors = ["Koji Ishii <kojii@chromium.org>"]
10
10
  readme = "README.md"
@@ -12,14 +12,14 @@ repository = "https://github.com/kojiishi/unicodedata-reader"
12
12
  license = "Apache-2.0"
13
13
 
14
14
  [tool.poetry.dependencies]
15
- python = ">=3.8,<3.10"
16
- platformdirs = "^2.2.0"
15
+ python = ">=3.8"
16
+ platformdirs = ">=2.2,<5.0"
17
17
 
18
18
  [tool.poetry.dev-dependencies]
19
- pytest = "^6.2.4"
20
- pytype = "*"
21
- tox = "^3.24.3"
22
- yapf = "^0.31.0"
19
+ pytest = "*"
20
+ pytype = {version = "*", python = "<3.10"}
21
+ tox = "^4.14.2"
22
+ yapf = "^0.40.2"
23
23
 
24
24
  [tool.poetry.scripts]
25
25
  unicodedata-reader = 'unicodedata_reader.__main__:main'
@@ -2,3 +2,4 @@ from .entry import *
2
2
  from .reader import *
3
3
  from .compressor import *
4
4
  from .cli import *
5
+ from .set import *
@@ -2,6 +2,7 @@ import pathlib
2
2
  import sys
3
3
 
4
4
  import unicodedata_reader.bidi_brackets as bidi_brackets
5
+ import unicodedata_reader.east_asian_width as ea
5
6
  import unicodedata_reader.emoji as emoji
6
7
  import unicodedata_reader.general_category as gc
7
8
  import unicodedata_reader.line_break as lb
@@ -12,6 +13,7 @@ def main():
12
13
  args = sys.argv
13
14
  sub_commands = {
14
15
  'bidi': lambda: bidi_brackets.dump_bidi_brackets(),
16
+ 'ea': lambda: ea.UnicodeEastAsianWidthDataCli().main(),
15
17
  'emoji': lambda: emoji.UnicodeEmojiDataCli().main(),
16
18
  'gc': lambda: gc.UnicodeGeneralCategoryDataCli().main(),
17
19
  'lb': lambda: lb.UnicodeLineBreakDataCli().main(),
@@ -73,14 +73,15 @@ def _init_logging(verbose):
73
73
 
74
74
 
75
75
  class UnicodeDataCli(object):
76
+
76
77
  def __init__(self):
77
- self.parse_args()
78
+ self._parse_args()
78
79
 
79
80
  def _columns(self) -> Dict[str, Callable[[int, str], Any]]:
80
81
  columns = self._core_columns()
81
82
  columns = dict(
82
83
  itertools.chain({
83
- 'Code': lambda code, ch: u_hex(code),
84
+ 'Code': lambda code, ch: 'U' + u_hex(code),
84
85
  'Char': lambda code, ch: u_printable_chr(ch),
85
86
  }.items(), columns.items(), {
86
87
  'Name': lambda code, ch: u_name_or_empty(ch),
@@ -119,17 +120,22 @@ class UnicodeDataCli(object):
119
120
  compressor = UnicodeDataCompressor(entries)
120
121
  compressor.substitute_template(template, name=self.name, output=output)
121
122
 
122
- def parse_args(self):
123
+ def _parse_args(self):
123
124
  parser = argparse.ArgumentParser()
124
- parser.add_argument('text', nargs='*')
125
+ parser.add_argument('text',
126
+ nargs='*',
127
+ help='show properties for the text')
125
128
  parser.add_argument('-f', '--no-cache', action='store_true')
126
- parser.add_argument('-n', '--name')
127
- parser.add_argument('-t', '--template', type=pathlib.Path)
129
+ parser.add_argument('--name', help='$NAME in the template')
130
+ parser.add_argument('-t',
131
+ '--template',
132
+ type=pathlib.Path,
133
+ help='generate a file from the template')
128
134
  parser.add_argument('-o', '--output', type=pathlib.Path)
129
- parser.add_argument("-v",
130
- "--verbose",
131
- help="increase output verbosity",
132
- action="count",
135
+ parser.add_argument('-v',
136
+ '--verbose',
137
+ help='increase output verbosity',
138
+ action='count',
133
139
  default=0)
134
140
  parser.parse_args(namespace=self)
135
141
  _init_logging(self.verbose) # pytype: disable=attribute-error
@@ -22,6 +22,7 @@ def _init_logging(verbose: int):
22
22
 
23
23
 
24
24
  class UnicodeDataCompressor(object):
25
+
25
26
  def __init__(self, entries: UnicodeDataEntries):
26
27
  self._entries = entries
27
28
 
@@ -82,8 +83,8 @@ class UnicodeDataCompressor(object):
82
83
  len(bytes), len(base64bytes), len(values_for_int),
83
84
  value_bits)
84
85
  mapping = {
85
- 'PROP_NAME': name,
86
- 'BASE64': base64bytes.decode('ascii'),
86
+ 'NAME': name,
87
+ 'BASE64BYTES': base64bytes.decode('ascii'),
87
88
  'VALUE_BITS': str(value_bits),
88
89
  'VALUE_MASK': str((1 << value_bits) - 1),
89
90
  'VALUE_LIST': ','.join(f'"{v}"' for v in values_for_int),
@@ -0,0 +1,30 @@
1
+ #!/usr/bin/env python3
2
+ import unicodedata
3
+ from typing import Any
4
+ from typing import Callable
5
+ from typing import Dict
6
+
7
+ from unicodedata_reader import *
8
+
9
+
10
+ class UnicodeEastAsianWidthDataCli(UnicodeDataCli):
11
+
12
+ def __init__(self):
13
+ super().__init__()
14
+ self._entries = UnicodeDataReader.default.east_asian_width()
15
+
16
+ def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
17
+ return {
18
+ 'EA': lambda code, ch: self._entries.value(code),
19
+ 'GC': lambda code, ch: unicodedata.category(ch),
20
+ 'EAW': lambda code, ch: unicodedata.east_asian_width(ch),
21
+ 'cp932': lambda code, ch: u_enc(ch, 'cp932'),
22
+ 'sjis04': lambda code, ch: u_enc(ch, 'sjis_2004'),
23
+ 'cp936': lambda code, ch: u_enc(ch, 'cp936'),
24
+ 'cp949': lambda code, ch: u_enc(ch, 'cp949'),
25
+ 'cp950': lambda code, ch: u_enc(ch, 'cp950'),
26
+ }
27
+
28
+
29
+ if __name__ == '__main__':
30
+ UnicodeEastAsianWidthDataCli().main()
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env python3
2
+ from typing import Any
3
+ from typing import Callable
4
+ from typing import Dict
5
+
6
+ from unicodedata_reader import *
7
+
8
+
9
+ class UnicodeEmojiDataCli(UnicodeDataCli):
10
+
11
+ def __init__(self):
12
+ super().__init__()
13
+ self._entries = UnicodeDataReader.default.emoji()
14
+
15
+ def _emoji_flag_func(self, mask: EmojiType):
16
+ return lambda code, ch: 1 if self._entries.value(code) & mask else 0
17
+
18
+ def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
19
+ return {
20
+ 'Emoji':
21
+ self._emoji_flag_func(EmojiType.Emoji),
22
+ 'Emoji_Presentation':
23
+ self._emoji_flag_func(EmojiType.Emoji_Presentation),
24
+ 'Emoji_Modifier':
25
+ self._emoji_flag_func(EmojiType.Emoji_Modifier),
26
+ 'Emoji_Modifier_Base':
27
+ self._emoji_flag_func(EmojiType.Emoji_Modifier_Base),
28
+ 'Emoji_Component':
29
+ self._emoji_flag_func(EmojiType.Emoji_Component),
30
+ 'Extended_Pictographic':
31
+ self._emoji_flag_func(EmojiType.Extended_Pictographic),
32
+ 'EmojiCombined':
33
+ lambda code, ch: self._entries.value(code),
34
+ }
35
+
36
+
37
+ if __name__ == '__main__':
38
+ UnicodeEmojiDataCli().main()
@@ -27,6 +27,7 @@ def u_enc(c, encoding):
27
27
 
28
28
 
29
29
  class BidiBrackets(object):
30
+
30
31
  def __init__(self, pair: int, type: str):
31
32
  self.pair = pair
32
33
  self.type = type
@@ -55,6 +56,9 @@ class UnicodeDataEntry(object):
55
56
 
56
57
  [Unicode character database]: https://unicode.org/reports/tr44/
57
58
  """
59
+
60
+ max_code_point = 0x10FFFF
61
+
58
62
  def __init__(self, min: int, max: int, value):
59
63
  self.min = min
60
64
  self.max = max
@@ -134,19 +138,22 @@ class UnicodeDataEntry(object):
134
138
  min = -1
135
139
  last_code = -1
136
140
  for code, value in values:
141
+ assert code > last_code
137
142
  if value == last_value and code == last_code + 1:
138
143
  last_code = code
139
144
  continue
140
- if min >= 0 and last_value is not None:
145
+ if min >= 0:
141
146
  yield UnicodeDataEntry(min, last_code, last_value)
142
147
  last_value = value
143
148
  min = last_code = code
144
- if min >= 0 and last_value is not None:
149
+ if min >= 0:
145
150
  yield UnicodeDataEntry(min, code, last_value)
146
151
 
147
152
  @staticmethod
148
153
  def from_values(values: Iterable[Any]):
149
- return UnicodeDataEntry.from_pairs(enumerate(values))
154
+ pairs = enumerate(values)
155
+ pairs = (p for p in pairs if p[1] is not None)
156
+ return UnicodeDataEntry.from_pairs(pairs)
150
157
 
151
158
  @staticmethod
152
159
  def values_for_code(entries, missing_value) -> Iterable[Any]:
@@ -163,6 +170,7 @@ class UnicodeDataEntries(object):
163
170
  or a list of `UnicodeDataEntry`.
164
171
  [Unicode character database]: https://unicode.org/reports/tr44/
165
172
  """
173
+
166
174
  def __init__(self,
167
175
  entries: Optional[Union[Iterable[UnicodeDataEntry],
168
176
  Sequence[UnicodeDataEntry]]] = None,
@@ -171,12 +179,15 @@ class UnicodeDataEntries(object):
171
179
  converter=None):
172
180
  self._missing_entries = self._default_missing_entries()
173
181
  self.name = name
182
+ self._values_for_int = None # type: list
183
+
174
184
  if entries is not None:
185
+ assert lines is None
186
+ assert converter is None
175
187
  self._entries = entries
176
188
  else:
177
189
  assert lines is not None
178
190
  self._load_lines(lines, converter=converter)
179
- self._values_for_int = None # type: list
180
191
 
181
192
  def _default_missing_entries(self) -> List[UnicodeDataEntry]:
182
193
  return []
@@ -193,16 +204,16 @@ class UnicodeDataEntries(object):
193
204
  self._missing_entries.extend(entries)
194
205
  assert self._missing_entries
195
206
 
196
- def ensure_multi_iterable(self):
207
+ def _ensure_multi_iterable(self):
197
208
  if isinstance(self._entries, types.GeneratorType):
198
209
  self._entries = tuple(self._entries)
199
210
 
200
211
  def __iter__(self):
201
- self.ensure_multi_iterable()
212
+ self._ensure_multi_iterable()
202
213
  return self._entries.__iter__()
203
214
 
204
215
  def __len__(self):
205
- self.ensure_multi_iterable()
216
+ self._ensure_multi_iterable()
206
217
  return len(self._entries)
207
218
 
208
219
  def missing_value(self, code: int):
@@ -235,12 +246,12 @@ class UnicodeDataEntries(object):
235
246
 
236
247
  def unicodes(self) -> Iterable[int]:
237
248
  """Returns a list of Unicode code points defined in this entries."""
238
- self.ensure_multi_iterable()
249
+ self._ensure_multi_iterable()
239
250
  return itertools.chain(*(e.range() for e in self._entries))
240
251
 
241
252
  def value(self, code: int):
242
253
  """Returns the value for the given code point."""
243
- self.ensure_multi_iterable()
254
+ self._ensure_multi_iterable()
244
255
  for entry in self._entries:
245
256
  if code < entry.min:
246
257
  return self.missing_value(code)
@@ -254,7 +265,7 @@ class UnicodeDataEntries(object):
254
265
  The list includes missing values,
255
266
  so that `tuple(values_for_code())[code]` is equal to `value(code)`.
256
267
  """
257
- self.ensure_multi_iterable()
268
+ self._ensure_multi_iterable()
258
269
  return UnicodeDataEntry.values_for_code(self._entries,
259
270
  self.missing_value)
260
271
 
@@ -278,8 +289,8 @@ class UnicodeDataEntries(object):
278
289
 
279
290
  On return, the original values are stored in `self.value_list`.
280
291
  """
281
- assert self.values_for_int() is None
282
- self.ensure_multi_iterable()
292
+ assert self._values_for_int is None
293
+ self._ensure_multi_iterable()
283
294
  value_map = {}
284
295
  for entry in self._entries:
285
296
  assert not isinstance(entry.value, int)
@@ -295,7 +306,7 @@ class UnicodeDataEntries(object):
295
306
 
296
307
  def to_dict(self) -> Dict[int, Any]:
297
308
  """Returns a `dict` of values with a Unicode code point as the key."""
298
- self.ensure_multi_iterable()
309
+ self._ensure_multi_iterable()
299
310
  dict = {}
300
311
  for entry in self._entries:
301
312
  for code in entry.range():
@@ -304,12 +315,14 @@ class UnicodeDataEntries(object):
304
315
 
305
316
 
306
317
  class UnicodeBidiBracketsDataEntries(UnicodeDataEntries):
318
+
307
319
  def _load_lines(self, lines: Iterable[str], converter=None):
308
320
  converter = converter or BidiBrackets.from_values
309
321
  super()._load_lines(lines, converter=converter)
310
322
 
311
323
 
312
324
  class UnicodeEmojiDataEntries(UnicodeDataEntries):
325
+
313
326
  def _load_lines(self, lines: Iterable[str], converter=None):
314
327
  converter = converter or (lambda v: EmojiType[v])
315
328
  super()._load_lines(lines, converter=converter)
@@ -334,6 +347,7 @@ class UnicodeEmojiDataEntries(UnicodeDataEntries):
334
347
 
335
348
 
336
349
  class UnicodeLineBreakDataEntries(UnicodeDataEntries):
350
+
337
351
  def _load_comment(self, comment: str, start_index: int):
338
352
  # Load missing value entries. See the comments in:
339
353
  # https://www.unicode.org/Public/UNIDATA/LineBreak.txt
@@ -356,12 +370,14 @@ class UnicodeLineBreakDataEntries(UnicodeDataEntries):
356
370
 
357
371
 
358
372
  class UnicodeScriptExtensionsDataEntries(UnicodeDataEntries):
373
+
359
374
  def _load_lines(self, lines: Iterable[str], converter=None):
360
375
  converter = converter or (lambda v: v.split())
361
376
  super()._load_lines(lines, converter=converter)
362
377
 
363
378
 
364
379
  class UnicodeVerticalOrientationDataEntries(UnicodeDataEntries):
380
+
365
381
  def _load_comment(self, comment: str, start_index: int):
366
382
  # Load missing value entries. See the comments in:
367
383
  # https://www.unicode.org/Public/UNIDATA/VerticalOrientation.txt
@@ -8,6 +8,7 @@ from unicodedata_reader import *
8
8
 
9
9
 
10
10
  class UnicodeGeneralCategoryDataCli(UnicodeDataCli):
11
+
11
12
  def __init__(self):
12
13
  super().__init__()
13
14
  self._entries = UnicodeDataReader.default.general_category()
@@ -8,6 +8,7 @@ from unicodedata_reader import *
8
8
 
9
9
 
10
10
  class UnicodeLineBreakDataCli(UnicodeDataCli):
11
+
11
12
  def __init__(self):
12
13
  super().__init__()
13
14
  self._entries = UnicodeDataReader.default.line_break()
@@ -33,6 +33,11 @@ class UnicodeDataReader(object):
33
33
  lines = self.read_lines(name)
34
34
  return UnicodeDataEntries(name=name, lines=lines)
35
35
 
36
+ def east_asian_width(self) -> UnicodeDataEntries:
37
+ name = 'EastAsianWidth'
38
+ lines = self.read_lines(name)
39
+ return UnicodeDataEntries(name=name, lines=lines)
40
+
36
41
  def emoji(self) -> UnicodeDataEntries:
37
42
  lines = self.read_lines('emoji/emoji-data')
38
43
  return UnicodeEmojiDataEntries(name='Emoji', lines=lines)
@@ -0,0 +1,72 @@
1
+ from typing import Callable
2
+
3
+ from unicodedata_reader.entry import *
4
+ from unicodedata_reader.reader import *
5
+
6
+
7
+ class Set(object):
8
+ """A simple set of Unicode code points."""
9
+
10
+ def __init__(self) -> None:
11
+ self.set = set() # type: set[int]
12
+
13
+ def __contains__(self, code_point: int) -> bool:
14
+ return code_point in self.set
15
+
16
+ def __iter__(self) -> Iterable[int]:
17
+ return self.set.__iter__()
18
+
19
+ def __isub__(self, other: 'Set') -> None:
20
+ self.set -= other.set
21
+
22
+ def __iand__(self, other: 'Set') -> None:
23
+ self.set &= other.set
24
+
25
+ def __ior__(self, other: 'Set') -> None:
26
+ self.set |= other.set
27
+
28
+ def add(self, code: int) -> None:
29
+ self.set.add(code)
30
+
31
+ def remove(self, code: int) -> None:
32
+ self.set.discard(code)
33
+
34
+ def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
35
+ bool]):
36
+ for entry in entries:
37
+ if pred(entry.value):
38
+ for code in entry.range():
39
+ self.set.add(code)
40
+
41
+ @staticmethod
42
+ def east_asian_width(
43
+ value: str,
44
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
45
+ set = Set()
46
+ set.add_entries(reader.east_asian_width(), lambda v: v == value)
47
+ return set
48
+
49
+ @staticmethod
50
+ def general_category(
51
+ value: str,
52
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
53
+ set = Set()
54
+ set.add_entries(reader.general_category(),
55
+ lambda v: v.startswith(value))
56
+ return set
57
+
58
+ @staticmethod
59
+ def scripts(
60
+ value: str,
61
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
62
+ set = Set()
63
+ set.add_entries(reader.scripts(), lambda v: v == value)
64
+ return set
65
+
66
+ @staticmethod
67
+ def script_extensions(
68
+ value: str,
69
+ reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
70
+ set = Set()
71
+ set.add_entries(reader.script_extensions(), lambda v: value in v)
72
+ return set
@@ -8,6 +8,7 @@ from unicodedata_reader import *
8
8
 
9
9
 
10
10
  class UnicodeVerticalOrientationDataCli(UnicodeDataCli):
11
+
11
12
  def __init__(self):
12
13
  super().__init__()
13
14
  self._entries = UnicodeDataReader.default.vertical_orientation()
@@ -1,34 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- from setuptools import setup
3
-
4
- packages = \
5
- ['unicodedata_reader']
6
-
7
- package_data = \
8
- {'': ['*']}
9
-
10
- install_requires = \
11
- ['platformdirs>=2.2.0,<3.0.0']
12
-
13
- entry_points = \
14
- {'console_scripts': ['unicodedata-reader = unicodedata_reader.__main__:main']}
15
-
16
- setup_kwargs = {
17
- 'name': 'unicodedata-reader',
18
- 'version': '0.1.6',
19
- 'description': '',
20
- 'long_description': '[![CI](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml/badge.svg)](https://github.com/kojiishi/unicodedata-reader/actions/workflows/ci.yml)\n[![PyPI](https://img.shields.io/pypi/v/unicodedata-reader.svg)](https://pypi.org/project/unicodedata-reader/)\n[![Dependencies](https://badgen.net/github/dependabot/kojiishi/unicodedata-reader)](https://github.com/kojiishi/unicodedata-reader/network/updates)\n\n\n# unicodedata-reader\n\nThis package reads and parses the [Unicode Character Database] files.\n\nMany of them are available in the [unicodedata] module,\nor in other 3rd party modules.\nWhen the desired data is not in any existing modules,\nsuch as the [Line_Break property] or the [Vertical_Orientation property],\nthis package can read the data files\nat <https://www.unicode.org/Public/UNIDATA/>.\n\nThis package can also generate JavaScript functions\nthat can read the property values of the [Unicode Character Database]\nin browsers.\nPlease see the [JavaScript] section below.\n\n[Line_Break property]: http://unicode.org/reports/tr44/#Line_Break\n[Unicode Character Database]: https://unicode.org/reports/tr44/\n[unicodedata]: https://docs.python.org/3/library/unicodedata.html\n[Vertical_Orientation property]: http://unicode.org/reports/tr44/#Vertical_Orientation\n\n## Install\n\n```sh\npip install unicodedata-reader\n```\nIf you want to clone and install using [poetry]:\n```sh\ngit clone https://github.com/kojiishi/unicodedata-reader\ncd unicodedata-reader\npoetry install\npoetry shell\n```\n\n[poetry]: https://github.com/python-poetry/poetry\n\n\n## Python\n\n```python\nimport unicodedata_reader\n\nreader = unicodedata_reader.UnicodeDataReader.default\nlb = reader.line_break()\nprint(lb.value(0x41))\n```\nThe example above prints `AL`,\nthe [Line_Break property] value for U+0041.\nPlease also see [line_break_test.py] for more usages.\n\n[line_break_test.py]: https://github.com/kojiishi/unicodedata-reader/blob/main/tests/line_break_test.py\n\n## JavaScript\n[JavaScript]: #javascript\n\nThe [`UnicodeDataCompressor` class] in this package\ncan generate JavaScript functions that can read the property values\nof the [Unicode Character Database] in browsers.\n\nPlease see [LineBreak.js] for an example of the generated functions\nand [LineBreak.html] for an example usage.\n\n[`UnicodeDataCompressor` class]: https://github.com/kojiishi/unicodedata-reader/blob/main/unicodedata_reader/compressor.py\n[LineBreak.html]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.html\n[LineBreak.js]: https://github.com/kojiishi/unicodedata-reader/blob/main/js/LineBreak.js\n',
21
- 'author': 'Koji Ishii',
22
- 'author_email': 'kojii@chromium.org',
23
- 'maintainer': None,
24
- 'maintainer_email': None,
25
- 'url': 'https://github.com/kojiishi/unicodedata-reader',
26
- 'packages': packages,
27
- 'package_data': package_data,
28
- 'install_requires': install_requires,
29
- 'entry_points': entry_points,
30
- 'python_requires': '>=3.8,<3.10',
31
- }
32
-
33
-
34
- setup(**setup_kwargs)
@@ -1,21 +0,0 @@
1
- #!/usr/bin/env python3
2
- from typing import Any
3
- from typing import Callable
4
- from typing import Dict
5
-
6
- from unicodedata_reader import *
7
-
8
-
9
- class UnicodeEmojiDataCli(UnicodeDataCli):
10
- def __init__(self):
11
- super().__init__()
12
- self._entries = UnicodeDataReader.default.emoji()
13
-
14
- def _core_columns(self) -> Dict[str, Callable[[int, str], Any]]:
15
- return {
16
- 'Emoji': lambda code, ch: self._entries.value(code),
17
- }
18
-
19
-
20
- if __name__ == '__main__':
21
- UnicodeEmojiDataCli().main()