unicodedata-reader 1.1.0__tar.gz → 1.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/PKG-INFO +1 -1
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/pyproject.toml +2 -2
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/cli.py +5 -2
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/entry.py +28 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/reader.py +19 -1
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/set.py +13 -20
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/LICENSE +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/README.md +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/__init__.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/__main__.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/bidi_brackets.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/compressor.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/east_asian_width.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/emoji.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/general_category.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/line_break.py +0 -0
- {unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/vertical_orientation.py +0 -0
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
|
4
4
|
|
|
5
5
|
[tool.poetry]
|
|
6
6
|
name = "unicodedata-reader"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.3.0"
|
|
8
8
|
description = ""
|
|
9
9
|
authors = ["Koji Ishii <kojii@chromium.org>"]
|
|
10
10
|
readme = "README.md"
|
|
@@ -17,7 +17,7 @@ platformdirs = ">=2.2,<5.0"
|
|
|
17
17
|
|
|
18
18
|
[tool.poetry.dev-dependencies]
|
|
19
19
|
pytest = "*"
|
|
20
|
-
pytype =
|
|
20
|
+
pytype = "*"
|
|
21
21
|
tox = "^4.14.2"
|
|
22
22
|
yapf = "^0.40.2"
|
|
23
23
|
|
|
@@ -125,7 +125,8 @@ class UnicodeDataCli(object):
|
|
|
125
125
|
parser.add_argument('text',
|
|
126
126
|
nargs='*',
|
|
127
127
|
help='show properties for the text')
|
|
128
|
-
parser.add_argument('-f', '--
|
|
128
|
+
parser.add_argument('-f', '--clear-cache', action='store_true')
|
|
129
|
+
parser.add_argument('-F', '--no-cache', action='store_true')
|
|
129
130
|
parser.add_argument('--name', help='$NAME in the template')
|
|
130
131
|
parser.add_argument('-t',
|
|
131
132
|
'--template',
|
|
@@ -139,8 +140,10 @@ class UnicodeDataCli(object):
|
|
|
139
140
|
default=0)
|
|
140
141
|
parser.parse_args(namespace=self)
|
|
141
142
|
_init_logging(self.verbose) # pytype: disable=attribute-error
|
|
143
|
+
if self.clear_cache:
|
|
144
|
+
UnicodeDataCachedReader.clear_cache()
|
|
142
145
|
if self.no_cache:
|
|
143
|
-
UnicodeDataReader.
|
|
146
|
+
UnicodeDataReader.default = UnicodeDataReader()
|
|
144
147
|
|
|
145
148
|
def main(self):
|
|
146
149
|
if self.template:
|
|
@@ -4,11 +4,13 @@ import logging
|
|
|
4
4
|
import re
|
|
5
5
|
import types
|
|
6
6
|
from typing import Any
|
|
7
|
+
from typing import Callable
|
|
7
8
|
from typing import Dict
|
|
8
9
|
from typing import Iterable
|
|
9
10
|
from typing import List
|
|
10
11
|
from typing import Optional
|
|
11
12
|
from typing import Sequence
|
|
13
|
+
from typing import Set
|
|
12
14
|
from typing import Union
|
|
13
15
|
from typing import Tuple
|
|
14
16
|
|
|
@@ -259,6 +261,32 @@ class UnicodeDataEntries(object):
|
|
|
259
261
|
return entry.value
|
|
260
262
|
return self.missing_value(code)
|
|
261
263
|
|
|
264
|
+
def filter(self, pred: Callable[[Any],
|
|
265
|
+
bool]) -> Iterable[UnicodeDataEntry]:
|
|
266
|
+
"""Returns an `Iterable` of `UnicodeDataEntry` for the given `pred`."""
|
|
267
|
+
return (entry for entry in self if pred(entry.value))
|
|
268
|
+
|
|
269
|
+
def codes_for(self, pred: Callable[[Any], bool]) -> Iterable[int]:
|
|
270
|
+
"""Returns an `Iterable` of Unicode code points for the given `pred`."""
|
|
271
|
+
return itertools.chain(*(e.range() for e in self.filter(pred)))
|
|
272
|
+
|
|
273
|
+
def add_to_set(self, pred: Callable[[Any], bool], set: Set[int]) -> None:
|
|
274
|
+
"""Add values `pred` returns `True` to `set[int]`."""
|
|
275
|
+
for code in self.codes_for(pred):
|
|
276
|
+
set.add(code)
|
|
277
|
+
|
|
278
|
+
def remove_from_set(self, pred: Callable[[Any], bool],
|
|
279
|
+
set: Set[int]) -> None:
|
|
280
|
+
"""Remove values `pred` returns `True` from `set[int]`."""
|
|
281
|
+
for code in self.codes_for(pred):
|
|
282
|
+
set.discard(code)
|
|
283
|
+
|
|
284
|
+
def to_set(self, pred: Callable[[Any], bool]) -> Set[int]:
|
|
285
|
+
"""Returns a `set[int]` of values `pred` returns `True`."""
|
|
286
|
+
s = set() # type: set[int]
|
|
287
|
+
self.add_to_set(pred, s)
|
|
288
|
+
return s
|
|
289
|
+
|
|
262
290
|
def values_for_code(self) -> Iterable[Any]:
|
|
263
291
|
"""Returns a list of values whose index is the Unicode code point.
|
|
264
292
|
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import logging
|
|
2
2
|
import pathlib
|
|
3
3
|
from typing import Iterable
|
|
4
|
+
import shutil
|
|
4
5
|
import urllib.request
|
|
5
6
|
|
|
6
7
|
from unicodedata_reader.entry import *
|
|
@@ -23,6 +24,12 @@ class UnicodeDataReader(object):
|
|
|
23
24
|
default = None
|
|
24
25
|
is_caching_allowed = True
|
|
25
26
|
|
|
27
|
+
def __init__(
|
|
28
|
+
self,
|
|
29
|
+
url_template: str = 'https://www.unicode.org/Public/UNIDATA/{0}.txt'
|
|
30
|
+
) -> None:
|
|
31
|
+
self.url_template = url_template
|
|
32
|
+
|
|
26
33
|
def bidi_brackets(self) -> UnicodeDataEntries:
|
|
27
34
|
name = 'BidiBrackets'
|
|
28
35
|
lines = self.read_lines(name)
|
|
@@ -74,8 +81,11 @@ class UnicodeDataReader(object):
|
|
|
74
81
|
lines = self.read_lines(name)
|
|
75
82
|
return UnicodeVerticalOrientationDataEntries(name=name, lines=lines)
|
|
76
83
|
|
|
84
|
+
def get_url(self, name: str) -> str:
|
|
85
|
+
return self.url_template.format(name)
|
|
86
|
+
|
|
77
87
|
def read_lines(self, name: str) -> Iterable[str]:
|
|
78
|
-
url =
|
|
88
|
+
url = self.get_url(name)
|
|
79
89
|
_logger.debug('Downloading %s', url)
|
|
80
90
|
with urllib.request.urlopen(url) as response:
|
|
81
91
|
body = response.read().decode('utf-8')
|
|
@@ -112,5 +122,13 @@ class UnicodeDataCachedReader(UnicodeDataReader):
|
|
|
112
122
|
|
|
113
123
|
return lines
|
|
114
124
|
|
|
125
|
+
@staticmethod
|
|
126
|
+
def clear_cache(ignore_errors: bool = False):
|
|
127
|
+
cache_dir = UnicodeDataCachedReader._cache_dir
|
|
128
|
+
if not cache_dir or not cache_dir.exists():
|
|
129
|
+
return
|
|
130
|
+
_logger.debug('Deleting cache %s', cache_dir)
|
|
131
|
+
shutil.rmtree(cache_dir, ignore_errors=ignore_errors)
|
|
132
|
+
|
|
115
133
|
|
|
116
134
|
UnicodeDataReader.default = UnicodeDataCachedReader()
|
|
@@ -1,16 +1,21 @@
|
|
|
1
1
|
from typing import Any
|
|
2
2
|
from typing import Callable
|
|
3
3
|
from typing import Iterable
|
|
4
|
+
from typing import Set
|
|
4
5
|
|
|
5
6
|
from unicodedata_reader.entry import *
|
|
6
7
|
from unicodedata_reader.reader import *
|
|
7
8
|
|
|
8
9
|
|
|
9
10
|
class Set(object):
|
|
10
|
-
"""A simple set of Unicode code points."""
|
|
11
|
+
"""A simple wrapper of a `set` of Unicode code points."""
|
|
11
12
|
|
|
12
|
-
def __init__(self
|
|
13
|
-
|
|
13
|
+
def __init__(self,
|
|
14
|
+
entries: UnicodeDataEntries = None,
|
|
15
|
+
pred: Callable[[Any], bool] = None) -> None:
|
|
16
|
+
self.set = set() # type: Set[int]
|
|
17
|
+
if entries:
|
|
18
|
+
self.add_entries(entries, pred)
|
|
14
19
|
|
|
15
20
|
def __contains__(self, code_point: int) -> bool:
|
|
16
21
|
return code_point in self.set
|
|
@@ -38,40 +43,28 @@ class Set(object):
|
|
|
38
43
|
|
|
39
44
|
def add_entries(self, entries: UnicodeDataEntries, pred: Callable[[Any],
|
|
40
45
|
bool]):
|
|
41
|
-
|
|
42
|
-
if pred(entry.value):
|
|
43
|
-
for code in entry.range():
|
|
44
|
-
self.set.add(code)
|
|
46
|
+
entries.add_to_set(pred, self.set)
|
|
45
47
|
|
|
46
48
|
@staticmethod
|
|
47
49
|
def east_asian_width(
|
|
48
50
|
value: str,
|
|
49
51
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
50
|
-
|
|
51
|
-
set.add_entries(reader.east_asian_width(), lambda v: v == value)
|
|
52
|
-
return set
|
|
52
|
+
return Set(reader.east_asian_width(), lambda v: v == value)
|
|
53
53
|
|
|
54
54
|
@staticmethod
|
|
55
55
|
def general_category(
|
|
56
56
|
value: str,
|
|
57
57
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
58
|
-
|
|
59
|
-
set.add_entries(reader.general_category(),
|
|
60
|
-
lambda v: v.startswith(value))
|
|
61
|
-
return set
|
|
58
|
+
return Set(reader.general_category(), lambda v: v.startswith(value))
|
|
62
59
|
|
|
63
60
|
@staticmethod
|
|
64
61
|
def scripts(
|
|
65
62
|
value: str,
|
|
66
63
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
67
|
-
|
|
68
|
-
set.add_entries(reader.scripts(), lambda v: v == value)
|
|
69
|
-
return set
|
|
64
|
+
return Set(reader.scripts(), lambda v: v == value)
|
|
70
65
|
|
|
71
66
|
@staticmethod
|
|
72
67
|
def script_extensions(
|
|
73
68
|
value: str,
|
|
74
69
|
reader: UnicodeDataReader = UnicodeDataReader.default) -> 'Set':
|
|
75
|
-
|
|
76
|
-
set.add_entries(reader.script_extensions(), lambda v: value in v)
|
|
77
|
-
return set
|
|
70
|
+
return Set(reader.script_extensions(), lambda v: value in v)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/east_asian_width.py
RENAMED
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/general_category.py
RENAMED
|
File without changes
|
|
File without changes
|
{unicodedata_reader-1.1.0 → unicodedata_reader-1.3.0}/unicodedata_reader/vertical_orientation.py
RENAMED
|
File without changes
|