totolo 0.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
totolo/__init__.py ADDED
@@ -0,0 +1,15 @@
1
+ """
2
+ The Python interface to themeontology.org.
3
+ """
4
+
5
+ __version__ = "0.1"
6
+
7
+ from totolo.api import TORemote, empty, files
8
+
9
+ remote = TORemote()
10
+
11
+ __ALL__ = [
12
+ empty,
13
+ files,
14
+ remote,
15
+ ]
totolo/api.py ADDED
@@ -0,0 +1,42 @@
1
+ import json
2
+ import re
3
+ import urllib.request
4
+
5
+ from .impl.parser import TOParser
6
+ from .themeontology import ThemeOntology
7
+
8
+ DEFAULT_URL = "https://github.com/theme-ontology/theming/"
9
+ API_URL = "https://api.github.com/repos/theme-ontology/theming/"
10
+
11
+
12
+ def files(paths=None):
13
+ return TOParser.add_files(empty(), paths)
14
+
15
+
16
+ def empty():
17
+ return ThemeOntology()
18
+
19
+
20
+ class TORemote:
21
+ def __call__(self, url: str = "") -> ThemeOntology:
22
+ if not url:
23
+ url = DEFAULT_URL + "archive/refs/heads/master.tar.gz"
24
+ return TOParser.add_url(empty(), url)
25
+
26
+ def version(self, version: str = ""):
27
+ if not version:
28
+ return self()
29
+ if re.match(r"v\d+\.\d+\.\d+$", version):
30
+ url = DEFAULT_URL + f"archive/refs/tags/{version}.tar.gz"
31
+ elif re.match(r"v20\d{2}\.\d{2}$", version):
32
+ url = DEFAULT_URL + f"archive/refs/tags/{version}.tar.gz"
33
+ else:
34
+ raise ValueError(f"Unknown version format {version}.")
35
+ return self(url)
36
+
37
+ def versions(self):
38
+ url = API_URL + "releases"
39
+ with urllib.request.urlopen(url) as response:
40
+ response.read()
41
+ for item in json.loads(response.read()):
42
+ yield item["tag_name"], item["name"]
File without changes
totolo/impl/core.py ADDED
@@ -0,0 +1,92 @@
1
+ from collections import OrderedDict
2
+ from copy import deepcopy
3
+
4
+
5
+ class TOAttr:
6
+ def __init__(self, default="", private=False):
7
+ self.default = default
8
+ self.private = private
9
+
10
+
11
+ class TOStoredAttr(TOAttr):
12
+ def __init__(self, datatype="blob", default=None, required=False):
13
+ if default is None:
14
+ default = [] if "list" in datatype else ""
15
+ super().__init__(default, True)
16
+ self.datatype = datatype
17
+ self.required = required
18
+
19
+
20
+ class TOObjectMeta(type):
21
+ def __new__(meta, name, bases, attr):
22
+ to_attrs = OrderedDict()
23
+ for base in bases:
24
+ for key, value in getattr(base, "_to_attrs", {}).items():
25
+ if key not in attr:
26
+ to_attrs[key] = value
27
+ for key, value in list(attr.items()):
28
+ if isinstance(value, TOStoredAttr):
29
+ nkey = key.replace("_", " ")
30
+ to_attrs[nkey] = value
31
+ del attr[key]
32
+ elif isinstance(value, TOAttr):
33
+ to_attrs[key] = value
34
+ del attr[key]
35
+ attr["_to_attrs"] = to_attrs
36
+ return super().__new__(meta, name, bases, attr)
37
+
38
+ def __call__(cls, *args, **kwargs):
39
+ self = super().__call__(*args, **kwargs)
40
+ for key, value in self._to_attrs.items():
41
+ if not value.private and not hasattr(self, key):
42
+ setattr(self, key, deepcopy(value.default))
43
+ return self
44
+
45
+ @classmethod
46
+ def __prepare__(mcls, _cls, _bases):
47
+ return OrderedDict()
48
+
49
+
50
+ def a(*args, **kwargs):
51
+ return TOAttr(*args, **kwargs)
52
+
53
+
54
+ def sa(*args, **kwargs):
55
+ return TOStoredAttr(*args, **kwargs)
56
+
57
+
58
+ class TOObject(metaclass=TOObjectMeta):
59
+ def __init__(self, *args, **kwargs):
60
+ super().__init__()
61
+ for arg, key in zip(args, self._to_attrs.keys()):
62
+ setattr(self, key, arg)
63
+ for key, value in kwargs.items():
64
+ setattr(self, key, value)
65
+ self.get_attr = self._to_attrs.get
66
+
67
+ def iter_attrs(self):
68
+ for key, value in self._to_attrs.items():
69
+ yield key, value
70
+
71
+ def iter_stored(self):
72
+ for key, value in self.iter_attrs():
73
+ if isinstance(value, TOStoredAttr):
74
+ yield key, value
75
+
76
+ def field_type(self, key):
77
+ try:
78
+ return self.get_attr(key).datatype
79
+ except (KeyError, AttributeError) as _e:
80
+ return "unknown"
81
+
82
+ def field_required(self, key):
83
+ try:
84
+ return self.get_attr(key).datatype
85
+ except (KeyError, AttributeError) as _e:
86
+ return "unknown"
87
+
88
+ def __str__(self):
89
+ return "[TOObject]"
90
+
91
+ def __repr__(self):
92
+ return f'{type(self).__name__}<{str(self)}>'
totolo/impl/entry.py ADDED
@@ -0,0 +1,120 @@
1
+ from collections import OrderedDict
2
+
3
+ from .core import TOObject, a
4
+ from .field import TOField
5
+
6
+
7
+ class TOEntry(TOObject):
8
+ name = a("")
9
+ fields = a(OrderedDict())
10
+ parents = a(set())
11
+ children = a(set())
12
+ source = a([])
13
+ source_location = a("<api>)")
14
+ ontology = a()
15
+
16
+ def __str__(self):
17
+ return "{}[{}]".format(
18
+ self.name.encode("ascii", "ignore"),
19
+ len(self.fields)
20
+ )
21
+
22
+ def __getitem__(self, key):
23
+ return self.get(key)
24
+
25
+ def __setitem__(self, key, value):
26
+ self.fields[key] = value
27
+
28
+ def __delitem__(self, key):
29
+ del self.fields[key]
30
+
31
+ def iter_fields(self, reorder=False, skipunknown=False):
32
+ if reorder:
33
+ order = [fn for fn, _ in self.iter_stored()]
34
+ else:
35
+ order = self.fields.keys()
36
+ for fieldname in order:
37
+ field = self.fields.get(fieldname, None)
38
+ if field is not None:
39
+ fieldtype = self.field_type(fieldname)
40
+ if fieldtype != "unknown" or not skipunknown:
41
+ yield field
42
+
43
+ def ancestors(self):
44
+ yield from self._dfs("parents", self._lookup())
45
+
46
+ def descendants(self):
47
+ yield from self._dfs("children", self._lookup())
48
+
49
+ def validate(self):
50
+ junklines = []
51
+ for idx, line in enumerate(self.source):
52
+ if idx > 1:
53
+ if line.startswith("::"):
54
+ break
55
+ elif line.strip():
56
+ junklines.append(line)
57
+ if junklines:
58
+ junkmsg = '/'.join(junklines)
59
+ if len(junkmsg) > 13:
60
+ junkmsg = junkmsg[:10] + "..."
61
+ yield f"{self.name}: junk in entry header: {junkmsg}"
62
+ for field in self.fields.values():
63
+ if self.field_type(field.name) == "unknown":
64
+ yield f"{self.name}: unknown field '{field.name}'"
65
+
66
+ def text_canonical(self):
67
+ lines = [self.name, "=" * len(self.name), ""]
68
+ for field in self.iter_fields(reorder=True, skipunknown=True):
69
+ if self.field_required(field.name) or not field.empty():
70
+ lines.append(field.text_canonical())
71
+ lines.append("")
72
+ return "\n".join(lines)
73
+
74
+ def text_original(self):
75
+ lines = [self.name, "=" * len(self.name), ""]
76
+ for field in self.iter_fields(reorder=False, skipunknown=False):
77
+ lines.append(field.text_canonical())
78
+ lines.append("")
79
+ return "\n".join(lines)
80
+
81
+ def get(self, fieldname):
82
+ """Get field, returning a frozen default field if it doesn't exist."""
83
+ field = self.fields.get(fieldname, None)
84
+ if field is not None:
85
+ return field
86
+ fieldtype = self.field_type(fieldname)
87
+ return TOField(fieldtype=fieldtype, name=fieldname).freeze()
88
+
89
+ def setdefault(self, fieldname):
90
+ """Get field, creating it first if it doesn't exist."""
91
+ field = self.fields.get(fieldname, None)
92
+ if field is not None:
93
+ return field
94
+ fieldtype = self.field_type(fieldname)
95
+ field = TOField(fieldtype=fieldtype, name=fieldname)
96
+ self.fields[fieldname] = field
97
+ return field
98
+
99
+ def delete(self, fieldname):
100
+ if fieldname in self.fields:
101
+ del self.fields[fieldname]
102
+
103
+ def print(self):
104
+ print(self.text_canonical().strip())
105
+
106
+ def _lookup(self):
107
+ return {}
108
+
109
+ def _dfs(self, attr, lookup):
110
+ visited = set()
111
+ pending = [self.name]
112
+ visited.update(pending)
113
+ while pending:
114
+ name = pending.pop()
115
+ yield name
116
+ item = lookup[name]
117
+ for nitem in getattr(item, attr):
118
+ if nitem not in visited:
119
+ pending.append(nitem)
120
+ visited.add(nitem)
totolo/impl/field.py ADDED
@@ -0,0 +1,109 @@
1
+ from .core import TOObject, a
2
+ from .keyword import TOKeyword
3
+
4
+
5
+ class TOField(TOObject):
6
+ name = a("")
7
+ fieldtype = a("")
8
+ source = a([])
9
+ data = a([])
10
+ parts = a([])
11
+ frozen = a(False)
12
+
13
+ def __setattr__(self, name, value):
14
+ if name != "frozen":
15
+ self.assert_mutable()
16
+ super().__setattr__(name, value)
17
+
18
+ def __repr__(self):
19
+ return "{}<{}>[{}]".format(
20
+ type(self).__name__,
21
+ self.name.encode("ascii", "ignore"),
22
+ len(self.data)
23
+ )
24
+
25
+ def __str__(self):
26
+ return self.text_canonical_contents()
27
+
28
+ def __iter__(self):
29
+ for part in self.iter_parts():
30
+ yield part
31
+
32
+ def freeze(self):
33
+ if not self.frozen:
34
+ self.source = tuple(self.source)
35
+ self.data = tuple(self.data)
36
+ self.parts = tuple(self.parts)
37
+ self.frozen = True
38
+ return self
39
+
40
+ def assert_mutable(self):
41
+ if getattr(self, "frozen", False):
42
+ raise AttributeError(
43
+ "Object is frozen, indicating it is detached from an ontology.")
44
+ return True
45
+
46
+ def str(self):
47
+ return str(self)
48
+
49
+ def list(self):
50
+ return list(self.parts)
51
+
52
+ def empty(self):
53
+ return any(self.parts)
54
+
55
+ def iter_parts(self):
56
+ for part in self.parts:
57
+ yield part
58
+
59
+ def text_canonical_contents(self):
60
+ parts = [str(x) for x in self.iter_parts()]
61
+ return "\n".join(parts)
62
+
63
+ def text_canonical(self):
64
+ parts = [f":: {self.name}"]
65
+ parts.extend(str(x) for x in self.iter_parts())
66
+ return "\n".join(parts)
67
+
68
+ def delete_kw(self, kw):
69
+ """Delete a keyword."""
70
+ self.assert_mutable()
71
+ fieldtype = self.fieldtype
72
+ todelete = set()
73
+ if fieldtype == "kwlist":
74
+ for idx, part in enumerate(self.parts):
75
+ if part.keyword == kw:
76
+ todelete.add(idx)
77
+ self.parts = [p for idx, p in enumerate(self.parts) if idx not in todelete]
78
+ return min(todelete) if todelete else len(self.parts)
79
+
80
+ def update_kw(self, kw, keyword=None, motivation=None, capacity=None, notes=None):
81
+ """Edit a keyword."""
82
+ self.assert_mutable()
83
+ assert self.fieldtype == "kwlist"
84
+ for part in self.parts:
85
+ if part.keyword == kw:
86
+ if keyword is not None:
87
+ part.keyword = keyword
88
+ if motivation is not None:
89
+ part.motivation = motivation
90
+ if capacity is not None:
91
+ part.capacity = capacity
92
+ if notes is not None:
93
+ part.notes = notes
94
+
95
+ def insert_kw(self, idx=None, keyword="", motivation="", capacity="", notes=""):
96
+ """Insert a keyword."""
97
+ self.assert_mutable()
98
+ assert self.fieldtype == "kwlist"
99
+ if idx is None:
100
+ idx = len(self.parts)
101
+ self.parts.insert(
102
+ idx,
103
+ TOKeyword(
104
+ keyword,
105
+ capacity=capacity,
106
+ motivation=motivation,
107
+ notes=notes
108
+ )
109
+ )
totolo/impl/keyword.py ADDED
@@ -0,0 +1,14 @@
1
+ from .core import TOObject, a
2
+
3
+
4
+ class TOKeyword(TOObject):
5
+ keyword = a("")
6
+ capacity = a("")
7
+ motivation = a("")
8
+ notes = a("")
9
+
10
+ def __str__(self):
11
+ capacity = f" <{self.capacity}>" if self.capacity else ""
12
+ motivation = f" [{self.motivation}]" if self.motivation else ""
13
+ notes = f" {{{self.notes}}}" if self.notes else ""
14
+ return f"{self.keyword}{capacity}{motivation}{notes}"
totolo/impl/parser.py ADDED
@@ -0,0 +1,230 @@
1
+ import os
2
+ import re
3
+ import weakref
4
+ from typing import Generator, Iterable, List, Tuple
5
+
6
+ import totolo.lib.files
7
+ import totolo.lib.textformat
8
+
9
+ from ..story import TOStory
10
+ from ..theme import TOTheme
11
+ from .field import TOField
12
+ from .keyword import TOKeyword
13
+
14
+
15
+ class TOParser:
16
+ @staticmethod
17
+ def iter_entries(lines: Iterable[str]) -> Generator[str, None, None]:
18
+ """
19
+ Iterate through the "entries" in a text file. An entry is a block of lines
20
+ that starts with a title line, followed by a line starting with "===".
21
+ """
22
+ linebuffer = []
23
+ for line in lines:
24
+ line = line.rstrip()
25
+ if line.startswith("===") and linebuffer:
26
+ prevlines = linebuffer[:-1]
27
+ if any(x for x in prevlines):
28
+ yield prevlines
29
+ linebuffer = [linebuffer[-1]]
30
+ linebuffer.append(line)
31
+ if linebuffer and any(line for line in linebuffer):
32
+ yield linebuffer
33
+
34
+ @staticmethod
35
+ def iter_fields(lines: Iterable[str]) -> List[str]:
36
+ """
37
+ Iterate through the fields of an entry. Fields are blocks starting with ::
38
+ """
39
+ linebuffer = []
40
+ for line in lines:
41
+ if line.startswith("::"):
42
+ if linebuffer:
43
+ yield linebuffer
44
+ linebuffer = [line]
45
+ elif linebuffer:
46
+ linebuffer.append(line)
47
+ if linebuffer:
48
+ yield linebuffer
49
+
50
+ @staticmethod
51
+ def iter_listitems(lines: Iterable[str]) -> str:
52
+ """
53
+ Turn a list of strings into items. Items may be newline or comma separated.
54
+ """
55
+ for line in lines:
56
+ # note: once upon a time we used to have multiple items separated by commas
57
+ # on a single line but that is no longer permitted.
58
+ item = line.strip()
59
+ if item:
60
+ yield item
61
+
62
+ @staticmethod
63
+ def iter_kwitems(
64
+ lines: Iterable[str]
65
+ ) -> Generator[Tuple[str, str, str, str], None, None]:
66
+ """
67
+ Turn a list of strings into kewyword items. Items may be newline or comma
68
+ separated. Items may contain data in () [] {} parentheses.
69
+ """
70
+ def dict2row(tokendict):
71
+ tkw = tokendict.get("", "").strip()
72
+ tmotivation = tokendict.get("[", "").strip()
73
+ tcapacity = tokendict.get("<", "").strip()
74
+ tnotes = tokendict.get("{", "").strip()
75
+ return tkw, tcapacity, tmotivation, tnotes
76
+
77
+ field = "\n".join(lines)
78
+ token = {}
79
+ delcorr = {"[": "]", "{": "}", "<": ">"}
80
+ farr = re.split("([\\[\\]\\{\\}\\<\\>,\\n])", field)
81
+ state = ""
82
+ splitters = ",\n"
83
+
84
+ for part in farr:
85
+ if part in delcorr:
86
+ state = part
87
+ elif part in delcorr.values():
88
+ if delcorr.get(state, None) == part:
89
+ state = ""
90
+ else:
91
+ raise AssertionError(
92
+ "Malformed field (bracket mismatch):\n %s" % field
93
+ )
94
+ elif part in splitters and not state:
95
+ tokrow = dict2row(token)
96
+ if not tokrow[0].strip():
97
+ pass # we allow splitting by both newline and comma
98
+ else:
99
+ yield tokrow
100
+ token = {}
101
+ else:
102
+ token[state] = token.get(state, "") + part
103
+
104
+ tokrow = dict2row(token)
105
+ if tokrow[0].strip():
106
+ yield dict2row(token)
107
+
108
+ @classmethod
109
+ def make_field(cls, lines, fieldtype):
110
+ field = TOField(
111
+ fieldtype=fieldtype,
112
+ name=lines[0].strip(": "),
113
+ data=lines[1:],
114
+ source=list(lines),
115
+ )
116
+ if fieldtype == "kwlist":
117
+ for kwtuple in TOParser.iter_kwitems(field.data):
118
+ field.parts.append(TOKeyword(*kwtuple))
119
+ elif fieldtype == "list":
120
+ for item in TOParser.iter_listitems(field.data):
121
+ field.parts.append(item)
122
+ elif fieldtype == "text":
123
+ field.parts.append(
124
+ totolo.lib.textformat.add_wordwrap(
125
+ "\n".join(
126
+ field.data)).strip())
127
+ else:
128
+ field.parts.append('\n'.join(field.data))
129
+ return field
130
+
131
+ @classmethod
132
+ def populate_entry(cls, entry, lines):
133
+ entry.source.extend(lines)
134
+ cleaned = []
135
+ for line in lines:
136
+ cline = line.strip()
137
+ if cline or (cleaned and cleaned[-1]):
138
+ cleaned.append(cline) # no more than one blank line in a row
139
+ assert len(cleaned) > 1 and cleaned[1].startswith("==="), "missing name"
140
+ while cleaned and not cleaned[-1]:
141
+ cleaned.pop()
142
+ entry.name = cleaned[0]
143
+ for fieldlines in cls.iter_fields(cleaned):
144
+ while fieldlines and not fieldlines[-1]:
145
+ fieldlines.pop()
146
+ name = fieldlines[0].strip(": ")
147
+ fieldtype = entry.field_type(name)
148
+ field = cls.make_field(fieldlines, fieldtype)
149
+ entry[field.name] = field
150
+ return entry
151
+
152
+ @classmethod
153
+ def make_story(cls, lines):
154
+ story = cls.populate_entry(TOStory(), lines)
155
+ return story
156
+
157
+ @classmethod
158
+ def make_theme(cls, lines):
159
+ theme = cls.populate_entry(TOTheme(), lines)
160
+ return theme
161
+
162
+ @classmethod
163
+ def parse_stories(cls, lines):
164
+ collection_entry = None
165
+ entries = []
166
+ if isinstance(lines, str):
167
+ lines = lines.splitlines()
168
+ for idx, entrylines in enumerate(TOParser.iter_entries(lines)):
169
+ entry = cls.make_story(entrylines)
170
+ if idx == 0:
171
+ mycols = entry.get("Collections").parts
172
+ if mycols and mycols[0] == entry.sid:
173
+ collection_entry = entry
174
+ if idx > 0 and collection_entry:
175
+ field = collection_entry.setdefault("Component Stories")
176
+ field.parts.append(entry.sid)
177
+ entries.append(entry)
178
+ return entries
179
+
180
+ @classmethod
181
+ def parse_themes(cls, lines):
182
+ entries = []
183
+ if isinstance(lines, str):
184
+ lines = lines.splitlines()
185
+ for _idx, entrylines in enumerate(TOParser.iter_entries(lines)):
186
+ entry = cls.make_theme(entrylines)
187
+ entries.append(entry)
188
+ return entries
189
+
190
+ @classmethod
191
+ def add_url(cls, to, url):
192
+ suffixes = [".tar", ".tar.gz"]
193
+ if any(url.endswith(x) for x in suffixes):
194
+ with totolo.lib.files.remote_tar(url) as dirname:
195
+ cls.add_files(to, dirname)
196
+ else:
197
+ raise ValueError(f"Expected url ending in one of {suffixes}")
198
+ return to
199
+
200
+ @classmethod
201
+ def add_files(cls, to, paths):
202
+ if isinstance(paths, str):
203
+ paths = [paths]
204
+ for path in paths:
205
+ to.basepaths.add(path)
206
+ if os.path.isdir(path):
207
+ for filepath in totolo.lib.files.walk(path, r".*\.(st|th)\.txt$"):
208
+ cls._add_file(to, filepath)
209
+ else:
210
+ cls._add_file(to, path)
211
+ return to.refresh_relations()
212
+
213
+ @classmethod
214
+ def _add_file(cls, to, path):
215
+ target = {}
216
+ with open(path, "r", encoding='utf-8') as fh:
217
+ entry_iterable = []
218
+ if path.endswith(".th.txt"):
219
+ entry_iterable = cls.parse_themes(fh)
220
+ target = to.theme
221
+ elif path.endswith(".st.txt"):
222
+ entry_iterable = cls.parse_stories(fh)
223
+ target = to.story
224
+ for entry in entry_iterable:
225
+ entry.source_location = path
226
+ entry.ontology = weakref.ref(to)
227
+ to.entries.setdefault(path, [])
228
+ to.entries[path].append(entry)
229
+ target[entry.name] = entry
230
+ return to
totolo/lib/__init__.py ADDED
File without changes
totolo/lib/files.py ADDED
@@ -0,0 +1,38 @@
1
+ import contextlib
2
+ import os
3
+ import os.path
4
+ import re
5
+ import tarfile
6
+ import tempfile
7
+ import urllib.request
8
+ from io import BytesIO
9
+
10
+
11
+ @contextlib.contextmanager
12
+ def remote_tar(url: str):
13
+ with tempfile.TemporaryDirectory() as dirname:
14
+ with urllib.request.urlopen(url) as response:
15
+ with tarfile.open(name=None, fileobj=BytesIO(response.read())) as tar:
16
+ tar.extractall(dirname)
17
+ paths = [os.path.join(dirname, x) for x in os.listdir(dirname)]
18
+ # we expect a single (irrelevant) dir here as git wraps everything thus
19
+ if len(paths) == 1 and os.path.isdir(paths[0]):
20
+ yield paths[0]
21
+ else:
22
+ yield dirname
23
+
24
+
25
+ def walk(path: str, pattern: str = ".*", levels: int = -1):
26
+ r = re.compile(pattern)
27
+ # yield matching files
28
+ for item in os.listdir(path):
29
+ spath = os.path.join(path, item)
30
+ if r.match(item):
31
+ if os.path.isfile(spath):
32
+ yield spath
33
+ # recurse
34
+ for item in os.listdir(path):
35
+ spath = os.path.join(path, item)
36
+ if os.path.isdir(spath) and levels != 0:
37
+ for res in walk(spath, pattern, levels - 1):
38
+ yield res
@@ -0,0 +1,47 @@
1
+ import textwrap
2
+
3
+
4
+ def remove_wordwrap(text):
5
+ """
6
+ Remove single newline characters (i.e. '\n') from string, but leave double newlines (i.e. '\n\n').
7
+ Args:
8
+ text: string
9
+ Returns: string
10
+ """
11
+ text_blocks = text.split("\n")
12
+ dewordwraped_text_block = []
13
+ dewordwraped_text_blocks = []
14
+
15
+ for text_block in text_blocks:
16
+ text_block = text_block.strip()
17
+
18
+ if not text_block:
19
+ if dewordwraped_text_block:
20
+ dewordwraped_text_blocks.append(" ".join(dewordwraped_text_block))
21
+ dewordwraped_text_block = []
22
+ else:
23
+ dewordwraped_text_block.append(text_block)
24
+
25
+ if dewordwraped_text_block:
26
+ dewordwraped_text_blocks.append(" ".join(dewordwraped_text_block))
27
+
28
+ return "\n\n".join(dewordwraped_text_blocks)
29
+
30
+ def add_wordwrap(text, wrap_length=78):
31
+ """
32
+ Add line breaks to string (i.e. '\n' character) so that each line is at most 'wrap_length'
33
+ characters in length.
34
+ Args:
35
+ text: string
36
+ wrap_length: integer
37
+ Returns: string
38
+ """
39
+ paragraphs = remove_wordwrap(text)
40
+ wordwrapped_lines = []
41
+
42
+ for paragraph in paragraphs.split("\n\n"):
43
+ for wordwrapped_line in textwrap.fill(paragraph, wrap_length).split("\n"):
44
+ wordwrapped_lines.append(wordwrapped_line.strip())
45
+ wordwrapped_lines.append("")
46
+
47
+ return "\n".join(wordwrapped_lines)
totolo/story.py ADDED
@@ -0,0 +1,128 @@
1
+ import re
2
+
3
+ from .impl.core import sa
4
+ from .impl.entry import TOEntry
5
+
6
+
7
+ class TOStory(TOEntry):
8
+ Title = sa("text", required=True)
9
+ Date = sa("date", required=True)
10
+ Description = sa("text")
11
+ Authors = sa("blob")
12
+ Variation = sa("blob")
13
+ References = sa("list")
14
+ Ratings = sa("list")
15
+ Collections = sa("list")
16
+ Component_Stories = sa("list")
17
+ Related_Stories = sa("list")
18
+ Choice_Themes = sa("kwlist")
19
+ Major_Themes = sa("kwlist")
20
+ Minor_Themes = sa("kwlist")
21
+ Not_Themes = sa("kwlist")
22
+ Other_Keywords = sa("kwlist")
23
+
24
+ def iter_theme_entries(self):
25
+ """
26
+ Yield (weight, TOKeyword) pairs. TOKeyword contains the comment and other
27
+ metadata associated with a theme entry in a story.
28
+ """
29
+ for weight in ["Choice Themes", "Major Themes", "Minor Themes", "Not Themes"]:
30
+ field = self.get(weight)
31
+ if field:
32
+ for part in field.iter_parts():
33
+ yield weight, part
34
+
35
+ def iter_themes(self):
36
+ """
37
+ Iterate over the theme objects associated with this story object.
38
+ """
39
+ if not getattr(self, "ontology", None):
40
+ raise RuntimeError(
41
+ "Story must be associated with an ontology to look up themes.")
42
+ to = self.ontology()
43
+ for weight, part in self.iter_theme_entries():
44
+ theme = to.theme[part.keyword]
45
+ yield weight, theme
46
+
47
+ @property
48
+ def date(self):
49
+ """
50
+ Return the date entry as verbatim it is recorded in the text file.
51
+ """
52
+ return self.get("Date").text_canonical_contents().strip()
53
+
54
+ @property
55
+ def year(self):
56
+ """
57
+ Returns the year of the story, or the earliest year for a collection.
58
+ A positive number is the year AD.
59
+ A negative number is the year BC.
60
+ Zero indicates that the the information is missing (there is no year zero in
61
+ AD/BC notation). Dates can be entered in a variety of ways but the year should
62
+ always be present. If this function returns zero for a story the story's data
63
+ entry is considered to be faulty.
64
+ """
65
+ date = self.date
66
+ yearmatch = re.match("\\d+", date)
67
+ if not yearmatch:
68
+ return 0
69
+ year = int(yearmatch.group())
70
+ if "bc" in date.lower():
71
+ year *= -1
72
+ return year
73
+
74
+ @property
75
+ def sid(self):
76
+ return self.name
77
+
78
+ @property
79
+ def title(self):
80
+ return self.get("Title").text_canonical_contents().strip()
81
+
82
+ def verbose_description(self):
83
+ """
84
+ A description that combines various other fields, including Notes, Examples,
85
+ Aliases, and References.
86
+ """
87
+ description = str(self.get("Description"))
88
+ references = str(self.get("References")).strip()
89
+ if references:
90
+ description += "\n\nReferences:\n"
91
+ for line in references.split("\n"):
92
+ line = line.strip()
93
+ if line:
94
+ description += line + "\n"
95
+ return description
96
+
97
+ def html_description(self):
98
+ """
99
+ Turn the verbose description into html.
100
+ """
101
+ import html
102
+ description = html.escape(str(self.get("Description")))
103
+ references = html.escape(str(self.get("References")).strip())
104
+ description = '<P class="obj-description"><BR>\n' + description
105
+ description += "</P>\n"
106
+ if references:
107
+ description += '<P class="obj-description"><b>References:</b><BR>\n'
108
+ for line in references.split("\n"):
109
+ line = line.strip()
110
+ if line:
111
+ aline = '<A href="{}">{}</A>'.format(line, line)
112
+ description += aline + "\n"
113
+ description += "</P>\n"
114
+ return description
115
+
116
+ def html_short_description(self):
117
+ """
118
+ A limited length short description without embelishments like "references".
119
+ """
120
+ import html
121
+ description = str(self.get("Description"))[:256]
122
+ return html.escape(description)
123
+
124
+ def _lookup(self):
125
+ try:
126
+ return self.ontology().story
127
+ except AttributeError:
128
+ return {}
totolo/theme.py ADDED
@@ -0,0 +1,71 @@
1
+ from .impl.core import sa
2
+ from .impl.entry import TOEntry
3
+
4
+
5
+ class TOTheme(TOEntry):
6
+ Description = sa("text", required=True)
7
+ Parents = sa("list")
8
+ Notes = sa("text")
9
+ Examples = sa("text")
10
+ References = sa("list")
11
+ Aliases = sa("list")
12
+
13
+ def verbose_description(self):
14
+ description = str(self.get("Description"))
15
+ examples = str(self.get("Examples")).strip()
16
+ aliases = str(self.get("Aliases")).strip()
17
+ notes = str(self.get("Notes")).strip()
18
+ references = str(self.get("References")).strip()
19
+ if notes:
20
+ description += "\n\nNotes:\n" + notes
21
+ if examples:
22
+ description += "\n\nExamples:\n" + examples
23
+ if aliases:
24
+ description += "\n\nAliases:\n" + aliases
25
+ if references:
26
+ description += "\n\nReferences:\n"
27
+ for line in references.split("\n"):
28
+ line = line.strip()
29
+ if line:
30
+ description += line + "\n"
31
+ return description
32
+
33
+ def html_description(self):
34
+ import html
35
+ description = html.escape(str(self.get("Description")))
36
+ examples = html.escape(str(self.get("Examples")).strip())
37
+ aliases = html.escape(str(self.get("Aliases")).strip())
38
+ notes = html.escape(str(self.get("Notes")).strip())
39
+ references = html.escape(str(self.get("References")).strip())
40
+ description = '<P class="obj-description"><BR>\n' + description
41
+ description += "</P>\n"
42
+ if notes:
43
+ description += '<P class="obj-description"><b>Notes:</b><BR>\n' + notes
44
+ description += "</P>\n"
45
+ if examples:
46
+ description += '<P class="obj-description"><b>Examples:</b><BR>\n' + examples
47
+ description += "</P>\n"
48
+ if aliases:
49
+ aliases = ', '.join(aliases.split("\n"))
50
+ description += '<P class="obj-description"><b>Aliases:</b><BR>\n' + aliases
51
+ description += "</P>\n"
52
+ if references:
53
+ description += '<P class="obj-description"><b>References:</b><BR>\n'
54
+ for line in references.split("\n"):
55
+ line = line.strip()
56
+ if line:
57
+ aline = '<A href="{}">{}</A>'.format(line, line)
58
+ description += aline + "\n"
59
+ description += "</P>\n"
60
+ return description
61
+
62
+ def html_short_description(self):
63
+ import html
64
+ description = str(self.get("Description"))[:256]
65
+ return html.escape(description)
66
+
67
+ def _lookup(self):
68
+ try:
69
+ return self.ontology().theme
70
+ except AttributeError:
71
+ return {}
@@ -0,0 +1,179 @@
1
+ import copy
2
+ import os.path
3
+ import random
4
+ from collections import defaultdict
5
+
6
+ from .impl.core import TOObject, a
7
+
8
+
9
+ class TODict(dict):
10
+ pass
11
+
12
+
13
+ class ThemeOntology(TOObject):
14
+ theme = a(TODict())
15
+ story = a(TODict())
16
+ entries = a({})
17
+ basepaths = a(set())
18
+
19
+ def __len__(self):
20
+ return sum(len(v) for v in self.entries.values())
21
+
22
+ def __str__(self):
23
+ return f"<{len(self.theme)} themes, {len(self.story)} stories>"
24
+
25
+ def stories(self):
26
+ for story in self.story.values():
27
+ yield story
28
+
29
+ def themes(self):
30
+ for theme in self.theme.values():
31
+ yield theme
32
+
33
+ def astory(self):
34
+ return random.sample(self.story.values(), 1)[0]
35
+
36
+ def atheme(self):
37
+ return random.sample(self.theme.values(), 1)[0]
38
+
39
+ def dataframe(self, implied_themes=True):
40
+ import pandas as pd
41
+ data = []
42
+ for story in self.stories():
43
+ for weight, part in story.iter_theme_entries():
44
+ themes = [part.keyword]
45
+ if implied_themes and part.keyword in self.theme:
46
+ theme = self.theme[part.keyword]
47
+ themes.extend(theme.ancestors())
48
+ data.append([story.name, story["Title"],
49
+ story["Date"], part.keyword, weight])
50
+ return pd.DataFrame(
51
+ data, columns=["story_id", "title", "date", "theme", "weight"])
52
+
53
+ def validate(self):
54
+ yield from self.validate_entries()
55
+ yield from self.validate_storythemes()
56
+ yield from self.validate_cycles()
57
+
58
+ def validate_entries(self):
59
+ """Validate basic format of theme and story entries."""
60
+ lookup = defaultdict(dict)
61
+ for path, entries in self.entries.items():
62
+ for entry in entries:
63
+ for warning in entry.validate():
64
+ yield f"{path}: {warning}"
65
+ if entry.name in lookup[type(entry)]:
66
+ yield f"{path}: Multiple {type(entry)} with name '{entry.name}'"
67
+
68
+ def validate_storythemes(self):
69
+ """Detect undefined themes used in stories."""
70
+ for story in self.stories():
71
+ for weight in ["choice", "major", "minor", "not"]:
72
+ field = f"{weight.capitalize()} Themes"
73
+ for kwf in story.get(field):
74
+ if kwf.keyword not in self.theme:
75
+ name, kw = story.name, kwf.keyword
76
+ yield f"{name}: Undefined '{weight} theme' with name '{kw}'"
77
+
78
+ def validate_cycles(self):
79
+ """Detect cycles (stops after first cycle encountered)."""
80
+ parents = {}
81
+ for theme in self.themes():
82
+ parents[theme.name] = [parent for parent in theme.get("Parents")]
83
+
84
+ def dfs(current, tpath=None):
85
+ tpath = tpath or []
86
+ if current in tpath:
87
+ cycle = tpath[tpath.index(current):]
88
+ return f"Cycle: {cycle}"
89
+ else:
90
+ tpath.append(current)
91
+ for parent in parents[current]:
92
+ msg = dfs(parent, tpath)
93
+ if msg:
94
+ return msg
95
+ tpath.pop()
96
+ return None
97
+
98
+ for theme in self.themes():
99
+ msg = dfs(theme.name)
100
+ if msg:
101
+ yield msg
102
+ break
103
+
104
+ def write_clean(self, verbose=False):
105
+ """
106
+ Write the ontology back to its source file while cleaning up syntax and
107
+ omitting unknown field names.
108
+ """
109
+ self.write(verbose=verbose)
110
+
111
+ def write(self, prefix=None, cleaned=False, verbose=False):
112
+ old_prefix = "" if prefix is None else os.path.commonpath(self.basepaths)
113
+ for path, entries in self.entries.items():
114
+ if prefix is not None:
115
+ rel_path = os.path.relpath(path, old_prefix)
116
+ path = os.path.join(prefix, rel_path)
117
+ self._writefile(path, entries, cleaned)
118
+ if verbose:
119
+ print(f"wrote: {path}")
120
+
121
+ def print_warnings(self):
122
+ """
123
+ Run validate and print warnings to stdout.
124
+ """
125
+ for msg in self.validate():
126
+ print(msg)
127
+ return self
128
+
129
+ def refresh_relations(self):
130
+ """
131
+ The ontology keeps track of parent/child relations in order to facilitate
132
+ quicker traversal of this hierarchy in both directions, for both themes
133
+ and stories. This method is invoked when the ontology has changed. It is
134
+ invoked automatically by the parser and usually doesn't need to be invoked
135
+ manually.
136
+ """
137
+ for theme in self.themes():
138
+ theme.parents.clear()
139
+ theme.children.clear()
140
+ for story in self.stories():
141
+ story.parents.clear()
142
+ story.children.clear()
143
+ for theme in self.themes():
144
+ for ptheme_name in theme["Parents"]:
145
+ if ptheme_name in self.theme:
146
+ theme.parents.add(ptheme_name)
147
+ self.theme[ptheme_name].children.add(theme.name)
148
+ for story in self.stories():
149
+ for pstory_name in story["Collections"]:
150
+ if pstory_name in self.story:
151
+ story.parents.add(pstory_name)
152
+ self.story[pstory_name].children.add(story.name)
153
+ for cstory_name in story["Component Stories"]:
154
+ if cstory_name in self.story:
155
+ story.children.add(cstory_name)
156
+ self.story[cstory_name].parents.add(story.name)
157
+ return self
158
+
159
+ def _writefile(self, path, entries, cleaned):
160
+ cskey = "Component Stories"
161
+ dirname = os.path.dirname(path)
162
+ if not os.path.exists(dirname):
163
+ os.makedirs(dirname)
164
+ with open(path, "w", encoding='utf-8') as fh:
165
+ sids = set(e.name for e in entries)
166
+ for idx, entry in enumerate(entries):
167
+ if idx == 0 and entry.name in entry["Collections"]:
168
+ field = entry.get(cskey)
169
+ parts = [x for x in field.parts if x not in sids]
170
+ if parts != field.parts:
171
+ entry = copy.deepcopy(entry)
172
+ if parts:
173
+ field = entry.setdefault(cskey)
174
+ field.parts = parts
175
+ else:
176
+ entry.delete(cskey)
177
+ lines = entry.text_canonical() if cleaned else entry.text_original()
178
+ fh.write(lines)
179
+ fh.write("\n\n")
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Theme Ontology
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,16 @@
1
+ Metadata-Version: 2.1
2
+ Name: totolo
3
+ Version: 0.1
4
+ Summary: The Python interface to themeontology.org.
5
+ Author-email: Mikael Onsjö <mikael@odinlake.net>
6
+ Description-Content-Type: text/markdown
7
+ Classifier: License :: OSI Approved :: MIT License
8
+ Project-URL: Documentation, https://github.com/theme-ontology/theming
9
+ Project-URL: Home, https://www.themeontology.org/
10
+ Project-URL: Source, https://github.com/theme-ontology/python-totolo
11
+
12
+ # python totolo
13
+
14
+ This repository contains a Python package, totolo,
15
+ for working with data from [the theme repository](https://github.com/theme-ontology/theming/).
16
+
@@ -0,0 +1,18 @@
1
+ totolo/__init__.py,sha256=lsBiNOLPDdQjpedhdBAwzzyAH5lWE0lfSTURkR6CsfI,189
2
+ totolo/api.py,sha256=Xx63I9PYmL8hxy9NRSQa3rlYqjqBhXMAFCVqnvrFkt4,1247
3
+ totolo/story.py,sha256=NSx36RDe3R8qruXSBYZLYFBBaDBXPjPXmS5xqCz7PQc,4242
4
+ totolo/theme.py,sha256=PxnFINPT4_UDiJElg6GIMHV8ifbKv-oL4WeOgMRgaUo,2665
5
+ totolo/themeontology.py,sha256=CKjghNVbC9yoxm_NQlhkm1jScQ1ugMh5ZEdmWA94Wkg,6588
6
+ totolo/impl/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ totolo/impl/core.py,sha256=QTsdkQBgBQcXFoiO1F3qStBeQMujJ_jKWXjZH5U5VnU,2731
8
+ totolo/impl/entry.py,sha256=kmfm5zbH7yHH0CmnOSu7Jm27atWrCw6xJpOc5XxneRw,3833
9
+ totolo/impl/field.py,sha256=4vub-YUL1bAWxdDCifa1VG-Djfr6aZhTKyeoBxN5PxY,3159
10
+ totolo/impl/keyword.py,sha256=oNRa7-71gAInNMQozW4EfKZhxo2N-RkQWWdx9KM3s7Q,424
11
+ totolo/impl/parser.py,sha256=kKmlrA-vIGVsKmstthYtje1aMCC8trsLCc0YYPfSb6Y,7980
12
+ totolo/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ totolo/lib/files.py,sha256=Q-9PESBqoetdJcE0NMUuLJQ_OblOXaDVgpiOPz-vRvk,1192
14
+ totolo/lib/textformat.py,sha256=bv8bmsGIHsqccXSwrTqiPLhMbgRwhhlAEt11oMFDu7M,1408
15
+ totolo-0.1.dist-info/LICENSE,sha256=aQxSEqA2gmQp240XgIJiSZZ_lYvjAk80lmTsdYVl5mY,1071
16
+ totolo-0.1.dist-info/WHEEL,sha256=Sgu64hAMa6g5FdzHxXv9Xdse9yxpGGMeagVtPMWpJQY,99
17
+ totolo-0.1.dist-info/METADATA,sha256=Sd2FDB3i6CC75ZUuQiuonSAwCwPXhTDn4EkkBOAHmhU,595
18
+ totolo-0.1.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: flit 3.9.0
3
+ Root-Is-Purelib: true
4
+ Tag: py2-none-any
5
+ Tag: py3-none-any