superpage 1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superpage-1/LICENSE +7 -0
- superpage-1/PKG-INFO +20 -0
- superpage-1/README.md +4 -0
- superpage-1/pyproject.toml +30 -0
- superpage-1/setup.cfg +4 -0
- superpage-1/superpage/__init__.py +16 -0
- superpage-1/superpage/__main__.py +56 -0
- superpage-1/superpage/book.py +445 -0
- superpage-1/superpage/builder.py +380 -0
- superpage-1/superpage/config.py +289 -0
- superpage-1/superpage/event.py +294 -0
- superpage-1/superpage/linter.py +197 -0
- superpage-1/superpage/meta.py +3 -0
- superpage-1/superpage/monitor.py +77 -0
- superpage-1/superpage/navigation.py +627 -0
- superpage-1/superpage/page.py +212 -0
- superpage-1/superpage/reference.py +66 -0
- superpage-1/superpage/resources/connection.js +85 -0
- superpage-1/superpage/resources/nav.js +226 -0
- superpage-1/superpage/resources/resources.js +39 -0
- superpage-1/superpage/resources/style.css +147 -0
- superpage-1/superpage/server.py +261 -0
- superpage-1/superpage/summary.py +48 -0
- superpage-1/superpage/tasks.py +90 -0
- superpage-1/superpage/watcher.py +86 -0
- superpage-1/superpage.egg-info/PKG-INFO +20 -0
- superpage-1/superpage.egg-info/SOURCES.txt +28 -0
- superpage-1/superpage.egg-info/dependency_links.txt +1 -0
- superpage-1/superpage.egg-info/requires.txt +3 -0
- superpage-1/superpage.egg-info/top_level.txt +1 -0
superpage-1/LICENSE
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
Copyright 2025 Klas Lindberg
|
|
2
|
+
|
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
|
4
|
+
|
|
5
|
+
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
|
6
|
+
|
|
7
|
+
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
superpage-1/PKG-INFO
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: superpage
|
|
3
|
+
Version: 1
|
|
4
|
+
Summary: Lightweight Markdown builder
|
|
5
|
+
Author-email: Klas Lindberg <flimango@protonmail.com>
|
|
6
|
+
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Mysingen/superpage
|
|
8
|
+
Project-URL: Issues, https://github.com/Mysingen/superpage/issues
|
|
9
|
+
Requires-Python: >=3.11
|
|
10
|
+
Description-Content-Type: text/markdown
|
|
11
|
+
License-File: LICENSE
|
|
12
|
+
Requires-Dist: pyyaml>=6.0.3
|
|
13
|
+
Requires-Dist: watchdog>=6.0.0
|
|
14
|
+
Requires-Dist: markdown>=3.10
|
|
15
|
+
Dynamic: license-file
|
|
16
|
+
|
|
17
|
+
# Superpage
|
|
18
|
+
|
|
19
|
+
Superpage is a lightweight Markdown processor that offers snappy live editing,
|
|
20
|
+
and human readable HTML output with very few dependencies.
|
superpage-1/README.md
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "superpage"
|
|
3
|
+
version = "1"
|
|
4
|
+
authors = [
|
|
5
|
+
{ name="Klas Lindberg", email="flimango@protonmail.com" },
|
|
6
|
+
]
|
|
7
|
+
description = "Lightweight Markdown builder"
|
|
8
|
+
readme = "README.md"
|
|
9
|
+
license = "MIT"
|
|
10
|
+
license-files = ["LICENSE"]
|
|
11
|
+
requires-python = ">=3.11"
|
|
12
|
+
dependencies = [
|
|
13
|
+
"pyyaml>=6.0.3",
|
|
14
|
+
"watchdog>=6.0.0",
|
|
15
|
+
"markdown>=3.10",
|
|
16
|
+
]
|
|
17
|
+
|
|
18
|
+
[project.urls]
|
|
19
|
+
Homepage = "https://github.com/Mysingen/superpage"
|
|
20
|
+
Issues = "https://github.com/Mysingen/superpage/issues"
|
|
21
|
+
|
|
22
|
+
[build-system]
|
|
23
|
+
requires = ["setuptools>=60"]
|
|
24
|
+
build-backend = "setuptools.build_meta"
|
|
25
|
+
|
|
26
|
+
[tool.setuptools]
|
|
27
|
+
packages = ["superpage"]
|
|
28
|
+
|
|
29
|
+
[tool.setuptools.package-data]
|
|
30
|
+
superpage = ["resources/*"]
|
superpage-1/setup.cfg
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
|
|
3
|
+
from .book import Book
|
|
4
|
+
from .builder import Builder
|
|
5
|
+
from .config import Config, Flags
|
|
6
|
+
from .linter import LinkProblem, HtmlAnchorExtractor
|
|
7
|
+
from .meta import PKG_ROOT
|
|
8
|
+
from .monitor import Logger
|
|
9
|
+
from .navigation import NavData
|
|
10
|
+
from .page import Page
|
|
11
|
+
from .event import Event, EventKind, SeverityLevel
|
|
12
|
+
from .reference import Reference, ReferenceExtractor
|
|
13
|
+
from .server import Server, Client, ServerObserveKind
|
|
14
|
+
from .summary import Summary
|
|
15
|
+
from .tasks import BuilderTask, ServerTask
|
|
16
|
+
from .watcher import Watcher
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
import sys
|
|
3
|
+
import threading
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
# make superpage importable as package
|
|
7
|
+
PKG_ROOT = pathlib.Path(__file__).parent
|
|
8
|
+
sys.path.insert(0, str(PKG_ROOT.parent))
|
|
9
|
+
|
|
10
|
+
for p in sys.path:
|
|
11
|
+
print(' ', p)
|
|
12
|
+
|
|
13
|
+
import superpage
|
|
14
|
+
|
|
15
|
+
if __name__ == '__main__':
|
|
16
|
+
if len(sys.argv) < 3:
|
|
17
|
+
print('first argument must be a command, third a config file')
|
|
18
|
+
sys.exit(1)
|
|
19
|
+
command = sys.argv[1]
|
|
20
|
+
cfg_file = pathlib.Path(sys.argv[2])
|
|
21
|
+
config = superpage.Config(cfg_file.parent)
|
|
22
|
+
flags = superpage.Flags(auto_dot_pages=True)
|
|
23
|
+
|
|
24
|
+
for o in config.from_file(cfg_file).get_events():
|
|
25
|
+
print(o)
|
|
26
|
+
if o.is_problem():
|
|
27
|
+
sys.exit(1)
|
|
28
|
+
|
|
29
|
+
if command == 'build':
|
|
30
|
+
options = sys.argv[2:]
|
|
31
|
+
|
|
32
|
+
logger = superpage.Logger(
|
|
33
|
+
filter=superpage.SeverityLevel.INFO)
|
|
34
|
+
builder = superpage.Builder(logger, flags=flags)
|
|
35
|
+
|
|
36
|
+
begin = time.time()
|
|
37
|
+
builder.recover(cfg_file)
|
|
38
|
+
if 'clean' in options:
|
|
39
|
+
builder.clean()
|
|
40
|
+
num_pages = builder.build()
|
|
41
|
+
end = time.time()
|
|
42
|
+
print(f'{num_pages} pages built in {end - begin} seconds')
|
|
43
|
+
sys.exit(0)
|
|
44
|
+
|
|
45
|
+
if command == 'serve':
|
|
46
|
+
queue = superpage.monitor.EventQueue()
|
|
47
|
+
logger = superpage.Logger()
|
|
48
|
+
builder = superpage.BuilderTask(cfg_file, queue, logger)
|
|
49
|
+
server = superpage.ServerTask(cfg_file, queue, logger)
|
|
50
|
+
client = superpage.Client(cfg_file, logger)
|
|
51
|
+
client.connect(timeout=5)
|
|
52
|
+
del client
|
|
53
|
+
|
|
54
|
+
server.join()
|
|
55
|
+
builder.join()
|
|
56
|
+
sys.exit(0)
|
|
@@ -0,0 +1,445 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import pathlib
|
|
3
|
+
|
|
4
|
+
from superpage.config import Config, Flags
|
|
5
|
+
from superpage.linter import HtmlAnchorExtractor, SeverityLevel
|
|
6
|
+
from superpage.event import Event, EventKind
|
|
7
|
+
from superpage.monitor import Logger
|
|
8
|
+
from superpage.navigation import DotPages, is_link_loop
|
|
9
|
+
from superpage.page import Page
|
|
10
|
+
from superpage.summary import Summary
|
|
11
|
+
|
|
12
|
+
def all_files(
|
|
13
|
+
path : pathlib.Path
|
|
14
|
+
) -> list [ pathlib.Path ]:
|
|
15
|
+
return [p for p in sorted(path.rglob('*')) if not is_link_loop(p)]
|
|
16
|
+
|
|
17
|
+
def all_md_files(
|
|
18
|
+
path : pathlib.Path
|
|
19
|
+
) -> list [ pathlib.Path ]:
|
|
20
|
+
return [p for p in sorted(path.rglob('*.md')) if not is_link_loop(p)]
|
|
21
|
+
|
|
22
|
+
def all_html_files(
|
|
23
|
+
path : pathlib.Path
|
|
24
|
+
) -> list [ pathlib.Path ]:
|
|
25
|
+
return [p for p in sorted(path.rglob('*.html')) if not is_link_loop(p)]
|
|
26
|
+
|
|
27
|
+
class Book:
|
|
28
|
+
flags : Flags
|
|
29
|
+
root_dir : pathlib.Path
|
|
30
|
+
config : Config
|
|
31
|
+
logger : Logger
|
|
32
|
+
path : pathlib.Path
|
|
33
|
+
title : str
|
|
34
|
+
_pages : list [ Page ] # all book pages. shares objects with dot_pages
|
|
35
|
+
dot_pages : DotPages # subset of all pages known for the book
|
|
36
|
+
site_path : pathlib.Path
|
|
37
|
+
|
|
38
|
+
def __init__(
|
|
39
|
+
self,
|
|
40
|
+
root_dir : pathlib.Path,
|
|
41
|
+
config : Config,
|
|
42
|
+
logger : Logger,
|
|
43
|
+
path : pathlib.Path,
|
|
44
|
+
title : str,
|
|
45
|
+
*,
|
|
46
|
+
flags : Flags
|
|
47
|
+
):
|
|
48
|
+
self.flags = flags
|
|
49
|
+
assert(root_dir.is_absolute())
|
|
50
|
+
assert(path.is_absolute())
|
|
51
|
+
self.root_dir = root_dir
|
|
52
|
+
self.config = config
|
|
53
|
+
self.logger = logger
|
|
54
|
+
self.path = path
|
|
55
|
+
self._pages = []
|
|
56
|
+
self.title = title
|
|
57
|
+
self.dot_pages = None
|
|
58
|
+
|
|
59
|
+
self.site_path = None
|
|
60
|
+
if self.config.site_dir:
|
|
61
|
+
rel_site_path = self.path.relative_to(self.root_dir)
|
|
62
|
+
self.site_path = self.config.site_dir / rel_site_path
|
|
63
|
+
|
|
64
|
+
def from_dot_pages(
|
|
65
|
+
self
|
|
66
|
+
):
|
|
67
|
+
|
|
68
|
+
path = self.path / '.pages'
|
|
69
|
+
dp = DotPages(
|
|
70
|
+
self.root_dir,
|
|
71
|
+
self.config,
|
|
72
|
+
self.logger,
|
|
73
|
+
flags=self.flags)
|
|
74
|
+
|
|
75
|
+
if not path.exists():
|
|
76
|
+
# must have the class member instantiated to build at all but not
|
|
77
|
+
# obliged to also save it to disk:
|
|
78
|
+
self.dot_pages = None # if we're in recovery after deleting the file
|
|
79
|
+
if self.flags.auto_dot_pages:
|
|
80
|
+
self.logger.put(Event(
|
|
81
|
+
SeverityLevel.INFO,
|
|
82
|
+
path, None,
|
|
83
|
+
'automatically creating .pages file',
|
|
84
|
+
EventKind.AUTO_ADD_DOT_PAGES))
|
|
85
|
+
dp.to_file(path)
|
|
86
|
+
|
|
87
|
+
if self.dot_pages:
|
|
88
|
+
self.dot_pages.update_from_file(path, table=self.dot_pages)
|
|
89
|
+
else:
|
|
90
|
+
self.dot_pages = dp
|
|
91
|
+
dp.from_file(path, table=self.dot_pages)
|
|
92
|
+
|
|
93
|
+
def clean(
|
|
94
|
+
self
|
|
95
|
+
):
|
|
96
|
+
files = all_html_files(self.site_path)
|
|
97
|
+
|
|
98
|
+
for f in files:
|
|
99
|
+
self.logger.put(Event(
|
|
100
|
+
SeverityLevel.INFO,
|
|
101
|
+
pathlib.PurePosixPath(f), None,
|
|
102
|
+
'-',
|
|
103
|
+
EventKind.CLEAN_OUTPUT))
|
|
104
|
+
os.unlink(f)
|
|
105
|
+
|
|
106
|
+
def add_page(
|
|
107
|
+
self,
|
|
108
|
+
page : Page
|
|
109
|
+
) -> None:
|
|
110
|
+
if page not in self._pages:
|
|
111
|
+
self._pages.append(page)
|
|
112
|
+
return True
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
def remove_page(
|
|
116
|
+
self,
|
|
117
|
+
page : Page
|
|
118
|
+
) -> None:
|
|
119
|
+
if page not in self._pages:
|
|
120
|
+
return
|
|
121
|
+
self._pages.remove(page)
|
|
122
|
+
|
|
123
|
+
def patch_dot_pages(
|
|
124
|
+
self,
|
|
125
|
+
path : pathlib.Path,
|
|
126
|
+
*,
|
|
127
|
+
table : DotPages
|
|
128
|
+
) -> Page:
|
|
129
|
+
self.logger.put(Event(
|
|
130
|
+
SeverityLevel.WARNING,
|
|
131
|
+
path, self.path / '.pages',
|
|
132
|
+
f'source file is not mentioned',
|
|
133
|
+
EventKind.NO_DOT_PAGES_MENTION))
|
|
134
|
+
|
|
135
|
+
entry = None
|
|
136
|
+
|
|
137
|
+
# add orphan .md and .pages files to the book's nav tree:
|
|
138
|
+
if path.suffix == '.md':
|
|
139
|
+
entry = Page(self.root_dir, self.config, self.logger, path)
|
|
140
|
+
title = entry.guess_title()
|
|
141
|
+
self.dot_pages.add_page(title, entry, auto_added=True)
|
|
142
|
+
|
|
143
|
+
elif path.name == '.pages':
|
|
144
|
+
# need an awkward default for the title but can make something from
|
|
145
|
+
# the book's title and the file path:
|
|
146
|
+
rel_dir_path = path.relative_to(self.root_dir).parent
|
|
147
|
+
title = ' / '.join([p for p in rel_dir_path.parts])
|
|
148
|
+
entry = DotPages(
|
|
149
|
+
self.root_dir, self.config, self.logger, flags=self.flags)
|
|
150
|
+
entry.from_file(path, table=table)
|
|
151
|
+
self.dot_pages.add_dot_pages(title, entry)
|
|
152
|
+
|
|
153
|
+
else:
|
|
154
|
+
assert(False and 'unreachable code')
|
|
155
|
+
|
|
156
|
+
if self.flags.auto_dot_pages:
|
|
157
|
+
# also save the root .pages file under the auto flag
|
|
158
|
+
self.logger.put(Event(
|
|
159
|
+
SeverityLevel.INFO,
|
|
160
|
+
path, self.path / '.pages',
|
|
161
|
+
f'source file added automatically',
|
|
162
|
+
EventKind.AUTO_ADD_DOT_PAGES))
|
|
163
|
+
self.dot_pages.to_file(self.dot_pages.path)
|
|
164
|
+
|
|
165
|
+
return entry
|
|
166
|
+
|
|
167
|
+
def build_page(
|
|
168
|
+
self,
|
|
169
|
+
md_path : pathlib.Path,
|
|
170
|
+
*,
|
|
171
|
+
table : DotPages
|
|
172
|
+
):
|
|
173
|
+
assert(md_path.is_absolute())
|
|
174
|
+
|
|
175
|
+
# the same source file may be mentioned multiple times in a .pages
|
|
176
|
+
# file so we can get more than one page back:
|
|
177
|
+
page_list = self.dot_pages.get_pages(md_path)
|
|
178
|
+
|
|
179
|
+
# the page is not referenced by any .pages that is reachable through
|
|
180
|
+
# the root .pages file, or there is no root .pages file at all:
|
|
181
|
+
if not page_list:
|
|
182
|
+
page_list.append(
|
|
183
|
+
self.patch_dot_pages(md_path, table=table))
|
|
184
|
+
|
|
185
|
+
for page in page_list:
|
|
186
|
+
self.add_page(page) # in case we never saw it before
|
|
187
|
+
# this is not the time or place to worry about build avoidance. just
|
|
188
|
+
# call build() on the same content multiple times.
|
|
189
|
+
page.build()
|
|
190
|
+
|
|
191
|
+
def recover_page(
|
|
192
|
+
self,
|
|
193
|
+
path : pathlib.Path,
|
|
194
|
+
*,
|
|
195
|
+
table : DotPages
|
|
196
|
+
):
|
|
197
|
+
assert(path.is_absolute())
|
|
198
|
+
|
|
199
|
+
# the same source file may be mentioned multiple times in a .pages
|
|
200
|
+
# file so we can get more than one page back:
|
|
201
|
+
page_list = self.dot_pages.get_pages(path)
|
|
202
|
+
|
|
203
|
+
# the page is not referenced by any .pages that is reachable through
|
|
204
|
+
# the root .pages file, or there is no root .pages file at all:
|
|
205
|
+
if not page_list:
|
|
206
|
+
page_list.append(
|
|
207
|
+
self.patch_dot_pages(path, table=table))
|
|
208
|
+
|
|
209
|
+
for page in page_list:
|
|
210
|
+
self.add_page(page) # in case we never saw it before
|
|
211
|
+
page.recover()
|
|
212
|
+
|
|
213
|
+
def remove_any(
|
|
214
|
+
self,
|
|
215
|
+
path : pathlib.Path
|
|
216
|
+
):
|
|
217
|
+
assert(path.is_absolute())
|
|
218
|
+
|
|
219
|
+
entry_list = self.dot_pages.remove_any(path)
|
|
220
|
+
do_recovery = False
|
|
221
|
+
|
|
222
|
+
for e in entry_list:
|
|
223
|
+
|
|
224
|
+
if type(e.reference) == Page:
|
|
225
|
+
self.remove_page(e.reference)
|
|
226
|
+
html = path.with_suffix('.html')
|
|
227
|
+
if not html.exists():
|
|
228
|
+
return
|
|
229
|
+
self.logger.put(Event(
|
|
230
|
+
SeverityLevel.INFO,
|
|
231
|
+
pathlib.PurePosixPath(html), None,
|
|
232
|
+
'-',
|
|
233
|
+
EventKind.CLEAN_OUTPUT))
|
|
234
|
+
os.unlink(html)
|
|
235
|
+
|
|
236
|
+
if type(e.reference) == DotPages:
|
|
237
|
+
do_recovery = True
|
|
238
|
+
|
|
239
|
+
if path == self.dot_pages.path:
|
|
240
|
+
do_recovery = True
|
|
241
|
+
|
|
242
|
+
if do_recovery:
|
|
243
|
+
self.dot_pages = None
|
|
244
|
+
self.recover()
|
|
245
|
+
|
|
246
|
+
def move_any(
|
|
247
|
+
self,
|
|
248
|
+
src : pathlib.Path,
|
|
249
|
+
dst : pathlib.Path,
|
|
250
|
+
*,
|
|
251
|
+
table : DotPages
|
|
252
|
+
) -> None:
|
|
253
|
+
assert(src.is_absolute())
|
|
254
|
+
assert(dst.is_absolute())
|
|
255
|
+
|
|
256
|
+
entry_list = self.dot_pages.remove_any(src)
|
|
257
|
+
do_recovery = False
|
|
258
|
+
|
|
259
|
+
for e in entry_list:
|
|
260
|
+
|
|
261
|
+
if type(e.reference) == Page:
|
|
262
|
+
self.remove_page(e.reference)
|
|
263
|
+
html = src.with_suffix('.html')
|
|
264
|
+
self.logger.put(Event(
|
|
265
|
+
SeverityLevel.INFO,
|
|
266
|
+
pathlib.PurePosixPath(html), None,
|
|
267
|
+
'-',
|
|
268
|
+
EventKind.CLEAN_OUTPUT))
|
|
269
|
+
if html.is_file():
|
|
270
|
+
os.unlink(html)
|
|
271
|
+
self.build_page(dst, table=self.dot_pages)
|
|
272
|
+
do_recovery = True
|
|
273
|
+
|
|
274
|
+
if type(e.reference) == DotPages:
|
|
275
|
+
do_recovery = True
|
|
276
|
+
|
|
277
|
+
if src == self.dot_pages.path or dst == self.dot_pages.path:
|
|
278
|
+
do_recovery = True
|
|
279
|
+
|
|
280
|
+
if do_recovery:
|
|
281
|
+
self.dot_pages = None
|
|
282
|
+
self.recover()
|
|
283
|
+
|
|
284
|
+
def build_nav(
|
|
285
|
+
self,
|
|
286
|
+
dot_pages_path : pathlib.Path
|
|
287
|
+
):
|
|
288
|
+
# overall strategy:
|
|
289
|
+
# reorder Page and DotPages objects within the existing DotPages object
|
|
290
|
+
# based on the order they are seen in the changed .pages file. reorder
|
|
291
|
+
# Page objects within the existing Page list based on the order they are
|
|
292
|
+
# seen in the updated DotPages tree.
|
|
293
|
+
|
|
294
|
+
assert(self.dot_pages) # books at least always have fake .pages assigned
|
|
295
|
+
|
|
296
|
+
dps = self.dot_pages.get_dot_pages(dot_pages_path)
|
|
297
|
+
if dot_pages_path == self.dot_pages.path:
|
|
298
|
+
# the root .pages file should not have aliases in the navigation
|
|
299
|
+
# data since it is not referenced by any other .pages file and must
|
|
300
|
+
# not reference itself:
|
|
301
|
+
assert(len(dps) == 1)
|
|
302
|
+
|
|
303
|
+
# pass in a table of all existing entries to not recreate entries that
|
|
304
|
+
# have already been built before:
|
|
305
|
+
table = self.dot_pages
|
|
306
|
+
dps[0].update_from_file(dot_pages_path, table)
|
|
307
|
+
|
|
308
|
+
# update the book's flat page list ordering and build reachable pages
|
|
309
|
+
updated : list [ Page ] = []
|
|
310
|
+
rebuilt : list [ Page ] = []
|
|
311
|
+
old_paths = [p.path for p in self._pages]
|
|
312
|
+
for d in dps:
|
|
313
|
+
for p in d.list_pages(recursive=True) or []:
|
|
314
|
+
try:
|
|
315
|
+
i = old_paths.index(p.path)
|
|
316
|
+
updated.append(self._pages[i])
|
|
317
|
+
except ValueError:
|
|
318
|
+
updated.append(p)
|
|
319
|
+
rebuilt.append(p)
|
|
320
|
+
p.build() # do build avoidance in there, not here
|
|
321
|
+
|
|
322
|
+
self._pages = updated
|
|
323
|
+
|
|
324
|
+
def build(
|
|
325
|
+
self
|
|
326
|
+
) -> int:
|
|
327
|
+
assert(self.path.is_absolute())
|
|
328
|
+
|
|
329
|
+
# dilemma: .md files may not be listed in .pages and .pages may contain
|
|
330
|
+
# entries that do not have any .md backing. need to find both problems.
|
|
331
|
+
# DotPages.from_file() checks existence of files referenced in .pages
|
|
332
|
+
# files. here we build *all* md in the project and log a problem for any
|
|
333
|
+
# file not mentioned in a .pages entry.
|
|
334
|
+
|
|
335
|
+
md_files = all_md_files(self.path)
|
|
336
|
+
for f in md_files:
|
|
337
|
+
self.build_page(self.path / f, table=self.dot_pages)
|
|
338
|
+
|
|
339
|
+
return len(md_files)
|
|
340
|
+
|
|
341
|
+
def recover(
|
|
342
|
+
self
|
|
343
|
+
) -> int:
|
|
344
|
+
assert(self.path.is_absolute())
|
|
345
|
+
|
|
346
|
+
# this will discover all, none or some of the book's .pages files:
|
|
347
|
+
self.from_dot_pages()
|
|
348
|
+
|
|
349
|
+
md_files = 0
|
|
350
|
+
|
|
351
|
+
for f in all_files(self.path):
|
|
352
|
+
p = self.path / f
|
|
353
|
+
if p.suffix == '.md':
|
|
354
|
+
# sets title=None but the next file could be a .pages that sets
|
|
355
|
+
# the title. order of discovery is awkward...
|
|
356
|
+
self.recover_page(p, table=self.dot_pages)
|
|
357
|
+
md_files += 1
|
|
358
|
+
elif p.name == '.pages':
|
|
359
|
+
# check that this .pages was not discovered in from_dot_pages()
|
|
360
|
+
if not self.dot_pages.get_dot_pages(p):
|
|
361
|
+
self.patch_dot_pages(p, table=self.dot_pages)
|
|
362
|
+
|
|
363
|
+
return md_files
|
|
364
|
+
|
|
365
|
+
def summarize(
|
|
366
|
+
self
|
|
367
|
+
) -> Summary:
|
|
368
|
+
summary = self.dot_pages.summarize()
|
|
369
|
+
summary.all_dot_pages_count += 1 # not self-counted by the call
|
|
370
|
+
return summary
|
|
371
|
+
|
|
372
|
+
def get_page(
|
|
373
|
+
self,
|
|
374
|
+
path : pathlib.Path
|
|
375
|
+
) -> Page:
|
|
376
|
+
assert(path.is_absolute())
|
|
377
|
+
for page in self._pages:
|
|
378
|
+
if page.path == path:
|
|
379
|
+
return page
|
|
380
|
+
return None
|
|
381
|
+
|
|
382
|
+
# a .pages file is allowed to reference the same path multiple times, in
|
|
383
|
+
# which case the returned list will hold multiple representations of the
|
|
384
|
+
# same .pages file.
|
|
385
|
+
def get_dot_pages(
|
|
386
|
+
self,
|
|
387
|
+
path : pathlib.Path
|
|
388
|
+
) -> list [ DotPages ]:
|
|
389
|
+
assert(path.is_absolute())
|
|
390
|
+
if path == self.dot_pages.path:
|
|
391
|
+
return [self.dot_pages]
|
|
392
|
+
return self.dot_pages.get_dot_pages(path)
|
|
393
|
+
|
|
394
|
+
def lint_links(
|
|
395
|
+
self
|
|
396
|
+
):
|
|
397
|
+
for page in self._pages:
|
|
398
|
+
for hazard in page.link_hazards:
|
|
399
|
+
# there should only be anchorage hazards to look into a
|
|
400
|
+
# second time after all pages have been translated:
|
|
401
|
+
assert hazard.is_anchorage_kind()
|
|
402
|
+
path, anchor = hazard.get_path_and_anchor()
|
|
403
|
+
|
|
404
|
+
if hazard.kind == EventKind.NO_SUCH_ANCHOR:
|
|
405
|
+
# find the page that represents the markdown file
|
|
406
|
+
# targeted by the link. then look inside its metadata
|
|
407
|
+
# to check if the wanted anchor was seen during
|
|
408
|
+
# translation.
|
|
409
|
+
if path:
|
|
410
|
+
assert(path.endswith('.md'))
|
|
411
|
+
resolved = (page.path.parent / path).resolve()
|
|
412
|
+
target = self.get_page(resolved)
|
|
413
|
+
if not target:
|
|
414
|
+
print(f'INTERNAL: {target} not indexed by {self.title}')
|
|
415
|
+
continue
|
|
416
|
+
#assert(target)
|
|
417
|
+
else:
|
|
418
|
+
target = page # in-page reference
|
|
419
|
+
|
|
420
|
+
if anchor not in target.list_anchors():
|
|
421
|
+
page.link_problems.append(hazard)
|
|
422
|
+
self.logger.put(hazard)
|
|
423
|
+
continue
|
|
424
|
+
|
|
425
|
+
if hazard.kind == EventKind.HTML_ANCHORAGE:
|
|
426
|
+
# prase the target file to find anchor definitions
|
|
427
|
+
parser = HtmlAnchorExtractor()
|
|
428
|
+
resolved = (page.path.parent / path).resolve()
|
|
429
|
+
with open(resolved, encoding='utf-8') as f:
|
|
430
|
+
parser.feed(f.read())
|
|
431
|
+
parser.close()
|
|
432
|
+
if anchor not in parser.anchors:
|
|
433
|
+
page.link_problems.append(hazard)
|
|
434
|
+
self.logger.put(hazard)
|
|
435
|
+
continue
|
|
436
|
+
|
|
437
|
+
# not checking anchor links into "too alien" file formats
|
|
438
|
+
# even though at least some of them (PDF files) should be
|
|
439
|
+
# supportable:
|
|
440
|
+
if hazard.kind == EventKind.NO_CLEAR_ANCHORAGE:
|
|
441
|
+
page.link_problems.append(hazard)
|
|
442
|
+
self.logger.put(hazard)
|
|
443
|
+
continue
|
|
444
|
+
|
|
445
|
+
page.link_hazards = [] # reclaim memory
|