superpage 1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
superpage-1/LICENSE ADDED
@@ -0,0 +1,7 @@
1
+ Copyright 2025 Klas Lindberg
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
4
+
5
+ The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
6
+
7
+ THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
superpage-1/PKG-INFO ADDED
@@ -0,0 +1,20 @@
1
+ Metadata-Version: 2.4
2
+ Name: superpage
3
+ Version: 1
4
+ Summary: Lightweight Markdown builder
5
+ Author-email: Klas Lindberg <flimango@protonmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Mysingen/superpage
8
+ Project-URL: Issues, https://github.com/Mysingen/superpage/issues
9
+ Requires-Python: >=3.11
10
+ Description-Content-Type: text/markdown
11
+ License-File: LICENSE
12
+ Requires-Dist: pyyaml>=6.0.3
13
+ Requires-Dist: watchdog>=6.0.0
14
+ Requires-Dist: markdown>=3.10
15
+ Dynamic: license-file
16
+
17
+ # Superpage
18
+
19
+ Superpage is a lightweight Markdown processor that offers snappy live editing,
20
+ and human readable HTML output with very few dependencies.
superpage-1/README.md ADDED
@@ -0,0 +1,4 @@
1
+ # Superpage
2
+
3
+ Superpage is a lightweight Markdown processor that offers snappy live editing,
4
+ and human readable HTML output with very few dependencies.
@@ -0,0 +1,30 @@
1
+ [project]
2
+ name = "superpage"
3
+ version = "1"
4
+ authors = [
5
+ { name="Klas Lindberg", email="flimango@protonmail.com" },
6
+ ]
7
+ description = "Lightweight Markdown builder"
8
+ readme = "README.md"
9
+ license = "MIT"
10
+ license-files = ["LICENSE"]
11
+ requires-python = ">=3.11"
12
+ dependencies = [
13
+ "pyyaml>=6.0.3",
14
+ "watchdog>=6.0.0",
15
+ "markdown>=3.10",
16
+ ]
17
+
18
+ [project.urls]
19
+ Homepage = "https://github.com/Mysingen/superpage"
20
+ Issues = "https://github.com/Mysingen/superpage/issues"
21
+
22
+ [build-system]
23
+ requires = ["setuptools>=60"]
24
+ build-backend = "setuptools.build_meta"
25
+
26
+ [tool.setuptools]
27
+ packages = ["superpage"]
28
+
29
+ [tool.setuptools.package-data]
30
+ superpage = ["resources/*"]
superpage-1/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,16 @@
1
+ import pathlib
2
+
3
+ from .book import Book
4
+ from .builder import Builder
5
+ from .config import Config, Flags
6
+ from .linter import LinkProblem, HtmlAnchorExtractor
7
+ from .meta import PKG_ROOT
8
+ from .monitor import Logger
9
+ from .navigation import NavData
10
+ from .page import Page
11
+ from .event import Event, EventKind, SeverityLevel
12
+ from .reference import Reference, ReferenceExtractor
13
+ from .server import Server, Client, ServerObserveKind
14
+ from .summary import Summary
15
+ from .tasks import BuilderTask, ServerTask
16
+ from .watcher import Watcher
@@ -0,0 +1,56 @@
1
+ import pathlib
2
+ import sys
3
+ import threading
4
+ import time
5
+
6
+ # make superpage importable as package
7
+ PKG_ROOT = pathlib.Path(__file__).parent
8
+ sys.path.insert(0, str(PKG_ROOT.parent))
9
+
10
+ for p in sys.path:
11
+ print(' ', p)
12
+
13
+ import superpage
14
+
15
+ if __name__ == '__main__':
16
+ if len(sys.argv) < 3:
17
+ print('first argument must be a command, third a config file')
18
+ sys.exit(1)
19
+ command = sys.argv[1]
20
+ cfg_file = pathlib.Path(sys.argv[2])
21
+ config = superpage.Config(cfg_file.parent)
22
+ flags = superpage.Flags(auto_dot_pages=True)
23
+
24
+ for o in config.from_file(cfg_file).get_events():
25
+ print(o)
26
+ if o.is_problem():
27
+ sys.exit(1)
28
+
29
+ if command == 'build':
30
+ options = sys.argv[2:]
31
+
32
+ logger = superpage.Logger(
33
+ filter=superpage.SeverityLevel.INFO)
34
+ builder = superpage.Builder(logger, flags=flags)
35
+
36
+ begin = time.time()
37
+ builder.recover(cfg_file)
38
+ if 'clean' in options:
39
+ builder.clean()
40
+ num_pages = builder.build()
41
+ end = time.time()
42
+ print(f'{num_pages} pages built in {end - begin} seconds')
43
+ sys.exit(0)
44
+
45
+ if command == 'serve':
46
+ queue = superpage.monitor.EventQueue()
47
+ logger = superpage.Logger()
48
+ builder = superpage.BuilderTask(cfg_file, queue, logger)
49
+ server = superpage.ServerTask(cfg_file, queue, logger)
50
+ client = superpage.Client(cfg_file, logger)
51
+ client.connect(timeout=5)
52
+ del client
53
+
54
+ server.join()
55
+ builder.join()
56
+ sys.exit(0)
@@ -0,0 +1,445 @@
1
+ import os
2
+ import pathlib
3
+
4
+ from superpage.config import Config, Flags
5
+ from superpage.linter import HtmlAnchorExtractor, SeverityLevel
6
+ from superpage.event import Event, EventKind
7
+ from superpage.monitor import Logger
8
+ from superpage.navigation import DotPages, is_link_loop
9
+ from superpage.page import Page
10
+ from superpage.summary import Summary
11
+
12
+ def all_files(
13
+ path : pathlib.Path
14
+ ) -> list [ pathlib.Path ]:
15
+ return [p for p in sorted(path.rglob('*')) if not is_link_loop(p)]
16
+
17
+ def all_md_files(
18
+ path : pathlib.Path
19
+ ) -> list [ pathlib.Path ]:
20
+ return [p for p in sorted(path.rglob('*.md')) if not is_link_loop(p)]
21
+
22
+ def all_html_files(
23
+ path : pathlib.Path
24
+ ) -> list [ pathlib.Path ]:
25
+ return [p for p in sorted(path.rglob('*.html')) if not is_link_loop(p)]
26
+
27
+ class Book:
28
+ flags : Flags
29
+ root_dir : pathlib.Path
30
+ config : Config
31
+ logger : Logger
32
+ path : pathlib.Path
33
+ title : str
34
+ _pages : list [ Page ] # all book pages. shares objects with dot_pages
35
+ dot_pages : DotPages # subset of all pages known for the book
36
+ site_path : pathlib.Path
37
+
38
+ def __init__(
39
+ self,
40
+ root_dir : pathlib.Path,
41
+ config : Config,
42
+ logger : Logger,
43
+ path : pathlib.Path,
44
+ title : str,
45
+ *,
46
+ flags : Flags
47
+ ):
48
+ self.flags = flags
49
+ assert(root_dir.is_absolute())
50
+ assert(path.is_absolute())
51
+ self.root_dir = root_dir
52
+ self.config = config
53
+ self.logger = logger
54
+ self.path = path
55
+ self._pages = []
56
+ self.title = title
57
+ self.dot_pages = None
58
+
59
+ self.site_path = None
60
+ if self.config.site_dir:
61
+ rel_site_path = self.path.relative_to(self.root_dir)
62
+ self.site_path = self.config.site_dir / rel_site_path
63
+
64
+ def from_dot_pages(
65
+ self
66
+ ):
67
+
68
+ path = self.path / '.pages'
69
+ dp = DotPages(
70
+ self.root_dir,
71
+ self.config,
72
+ self.logger,
73
+ flags=self.flags)
74
+
75
+ if not path.exists():
76
+ # must have the class member instantiated to build at all but not
77
+ # obliged to also save it to disk:
78
+ self.dot_pages = None # if we're in recovery after deleting the file
79
+ if self.flags.auto_dot_pages:
80
+ self.logger.put(Event(
81
+ SeverityLevel.INFO,
82
+ path, None,
83
+ 'automatically creating .pages file',
84
+ EventKind.AUTO_ADD_DOT_PAGES))
85
+ dp.to_file(path)
86
+
87
+ if self.dot_pages:
88
+ self.dot_pages.update_from_file(path, table=self.dot_pages)
89
+ else:
90
+ self.dot_pages = dp
91
+ dp.from_file(path, table=self.dot_pages)
92
+
93
+ def clean(
94
+ self
95
+ ):
96
+ files = all_html_files(self.site_path)
97
+
98
+ for f in files:
99
+ self.logger.put(Event(
100
+ SeverityLevel.INFO,
101
+ pathlib.PurePosixPath(f), None,
102
+ '-',
103
+ EventKind.CLEAN_OUTPUT))
104
+ os.unlink(f)
105
+
106
+ def add_page(
107
+ self,
108
+ page : Page
109
+ ) -> None:
110
+ if page not in self._pages:
111
+ self._pages.append(page)
112
+ return True
113
+ return False
114
+
115
+ def remove_page(
116
+ self,
117
+ page : Page
118
+ ) -> None:
119
+ if page not in self._pages:
120
+ return
121
+ self._pages.remove(page)
122
+
123
+ def patch_dot_pages(
124
+ self,
125
+ path : pathlib.Path,
126
+ *,
127
+ table : DotPages
128
+ ) -> Page:
129
+ self.logger.put(Event(
130
+ SeverityLevel.WARNING,
131
+ path, self.path / '.pages',
132
+ f'source file is not mentioned',
133
+ EventKind.NO_DOT_PAGES_MENTION))
134
+
135
+ entry = None
136
+
137
+ # add orphan .md and .pages files to the book's nav tree:
138
+ if path.suffix == '.md':
139
+ entry = Page(self.root_dir, self.config, self.logger, path)
140
+ title = entry.guess_title()
141
+ self.dot_pages.add_page(title, entry, auto_added=True)
142
+
143
+ elif path.name == '.pages':
144
+ # need an awkward default for the title but can make something from
145
+ # the book's title and the file path:
146
+ rel_dir_path = path.relative_to(self.root_dir).parent
147
+ title = ' / '.join([p for p in rel_dir_path.parts])
148
+ entry = DotPages(
149
+ self.root_dir, self.config, self.logger, flags=self.flags)
150
+ entry.from_file(path, table=table)
151
+ self.dot_pages.add_dot_pages(title, entry)
152
+
153
+ else:
154
+ assert(False and 'unreachable code')
155
+
156
+ if self.flags.auto_dot_pages:
157
+ # also save the root .pages file under the auto flag
158
+ self.logger.put(Event(
159
+ SeverityLevel.INFO,
160
+ path, self.path / '.pages',
161
+ f'source file added automatically',
162
+ EventKind.AUTO_ADD_DOT_PAGES))
163
+ self.dot_pages.to_file(self.dot_pages.path)
164
+
165
+ return entry
166
+
167
+ def build_page(
168
+ self,
169
+ md_path : pathlib.Path,
170
+ *,
171
+ table : DotPages
172
+ ):
173
+ assert(md_path.is_absolute())
174
+
175
+ # the same source file may be mentioned multiple times in a .pages
176
+ # file so we can get more than one page back:
177
+ page_list = self.dot_pages.get_pages(md_path)
178
+
179
+ # the page is not referenced by any .pages that is reachable through
180
+ # the root .pages file, or there is no root .pages file at all:
181
+ if not page_list:
182
+ page_list.append(
183
+ self.patch_dot_pages(md_path, table=table))
184
+
185
+ for page in page_list:
186
+ self.add_page(page) # in case we never saw it before
187
+ # this is not the time or place to worry about build avoidance. just
188
+ # call build() on the same content multiple times.
189
+ page.build()
190
+
191
+ def recover_page(
192
+ self,
193
+ path : pathlib.Path,
194
+ *,
195
+ table : DotPages
196
+ ):
197
+ assert(path.is_absolute())
198
+
199
+ # the same source file may be mentioned multiple times in a .pages
200
+ # file so we can get more than one page back:
201
+ page_list = self.dot_pages.get_pages(path)
202
+
203
+ # the page is not referenced by any .pages that is reachable through
204
+ # the root .pages file, or there is no root .pages file at all:
205
+ if not page_list:
206
+ page_list.append(
207
+ self.patch_dot_pages(path, table=table))
208
+
209
+ for page in page_list:
210
+ self.add_page(page) # in case we never saw it before
211
+ page.recover()
212
+
213
+ def remove_any(
214
+ self,
215
+ path : pathlib.Path
216
+ ):
217
+ assert(path.is_absolute())
218
+
219
+ entry_list = self.dot_pages.remove_any(path)
220
+ do_recovery = False
221
+
222
+ for e in entry_list:
223
+
224
+ if type(e.reference) == Page:
225
+ self.remove_page(e.reference)
226
+ html = path.with_suffix('.html')
227
+ if not html.exists():
228
+ return
229
+ self.logger.put(Event(
230
+ SeverityLevel.INFO,
231
+ pathlib.PurePosixPath(html), None,
232
+ '-',
233
+ EventKind.CLEAN_OUTPUT))
234
+ os.unlink(html)
235
+
236
+ if type(e.reference) == DotPages:
237
+ do_recovery = True
238
+
239
+ if path == self.dot_pages.path:
240
+ do_recovery = True
241
+
242
+ if do_recovery:
243
+ self.dot_pages = None
244
+ self.recover()
245
+
246
+ def move_any(
247
+ self,
248
+ src : pathlib.Path,
249
+ dst : pathlib.Path,
250
+ *,
251
+ table : DotPages
252
+ ) -> None:
253
+ assert(src.is_absolute())
254
+ assert(dst.is_absolute())
255
+
256
+ entry_list = self.dot_pages.remove_any(src)
257
+ do_recovery = False
258
+
259
+ for e in entry_list:
260
+
261
+ if type(e.reference) == Page:
262
+ self.remove_page(e.reference)
263
+ html = src.with_suffix('.html')
264
+ self.logger.put(Event(
265
+ SeverityLevel.INFO,
266
+ pathlib.PurePosixPath(html), None,
267
+ '-',
268
+ EventKind.CLEAN_OUTPUT))
269
+ if html.is_file():
270
+ os.unlink(html)
271
+ self.build_page(dst, table=self.dot_pages)
272
+ do_recovery = True
273
+
274
+ if type(e.reference) == DotPages:
275
+ do_recovery = True
276
+
277
+ if src == self.dot_pages.path or dst == self.dot_pages.path:
278
+ do_recovery = True
279
+
280
+ if do_recovery:
281
+ self.dot_pages = None
282
+ self.recover()
283
+
284
+ def build_nav(
285
+ self,
286
+ dot_pages_path : pathlib.Path
287
+ ):
288
+ # overall strategy:
289
+ # reorder Page and DotPages objects within the existing DotPages object
290
+ # based on the order they are seen in the changed .pages file. reorder
291
+ # Page objects within the existing Page list based on the order they are
292
+ # seen in the updated DotPages tree.
293
+
294
+ assert(self.dot_pages) # books at least always have fake .pages assigned
295
+
296
+ dps = self.dot_pages.get_dot_pages(dot_pages_path)
297
+ if dot_pages_path == self.dot_pages.path:
298
+ # the root .pages file should not have aliases in the navigation
299
+ # data since it is not referenced by any other .pages file and must
300
+ # not reference itself:
301
+ assert(len(dps) == 1)
302
+
303
+ # pass in a table of all existing entries to not recreate entries that
304
+ # have already been built before:
305
+ table = self.dot_pages
306
+ dps[0].update_from_file(dot_pages_path, table)
307
+
308
+ # update the book's flat page list ordering and build reachable pages
309
+ updated : list [ Page ] = []
310
+ rebuilt : list [ Page ] = []
311
+ old_paths = [p.path for p in self._pages]
312
+ for d in dps:
313
+ for p in d.list_pages(recursive=True) or []:
314
+ try:
315
+ i = old_paths.index(p.path)
316
+ updated.append(self._pages[i])
317
+ except ValueError:
318
+ updated.append(p)
319
+ rebuilt.append(p)
320
+ p.build() # do build avoidance in there, not here
321
+
322
+ self._pages = updated
323
+
324
+ def build(
325
+ self
326
+ ) -> int:
327
+ assert(self.path.is_absolute())
328
+
329
+ # dilemma: .md files may not be listed in .pages and .pages may contain
330
+ # entries that do not have any .md backing. need to find both problems.
331
+ # DotPages.from_file() checks existence of files referenced in .pages
332
+ # files. here we build *all* md in the project and log a problem for any
333
+ # file not mentioned in a .pages entry.
334
+
335
+ md_files = all_md_files(self.path)
336
+ for f in md_files:
337
+ self.build_page(self.path / f, table=self.dot_pages)
338
+
339
+ return len(md_files)
340
+
341
+ def recover(
342
+ self
343
+ ) -> int:
344
+ assert(self.path.is_absolute())
345
+
346
+ # this will discover all, none or some of the book's .pages files:
347
+ self.from_dot_pages()
348
+
349
+ md_files = 0
350
+
351
+ for f in all_files(self.path):
352
+ p = self.path / f
353
+ if p.suffix == '.md':
354
+ # sets title=None but the next file could be a .pages that sets
355
+ # the title. order of discovery is awkward...
356
+ self.recover_page(p, table=self.dot_pages)
357
+ md_files += 1
358
+ elif p.name == '.pages':
359
+ # check that this .pages was not discovered in from_dot_pages()
360
+ if not self.dot_pages.get_dot_pages(p):
361
+ self.patch_dot_pages(p, table=self.dot_pages)
362
+
363
+ return md_files
364
+
365
+ def summarize(
366
+ self
367
+ ) -> Summary:
368
+ summary = self.dot_pages.summarize()
369
+ summary.all_dot_pages_count += 1 # not self-counted by the call
370
+ return summary
371
+
372
+ def get_page(
373
+ self,
374
+ path : pathlib.Path
375
+ ) -> Page:
376
+ assert(path.is_absolute())
377
+ for page in self._pages:
378
+ if page.path == path:
379
+ return page
380
+ return None
381
+
382
+ # a .pages file is allowed to reference the same path multiple times, in
383
+ # which case the returned list will hold multiple representations of the
384
+ # same .pages file.
385
+ def get_dot_pages(
386
+ self,
387
+ path : pathlib.Path
388
+ ) -> list [ DotPages ]:
389
+ assert(path.is_absolute())
390
+ if path == self.dot_pages.path:
391
+ return [self.dot_pages]
392
+ return self.dot_pages.get_dot_pages(path)
393
+
394
+ def lint_links(
395
+ self
396
+ ):
397
+ for page in self._pages:
398
+ for hazard in page.link_hazards:
399
+ # there should only be anchorage hazards to look into a
400
+ # second time after all pages have been translated:
401
+ assert hazard.is_anchorage_kind()
402
+ path, anchor = hazard.get_path_and_anchor()
403
+
404
+ if hazard.kind == EventKind.NO_SUCH_ANCHOR:
405
+ # find the page that represents the markdown file
406
+ # targeted by the link. then look inside its metadata
407
+ # to check if the wanted anchor was seen during
408
+ # translation.
409
+ if path:
410
+ assert(path.endswith('.md'))
411
+ resolved = (page.path.parent / path).resolve()
412
+ target = self.get_page(resolved)
413
+ if not target:
414
+ print(f'INTERNAL: {target} not indexed by {self.title}')
415
+ continue
416
+ #assert(target)
417
+ else:
418
+ target = page # in-page reference
419
+
420
+ if anchor not in target.list_anchors():
421
+ page.link_problems.append(hazard)
422
+ self.logger.put(hazard)
423
+ continue
424
+
425
+ if hazard.kind == EventKind.HTML_ANCHORAGE:
426
+ # prase the target file to find anchor definitions
427
+ parser = HtmlAnchorExtractor()
428
+ resolved = (page.path.parent / path).resolve()
429
+ with open(resolved, encoding='utf-8') as f:
430
+ parser.feed(f.read())
431
+ parser.close()
432
+ if anchor not in parser.anchors:
433
+ page.link_problems.append(hazard)
434
+ self.logger.put(hazard)
435
+ continue
436
+
437
+ # not checking anchor links into "too alien" file formats
438
+ # even though at least some of them (PDF files) should be
439
+ # supportable:
440
+ if hazard.kind == EventKind.NO_CLEAR_ANCHORAGE:
441
+ page.link_problems.append(hazard)
442
+ self.logger.put(hazard)
443
+ continue
444
+
445
+ page.link_hazards = [] # reclaim memory