txthighlight 0.1.0a0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Jifeng Wu
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: txthighlight
3
+ Version: 0.1.0a0
4
+ Summary: A tiny local web app for highlighting and commenting on plain text files.
5
+ Author-email: Jifeng Wu <jifengwu2k@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/jifengwu2k/txthighlight
8
+ Project-URL: Bug Tracker, https://github.com/jifengwu2k/txthighlight/issues
9
+ Classifier: Programming Language :: Python :: 2
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=2
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Dynamic: license-file
16
+
17
+ # `txthighlight`
18
+
19
+ A tiny local web app for highlighting and commenting on plain text files.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install txthighlight
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ Run it like this:
30
+
31
+ ```bash
32
+ txthighlight --host 0.0.0.0 --port 8080 somefile.txt
33
+ ```
34
+
35
+ Then open:
36
+
37
+ ```text
38
+ http://0.0.0.0:8080
39
+ ```
40
+
41
+ Highlight metadata is stored next to the source file in:
42
+
43
+ ```text
44
+ somefile.txt.json
45
+ ```
46
+
47
+ ### What it does
48
+
49
+ - renders a plain text file in the browser
50
+ - lets you select text and highlight it
51
+ - lets you add comments to highlights
52
+ - lets you remove highlights
53
+ - stores annotation data locally in a JSON sidecar file
54
+ - works with desktop and mobile browsers
55
+
56
+ ### Why this exists
57
+
58
+ There are many tools for annotating PDFs, rich text documents, and web pages.
59
+ There are very few simple tools for annotating a raw local text file.
60
+
61
+ Plain text is still a common working format for:
62
+
63
+ - transcripts
64
+ - logs
65
+ - OCR output
66
+ - legal or policy text
67
+ - prompt corpora
68
+ - research notes
69
+ - interview notes
70
+ - exported chat histories
71
+
72
+ This project fills that narrow gap: plain text in, annotations in a nearby JSON file, no database required.
73
+
74
+ ### The niche
75
+
76
+ This tool lives in an awkward but useful niche.
77
+
78
+ Most annotation tools assume one of these:
79
+
80
+ - HTML pages annotated by a browser extension
81
+ - rich text documents like Word or Google Docs
82
+ - PDF with built-in annotation support
83
+ - note-taking apps with their own storage format
84
+
85
+ But sometimes you do not want any of that. Sometimes you have a `.txt` file and want to keep working with a `.txt` file.
86
+
87
+ ### Data format
88
+
89
+ Annotations are stored in `<text-file>.json`.
90
+
91
+ Example:
92
+
93
+ ```json
94
+ {
95
+ "source_file": "/path/to/somefile.txt",
96
+ "annotations": [
97
+ {
98
+ "id": "3d7278a2-6d67-4e1c-a0c8-4a0a7d3b0e40",
99
+ "start": 12,
100
+ "end": 42,
101
+ "comment": "Important passage",
102
+ "created_at": 1713350000,
103
+ "updated_at": 1713350123
104
+ }
105
+ ]
106
+ }
107
+ ```
108
+
109
+ Offsets are character offsets into the text file as loaded by the app.
110
+
111
+ ## Contributing
112
+
113
+ Contributions are welcome! Please submit pull requests or open issues on the GitHub repository.
114
+
115
+ ## License
116
+
117
+ This project is licensed under the [MIT License](LICENSE).
@@ -0,0 +1,101 @@
1
+ # `txthighlight`
2
+
3
+ A tiny local web app for highlighting and commenting on plain text files.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pip install txthighlight
9
+ ```
10
+
11
+ ## Usage
12
+
13
+ Run it like this:
14
+
15
+ ```bash
16
+ txthighlight --host 0.0.0.0 --port 8080 somefile.txt
17
+ ```
18
+
19
+ Then open:
20
+
21
+ ```text
22
+ http://0.0.0.0:8080
23
+ ```
24
+
25
+ Highlight metadata is stored next to the source file in:
26
+
27
+ ```text
28
+ somefile.txt.json
29
+ ```
30
+
31
+ ### What it does
32
+
33
+ - renders a plain text file in the browser
34
+ - lets you select text and highlight it
35
+ - lets you add comments to highlights
36
+ - lets you remove highlights
37
+ - stores annotation data locally in a JSON sidecar file
38
+ - works with desktop and mobile browsers
39
+
40
+ ### Why this exists
41
+
42
+ There are many tools for annotating PDFs, rich text documents, and web pages.
43
+ There are very few simple tools for annotating a raw local text file.
44
+
45
+ Plain text is still a common working format for:
46
+
47
+ - transcripts
48
+ - logs
49
+ - OCR output
50
+ - legal or policy text
51
+ - prompt corpora
52
+ - research notes
53
+ - interview notes
54
+ - exported chat histories
55
+
56
+ This project fills that narrow gap: plain text in, annotations in a nearby JSON file, no database required.
57
+
58
+ ### The niche
59
+
60
+ This tool lives in an awkward but useful niche.
61
+
62
+ Most annotation tools assume one of these:
63
+
64
+ - HTML pages annotated by a browser extension
65
+ - rich text documents like Word or Google Docs
66
+ - PDF with built-in annotation support
67
+ - note-taking apps with their own storage format
68
+
69
+ But sometimes you do not want any of that. Sometimes you have a `.txt` file and want to keep working with a `.txt` file.
70
+
71
+ ### Data format
72
+
73
+ Annotations are stored in `<text-file>.json`.
74
+
75
+ Example:
76
+
77
+ ```json
78
+ {
79
+ "source_file": "/path/to/somefile.txt",
80
+ "annotations": [
81
+ {
82
+ "id": "3d7278a2-6d67-4e1c-a0c8-4a0a7d3b0e40",
83
+ "start": 12,
84
+ "end": 42,
85
+ "comment": "Important passage",
86
+ "created_at": 1713350000,
87
+ "updated_at": 1713350123
88
+ }
89
+ ]
90
+ }
91
+ ```
92
+
93
+ Offsets are character offsets into the text file as loaded by the app.
94
+
95
+ ## Contributing
96
+
97
+ Contributions are welcome! Please submit pull requests or open issues on the GitHub repository.
98
+
99
+ ## License
100
+
101
+ This project is licensed under the [MIT License](LICENSE).
@@ -0,0 +1,24 @@
1
+ [build-system]
2
+ requires = ["setuptools"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "txthighlight"
7
+ version = "0.1.0a0"
8
+ description = "A tiny local web app for highlighting and commenting on plain text files."
9
+ readme = "README.md"
10
+ requires-python = ">=2"
11
+ license = "MIT"
12
+ authors = [
13
+ { name="Jifeng Wu", email="jifengwu2k@gmail.com" }
14
+ ]
15
+ classifiers = [
16
+ "Programming Language :: Python :: 2",
17
+ "Programming Language :: Python :: 3",
18
+ "Operating System :: OS Independent",
19
+ ]
20
+ dependencies = []
21
+
22
+ [project.urls]
23
+ "Homepage" = "https://github.com/jifengwu2k/txthighlight"
24
+ "Bug Tracker" = "https://github.com/jifengwu2k/txthighlight/issues"
@@ -0,0 +1,7 @@
1
+ [bdist_wheel]
2
+ universal = 1
3
+
4
+ [egg_info]
5
+ tag_build =
6
+ tag_date = 0
7
+
@@ -0,0 +1,117 @@
1
+ Metadata-Version: 2.4
2
+ Name: txthighlight
3
+ Version: 0.1.0a0
4
+ Summary: A tiny local web app for highlighting and commenting on plain text files.
5
+ Author-email: Jifeng Wu <jifengwu2k@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/jifengwu2k/txthighlight
8
+ Project-URL: Bug Tracker, https://github.com/jifengwu2k/txthighlight/issues
9
+ Classifier: Programming Language :: Python :: 2
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: OS Independent
12
+ Requires-Python: >=2
13
+ Description-Content-Type: text/markdown
14
+ License-File: LICENSE
15
+ Dynamic: license-file
16
+
17
+ # `txthighlight`
18
+
19
+ A tiny local web app for highlighting and commenting on plain text files.
20
+
21
+ ## Installation
22
+
23
+ ```bash
24
+ pip install txthighlight
25
+ ```
26
+
27
+ ## Usage
28
+
29
+ Run it like this:
30
+
31
+ ```bash
32
+ txthighlight --host 0.0.0.0 --port 8080 somefile.txt
33
+ ```
34
+
35
+ Then open:
36
+
37
+ ```text
38
+ http://0.0.0.0:8080
39
+ ```
40
+
41
+ Highlight metadata is stored next to the source file in:
42
+
43
+ ```text
44
+ somefile.txt.json
45
+ ```
46
+
47
+ ### What it does
48
+
49
+ - renders a plain text file in the browser
50
+ - lets you select text and highlight it
51
+ - lets you add comments to highlights
52
+ - lets you remove highlights
53
+ - stores annotation data locally in a JSON sidecar file
54
+ - works with desktop and mobile browsers
55
+
56
+ ### Why this exists
57
+
58
+ There are many tools for annotating PDFs, rich text documents, and web pages.
59
+ There are very few simple tools for annotating a raw local text file.
60
+
61
+ Plain text is still a common working format for:
62
+
63
+ - transcripts
64
+ - logs
65
+ - OCR output
66
+ - legal or policy text
67
+ - prompt corpora
68
+ - research notes
69
+ - interview notes
70
+ - exported chat histories
71
+
72
+ This project fills that narrow gap: plain text in, annotations in a nearby JSON file, no database required.
73
+
74
+ ### The niche
75
+
76
+ This tool lives in an awkward but useful niche.
77
+
78
+ Most annotation tools assume one of these:
79
+
80
+ - HTML pages annotated by a browser extension
81
+ - rich text documents like Word or Google Docs
82
+ - PDF with built-in annotation support
83
+ - note-taking apps with their own storage format
84
+
85
+ But sometimes you do not want any of that. Sometimes you have a `.txt` file and want to keep working with a `.txt` file.
86
+
87
+ ### Data format
88
+
89
+ Annotations are stored in `<text-file>.json`.
90
+
91
+ Example:
92
+
93
+ ```json
94
+ {
95
+ "source_file": "/path/to/somefile.txt",
96
+ "annotations": [
97
+ {
98
+ "id": "3d7278a2-6d67-4e1c-a0c8-4a0a7d3b0e40",
99
+ "start": 12,
100
+ "end": 42,
101
+ "comment": "Important passage",
102
+ "created_at": 1713350000,
103
+ "updated_at": 1713350123
104
+ }
105
+ ]
106
+ }
107
+ ```
108
+
109
+ Offsets are character offsets into the text file as loaded by the app.
110
+
111
+ ## Contributing
112
+
113
+ Contributions are welcome! Please submit pull requests or open issues on the GitHub repository.
114
+
115
+ ## License
116
+
117
+ This project is licensed under the [MIT License](LICENSE).
@@ -0,0 +1,9 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ setup.cfg
5
+ txthighlight.py
6
+ txthighlight.egg-info/PKG-INFO
7
+ txthighlight.egg-info/SOURCES.txt
8
+ txthighlight.egg-info/dependency_links.txt
9
+ txthighlight.egg-info/top_level.txt
@@ -0,0 +1 @@
1
+ txthighlight
@@ -0,0 +1,835 @@
1
+ #!/usr/bin/env python
2
+ # coding=utf-8
3
+ from __future__ import print_function
4
+
5
+ import argparse
6
+ import codecs
7
+ import json
8
+ import os
9
+ import sys
10
+ import threading
11
+ import time
12
+ import uuid
13
+
14
+ if sys.version_info >= (3,):
15
+ from http.server import BaseHTTPRequestHandler, HTTPServer
16
+ from socketserver import ThreadingMixIn
17
+ from urllib.parse import urlparse
18
+ text_type = str
19
+ else:
20
+ from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
21
+ from SocketServer import ThreadingMixIn
22
+ from urlparse import urlparse
23
+ text_type = unicode
24
+
25
+
26
+ class ThreadingHTTPServer(ThreadingMixIn, HTTPServer):
27
+ daemon_threads = True
28
+
29
+
30
+ HTTP_OK = 200
31
+ HTTP_BAD_REQUEST = 400
32
+ HTTP_NOT_FOUND = 404
33
+ HTTP_INTERNAL_SERVER_ERROR = 500
34
+
35
+
36
+ HTML_PAGE = """<!DOCTYPE html>
37
+ <html lang=\"en\">
38
+ <head>
39
+ <meta charset=\"utf-8\" />
40
+ <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />
41
+ <title>Text Highlighter</title>
42
+ <style>
43
+ .highlight {
44
+ background: yellow;
45
+ }
46
+ #document,
47
+ #commentSnippet {
48
+ white-space: pre-wrap;
49
+ overflow-wrap: break-word;
50
+ }
51
+ </style>
52
+ </head>
53
+ <body>
54
+ <div id=\"document\"></div>
55
+
56
+ <div id=\"selectionToolbar\" hidden>
57
+ <button id=\"highlightBtn\">Highlight</button>
58
+ <button id=\"selectionCommentBtn\">Comment</button>
59
+ </div>
60
+
61
+ <div id=\"highlightToolbar\" hidden>
62
+ <button id=\"highlightCommentBtn\">Comment</button>
63
+ <button id=\"highlightRemoveBtn\">Remove</button>
64
+ </div>
65
+
66
+ <dialog id=\"commentModal\">
67
+ <form method=\"dialog\">
68
+ <p><strong>Add comment</strong></p>
69
+ <div id=\"commentSnippet\"></div>
70
+ <textarea id=\"commentBox\" placeholder=\"Add a note for this highlight…\"></textarea>
71
+ <div>
72
+ <button id=\"cancelComment\" value=\"cancel\">Cancel</button>
73
+ <button id=\"saveComment\" value=\"default\">Save comment</button>
74
+ </div>
75
+ </form>
76
+ </dialog>
77
+
78
+ <script>
79
+ const Mode = {
80
+ IDLE: 'idle',
81
+ SELECTION_ACTIVE: 'selection_active',
82
+ HIGHLIGHT_ACTIVE: 'highlight_active',
83
+ COMMENT_DIALOG: 'comment_dialog',
84
+ };
85
+
86
+ const state = {
87
+ mode: Mode.IDLE,
88
+ text: '',
89
+ annotations: [],
90
+ fileName: '',
91
+ metadataPath: '',
92
+ selection: null,
93
+ highlightId: null,
94
+ commentTargetId: null,
95
+ };
96
+
97
+ const docEl = document.getElementById('document');
98
+ const selectionToolbarEl = document.getElementById('selectionToolbar');
99
+ const highlightToolbarEl = document.getElementById('highlightToolbar');
100
+ const commentModalEl = document.getElementById('commentModal');
101
+ const commentSnippetEl = document.getElementById('commentSnippet');
102
+ const commentBoxEl = document.getElementById('commentBox');
103
+ const saveCommentEl = document.getElementById('saveComment');
104
+ const cancelCommentEl = document.getElementById('cancelComment');
105
+
106
+ function showError(message) {
107
+ if (message) {
108
+ window.alert(message);
109
+ }
110
+ }
111
+
112
+ function escapeHtml(text) {
113
+ return text
114
+ .replaceAll('&', '&amp;')
115
+ .replaceAll('<', '&lt;')
116
+ .replaceAll('>', '&gt;')
117
+ .replaceAll('"', '&quot;')
118
+ .replaceAll("'", '&#39;');
119
+ }
120
+
121
+ function findAnnotationById(id) {
122
+ return state.annotations.find((item) => item.id === id) || null;
123
+ }
124
+
125
+ function findHighlightElById(id) {
126
+ if (!id) {
127
+ return null;
128
+ }
129
+ return docEl.querySelector(`.highlight[data-id="${id}"]`);
130
+ }
131
+
132
+ function renderDocument() {
133
+ const annotations = [...state.annotations].sort((a, b) => a.start - b.start || a.end - b.end);
134
+ let cursor = 0;
135
+ let html = '';
136
+ for (const ann of annotations) {
137
+ if (ann.start > cursor) {
138
+ html += escapeHtml(state.text.slice(cursor, ann.start));
139
+ }
140
+ const text = escapeHtml(state.text.slice(ann.start, ann.end));
141
+ const title = ann.comment ? escapeHtml(ann.comment) : 'Highlight';
142
+ html += `<span class="highlight" data-id="${ann.id}" title="${title}">${text}</span>`;
143
+ cursor = ann.end;
144
+ }
145
+ if (cursor < state.text.length) {
146
+ html += escapeHtml(state.text.slice(cursor));
147
+ }
148
+ docEl.innerHTML = html || '(empty file)';
149
+ }
150
+
151
+ function getTextOffset(container, targetNode, targetOffset) {
152
+ let total = 0;
153
+ const walker = document.createTreeWalker(container, NodeFilter.SHOW_TEXT);
154
+ let node;
155
+ while ((node = walker.nextNode())) {
156
+ if (node === targetNode) {
157
+ return total + targetOffset;
158
+ }
159
+ total += node.textContent.length;
160
+ }
161
+ return null;
162
+ }
163
+
164
+ function getSelectionOffsets() {
165
+ const selection = window.getSelection();
166
+ if (!selection || selection.rangeCount === 0) {
167
+ return null;
168
+ }
169
+ const range = selection.getRangeAt(0);
170
+ if (range.collapsed) {
171
+ return null;
172
+ }
173
+ if (!docEl.contains(range.startContainer) || !docEl.contains(range.endContainer)) {
174
+ return null;
175
+ }
176
+ const start = getTextOffset(docEl, range.startContainer, range.startOffset);
177
+ const end = getTextOffset(docEl, range.endContainer, range.endOffset);
178
+ if (start == null || end == null || start === end) {
179
+ return null;
180
+ }
181
+ return start < end ? { start, end } : { start: end, end: start };
182
+ }
183
+
184
+ function getSelectionRect() {
185
+ const selection = window.getSelection();
186
+ if (!selection || selection.rangeCount === 0 || !state.selection) {
187
+ return null;
188
+ }
189
+ const rect = selection.getRangeAt(0).getBoundingClientRect();
190
+ if (!rect || (!rect.width && !rect.height)) {
191
+ return null;
192
+ }
193
+ return rect;
194
+ }
195
+
196
+ function placeToolbar(toolbarEl, rect) {
197
+ toolbarEl.hidden = false;
198
+ toolbarEl.style.position = 'fixed';
199
+ toolbarEl.style.zIndex = '50';
200
+
201
+ const toolbarWidth = toolbarEl.offsetWidth;
202
+ const top = rect.bottom + 8;
203
+ const maxLeft = Math.max(10, window.innerWidth - toolbarWidth - 10);
204
+ const left = Math.min(
205
+ maxLeft,
206
+ Math.max(10, rect.left + rect.width / 2 - toolbarWidth / 2)
207
+ );
208
+
209
+ toolbarEl.style.top = `${top}px`;
210
+ toolbarEl.style.left = `${left}px`;
211
+ }
212
+
213
+ function hideSelectionToolbar() {
214
+ selectionToolbarEl.hidden = true;
215
+ }
216
+
217
+ function hideHighlightToolbar() {
218
+ highlightToolbarEl.hidden = true;
219
+ }
220
+
221
+ function openCommentDialog(id) {
222
+ const ann = findAnnotationById(id);
223
+ if (!ann) {
224
+ return;
225
+ }
226
+ if (commentModalEl.open && commentModalEl.dataset.targetId === id) {
227
+ return;
228
+ }
229
+ commentModalEl.dataset.targetId = id;
230
+ commentSnippetEl.textContent = state.text.slice(ann.start, ann.end);
231
+ commentBoxEl.value = ann.comment || '';
232
+ if (!commentModalEl.open) {
233
+ commentModalEl.showModal();
234
+ }
235
+ window.setTimeout(() => commentBoxEl.focus(), 0);
236
+ }
237
+
238
+ function closeCommentDialog() {
239
+ commentModalEl.dataset.targetId = '';
240
+ if (commentModalEl.open) {
241
+ commentModalEl.close();
242
+ }
243
+ }
244
+
245
+ function renderUI() {
246
+ hideSelectionToolbar();
247
+ hideHighlightToolbar();
248
+
249
+ if (state.mode === Mode.SELECTION_ACTIVE && state.selection) {
250
+ const rect = getSelectionRect();
251
+ if (rect) {
252
+ placeToolbar(selectionToolbarEl, rect);
253
+ }
254
+ }
255
+
256
+ if (state.mode === Mode.HIGHLIGHT_ACTIVE && state.highlightId) {
257
+ const highlightEl = findHighlightElById(state.highlightId);
258
+ if (highlightEl) {
259
+ placeToolbar(highlightToolbarEl, highlightEl.getBoundingClientRect());
260
+ }
261
+ }
262
+
263
+ if (state.mode === Mode.COMMENT_DIALOG && state.commentTargetId) {
264
+ openCommentDialog(state.commentTargetId);
265
+ } else if (commentModalEl.open) {
266
+ commentModalEl.close();
267
+ }
268
+ }
269
+
270
+ function transition(event, payload = {}) {
271
+ switch (event) {
272
+ case 'DOCUMENT_LOADED':
273
+ state.text = payload.text || '';
274
+ state.annotations = payload.annotations || [];
275
+ state.fileName = payload.file_name || '';
276
+ state.metadataPath = payload.metadata_path || '';
277
+ state.mode = Mode.IDLE;
278
+ state.selection = null;
279
+ state.highlightId = null;
280
+ state.commentTargetId = null;
281
+ renderDocument();
282
+ renderUI();
283
+ return;
284
+
285
+ case 'SELECTION_CHANGED':
286
+ if (payload.selection) {
287
+ state.mode = Mode.SELECTION_ACTIVE;
288
+ state.selection = payload.selection;
289
+ state.highlightId = null;
290
+ state.commentTargetId = null;
291
+ } else if (state.mode === Mode.SELECTION_ACTIVE) {
292
+ state.mode = Mode.IDLE;
293
+ state.selection = null;
294
+ }
295
+ renderUI();
296
+ return;
297
+
298
+ case 'HIGHLIGHT_SELECTED':
299
+ if (!payload.id) {
300
+ return;
301
+ }
302
+ state.mode = Mode.HIGHLIGHT_ACTIVE;
303
+ state.highlightId = payload.id;
304
+ state.selection = null;
305
+ state.commentTargetId = null;
306
+ window.getSelection()?.removeAllRanges();
307
+ renderUI();
308
+ return;
309
+
310
+ case 'OPEN_COMMENT_DIALOG':
311
+ if (!payload.id) {
312
+ return;
313
+ }
314
+ state.mode = Mode.COMMENT_DIALOG;
315
+ state.highlightId = payload.id;
316
+ state.selection = null;
317
+ state.commentTargetId = payload.id;
318
+ renderUI();
319
+ return;
320
+
321
+ case 'COMMENT_DIALOG_CLOSED':
322
+ if (state.highlightId && findAnnotationById(state.highlightId)) {
323
+ state.mode = Mode.HIGHLIGHT_ACTIVE;
324
+ } else {
325
+ state.mode = Mode.IDLE;
326
+ state.highlightId = null;
327
+ }
328
+ state.commentTargetId = null;
329
+ renderUI();
330
+ return;
331
+
332
+ case 'CLEAR_ACTIVE':
333
+ state.mode = Mode.IDLE;
334
+ state.selection = null;
335
+ state.highlightId = null;
336
+ state.commentTargetId = null;
337
+ renderUI();
338
+ return;
339
+
340
+ case 'ANNOTATIONS_UPDATED':
341
+ state.annotations = payload.annotations || [];
342
+ renderDocument();
343
+ if (state.highlightId && !findAnnotationById(state.highlightId)) {
344
+ state.highlightId = null;
345
+ if (state.mode !== Mode.SELECTION_ACTIVE) {
346
+ state.mode = Mode.IDLE;
347
+ }
348
+ }
349
+ if (state.commentTargetId && !findAnnotationById(state.commentTargetId)) {
350
+ state.commentTargetId = null;
351
+ if (state.mode === Mode.COMMENT_DIALOG) {
352
+ state.mode = state.highlightId ? Mode.HIGHLIGHT_ACTIVE : Mode.IDLE;
353
+ }
354
+ }
355
+ renderUI();
356
+ return;
357
+ }
358
+ }
359
+
360
+ async function api(path, payload) {
361
+ const response = await fetch(path, {
362
+ method: 'POST',
363
+ headers: { 'Content-Type': 'application/json' },
364
+ body: JSON.stringify(payload),
365
+ });
366
+ const data = await response.json();
367
+ if (!response.ok) {
368
+ throw new Error(data.error || `HTTP ${response.status}`);
369
+ }
370
+ return data;
371
+ }
372
+
373
+ async function mutate(payload) {
374
+ try {
375
+ const data = await api('/api/highlights', payload);
376
+ transition('ANNOTATIONS_UPDATED', { annotations: data.annotations });
377
+ return data.annotations;
378
+ } catch (error) {
379
+ console.error(error);
380
+ showError(error.message);
381
+ return null;
382
+ }
383
+ }
384
+
385
+ async function loadDocument() {
386
+ const response = await fetch('/api/document');
387
+ const data = await response.json();
388
+ transition('DOCUMENT_LOADED', data);
389
+ }
390
+
391
+ function findHighlightForSelection(selection, annotations) {
392
+ const matches = annotations
393
+ .filter((ann) => ann.start <= selection.start && ann.end >= selection.end)
394
+ .sort((a, b) => (a.end - a.start) - (b.end - b.start));
395
+ return matches[0] || null;
396
+ }
397
+
398
+ selectionToolbarEl.addEventListener('pointerdown', (event) => {
399
+ event.preventDefault();
400
+ });
401
+
402
+ saveCommentEl.addEventListener('click', async () => {
403
+ const id = state.commentTargetId || state.highlightId;
404
+ if (!id) return;
405
+ await mutate({ action: 'update_comment', id, comment: commentBoxEl.value });
406
+ transition('COMMENT_DIALOG_CLOSED');
407
+ });
408
+
409
+ cancelCommentEl.addEventListener('click', () => {
410
+ transition('COMMENT_DIALOG_CLOSED');
411
+ });
412
+
413
+ commentModalEl.addEventListener('close', () => {
414
+ if (state.mode === Mode.COMMENT_DIALOG) {
415
+ transition('COMMENT_DIALOG_CLOSED');
416
+ }
417
+ });
418
+
419
+ document.getElementById('highlightBtn').addEventListener('click', async () => {
420
+ const selection = state.selection;
421
+ if (!selection) return;
422
+ window.getSelection()?.removeAllRanges();
423
+ transition('CLEAR_ACTIVE');
424
+ await mutate({ action: 'add', start: selection.start, end: selection.end });
425
+ });
426
+
427
+ document.getElementById('selectionCommentBtn').addEventListener('click', async () => {
428
+ const selection = state.selection;
429
+ if (!selection) return;
430
+ window.getSelection()?.removeAllRanges();
431
+ transition('CLEAR_ACTIVE');
432
+ const annotations = await mutate({ action: 'add', start: selection.start, end: selection.end });
433
+ if (!annotations) return;
434
+ const ann = findHighlightForSelection(selection, annotations);
435
+ if (!ann) return;
436
+ transition('OPEN_COMMENT_DIALOG', { id: ann.id });
437
+ });
438
+
439
+ document.getElementById('highlightCommentBtn').addEventListener('click', async () => {
440
+ const id = state.highlightId;
441
+ if (!id) return;
442
+ transition('OPEN_COMMENT_DIALOG', { id });
443
+ });
444
+
445
+ document.getElementById('highlightRemoveBtn').addEventListener('click', async () => {
446
+ const id = state.highlightId;
447
+ if (!id) return;
448
+ transition('CLEAR_ACTIVE');
449
+ await mutate({ action: 'remove_id', id });
450
+ });
451
+
452
+ docEl.addEventListener('click', (event) => {
453
+ const highlight = event.target.closest('.highlight');
454
+ if (highlight) {
455
+ transition('HIGHLIGHT_SELECTED', { id: highlight.dataset.id });
456
+ return;
457
+ }
458
+ if (!getSelectionOffsets()) {
459
+ transition('CLEAR_ACTIVE');
460
+ }
461
+ });
462
+
463
+ document.addEventListener('selectionchange', () => {
464
+ if (state.mode === Mode.COMMENT_DIALOG || document.activeElement === commentBoxEl) {
465
+ return;
466
+ }
467
+ const selection = getSelectionOffsets();
468
+ if (selection) {
469
+ transition('SELECTION_CHANGED', { selection });
470
+ window.requestAnimationFrame(renderUI);
471
+ } else {
472
+ transition('SELECTION_CHANGED', { selection: null });
473
+ }
474
+ });
475
+
476
+ document.addEventListener('pointerup', () => {
477
+ if (state.mode === Mode.COMMENT_DIALOG || document.activeElement === commentBoxEl) {
478
+ return;
479
+ }
480
+ window.setTimeout(() => {
481
+ const selection = getSelectionOffsets();
482
+ if (selection) {
483
+ transition('SELECTION_CHANGED', { selection });
484
+ }
485
+ }, 0);
486
+ });
487
+
488
+ document.addEventListener('keydown', (event) => {
489
+ if (event.key === 'Escape') {
490
+ window.getSelection()?.removeAllRanges();
491
+ transition('CLEAR_ACTIVE');
492
+ }
493
+ });
494
+
495
+ document.addEventListener('click', (event) => {
496
+ if (!event.target.closest('#highlightToolbar') && !event.target.closest('.highlight')) {
497
+ if (state.mode === Mode.HIGHLIGHT_ACTIVE && !getSelectionOffsets()) {
498
+ transition('CLEAR_ACTIVE');
499
+ }
500
+ }
501
+ if (!event.target.closest('#selectionToolbar') && !getSelectionOffsets() && state.mode === Mode.SELECTION_ACTIVE) {
502
+ transition('CLEAR_ACTIVE');
503
+ }
504
+ });
505
+
506
+ loadDocument().catch((error) => {
507
+ console.error(error);
508
+ showError(error.message);
509
+ docEl.textContent = 'Failed to load file.';
510
+ });
511
+ </script>
512
+ </body>
513
+ </html>
514
+ """
515
+
516
+
517
+ class AppState(object):
518
+ def __init__(self, text_path, metadata_path, text, lock):
519
+ # type: (text_type, text_type, text_type, threading.Lock) -> None
520
+ self.text_path = text_path
521
+ self.metadata_path = metadata_path
522
+ self.text = text
523
+ self.lock = lock
524
+ self._ensure_metadata_file()
525
+
526
+ def _ensure_metadata_file(self):
527
+ # type: () -> None
528
+ if os.path.exists(self.metadata_path):
529
+ return
530
+ payload = {
531
+ "source_file": self.text_path,
532
+ "annotations": [],
533
+ }
534
+ with codecs.open(self.metadata_path, "w", encoding="utf-8") as f:
535
+ f.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
536
+
537
+ def _load_annotations(self):
538
+ # type: () -> list
539
+ try:
540
+ with codecs.open(self.metadata_path, "r", encoding="utf-8") as f:
541
+ data = json.load(f)
542
+ except ValueError:
543
+ raise ValueError("Invalid JSON in metadata file: %s" % self.metadata_path)
544
+ annotations = data.get("annotations", [])
545
+ if not isinstance(annotations, list):
546
+ raise ValueError("Metadata JSON must contain an 'annotations' array")
547
+ normalized = []
548
+ for item in annotations:
549
+ try:
550
+ start = int(item["start"])
551
+ end = int(item["end"])
552
+ except Exception as exc:
553
+ raise ValueError("Every annotation must have integer start/end offsets: %s" % exc)
554
+ if not (0 <= start < end <= len(self.text)):
555
+ continue
556
+ normalized.append(
557
+ {
558
+ "id": str(item.get("id") or uuid.uuid4()),
559
+ "start": start,
560
+ "end": end,
561
+ "comment": str(item.get("comment") or ""),
562
+ "created_at": int(item.get("created_at") or now_timestamp()),
563
+ "updated_at": int(item.get("updated_at") or now_timestamp()),
564
+ }
565
+ )
566
+ normalized.sort(key=lambda ann: (ann["start"], ann["end"], ann["id"]))
567
+ return normalized
568
+
569
+ def _save_annotations(self, annotations):
570
+ # type: (list) -> None
571
+ payload = {
572
+ "source_file": self.text_path,
573
+ "annotations": annotations,
574
+ }
575
+ with codecs.open(self.metadata_path, "w", encoding="utf-8") as f:
576
+ f.write(json.dumps(payload, indent=2, ensure_ascii=False) + "\n")
577
+
578
+ def document_payload(self):
579
+ # type: () -> dict
580
+ with self.lock:
581
+ annotations = self._load_annotations()
582
+ return {
583
+ "file_name": os.path.basename(self.text_path),
584
+ "file_path": self.text_path,
585
+ "metadata_path": self.metadata_path,
586
+ "text": self.text,
587
+ "annotations": annotations,
588
+ }
589
+
590
+ def mutate(self, payload):
591
+ # type: (dict) -> list
592
+ action = payload.get("action")
593
+ with self.lock:
594
+ annotations = self._load_annotations()
595
+ if action == "add":
596
+ start = clamp_index(payload.get("start"), len(self.text))
597
+ end = clamp_index(payload.get("end"), len(self.text))
598
+ comment = str(payload.get("comment") or "")
599
+ annotations = add_annotation(annotations, start, end, comment)
600
+ elif action == "remove_range":
601
+ start = clamp_index(payload.get("start"), len(self.text))
602
+ end = clamp_index(payload.get("end"), len(self.text))
603
+ annotations = remove_range(annotations, start, end)
604
+ elif action == "update_comment":
605
+ ann_id = str(payload.get("id") or "")
606
+ comment = str(payload.get("comment") or "")
607
+ annotations = update_comment(annotations, ann_id, comment)
608
+ elif action == "remove_id":
609
+ ann_id = str(payload.get("id") or "")
610
+ annotations = [ann for ann in annotations if ann["id"] != ann_id]
611
+ else:
612
+ raise ValueError("Unsupported action: %s" % action)
613
+ annotations.sort(key=lambda ann: (ann["start"], ann["end"], ann["id"]))
614
+ self._save_annotations(annotations)
615
+ return annotations
616
+
617
+
618
+ def now_timestamp():
619
+ # type: () -> int
620
+ return int(time.time())
621
+
622
+
623
+ def clamp_index(value, maximum):
624
+ # type: (object, int) -> int
625
+ number = int(value)
626
+ return max(0, min(maximum, number))
627
+
628
+
629
+ def add_annotation(annotations, start, end, comment):
630
+ # type: (list, int, int, text_type) -> list
631
+ if start > end:
632
+ start, end = end, start
633
+ if start == end:
634
+ return annotations
635
+
636
+ merged_comment_parts = []
637
+ if comment.strip():
638
+ merged_comment_parts.append(comment.strip())
639
+
640
+ overlapping = []
641
+ remaining = []
642
+ for ann in annotations:
643
+ if ann["end"] < start or ann["start"] > end:
644
+ remaining.append(ann)
645
+ else:
646
+ overlapping.append(ann)
647
+
648
+ if overlapping:
649
+ start = min([start] + [ann["start"] for ann in overlapping])
650
+ end = max([end] + [ann["end"] for ann in overlapping])
651
+ overlapping_comments = [ann["comment"].strip() for ann in overlapping if ann.get("comment", "").strip()]
652
+ for item in overlapping_comments:
653
+ if item not in merged_comment_parts:
654
+ merged_comment_parts.append(item)
655
+ created_at = min(int(ann.get("created_at") or now_timestamp()) for ann in overlapping)
656
+ else:
657
+ created_at = now_timestamp()
658
+
659
+ remaining.append(
660
+ {
661
+ "id": overlapping[0]["id"] if overlapping else str(uuid.uuid4()),
662
+ "start": start,
663
+ "end": end,
664
+ "comment": "\n\n".join(merged_comment_parts),
665
+ "created_at": created_at,
666
+ "updated_at": now_timestamp(),
667
+ }
668
+ )
669
+ remaining.sort(key=lambda ann: (ann["start"], ann["end"], ann["id"]))
670
+ return remaining
671
+
672
+
673
+ def remove_range(annotations, start, end):
674
+ # type: (list, int, int) -> list
675
+ if start > end:
676
+ start, end = end, start
677
+ if start == end:
678
+ return annotations
679
+
680
+ updated = []
681
+ for ann in annotations:
682
+ ann_start = ann["start"]
683
+ ann_end = ann["end"]
684
+ if ann_end <= start or ann_start >= end:
685
+ updated.append(ann)
686
+ continue
687
+ if ann_start < start:
688
+ left_ann = dict(ann)
689
+ left_ann["id"] = str(uuid.uuid4())
690
+ left_ann["start"] = ann_start
691
+ left_ann["end"] = start
692
+ left_ann["updated_at"] = now_timestamp()
693
+ updated.append(left_ann)
694
+ if ann_end > end:
695
+ right_ann = dict(ann)
696
+ right_ann["id"] = str(uuid.uuid4())
697
+ right_ann["start"] = end
698
+ right_ann["end"] = ann_end
699
+ right_ann["updated_at"] = now_timestamp()
700
+ updated.append(right_ann)
701
+ updated.sort(key=lambda ann: (ann["start"], ann["end"], ann["id"]))
702
+ return updated
703
+
704
+
705
+ def update_comment(annotations, ann_id, comment):
706
+ # type: (list, text_type, text_type) -> list
707
+ found = False
708
+ updated = []
709
+ for ann in annotations:
710
+ if ann["id"] == ann_id:
711
+ found = True
712
+ updated_ann = dict(ann)
713
+ updated_ann["comment"] = comment
714
+ updated_ann["updated_at"] = now_timestamp()
715
+ updated.append(updated_ann)
716
+ else:
717
+ updated.append(ann)
718
+ if not found:
719
+ raise ValueError("Unknown highlight id: %s" % ann_id)
720
+ return updated
721
+
722
+
723
+ class HighlighterHandler(BaseHTTPRequestHandler):
724
+ server_version = "TextHighlighter/1.0"
725
+
726
+ @property
727
+ def app_state(self):
728
+ # type: () -> AppState
729
+ return self.server.app_state
730
+
731
+ def do_GET(self):
732
+ # type: () -> None
733
+ path = urlparse(self.path).path
734
+ if path == "/":
735
+ self._send_html(HTML_PAGE)
736
+ return
737
+ if path == "/api/document":
738
+ self._send_json(self.app_state.document_payload())
739
+ return
740
+ self._send_json({"error": "Not found"}, status=HTTP_NOT_FOUND)
741
+
742
+ def do_POST(self):
743
+ # type: () -> None
744
+ path = urlparse(self.path).path
745
+ if path != "/api/highlights":
746
+ self._send_json({"error": "Not found"}, status=HTTP_NOT_FOUND)
747
+ return
748
+
749
+ try:
750
+ length = int(self.headers.get("Content-Length", "0"))
751
+ except ValueError:
752
+ self._send_json({"error": "Invalid Content-Length"}, status=HTTP_BAD_REQUEST)
753
+ return
754
+
755
+ try:
756
+ if length:
757
+ body = self.rfile.read(length)
758
+ if not isinstance(body, text_type):
759
+ body = body.decode("utf-8")
760
+ else:
761
+ body = "{}"
762
+ payload = json.loads(body)
763
+ annotations = self.app_state.mutate(payload)
764
+ except ValueError as exc:
765
+ self._send_json({"error": str(exc)}, status=HTTP_BAD_REQUEST)
766
+ return
767
+ except Exception as exc:
768
+ self._send_json({"error": str(exc)}, status=HTTP_INTERNAL_SERVER_ERROR)
769
+ return
770
+
771
+ self._send_json({"ok": True, "annotations": annotations})
772
+
773
+ def log_message(self, fmt, *args):
774
+ # type: (text_type, *object) -> None
775
+ print("[%s] %s - %s" % (self.log_date_time_string(), self.address_string(), fmt % args))
776
+
777
+ def _send_html(self, html, status=HTTP_OK):
778
+ # type: (text_type, int) -> None
779
+ data = html.encode("utf-8")
780
+ self.send_response(status)
781
+ self.send_header("Content-Type", "text/html; charset=utf-8")
782
+ self.send_header("Content-Length", str(len(data)))
783
+ self.end_headers()
784
+ self.wfile.write(data)
785
+
786
+ def _send_json(self, payload, status=HTTP_OK):
787
+ # type: (dict, int) -> None
788
+ data = json.dumps(payload, ensure_ascii=False).encode("utf-8")
789
+ self.send_response(status)
790
+ self.send_header("Content-Type", "application/json; charset=utf-8")
791
+ self.send_header("Cache-Control", "no-store")
792
+ self.send_header("Content-Length", str(len(data)))
793
+ self.end_headers()
794
+ self.wfile.write(data)
795
+
796
+
797
+ def parse_args():
798
+ # type: () -> argparse.Namespace
799
+ parser = argparse.ArgumentParser(
800
+ description="Serve a plain text file in a browser with local highlight/comment storage."
801
+ )
802
+ parser.add_argument("text_file", help="Path to the plain text file to annotate")
803
+ parser.add_argument("--host", default="localhost", help="Host interface to bind to")
804
+ parser.add_argument("--port", default=8000, type=int, help="Port to listen on")
805
+ return parser.parse_args()
806
+
807
+
808
+ def main():
809
+ # type: () -> None
810
+ args = parse_args()
811
+ text_path = os.path.abspath(os.path.expanduser(args.text_file))
812
+ if not os.path.isfile(text_path):
813
+ raise SystemExit("Text file not found: %s" % text_path)
814
+
815
+ metadata_path = text_path + ".json"
816
+ with codecs.open(text_path, "r", encoding="utf-8", errors="replace") as f:
817
+ text = f.read()
818
+ app_state = AppState(text_path=text_path, metadata_path=metadata_path, text=text, lock=threading.Lock())
819
+
820
+ httpd = ThreadingHTTPServer((args.host, args.port), HighlighterHandler)
821
+ httpd.app_state = app_state # type: ignore[attr-defined]
822
+
823
+ print("Serving %s" % text_path)
824
+ print("Metadata file: %s" % metadata_path)
825
+ print("Open http://%s:%s" % (args.host, args.port))
826
+ try:
827
+ httpd.serve_forever()
828
+ except KeyboardInterrupt:
829
+ print("\nShutting down…")
830
+ finally:
831
+ httpd.server_close()
832
+
833
+
834
+ if __name__ == "__main__":
835
+ main()