wxpath 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wxpath/__init__.py +2 -0
- wxpath/cli.py +6 -0
- wxpath/core/exceptions.py +53 -0
- wxpath/core/models.py +1 -0
- wxpath/core/ops.py +100 -19
- wxpath/core/parser.py +94 -24
- wxpath/core/runtime/engine.py +74 -10
- wxpath/core/runtime/helpers.py +6 -3
- wxpath/http/client/__init__.py +1 -1
- wxpath/http/client/crawler.py +17 -5
- wxpath/http/client/response.py +7 -1
- wxpath/http/policy/retry.py +2 -2
- wxpath/integrations/__init__.py +0 -0
- wxpath/integrations/langchain/__init__.py +0 -0
- wxpath/integrations/langchain/examples/basic_rag.py +85 -0
- wxpath/integrations/langchain/examples/rolling_window_rag.py +218 -0
- wxpath/integrations/langchain/loader.py +60 -0
- wxpath/patches.py +215 -5
- wxpath/settings.py +3 -1
- wxpath/tui.py +1225 -0
- wxpath/tui_settings.py +151 -0
- wxpath/util/cleaners.py +31 -0
- wxpath/util/common_paths.py +22 -0
- wxpath/util/logging.py +3 -7
- {wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/METADATA +73 -9
- wxpath-0.5.1.dist-info/RECORD +45 -0
- {wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/WHEEL +1 -1
- {wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/entry_points.txt +1 -0
- wxpath-0.4.1.dist-info/RECORD +0 -35
- {wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/licenses/LICENSE +0 -0
- {wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/top_level.txt +0 -0
wxpath/tui.py
ADDED
|
@@ -0,0 +1,1225 @@
|
|
|
1
|
+
"""TUI for interactive wxpath expression testing.
|
|
2
|
+
|
|
3
|
+
A two-panel terminal interface:
|
|
4
|
+
- Top panel: Editor for wxpath DSL expressions
|
|
5
|
+
- Bottom panel: Live output of executed expressions
|
|
6
|
+
|
|
7
|
+
Warning:
|
|
8
|
+
Pre-1.0.0 - APIs and contracts may change
|
|
9
|
+
|
|
10
|
+
Example:
|
|
11
|
+
Launch the TUI from command line::
|
|
12
|
+
|
|
13
|
+
$ wxpath-tui
|
|
14
|
+
|
|
15
|
+
Or run as a module::
|
|
16
|
+
|
|
17
|
+
$ python -m wxpath.tui
|
|
18
|
+
|
|
19
|
+
"""
|
|
20
|
+
import asyncio
|
|
21
|
+
import csv
|
|
22
|
+
import json
|
|
23
|
+
import traceback
|
|
24
|
+
from datetime import datetime
|
|
25
|
+
from pathlib import Path
|
|
26
|
+
from typing import Any, Iterable
|
|
27
|
+
|
|
28
|
+
from elementpath.xpath_tokens import XPathMap
|
|
29
|
+
from lxml.html import HtmlElement, tostring
|
|
30
|
+
from rich.console import RenderableType
|
|
31
|
+
from textual import events, work
|
|
32
|
+
from textual.app import App, ComposeResult
|
|
33
|
+
from textual.containers import Container, Horizontal, Vertical, VerticalScroll
|
|
34
|
+
from textual.reactive import reactive
|
|
35
|
+
from textual.screen import ModalScreen
|
|
36
|
+
from textual.widgets import (
|
|
37
|
+
Button,
|
|
38
|
+
DataTable,
|
|
39
|
+
Footer,
|
|
40
|
+
Header,
|
|
41
|
+
Input,
|
|
42
|
+
Static,
|
|
43
|
+
Switch,
|
|
44
|
+
TextArea,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
from wxpath.core.runtime.engine import WXPathEngine
|
|
48
|
+
from wxpath.hooks import registry
|
|
49
|
+
from wxpath.hooks.builtin import SerializeXPathMapAndNodeHook
|
|
50
|
+
from wxpath.settings import SETTINGS
|
|
51
|
+
from wxpath.tui_settings import (
|
|
52
|
+
TUISettingsSchema,
|
|
53
|
+
load_tui_settings,
|
|
54
|
+
save_tui_settings,
|
|
55
|
+
validate_tui_settings,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class HeadersScreen(ModalScreen):
|
|
60
|
+
"""Modal screen for editing HTTP headers.
|
|
61
|
+
|
|
62
|
+
Allows users to paste and edit custom HTTP headers in JSON format.
|
|
63
|
+
Headers are applied to all subsequent HTTP requests.
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
CSS = """
|
|
67
|
+
HeadersScreen {
|
|
68
|
+
align: center middle;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
#headers-dialog {
|
|
72
|
+
width: 80;
|
|
73
|
+
height: 25;
|
|
74
|
+
border: thick $primary;
|
|
75
|
+
background: $surface;
|
|
76
|
+
padding: 1 2;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
#headers-title {
|
|
80
|
+
background: $primary;
|
|
81
|
+
color: $text;
|
|
82
|
+
text-style: bold;
|
|
83
|
+
padding: 0 2;
|
|
84
|
+
dock: top;
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
#headers-editor {
|
|
88
|
+
height: 1fr;
|
|
89
|
+
margin: 1 0;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
#headers-help {
|
|
93
|
+
color: $text-muted;
|
|
94
|
+
margin-bottom: 1;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
#headers-buttons {
|
|
98
|
+
height: auto;
|
|
99
|
+
align: center middle;
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
Button {
|
|
103
|
+
margin: 0 1;
|
|
104
|
+
}
|
|
105
|
+
"""
|
|
106
|
+
|
|
107
|
+
def __init__(self, current_headers: dict):
|
|
108
|
+
"""Initialize headers screen with current headers.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
current_headers: Dictionary of current HTTP headers
|
|
112
|
+
"""
|
|
113
|
+
super().__init__()
|
|
114
|
+
self.current_headers = current_headers
|
|
115
|
+
|
|
116
|
+
def compose(self) -> ComposeResult:
|
|
117
|
+
"""Build the headers dialog layout."""
|
|
118
|
+
with Vertical(id="headers-dialog"):
|
|
119
|
+
yield Static("HTTP Headers Configuration", id="headers-title")
|
|
120
|
+
yield Static(
|
|
121
|
+
("Enter headers as JSON (one per line or as object)."
|
|
122
|
+
" Press Ctrl+S to save, Escape to cancel."),
|
|
123
|
+
id="headers-help"
|
|
124
|
+
)
|
|
125
|
+
|
|
126
|
+
# Pre-populate with current headers in JSON format
|
|
127
|
+
headers_json = json.dumps(self.current_headers, indent=2)
|
|
128
|
+
yield TextArea(headers_json, language="json", id="headers-editor")
|
|
129
|
+
|
|
130
|
+
with Container(id="headers-buttons"):
|
|
131
|
+
yield Button("Save (Ctrl+S)", variant="primary", id="save-btn")
|
|
132
|
+
yield Button("Cancel (Esc)", variant="default", id="cancel-btn")
|
|
133
|
+
|
|
134
|
+
def on_mount(self) -> None:
|
|
135
|
+
"""Focus the editor when screen mounts."""
|
|
136
|
+
self.query_one("#headers-editor", TextArea).focus()
|
|
137
|
+
|
|
138
|
+
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
139
|
+
"""Handle button presses."""
|
|
140
|
+
if event.button.id == "save-btn":
|
|
141
|
+
self._save_headers()
|
|
142
|
+
elif event.button.id == "cancel-btn":
|
|
143
|
+
self.dismiss(None)
|
|
144
|
+
|
|
145
|
+
def on_key(self, event) -> None:
|
|
146
|
+
"""Handle keyboard shortcuts."""
|
|
147
|
+
if event.key == "ctrl+s":
|
|
148
|
+
self._save_headers()
|
|
149
|
+
event.prevent_default()
|
|
150
|
+
elif event.key == "escape":
|
|
151
|
+
self.dismiss(None)
|
|
152
|
+
event.prevent_default()
|
|
153
|
+
|
|
154
|
+
def _save_headers(self) -> None:
|
|
155
|
+
"""Parse and save the headers."""
|
|
156
|
+
editor = self.query_one("#headers-editor", TextArea)
|
|
157
|
+
headers_text = editor.text.strip()
|
|
158
|
+
|
|
159
|
+
if not headers_text:
|
|
160
|
+
# Empty headers = use defaults
|
|
161
|
+
self.dismiss({})
|
|
162
|
+
return
|
|
163
|
+
|
|
164
|
+
try:
|
|
165
|
+
# Try to parse as JSON
|
|
166
|
+
headers = json.loads(headers_text)
|
|
167
|
+
|
|
168
|
+
if not isinstance(headers, dict):
|
|
169
|
+
self.notify("Headers must be a JSON object/dict", severity="error")
|
|
170
|
+
return
|
|
171
|
+
|
|
172
|
+
# Validate all keys and values are strings
|
|
173
|
+
for key, value in headers.items():
|
|
174
|
+
if not isinstance(key, str):
|
|
175
|
+
self.notify(f"Header key must be string: {key}", severity="error")
|
|
176
|
+
return
|
|
177
|
+
if not isinstance(value, str):
|
|
178
|
+
self.notify(f"Header value must be string: {value}", severity="error")
|
|
179
|
+
return
|
|
180
|
+
|
|
181
|
+
self.dismiss(headers)
|
|
182
|
+
|
|
183
|
+
except json.JSONDecodeError as e:
|
|
184
|
+
self.notify(f"Invalid JSON: {e}", severity="error")
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
class SettingsScreen(ModalScreen):
|
|
188
|
+
"""Modal screen for editing persistent TUI settings (CONCURRENCY, PER_HOST, RESPECT_ROBOTS).
|
|
189
|
+
|
|
190
|
+
Settings are saved to ~/.config/wxpath/tui_settings.json and applied to the
|
|
191
|
+
crawler/engine on the next run.
|
|
192
|
+
"""
|
|
193
|
+
|
|
194
|
+
CSS = """
|
|
195
|
+
SettingsScreen {
|
|
196
|
+
align: center middle;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
#settings-dialog {
|
|
200
|
+
width: 60;
|
|
201
|
+
min-height: 18;
|
|
202
|
+
border: thick $primary;
|
|
203
|
+
background: $surface;
|
|
204
|
+
padding: 1 2;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
#settings-title {
|
|
208
|
+
background: $primary;
|
|
209
|
+
color: $text;
|
|
210
|
+
text-style: bold;
|
|
211
|
+
padding: 0 2;
|
|
212
|
+
dock: top;
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
.settings-row {
|
|
216
|
+
height: auto;
|
|
217
|
+
padding: 1 0;
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
.settings-label {
|
|
221
|
+
width: 18;
|
|
222
|
+
text-style: bold;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
.settings-input {
|
|
226
|
+
width: 1fr;
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
#settings-help {
|
|
230
|
+
color: $text-muted;
|
|
231
|
+
margin: 1 0;
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
#settings-buttons {
|
|
235
|
+
height: auto;
|
|
236
|
+
align: center middle;
|
|
237
|
+
padding: 1 0;
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
#settings-buttons Button {
|
|
241
|
+
margin: 0 1;
|
|
242
|
+
}
|
|
243
|
+
"""
|
|
244
|
+
|
|
245
|
+
def __init__(self, current: dict[str, Any]):
|
|
246
|
+
super().__init__()
|
|
247
|
+
self.current = dict(current)
|
|
248
|
+
|
|
249
|
+
def compose(self) -> ComposeResult:
|
|
250
|
+
with Vertical(id="settings-dialog"):
|
|
251
|
+
yield Static("Crawler Settings (persistent)", id="settings-title")
|
|
252
|
+
yield Static(
|
|
253
|
+
"Values are saved to config and used for the next run. Ctrl+S save, Esc cancel.",
|
|
254
|
+
id="settings-help",
|
|
255
|
+
)
|
|
256
|
+
for entry in TUISettingsSchema:
|
|
257
|
+
key = entry["key"]
|
|
258
|
+
label = entry["label"]
|
|
259
|
+
typ = entry["type"]
|
|
260
|
+
value = self.current.get(key, entry["default"])
|
|
261
|
+
with Horizontal(classes="settings-row"):
|
|
262
|
+
yield Static(label, classes="settings-label")
|
|
263
|
+
if typ == "int":
|
|
264
|
+
inp = Input(
|
|
265
|
+
str(value),
|
|
266
|
+
type="integer",
|
|
267
|
+
id=f"setting-{key}",
|
|
268
|
+
classes="settings-input",
|
|
269
|
+
)
|
|
270
|
+
yield inp
|
|
271
|
+
else:
|
|
272
|
+
sw = Switch(
|
|
273
|
+
value=bool(value),
|
|
274
|
+
id=f"setting-{key}",
|
|
275
|
+
classes="settings-input",
|
|
276
|
+
)
|
|
277
|
+
yield sw
|
|
278
|
+
with Container(id="settings-buttons"):
|
|
279
|
+
yield Button("Save (Ctrl+S)", variant="primary", id="settings-save-btn")
|
|
280
|
+
yield Button("Cancel (Esc)", variant="default", id="settings-cancel-btn")
|
|
281
|
+
|
|
282
|
+
def on_mount(self) -> None:
|
|
283
|
+
first_id = f"setting-{TUISettingsSchema[0]['key']}"
|
|
284
|
+
self.query_one(f"#{first_id}").focus()
|
|
285
|
+
|
|
286
|
+
def _gather(self) -> dict[str, Any]:
|
|
287
|
+
result = {}
|
|
288
|
+
for entry in TUISettingsSchema:
|
|
289
|
+
key = entry["key"]
|
|
290
|
+
# typ = entry["type"]
|
|
291
|
+
node = self.query_one(f"#setting-{key}")
|
|
292
|
+
if isinstance(node, Input):
|
|
293
|
+
raw = node.value.strip()
|
|
294
|
+
result[key] = int(raw) if raw else entry["default"]
|
|
295
|
+
else:
|
|
296
|
+
result[key] = node.value
|
|
297
|
+
return result
|
|
298
|
+
|
|
299
|
+
def _validate(self, data: dict[str, Any]) -> str | None:
|
|
300
|
+
errors = validate_tui_settings(data)
|
|
301
|
+
return errors[0] if errors else None
|
|
302
|
+
|
|
303
|
+
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
304
|
+
if event.button.id == "settings-save-btn":
|
|
305
|
+
data = self._gather()
|
|
306
|
+
err = self._validate(data)
|
|
307
|
+
if err:
|
|
308
|
+
self.notify(err, severity="error")
|
|
309
|
+
return
|
|
310
|
+
save_tui_settings(data)
|
|
311
|
+
self.dismiss(data)
|
|
312
|
+
elif event.button.id == "settings-cancel-btn":
|
|
313
|
+
self.dismiss(None)
|
|
314
|
+
|
|
315
|
+
def on_key(self, event) -> None:
|
|
316
|
+
if event.key == "ctrl+s":
|
|
317
|
+
data = self._gather()
|
|
318
|
+
err = self._validate(data)
|
|
319
|
+
if err:
|
|
320
|
+
self.notify(err, severity="error")
|
|
321
|
+
return
|
|
322
|
+
save_tui_settings(data)
|
|
323
|
+
self.dismiss(data)
|
|
324
|
+
event.prevent_default()
|
|
325
|
+
elif event.key == "escape":
|
|
326
|
+
self.dismiss(None)
|
|
327
|
+
event.prevent_default()
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
class ExportScreen(ModalScreen):
|
|
331
|
+
"""Modal screen for choosing export format (CSV or JSON).
|
|
332
|
+
|
|
333
|
+
Exports the current output data table to a file in the current
|
|
334
|
+
working directory with a timestamped default filename.
|
|
335
|
+
"""
|
|
336
|
+
|
|
337
|
+
CSS = """
|
|
338
|
+
ExportScreen {
|
|
339
|
+
align: center middle;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
#export-dialog {
|
|
343
|
+
width: 50;
|
|
344
|
+
border: thick $primary;
|
|
345
|
+
background: $surface;
|
|
346
|
+
padding: 1 2;
|
|
347
|
+
}
|
|
348
|
+
|
|
349
|
+
#export-title {
|
|
350
|
+
background: $primary;
|
|
351
|
+
color: $text;
|
|
352
|
+
text-style: bold;
|
|
353
|
+
padding: 0 2;
|
|
354
|
+
dock: top;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
#export-buttons {
|
|
358
|
+
height: auto;
|
|
359
|
+
align: center middle;
|
|
360
|
+
padding: 1 0;
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
#export-buttons Button {
|
|
364
|
+
margin: 0 1;
|
|
365
|
+
}
|
|
366
|
+
"""
|
|
367
|
+
|
|
368
|
+
def compose(self) -> ComposeResult:
|
|
369
|
+
"""Build the export dialog layout."""
|
|
370
|
+
with Vertical(id="export-dialog"):
|
|
371
|
+
yield Static("Export table data", id="export-title")
|
|
372
|
+
yield Static(
|
|
373
|
+
"Choose format. File is saved in the current directory.",
|
|
374
|
+
id="export-help",
|
|
375
|
+
)
|
|
376
|
+
with Container(id="export-buttons"):
|
|
377
|
+
yield Button("Export CSV", variant="primary", id="export-csv-btn")
|
|
378
|
+
yield Button("Export JSON", variant="primary", id="export-json-btn")
|
|
379
|
+
yield Button("Cancel (Esc)", variant="default", id="export-cancel-btn")
|
|
380
|
+
|
|
381
|
+
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
382
|
+
"""Handle export or cancel."""
|
|
383
|
+
if event.button.id == "export-cancel-btn":
|
|
384
|
+
self.dismiss(None)
|
|
385
|
+
elif event.button.id == "export-csv-btn":
|
|
386
|
+
self.dismiss("csv")
|
|
387
|
+
elif event.button.id == "export-json-btn":
|
|
388
|
+
self.dismiss("json")
|
|
389
|
+
|
|
390
|
+
def on_key(self, event) -> None:
|
|
391
|
+
"""Escape cancels."""
|
|
392
|
+
if event.key == "escape":
|
|
393
|
+
self.dismiss(None)
|
|
394
|
+
event.prevent_default()
|
|
395
|
+
|
|
396
|
+
|
|
397
|
+
class OutputPanel(Vertical, can_focus=True):
|
|
398
|
+
"""Display panel for expression results.
|
|
399
|
+
|
|
400
|
+
A reactive Static widget that displays formatted output from wxpath
|
|
401
|
+
expression execution. Supports multiple output formats including plain
|
|
402
|
+
text, HTML elements, and table views.
|
|
403
|
+
|
|
404
|
+
Attributes:
|
|
405
|
+
output_text: Reactive string that triggers display updates
|
|
406
|
+
"""
|
|
407
|
+
|
|
408
|
+
# output_text: reactive[str] = reactive("Waiting for expression...")
|
|
409
|
+
|
|
410
|
+
def __init__(self, *args, **kwargs):
|
|
411
|
+
"""Initialize the output panel.
|
|
412
|
+
|
|
413
|
+
Args:
|
|
414
|
+
*args: Positional arguments passed to Static
|
|
415
|
+
**kwargs: Keyword arguments passed to Static
|
|
416
|
+
"""
|
|
417
|
+
super().__init__(*args, **kwargs)
|
|
418
|
+
self.border_title = "Output"
|
|
419
|
+
|
|
420
|
+
def clear(self) -> None:
|
|
421
|
+
self.remove_children()
|
|
422
|
+
|
|
423
|
+
def append(self, renderable) -> None:
|
|
424
|
+
self.mount(Static(renderable))
|
|
425
|
+
# self.scroll_end(animate=False)
|
|
426
|
+
|
|
427
|
+
# def watch_output_text(self, new_text: str) -> None:
|
|
428
|
+
# """Update display when output changes.
|
|
429
|
+
|
|
430
|
+
# Args:
|
|
431
|
+
# new_text: New text content to display
|
|
432
|
+
# """
|
|
433
|
+
# self.update(new_text)
|
|
434
|
+
|
|
435
|
+
|
|
436
|
+
class DebugPanel(VerticalScroll, can_focus=False):
|
|
437
|
+
"""Scrollable panel for debug messages.
|
|
438
|
+
|
|
439
|
+
A simple vertical scroll region that collects timestamped debug
|
|
440
|
+
messages. Intended for lightweight, append-only logging during
|
|
441
|
+
interactive sessions.
|
|
442
|
+
"""
|
|
443
|
+
|
|
444
|
+
def __init__(self, *args, **kwargs):
|
|
445
|
+
"""Initialize the debug panel."""
|
|
446
|
+
super().__init__(*args, **kwargs)
|
|
447
|
+
# self.border_title = "Debug"
|
|
448
|
+
|
|
449
|
+
def clear(self) -> None:
|
|
450
|
+
"""Clear all debug messages."""
|
|
451
|
+
self.remove_children()
|
|
452
|
+
|
|
453
|
+
def append(self, message: str) -> None:
|
|
454
|
+
"""Append a new debug message and scroll to bottom.
|
|
455
|
+
|
|
456
|
+
Args:
|
|
457
|
+
message: Message text to append
|
|
458
|
+
"""
|
|
459
|
+
# Keep debug output simple Rich-markup strings.
|
|
460
|
+
self.mount(Static(message, classes="debug-line"))
|
|
461
|
+
self.scroll_end(animate=False)
|
|
462
|
+
|
|
463
|
+
|
|
464
|
+
class WXPathTUI(App):
|
|
465
|
+
"""Interactive TUI for wxpath expression testing.
|
|
466
|
+
|
|
467
|
+
Top panel: Expression editor
|
|
468
|
+
Bottom panel: Live output display
|
|
469
|
+
"""
|
|
470
|
+
|
|
471
|
+
TITLE = "wxpath TUI - Interactive Expression Testing"
|
|
472
|
+
# SUB_TITLE will be set dynamically based on cache state
|
|
473
|
+
|
|
474
|
+
CSS = """
|
|
475
|
+
Screen {
|
|
476
|
+
layout: vertical;
|
|
477
|
+
background: $surface;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
#editor-container {
|
|
481
|
+
height: 40%;
|
|
482
|
+
border: heavy $primary;
|
|
483
|
+
background: $panel;
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
#output-container {
|
|
487
|
+
/* height: 60%; */
|
|
488
|
+
height: 60%;
|
|
489
|
+
border: heavy $accent;
|
|
490
|
+
background: $panel;
|
|
491
|
+
}
|
|
492
|
+
|
|
493
|
+
#output-panel {
|
|
494
|
+
height: 3fr;
|
|
495
|
+
}
|
|
496
|
+
|
|
497
|
+
#debug-container {
|
|
498
|
+
layout: vertical;
|
|
499
|
+
height: 1fr;
|
|
500
|
+
min-height: 5;
|
|
501
|
+
border-top: tall $accent-darken-1;
|
|
502
|
+
background: $surface-darken-1;
|
|
503
|
+
}
|
|
504
|
+
|
|
505
|
+
#debug-header {
|
|
506
|
+
background: $accent-darken-1;
|
|
507
|
+
color: $text;
|
|
508
|
+
text-style: bold;
|
|
509
|
+
padding: 0 2;
|
|
510
|
+
dock: top;
|
|
511
|
+
}
|
|
512
|
+
|
|
513
|
+
#debug-panel {
|
|
514
|
+
height: 1fr;
|
|
515
|
+
min-height: 3;
|
|
516
|
+
padding: 0 2;
|
|
517
|
+
overflow-y: auto;
|
|
518
|
+
background: $surface-darken-1;
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
TextArea {
|
|
522
|
+
height: 100%;
|
|
523
|
+
background: $surface;
|
|
524
|
+
}
|
|
525
|
+
|
|
526
|
+
OutputPanel {
|
|
527
|
+
height: 100%;
|
|
528
|
+
padding: 1 2;
|
|
529
|
+
overflow-y: auto;
|
|
530
|
+
background: $surface;
|
|
531
|
+
}
|
|
532
|
+
|
|
533
|
+
DebugPanel {
|
|
534
|
+
height: 100%;
|
|
535
|
+
padding: 1 0;
|
|
536
|
+
overflow-y: auto;
|
|
537
|
+
background: $surface;
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
.panel-header {
|
|
541
|
+
background: $primary;
|
|
542
|
+
color: $text;
|
|
543
|
+
text-style: bold;
|
|
544
|
+
padding: 0 2;
|
|
545
|
+
dock: top;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
Header {
|
|
549
|
+
background: $primary-darken-2;
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
Footer {
|
|
553
|
+
background: $primary-darken-2;
|
|
554
|
+
}
|
|
555
|
+
"""
|
|
556
|
+
|
|
557
|
+
BINDINGS = [
|
|
558
|
+
("ctrl+q", "quit", "Quit"),
|
|
559
|
+
("ctrl+r", "execute", "Execute"),
|
|
560
|
+
("escape", "cancel_crawl", "Cancel Crawl"),
|
|
561
|
+
("ctrl+c", "clear", "Clear"),
|
|
562
|
+
("ctrl+shift+backspace", "clear_editor", "Clear Editor"),
|
|
563
|
+
("ctrl+d", "clear_debug", "Clear Debug"),
|
|
564
|
+
("ctrl+shift+d", "toggle_debug", "Toggle Debug"),
|
|
565
|
+
("ctrl+e", "export", "Export"),
|
|
566
|
+
("ctrl+l", "toggle_cache", "Cache"),
|
|
567
|
+
("ctrl+h", "edit_headers", "Headers"),
|
|
568
|
+
("ctrl+shift+s", "edit_settings", "Settings"),
|
|
569
|
+
("f5", "execute", "Execute"),
|
|
570
|
+
("tab", "focus_next", "Focus Next"),
|
|
571
|
+
]
|
|
572
|
+
|
|
573
|
+
cache_enabled = reactive(False)
|
|
574
|
+
debug_panel_visible = reactive(True)
|
|
575
|
+
custom_headers = reactive({})
|
|
576
|
+
tui_settings = reactive({})
|
|
577
|
+
|
|
578
|
+
def __init__(self):
|
|
579
|
+
"""Initialize the TUI application.
|
|
580
|
+
|
|
581
|
+
Sets up the wxpath engine with XPathMap serialization hook for
|
|
582
|
+
clean dict output in table views.
|
|
583
|
+
"""
|
|
584
|
+
super().__init__()
|
|
585
|
+
# Register serialization hook to convert XPathMap to dicts
|
|
586
|
+
registry.register(SerializeXPathMapAndNodeHook)
|
|
587
|
+
# self.engine = WXPathEngine()
|
|
588
|
+
self._executing = False
|
|
589
|
+
self._crawl_worker = None # Worker for current crawl; used for cancellation
|
|
590
|
+
self._last_sort_column: str | None = None
|
|
591
|
+
self._last_sort_reverse = False
|
|
592
|
+
# Don't set cache_enabled here - let on_mount handle it
|
|
593
|
+
|
|
594
|
+
def compose(self) -> ComposeResult:
|
|
595
|
+
"""Build the application layout."""
|
|
596
|
+
yield Header()
|
|
597
|
+
|
|
598
|
+
with Container(id="editor-container"):
|
|
599
|
+
yield Static("Expression Editor (Ctrl+R to execute)", classes="panel-header")
|
|
600
|
+
yield TextArea(id="expression-editor", language="python")
|
|
601
|
+
|
|
602
|
+
with Container(id="output-container"):
|
|
603
|
+
yield Static("Output", classes="panel-header")
|
|
604
|
+
yield OutputPanel(id="output-panel")
|
|
605
|
+
# yield Button("Export (Ctrl+E)", id="export_button")
|
|
606
|
+
|
|
607
|
+
with Container(id="debug-container"):
|
|
608
|
+
yield Static("Debug", id="debug-header", classes="panel-header")
|
|
609
|
+
yield DebugPanel(id="debug-panel")
|
|
610
|
+
|
|
611
|
+
yield Footer()
|
|
612
|
+
|
|
613
|
+
def on_mount(self) -> None:
|
|
614
|
+
"""Initialize with a sample expression."""
|
|
615
|
+
# Set cache_enabled from settings - this will trigger the watcher and update subtitle
|
|
616
|
+
self.cache_enabled = bool(SETTINGS.http.client.cache.enabled)
|
|
617
|
+
# Load persistent TUI settings (CONCURRENCY, PER_HOST, RESPECT_ROBOTS)
|
|
618
|
+
self.tui_settings = load_tui_settings()
|
|
619
|
+
|
|
620
|
+
editor = self.query_one("#expression-editor", TextArea)
|
|
621
|
+
# Start with a simple example
|
|
622
|
+
editor.text = "url('https://quotes.toscrape.com')//span[@class='text']/text()"
|
|
623
|
+
editor.focus()
|
|
624
|
+
|
|
625
|
+
# Show initial help text
|
|
626
|
+
self._update_output(
|
|
627
|
+
"[dim]Welcome to wxpath TUI![/dim]\n\n"
|
|
628
|
+
"[cyan]Quick Start:[/cyan]\n"
|
|
629
|
+
" • Edit the expression above\n"
|
|
630
|
+
" • Press [bold]Ctrl+R[/bold] or [bold]F5[/bold] to execute\n"
|
|
631
|
+
" • Press [bold]Escape[/bold] to cancel a running crawl\n"
|
|
632
|
+
" • Press [bold]Ctrl+E[/bold] to export table (CSV/JSON)\n"
|
|
633
|
+
" • Press [bold]Ctrl+C[/bold] to clear output\n"
|
|
634
|
+
" • Press [bold]Ctrl+Shift+Backspace[/bold] to clear expression editor\n"
|
|
635
|
+
" • Press [bold]Ctrl+Shift+D[/bold] to toggle debug panel\n"
|
|
636
|
+
" • Press [bold]Ctrl+H[/bold] to configure HTTP headers\n"
|
|
637
|
+
" • Press [bold]Ctrl+Shift+S[/bold] to edit persistent settings (concurrency, robots)\n" # noqa: E501
|
|
638
|
+
" • Press [bold]Ctrl+L[/bold] to toggle HTTP caching\n"
|
|
639
|
+
" • Use [bold]arrow keys[/bold] or [bold]scroll[/bold] to view results\n\n"
|
|
640
|
+
"[cyan]Example expressions:[/cyan]\n"
|
|
641
|
+
" • Extract text: url('...')//div//text()\n"
|
|
642
|
+
" • Extract as dict/table: url('...')//div/map { 'title': .//h1/text() }\n"
|
|
643
|
+
" • Follow links: url('...') ///url(//a/@href) //div/text()\n\n"
|
|
644
|
+
"[green]Expression appears valid - Press Ctrl+R or F5 to execute[/green]"
|
|
645
|
+
)
|
|
646
|
+
|
|
647
|
+
def watch_cache_enabled(self, new_value: bool) -> None:
|
|
648
|
+
"""Update global settings and subtitle when cache setting changes."""
|
|
649
|
+
# Update the global settings - this is what the HTTP crawler will read
|
|
650
|
+
SETTINGS.http.client.cache.enabled = bool(new_value)
|
|
651
|
+
self._debug(f"Cache enabled: {SETTINGS.http.client.cache.enabled}")
|
|
652
|
+
self._update_subtitle()
|
|
653
|
+
|
|
654
|
+
def watch_custom_headers(self, new_value: dict) -> None:
|
|
655
|
+
"""Update subtitle when custom headers change."""
|
|
656
|
+
self._update_subtitle()
|
|
657
|
+
|
|
658
|
+
def watch_tui_settings(self, new_value: dict) -> None:
|
|
659
|
+
"""Update subtitle when persistent settings change."""
|
|
660
|
+
self._update_subtitle()
|
|
661
|
+
|
|
662
|
+
def _update_subtitle(self) -> None:
|
|
663
|
+
"""Update subtitle with current cache, headers, and persistent settings."""
|
|
664
|
+
# cache_state = "ON" if self.cache_enabled else "OFF"
|
|
665
|
+
cache_state = SETTINGS.http.client.cache.enabled
|
|
666
|
+
headers_count = len(self.custom_headers)
|
|
667
|
+
headers_info = f"{headers_count} custom" if headers_count > 0 else "default"
|
|
668
|
+
conc = self.tui_settings.get("concurrency", 16)
|
|
669
|
+
ph = self.tui_settings.get("per_host", 8)
|
|
670
|
+
robots = "ON" if self.tui_settings.get("respect_robots", True) else "OFF"
|
|
671
|
+
self.sub_title = (
|
|
672
|
+
f"Cache: {cache_state} | Headers: {headers_info} | "
|
|
673
|
+
f"Concurrency: {conc} | Per host: {ph} | Robots: {robots} | "
|
|
674
|
+
f"Ctrl+R: Run | Ctrl+Shift+S: Settings | Ctrl+Q: Quit"
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
async def action_toggle_cache(self) -> None:
|
|
678
|
+
"""Toggle HTTP caching on/off for new requests."""
|
|
679
|
+
old_state = self.cache_enabled
|
|
680
|
+
self.cache_enabled = not self.cache_enabled
|
|
681
|
+
new_state = self.cache_enabled
|
|
682
|
+
|
|
683
|
+
old_label = "ON" if old_state else "OFF"
|
|
684
|
+
new_label = "ON" if new_state else "OFF"
|
|
685
|
+
|
|
686
|
+
self._update_output(
|
|
687
|
+
f"[cyan]HTTP caching toggled: {old_label} → {new_label}[/cyan]\n\n"
|
|
688
|
+
"[dim]This setting will apply to the next expression execution.[/dim]"
|
|
689
|
+
)
|
|
690
|
+
self._debug(f"Toggled cache from {old_label} to {new_label}")
|
|
691
|
+
|
|
692
|
+
def action_edit_headers(self) -> None:
|
|
693
|
+
"""Open the headers configuration screen."""
|
|
694
|
+
def handle_headers_result(result):
|
|
695
|
+
"""Handle the result from the headers screen."""
|
|
696
|
+
if result is not None:
|
|
697
|
+
self.custom_headers = result
|
|
698
|
+
count = len(result)
|
|
699
|
+
if count == 0:
|
|
700
|
+
self._update_output(
|
|
701
|
+
"[cyan]Headers cleared - using defaults[/cyan]\n\n"
|
|
702
|
+
"[dim]This will apply to the next expression execution.[/dim]"
|
|
703
|
+
)
|
|
704
|
+
else:
|
|
705
|
+
headers_preview = json.dumps(result, indent=2)
|
|
706
|
+
self._update_output(
|
|
707
|
+
f"[cyan]Custom headers saved ({count} headers)[/cyan]\n\n"
|
|
708
|
+
f"[green]{headers_preview}[/green]\n\n"
|
|
709
|
+
"[dim]These will apply to the next expression execution.[/dim]"
|
|
710
|
+
)
|
|
711
|
+
|
|
712
|
+
self.push_screen(HeadersScreen(dict(self.custom_headers)), handle_headers_result)
|
|
713
|
+
self._debug("Opened headers configuration screen")
|
|
714
|
+
|
|
715
|
+
def action_edit_settings(self) -> None:
|
|
716
|
+
"""Open the persistent settings screen (CONCURRENCY, PER_HOST, RESPECT_ROBOTS)."""
|
|
717
|
+
def handle_settings_result(result: dict[str, Any] | None) -> None:
|
|
718
|
+
if result is not None:
|
|
719
|
+
self.tui_settings = result
|
|
720
|
+
self._update_output(
|
|
721
|
+
"[cyan]Persistent settings saved[/cyan]\n\n"
|
|
722
|
+
f"CONCURRENCY: {result.get('concurrency', 16)} | "
|
|
723
|
+
f"PER_HOST: {result.get('per_host', 8)} | "
|
|
724
|
+
f"RESPECT_ROBOTS: {result.get('respect_robots', True)}\n\n"
|
|
725
|
+
"[dim]These apply to the next expression execution.[/dim]"
|
|
726
|
+
)
|
|
727
|
+
self._debug("Persistent settings saved and applied")
|
|
728
|
+
|
|
729
|
+
self.push_screen(SettingsScreen(dict(self.tui_settings)), handle_settings_result)
|
|
730
|
+
self._debug("Opened persistent settings screen")
|
|
731
|
+
|
|
732
|
+
def _get_output_data_table(self) -> DataTable | None:
|
|
733
|
+
"""Return the first DataTable in the output panel, or None if none.
|
|
734
|
+
|
|
735
|
+
Returns:
|
|
736
|
+
The output DataTable when the last run produced a table; None otherwise.
|
|
737
|
+
"""
|
|
738
|
+
panel = self.query_one("#output-panel", OutputPanel)
|
|
739
|
+
tables = panel.query(DataTable)
|
|
740
|
+
return tables.first() if tables else None
|
|
741
|
+
|
|
742
|
+
def _export_table_csv(self, data_table: DataTable, path: Path) -> None:
|
|
743
|
+
"""Write table data to a CSV file.
|
|
744
|
+
|
|
745
|
+
Args:
|
|
746
|
+
data_table: The DataTable to export.
|
|
747
|
+
path: Output file path.
|
|
748
|
+
"""
|
|
749
|
+
columns = data_table.ordered_columns
|
|
750
|
+
if not columns:
|
|
751
|
+
return
|
|
752
|
+
headers = [str(c.label) for c in columns]
|
|
753
|
+
with path.open("w", newline="", encoding="utf-8") as f:
|
|
754
|
+
writer = csv.writer(f)
|
|
755
|
+
writer.writerow(headers)
|
|
756
|
+
for row_meta in data_table.ordered_rows:
|
|
757
|
+
row_key = row_meta.key
|
|
758
|
+
cells = data_table.get_row(row_key)
|
|
759
|
+
writer.writerow([str(c) for c in cells])
|
|
760
|
+
|
|
761
|
+
def _export_table_json(self, data_table: DataTable, path: Path) -> None:
|
|
762
|
+
"""Write table data to a JSON file (list of row objects).
|
|
763
|
+
|
|
764
|
+
Args:
|
|
765
|
+
data_table: The DataTable to export.
|
|
766
|
+
path: Output file path.
|
|
767
|
+
"""
|
|
768
|
+
columns = data_table.ordered_columns
|
|
769
|
+
if not columns:
|
|
770
|
+
return
|
|
771
|
+
keys = [str(c.label) for c in columns]
|
|
772
|
+
rows = []
|
|
773
|
+
for row_meta in data_table.ordered_rows:
|
|
774
|
+
cells = data_table.get_row(row_meta.key)
|
|
775
|
+
rows.append(dict(zip(keys, [str(c) for c in cells], strict=True)))
|
|
776
|
+
with path.open("w", encoding="utf-8") as f:
|
|
777
|
+
json.dump(rows, f, indent=2)
|
|
778
|
+
|
|
779
|
+
def action_export(self) -> None:
|
|
780
|
+
"""Open export dialog to save table as CSV or JSON."""
|
|
781
|
+
def handle_export_result(fmt: str | None) -> None:
|
|
782
|
+
if fmt is None:
|
|
783
|
+
self._debug("Export cancelled")
|
|
784
|
+
return
|
|
785
|
+
table = self._get_output_data_table()
|
|
786
|
+
if table is None:
|
|
787
|
+
self.notify(
|
|
788
|
+
"No table to export. Run an expression that produces a table first.",
|
|
789
|
+
severity="warning",
|
|
790
|
+
)
|
|
791
|
+
self._debug("Export attempted but output panel has no DataTable")
|
|
792
|
+
return
|
|
793
|
+
stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
|
|
794
|
+
ext = ".csv" if fmt == "csv" else ".json"
|
|
795
|
+
path = Path.cwd() / f"wxpath_export_{stamp}{ext}"
|
|
796
|
+
try:
|
|
797
|
+
if fmt == "csv":
|
|
798
|
+
self._export_table_csv(table, path)
|
|
799
|
+
else:
|
|
800
|
+
self._export_table_json(table, path)
|
|
801
|
+
self.notify(f"Exported to {path}", severity="information")
|
|
802
|
+
self._debug(f"Exported table to {path} ({fmt.upper()}, {table.row_count} rows)")
|
|
803
|
+
except OSError as e:
|
|
804
|
+
self.notify(f"Export failed: {e}", severity="error")
|
|
805
|
+
self._debug(f"Export failed: {e}")
|
|
806
|
+
|
|
807
|
+
self.push_screen(ExportScreen(), handle_export_result)
|
|
808
|
+
self._debug("Opened export dialog")
|
|
809
|
+
|
|
810
|
+
def _numeric_sort_key(self, value: Any) -> tuple[int, float | str]:
|
|
811
|
+
"""Key for sorting: numbers by value, then non-numeric by string.
|
|
812
|
+
|
|
813
|
+
Used so numeric columns sort numerically (e.g. 2 < 10) instead of
|
|
814
|
+
lexicographically (e.g. "10" < "2"). Single cell value is passed
|
|
815
|
+
when sorting by one column.
|
|
816
|
+
"""
|
|
817
|
+
s = "" if value is None else str(value).strip()
|
|
818
|
+
if not s:
|
|
819
|
+
return (1, "")
|
|
820
|
+
try:
|
|
821
|
+
return (0, float(s))
|
|
822
|
+
except (ValueError, TypeError):
|
|
823
|
+
return (1, str(value))
|
|
824
|
+
|
|
825
|
+
def _is_numeric_column(self, table: DataTable, column_key: Any) -> bool:
|
|
826
|
+
"""Return True if column appears to be numeric (majority of non-empty parse as float)."""
|
|
827
|
+
numeric = 0
|
|
828
|
+
non_empty = 0
|
|
829
|
+
for cell in table.get_column(column_key):
|
|
830
|
+
if non_empty >= 10:
|
|
831
|
+
break
|
|
832
|
+
s = "" if cell is None else str(cell).strip()
|
|
833
|
+
if not s:
|
|
834
|
+
continue
|
|
835
|
+
non_empty += 1
|
|
836
|
+
try:
|
|
837
|
+
float(s)
|
|
838
|
+
numeric += 1
|
|
839
|
+
except (ValueError, TypeError):
|
|
840
|
+
pass
|
|
841
|
+
return numeric > 0 and numeric >= (non_empty / 2)
|
|
842
|
+
|
|
843
|
+
def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
|
|
844
|
+
"""Handle column header click: sort by that column (toggle asc/desc on repeat click)."""
|
|
845
|
+
table = event.data_table
|
|
846
|
+
column_key = event.column_key
|
|
847
|
+
key_str = str(column_key)
|
|
848
|
+
if self._last_sort_column == key_str:
|
|
849
|
+
self._last_sort_reverse = not self._last_sort_reverse
|
|
850
|
+
else:
|
|
851
|
+
self._last_sort_column = key_str
|
|
852
|
+
self._last_sort_reverse = False
|
|
853
|
+
if self._is_numeric_column(table, column_key):
|
|
854
|
+
table.sort(column_key, key=self._numeric_sort_key, reverse=self._last_sort_reverse)
|
|
855
|
+
direction = "desc" if self._last_sort_reverse else "asc"
|
|
856
|
+
self._debug(f"Sorted by column {key_str!r} numerically ({direction})")
|
|
857
|
+
else:
|
|
858
|
+
table.sort(column_key, reverse=self._last_sort_reverse)
|
|
859
|
+
direction = "desc" if self._last_sort_reverse else "asc"
|
|
860
|
+
self._debug(f"Sorted by column {key_str!r} ({direction})")
|
|
861
|
+
|
|
862
|
+
def on_button_pressed(self, event: Button.Pressed) -> None:
|
|
863
|
+
"""Handle button presses (e.g. Export)."""
|
|
864
|
+
if event.button.id == "export_button":
|
|
865
|
+
self.action_export()
|
|
866
|
+
|
|
867
|
+
def on_text_area_changed(self, event: TextArea.Changed) -> None:
|
|
868
|
+
"""Validate expression as user types."""
|
|
869
|
+
if event.text_area.id != "expression-editor":
|
|
870
|
+
return
|
|
871
|
+
|
|
872
|
+
expression = event.text_area.text.strip()
|
|
873
|
+
|
|
874
|
+
if not expression:
|
|
875
|
+
self._update_output("[dim]Waiting - Enter an expression and press Ctrl+R "
|
|
876
|
+
"or F5 to execute[/dim]")
|
|
877
|
+
return
|
|
878
|
+
|
|
879
|
+
# Show validation status
|
|
880
|
+
if not self._validate_expression(expression):
|
|
881
|
+
self._update_output("[yellow]Waiting - Expression incomplete (check parentheses,"
|
|
882
|
+
" braces, brackets, quotes)[/yellow]")
|
|
883
|
+
else:
|
|
884
|
+
self._update_output("[green]Expression appears valid - Press Ctrl+R or F5 to execute"
|
|
885
|
+
"[/green]")
|
|
886
|
+
|
|
887
|
+
def _prep_row(self, result: XPathMap | dict, keys: list[str]) -> list[str]:
|
|
888
|
+
"""Prepare a row for table display from a dict-like result.
|
|
889
|
+
|
|
890
|
+
Args:
|
|
891
|
+
result: Dictionary or XPathMap to extract values from
|
|
892
|
+
keys: Ordered list of column keys to extract
|
|
893
|
+
|
|
894
|
+
Returns:
|
|
895
|
+
List of string values in the same order as keys
|
|
896
|
+
"""
|
|
897
|
+
row = []
|
|
898
|
+
# Handle both dict and XPathMap for backward compatibility
|
|
899
|
+
d = result if isinstance(result, dict) else dict(result.items())
|
|
900
|
+
for key in keys: # Use provided order, not sorted
|
|
901
|
+
value = d.get(key, "")
|
|
902
|
+
if isinstance(value, Iterable) and not isinstance(value, str):
|
|
903
|
+
# Limit iterables (except strings) to first 10 items for display
|
|
904
|
+
if isinstance(value, list):
|
|
905
|
+
value = value[:10]
|
|
906
|
+
elif isinstance(value, set):
|
|
907
|
+
value = list(value)[:10]
|
|
908
|
+
else:
|
|
909
|
+
value = list(value)[:10]
|
|
910
|
+
# Convert to string for table display
|
|
911
|
+
row.append("" if value is None else str(value))
|
|
912
|
+
return row
|
|
913
|
+
|
|
914
|
+
@work(exclusive=True)
|
|
915
|
+
async def collect_results(self, expression: str) -> None:
|
|
916
|
+
"""Collect results from the expression."""
|
|
917
|
+
count = 0
|
|
918
|
+
try:
|
|
919
|
+
# Wrap the async iteration with timeout (60s for larger result sets)
|
|
920
|
+
|
|
921
|
+
# Import here to avoid circular imports
|
|
922
|
+
from wxpath.http.client.crawler import Crawler
|
|
923
|
+
|
|
924
|
+
conc = self.tui_settings.get("concurrency", 16)
|
|
925
|
+
ph = self.tui_settings.get("per_host", 8)
|
|
926
|
+
robots = self.tui_settings.get("respect_robots", True)
|
|
927
|
+
verify_ssl = self.tui_settings.get("verify_ssl", True)
|
|
928
|
+
crawler = Crawler(
|
|
929
|
+
concurrency=conc,
|
|
930
|
+
per_host=ph,
|
|
931
|
+
respect_robots=robots,
|
|
932
|
+
verify_ssl=verify_ssl,
|
|
933
|
+
headers=dict(self.custom_headers) if self.custom_headers else None,
|
|
934
|
+
)
|
|
935
|
+
engine = WXPathEngine(crawler=crawler)
|
|
936
|
+
|
|
937
|
+
# Streaming approach
|
|
938
|
+
panel = self.query_one("#output-panel", OutputPanel)
|
|
939
|
+
panel.clear()
|
|
940
|
+
|
|
941
|
+
# data_table = None
|
|
942
|
+
data_table = DataTable(show_header=True, zebra_stripes=True)
|
|
943
|
+
panel.mount(data_table)
|
|
944
|
+
columns_initialized = False
|
|
945
|
+
column_keys: list[str] = []
|
|
946
|
+
|
|
947
|
+
async for result in engine.run(expression, max_depth=1, progress=False, yield_errors=True):
|
|
948
|
+
if isinstance(result, dict) and result.get("__type__") == "error":
|
|
949
|
+
self._debug(f"Error: {result.get('reason')}: {result}")
|
|
950
|
+
continue
|
|
951
|
+
count += 1
|
|
952
|
+
if count % 100 == 0:
|
|
953
|
+
self._debug(f"Received result {count} of type {type(result).__name__}")
|
|
954
|
+
|
|
955
|
+
if isinstance(result, XPathMap):
|
|
956
|
+
# result = dict(result.items())
|
|
957
|
+
result = result._map
|
|
958
|
+
|
|
959
|
+
if not columns_initialized:
|
|
960
|
+
self._debug("Initializing table columns")
|
|
961
|
+
if isinstance(result, dict):
|
|
962
|
+
column_keys = list(result.keys())
|
|
963
|
+
for key in column_keys:
|
|
964
|
+
data_table.add_column(str(key), key=key)
|
|
965
|
+
columns_initialized = True
|
|
966
|
+
else:
|
|
967
|
+
data_table.add_column("value", key="value")
|
|
968
|
+
column_keys = ["value"]
|
|
969
|
+
columns_initialized = True
|
|
970
|
+
self._debug(f"Initializing table columns: {column_keys}")
|
|
971
|
+
|
|
972
|
+
# Format row using existing logic
|
|
973
|
+
if isinstance(result, dict):
|
|
974
|
+
row = self._prep_row(result, column_keys)
|
|
975
|
+
else:
|
|
976
|
+
row = [result]
|
|
977
|
+
# Add row with unique key for efficient updates
|
|
978
|
+
data_table.add_row(*row, key=str(count))
|
|
979
|
+
|
|
980
|
+
except asyncio.CancelledError:
|
|
981
|
+
# Keep partial results; append status without clearing the panel
|
|
982
|
+
panel = self.query_one("#output-panel", OutputPanel)
|
|
983
|
+
if count > 0:
|
|
984
|
+
panel.append(f"[yellow]Crawl cancelled — {count} partial result(s) shown.[/yellow]")
|
|
985
|
+
else:
|
|
986
|
+
panel.append("[yellow]Crawl cancelled.[/yellow]")
|
|
987
|
+
self._debug("Crawl cancelled by user.")
|
|
988
|
+
raise
|
|
989
|
+
except asyncio.TimeoutError:
|
|
990
|
+
if count > 0:
|
|
991
|
+
pass
|
|
992
|
+
else:
|
|
993
|
+
self._update_output(
|
|
994
|
+
"[yellow]Timeout after 60s - no results returned[/yellow]\n"
|
|
995
|
+
"The site may be slow or unresponsive."
|
|
996
|
+
)
|
|
997
|
+
self._executing = False
|
|
998
|
+
return
|
|
999
|
+
except Exception as e:
|
|
1000
|
+
# Log full stack trace to debug panel
|
|
1001
|
+
self._debug(traceback.format_exc())
|
|
1002
|
+
# Append error as next row of table (do not clear output panel)
|
|
1003
|
+
err_msg = f"Execution Error: {type(e).__name__}: {e}"
|
|
1004
|
+
if columns_initialized and column_keys:
|
|
1005
|
+
row = [err_msg] + [""] * (len(column_keys) - 1)
|
|
1006
|
+
data_table.add_row(*row, key=f"error-{count}")
|
|
1007
|
+
else:
|
|
1008
|
+
data_table.add_column("error", key="error")
|
|
1009
|
+
data_table.add_row(err_msg, key="error-0")
|
|
1010
|
+
self._executing = False
|
|
1011
|
+
return
|
|
1012
|
+
finally:
|
|
1013
|
+
self._executing = False
|
|
1014
|
+
self._debug(f"Processed {count} results.")
|
|
1015
|
+
|
|
1016
|
+
|
|
1017
|
+
async def action_execute(self) -> None:
|
|
1018
|
+
"""Execute the current expression."""
|
|
1019
|
+
if self._executing:
|
|
1020
|
+
return
|
|
1021
|
+
|
|
1022
|
+
editor = self.query_one("#expression-editor", TextArea)
|
|
1023
|
+
expression = editor.text.strip()
|
|
1024
|
+
|
|
1025
|
+
if not expression:
|
|
1026
|
+
self._update_output("[yellow]Waiting - No expression to execute[/yellow]")
|
|
1027
|
+
return
|
|
1028
|
+
|
|
1029
|
+
self._executing = True
|
|
1030
|
+
self._update_output("[cyan]Executing...[/cyan]")
|
|
1031
|
+
self._debug(f"Executing expression: {expression!r}")
|
|
1032
|
+
|
|
1033
|
+
try:
|
|
1034
|
+
# Validate expression first
|
|
1035
|
+
if not self._validate_expression(expression):
|
|
1036
|
+
self._update_output("[yellow]Waiting - Expression incomplete or invalid[/yellow]")
|
|
1037
|
+
self._executing = False
|
|
1038
|
+
return
|
|
1039
|
+
|
|
1040
|
+
# # Parse the expression - useful for deducing if to display table
|
|
1041
|
+
# parsed = parser.parse(expression)
|
|
1042
|
+
self._crawl_worker = self.collect_results(expression)
|
|
1043
|
+
except SyntaxError as e:
|
|
1044
|
+
self._update_output(f"[yellow]Waiting - Syntax Error:[/yellow] {e}")
|
|
1045
|
+
self._executing = False
|
|
1046
|
+
except ValueError as e:
|
|
1047
|
+
self._update_output(f"[yellow]Waiting - Validation Error:[/yellow] {e}")
|
|
1048
|
+
self._executing = False
|
|
1049
|
+
except Exception as e:
|
|
1050
|
+
self._update_output(f"[red]Error:[/red] {type(e).__name__}: {e}")
|
|
1051
|
+
self._executing = False
|
|
1052
|
+
# Do not set _executing = False here: execution runs in the collect_results
|
|
1053
|
+
# coroutine; only that coroutine's finally block should clear the flag.
|
|
1054
|
+
|
|
1055
|
+
def action_cancel_crawl(self) -> None:
|
|
1056
|
+
"""Cancel the currently running crawl (if any)."""
|
|
1057
|
+
self._debug(f"Cancelling crawl... executing: {self._executing}, "
|
|
1058
|
+
f"crawl_worker.name: {getattr(self._crawl_worker, 'name', None)}, "
|
|
1059
|
+
f"crawl_worker.is_running: {getattr(self._crawl_worker, 'is_running', False)}")
|
|
1060
|
+
if self._executing and self._crawl_worker and self._crawl_worker.is_running:
|
|
1061
|
+
self._debug("Cancel requested for crawl.")
|
|
1062
|
+
self._crawl_worker.cancel()
|
|
1063
|
+
|
|
1064
|
+
def _validate_expression(self, expression: str) -> bool:
|
|
1065
|
+
"""Validate if expression is complete and well-formed.
|
|
1066
|
+
|
|
1067
|
+
Args:
|
|
1068
|
+
expression: Expression string to validate
|
|
1069
|
+
|
|
1070
|
+
Returns:
|
|
1071
|
+
True if expression appears complete, False otherwise
|
|
1072
|
+
"""
|
|
1073
|
+
# Check for balanced parentheses
|
|
1074
|
+
paren_count = expression.count('(') - expression.count(')')
|
|
1075
|
+
if paren_count != 0:
|
|
1076
|
+
return False
|
|
1077
|
+
|
|
1078
|
+
# Check for balanced braces
|
|
1079
|
+
brace_count = expression.count('{') - expression.count('}')
|
|
1080
|
+
if brace_count != 0:
|
|
1081
|
+
return False
|
|
1082
|
+
|
|
1083
|
+
# Check for balanced brackets
|
|
1084
|
+
bracket_count = expression.count('[') - expression.count(']')
|
|
1085
|
+
if bracket_count != 0:
|
|
1086
|
+
return False
|
|
1087
|
+
|
|
1088
|
+
# Check for unclosed quotes
|
|
1089
|
+
# Simple check: even number of unescaped quotes
|
|
1090
|
+
single_quotes = len([c for i, c in enumerate(expression)
|
|
1091
|
+
if c == "'" and (i == 0 or expression[i-1] != '\\')])
|
|
1092
|
+
double_quotes = len([c for i, c in enumerate(expression)
|
|
1093
|
+
if c == '"' and (i == 0 or expression[i-1] != '\\')])
|
|
1094
|
+
|
|
1095
|
+
if single_quotes % 2 != 0 or double_quotes % 2 != 0:
|
|
1096
|
+
return False
|
|
1097
|
+
|
|
1098
|
+
return True
|
|
1099
|
+
|
|
1100
|
+
def action_clear(self) -> None:
|
|
1101
|
+
"""Clear the output panel."""
|
|
1102
|
+
self._update_output("Waiting for expression...")
|
|
1103
|
+
self._debug("Cleared output panel.")
|
|
1104
|
+
|
|
1105
|
+
def action_clear_editor(self) -> None:
|
|
1106
|
+
"""Clear the expression editor (all text)."""
|
|
1107
|
+
editor = self.query_one("#expression-editor", )
|
|
1108
|
+
editor.clear()
|
|
1109
|
+
self._debug("Expression editor cleared.")
|
|
1110
|
+
|
|
1111
|
+
def _update_output(self, content: str | RenderableType) -> None:
|
|
1112
|
+
"""Update the output panel with new content."""
|
|
1113
|
+
# output_panel = self.query_one("#output-panel", OutputPanel)
|
|
1114
|
+
|
|
1115
|
+
# if isinstance(content, str):
|
|
1116
|
+
# output_panel.update(content)
|
|
1117
|
+
# else:
|
|
1118
|
+
# output_panel.update(content)
|
|
1119
|
+
panel = self.query_one("#output-panel", OutputPanel)
|
|
1120
|
+
panel.remove_children()
|
|
1121
|
+
|
|
1122
|
+
if isinstance(content, str):
|
|
1123
|
+
panel.mount(Static(content))
|
|
1124
|
+
else:
|
|
1125
|
+
panel.mount(Static(content))
|
|
1126
|
+
|
|
1127
|
+
def action_clear_debug(self) -> None:
|
|
1128
|
+
"""Clear the debug panel."""
|
|
1129
|
+
panel = self.query_one("#debug-panel", DebugPanel)
|
|
1130
|
+
panel.clear()
|
|
1131
|
+
|
|
1132
|
+
def watch_debug_panel_visible(self, visible: bool) -> None:
|
|
1133
|
+
"""Show or hide the debug panel when toggled."""
|
|
1134
|
+
container = self.query_one("#debug-container", Container)
|
|
1135
|
+
container.display = visible
|
|
1136
|
+
|
|
1137
|
+
def action_toggle_debug(self) -> None:
|
|
1138
|
+
"""Toggle the debug panel visibility."""
|
|
1139
|
+
self.debug_panel_visible = not self.debug_panel_visible
|
|
1140
|
+
state = "shown" if self.debug_panel_visible else "hidden"
|
|
1141
|
+
self._debug(f"Debug panel {state}")
|
|
1142
|
+
|
|
1143
|
+
def _escape_rich_markup(self, s: str) -> str:
|
|
1144
|
+
"""Escape [ and ] so Rich does not interpret them as markup."""
|
|
1145
|
+
return s.replace("[", "\\[").replace("]", "\\]")
|
|
1146
|
+
|
|
1147
|
+
def _debug(self, message: str) -> None:
|
|
1148
|
+
"""Append a timestamped message to the debug panel."""
|
|
1149
|
+
panel = self.query_one("#debug-panel", DebugPanel)
|
|
1150
|
+
timestamp = datetime.now().strftime("%H:%M:%S")
|
|
1151
|
+
panel.append(f"[dim]{timestamp}[/dim] {self._escape_rich_markup(message)}")
|
|
1152
|
+
|
|
1153
|
+
def _format_stream_item(self, result: Any):
|
|
1154
|
+
"""Helps format stream items for display."""
|
|
1155
|
+
if isinstance(result, dict):
|
|
1156
|
+
return self._format_dict(result)
|
|
1157
|
+
elif isinstance(result, HtmlElement):
|
|
1158
|
+
return self._format_html_element(result)
|
|
1159
|
+
else:
|
|
1160
|
+
return str(result)
|
|
1161
|
+
|
|
1162
|
+
def _format_html_element(self, elem: HtmlElement) -> str:
|
|
1163
|
+
"""Format HTML element with partial content display.
|
|
1164
|
+
|
|
1165
|
+
Converts lxml HtmlElement to string representation, truncating at
|
|
1166
|
+
300 characters and escaping Rich markup brackets.
|
|
1167
|
+
|
|
1168
|
+
Args:
|
|
1169
|
+
elem: HTML element to format
|
|
1170
|
+
|
|
1171
|
+
Returns:
|
|
1172
|
+
Formatted string representation with Rich markup
|
|
1173
|
+
"""
|
|
1174
|
+
try:
|
|
1175
|
+
html_str = tostring(elem, encoding='unicode', method='html')
|
|
1176
|
+
|
|
1177
|
+
# Truncate long HTML
|
|
1178
|
+
if len(html_str) > 300:
|
|
1179
|
+
html_str = html_str[:300] + "..."
|
|
1180
|
+
|
|
1181
|
+
# Escape brackets for Rich markup
|
|
1182
|
+
html_str = html_str.replace("[", "\\[")
|
|
1183
|
+
|
|
1184
|
+
return f" [green]{html_str}[/green]"
|
|
1185
|
+
except Exception as e:
|
|
1186
|
+
return f" [yellow]<{elem.tag}> (error formatting: {e})[/yellow]"
|
|
1187
|
+
|
|
1188
|
+
def _format_dict(self, d: dict) -> str:
|
|
1189
|
+
"""Format dictionary with indentation.
|
|
1190
|
+
|
|
1191
|
+
Args:
|
|
1192
|
+
d: Dictionary to format
|
|
1193
|
+
|
|
1194
|
+
Returns:
|
|
1195
|
+
Formatted string
|
|
1196
|
+
"""
|
|
1197
|
+
lines = [" {"]
|
|
1198
|
+
for key, value in d.items():
|
|
1199
|
+
if isinstance(value, str) and len(value) > 100:
|
|
1200
|
+
value = value[:100] + "..."
|
|
1201
|
+
lines.append(f" {key!r}: {value!r},")
|
|
1202
|
+
lines.append(" }")
|
|
1203
|
+
return "\n".join(lines)
|
|
1204
|
+
|
|
1205
|
+
def main():
|
|
1206
|
+
"""Launch the wxpath TUI application.
|
|
1207
|
+
|
|
1208
|
+
Entry point for the wxpath-tui command-line tool. Creates and runs
|
|
1209
|
+
the interactive terminal interface for testing wxpath expressions.
|
|
1210
|
+
|
|
1211
|
+
Example:
|
|
1212
|
+
Run from command line::
|
|
1213
|
+
|
|
1214
|
+
$ wxpath-tui
|
|
1215
|
+
|
|
1216
|
+
Note:
|
|
1217
|
+
This function blocks until the user quits the application with
|
|
1218
|
+
Ctrl+Q or closes the terminal.
|
|
1219
|
+
"""
|
|
1220
|
+
app = WXPathTUI()
|
|
1221
|
+
app.run()
|
|
1222
|
+
|
|
1223
|
+
|
|
1224
|
+
if __name__ == "__main__":
|
|
1225
|
+
main()
|