wxpath 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wxpath/tui.py ADDED
@@ -0,0 +1,1225 @@
1
+ """TUI for interactive wxpath expression testing.
2
+
3
+ A two-panel terminal interface:
4
+ - Top panel: Editor for wxpath DSL expressions
5
+ - Bottom panel: Live output of executed expressions
6
+
7
+ Warning:
8
+ Pre-1.0.0 - APIs and contracts may change
9
+
10
+ Example:
11
+ Launch the TUI from command line::
12
+
13
+ $ wxpath-tui
14
+
15
+ Or run as a module::
16
+
17
+ $ python -m wxpath.tui
18
+
19
+ """
20
+ import asyncio
21
+ import csv
22
+ import json
23
+ import traceback
24
+ from datetime import datetime
25
+ from pathlib import Path
26
+ from typing import Any, Iterable
27
+
28
+ from elementpath.xpath_tokens import XPathMap
29
+ from lxml.html import HtmlElement, tostring
30
+ from rich.console import RenderableType
31
+ from textual import events, work
32
+ from textual.app import App, ComposeResult
33
+ from textual.containers import Container, Horizontal, Vertical, VerticalScroll
34
+ from textual.reactive import reactive
35
+ from textual.screen import ModalScreen
36
+ from textual.widgets import (
37
+ Button,
38
+ DataTable,
39
+ Footer,
40
+ Header,
41
+ Input,
42
+ Static,
43
+ Switch,
44
+ TextArea,
45
+ )
46
+
47
+ from wxpath.core.runtime.engine import WXPathEngine
48
+ from wxpath.hooks import registry
49
+ from wxpath.hooks.builtin import SerializeXPathMapAndNodeHook
50
+ from wxpath.settings import SETTINGS
51
+ from wxpath.tui_settings import (
52
+ TUISettingsSchema,
53
+ load_tui_settings,
54
+ save_tui_settings,
55
+ validate_tui_settings,
56
+ )
57
+
58
+
59
+ class HeadersScreen(ModalScreen):
60
+ """Modal screen for editing HTTP headers.
61
+
62
+ Allows users to paste and edit custom HTTP headers in JSON format.
63
+ Headers are applied to all subsequent HTTP requests.
64
+ """
65
+
66
+ CSS = """
67
+ HeadersScreen {
68
+ align: center middle;
69
+ }
70
+
71
+ #headers-dialog {
72
+ width: 80;
73
+ height: 25;
74
+ border: thick $primary;
75
+ background: $surface;
76
+ padding: 1 2;
77
+ }
78
+
79
+ #headers-title {
80
+ background: $primary;
81
+ color: $text;
82
+ text-style: bold;
83
+ padding: 0 2;
84
+ dock: top;
85
+ }
86
+
87
+ #headers-editor {
88
+ height: 1fr;
89
+ margin: 1 0;
90
+ }
91
+
92
+ #headers-help {
93
+ color: $text-muted;
94
+ margin-bottom: 1;
95
+ }
96
+
97
+ #headers-buttons {
98
+ height: auto;
99
+ align: center middle;
100
+ }
101
+
102
+ Button {
103
+ margin: 0 1;
104
+ }
105
+ """
106
+
107
+ def __init__(self, current_headers: dict):
108
+ """Initialize headers screen with current headers.
109
+
110
+ Args:
111
+ current_headers: Dictionary of current HTTP headers
112
+ """
113
+ super().__init__()
114
+ self.current_headers = current_headers
115
+
116
+ def compose(self) -> ComposeResult:
117
+ """Build the headers dialog layout."""
118
+ with Vertical(id="headers-dialog"):
119
+ yield Static("HTTP Headers Configuration", id="headers-title")
120
+ yield Static(
121
+ ("Enter headers as JSON (one per line or as object)."
122
+ " Press Ctrl+S to save, Escape to cancel."),
123
+ id="headers-help"
124
+ )
125
+
126
+ # Pre-populate with current headers in JSON format
127
+ headers_json = json.dumps(self.current_headers, indent=2)
128
+ yield TextArea(headers_json, language="json", id="headers-editor")
129
+
130
+ with Container(id="headers-buttons"):
131
+ yield Button("Save (Ctrl+S)", variant="primary", id="save-btn")
132
+ yield Button("Cancel (Esc)", variant="default", id="cancel-btn")
133
+
134
+ def on_mount(self) -> None:
135
+ """Focus the editor when screen mounts."""
136
+ self.query_one("#headers-editor", TextArea).focus()
137
+
138
+ def on_button_pressed(self, event: Button.Pressed) -> None:
139
+ """Handle button presses."""
140
+ if event.button.id == "save-btn":
141
+ self._save_headers()
142
+ elif event.button.id == "cancel-btn":
143
+ self.dismiss(None)
144
+
145
+ def on_key(self, event) -> None:
146
+ """Handle keyboard shortcuts."""
147
+ if event.key == "ctrl+s":
148
+ self._save_headers()
149
+ event.prevent_default()
150
+ elif event.key == "escape":
151
+ self.dismiss(None)
152
+ event.prevent_default()
153
+
154
+ def _save_headers(self) -> None:
155
+ """Parse and save the headers."""
156
+ editor = self.query_one("#headers-editor", TextArea)
157
+ headers_text = editor.text.strip()
158
+
159
+ if not headers_text:
160
+ # Empty headers = use defaults
161
+ self.dismiss({})
162
+ return
163
+
164
+ try:
165
+ # Try to parse as JSON
166
+ headers = json.loads(headers_text)
167
+
168
+ if not isinstance(headers, dict):
169
+ self.notify("Headers must be a JSON object/dict", severity="error")
170
+ return
171
+
172
+ # Validate all keys and values are strings
173
+ for key, value in headers.items():
174
+ if not isinstance(key, str):
175
+ self.notify(f"Header key must be string: {key}", severity="error")
176
+ return
177
+ if not isinstance(value, str):
178
+ self.notify(f"Header value must be string: {value}", severity="error")
179
+ return
180
+
181
+ self.dismiss(headers)
182
+
183
+ except json.JSONDecodeError as e:
184
+ self.notify(f"Invalid JSON: {e}", severity="error")
185
+
186
+
187
+ class SettingsScreen(ModalScreen):
188
+ """Modal screen for editing persistent TUI settings (CONCURRENCY, PER_HOST, RESPECT_ROBOTS).
189
+
190
+ Settings are saved to ~/.config/wxpath/tui_settings.json and applied to the
191
+ crawler/engine on the next run.
192
+ """
193
+
194
+ CSS = """
195
+ SettingsScreen {
196
+ align: center middle;
197
+ }
198
+
199
+ #settings-dialog {
200
+ width: 60;
201
+ min-height: 18;
202
+ border: thick $primary;
203
+ background: $surface;
204
+ padding: 1 2;
205
+ }
206
+
207
+ #settings-title {
208
+ background: $primary;
209
+ color: $text;
210
+ text-style: bold;
211
+ padding: 0 2;
212
+ dock: top;
213
+ }
214
+
215
+ .settings-row {
216
+ height: auto;
217
+ padding: 1 0;
218
+ }
219
+
220
+ .settings-label {
221
+ width: 18;
222
+ text-style: bold;
223
+ }
224
+
225
+ .settings-input {
226
+ width: 1fr;
227
+ }
228
+
229
+ #settings-help {
230
+ color: $text-muted;
231
+ margin: 1 0;
232
+ }
233
+
234
+ #settings-buttons {
235
+ height: auto;
236
+ align: center middle;
237
+ padding: 1 0;
238
+ }
239
+
240
+ #settings-buttons Button {
241
+ margin: 0 1;
242
+ }
243
+ """
244
+
245
+ def __init__(self, current: dict[str, Any]):
246
+ super().__init__()
247
+ self.current = dict(current)
248
+
249
+ def compose(self) -> ComposeResult:
250
+ with Vertical(id="settings-dialog"):
251
+ yield Static("Crawler Settings (persistent)", id="settings-title")
252
+ yield Static(
253
+ "Values are saved to config and used for the next run. Ctrl+S save, Esc cancel.",
254
+ id="settings-help",
255
+ )
256
+ for entry in TUISettingsSchema:
257
+ key = entry["key"]
258
+ label = entry["label"]
259
+ typ = entry["type"]
260
+ value = self.current.get(key, entry["default"])
261
+ with Horizontal(classes="settings-row"):
262
+ yield Static(label, classes="settings-label")
263
+ if typ == "int":
264
+ inp = Input(
265
+ str(value),
266
+ type="integer",
267
+ id=f"setting-{key}",
268
+ classes="settings-input",
269
+ )
270
+ yield inp
271
+ else:
272
+ sw = Switch(
273
+ value=bool(value),
274
+ id=f"setting-{key}",
275
+ classes="settings-input",
276
+ )
277
+ yield sw
278
+ with Container(id="settings-buttons"):
279
+ yield Button("Save (Ctrl+S)", variant="primary", id="settings-save-btn")
280
+ yield Button("Cancel (Esc)", variant="default", id="settings-cancel-btn")
281
+
282
+ def on_mount(self) -> None:
283
+ first_id = f"setting-{TUISettingsSchema[0]['key']}"
284
+ self.query_one(f"#{first_id}").focus()
285
+
286
+ def _gather(self) -> dict[str, Any]:
287
+ result = {}
288
+ for entry in TUISettingsSchema:
289
+ key = entry["key"]
290
+ # typ = entry["type"]
291
+ node = self.query_one(f"#setting-{key}")
292
+ if isinstance(node, Input):
293
+ raw = node.value.strip()
294
+ result[key] = int(raw) if raw else entry["default"]
295
+ else:
296
+ result[key] = node.value
297
+ return result
298
+
299
+ def _validate(self, data: dict[str, Any]) -> str | None:
300
+ errors = validate_tui_settings(data)
301
+ return errors[0] if errors else None
302
+
303
+ def on_button_pressed(self, event: Button.Pressed) -> None:
304
+ if event.button.id == "settings-save-btn":
305
+ data = self._gather()
306
+ err = self._validate(data)
307
+ if err:
308
+ self.notify(err, severity="error")
309
+ return
310
+ save_tui_settings(data)
311
+ self.dismiss(data)
312
+ elif event.button.id == "settings-cancel-btn":
313
+ self.dismiss(None)
314
+
315
+ def on_key(self, event) -> None:
316
+ if event.key == "ctrl+s":
317
+ data = self._gather()
318
+ err = self._validate(data)
319
+ if err:
320
+ self.notify(err, severity="error")
321
+ return
322
+ save_tui_settings(data)
323
+ self.dismiss(data)
324
+ event.prevent_default()
325
+ elif event.key == "escape":
326
+ self.dismiss(None)
327
+ event.prevent_default()
328
+
329
+
330
+ class ExportScreen(ModalScreen):
331
+ """Modal screen for choosing export format (CSV or JSON).
332
+
333
+ Exports the current output data table to a file in the current
334
+ working directory with a timestamped default filename.
335
+ """
336
+
337
+ CSS = """
338
+ ExportScreen {
339
+ align: center middle;
340
+ }
341
+
342
+ #export-dialog {
343
+ width: 50;
344
+ border: thick $primary;
345
+ background: $surface;
346
+ padding: 1 2;
347
+ }
348
+
349
+ #export-title {
350
+ background: $primary;
351
+ color: $text;
352
+ text-style: bold;
353
+ padding: 0 2;
354
+ dock: top;
355
+ }
356
+
357
+ #export-buttons {
358
+ height: auto;
359
+ align: center middle;
360
+ padding: 1 0;
361
+ }
362
+
363
+ #export-buttons Button {
364
+ margin: 0 1;
365
+ }
366
+ """
367
+
368
+ def compose(self) -> ComposeResult:
369
+ """Build the export dialog layout."""
370
+ with Vertical(id="export-dialog"):
371
+ yield Static("Export table data", id="export-title")
372
+ yield Static(
373
+ "Choose format. File is saved in the current directory.",
374
+ id="export-help",
375
+ )
376
+ with Container(id="export-buttons"):
377
+ yield Button("Export CSV", variant="primary", id="export-csv-btn")
378
+ yield Button("Export JSON", variant="primary", id="export-json-btn")
379
+ yield Button("Cancel (Esc)", variant="default", id="export-cancel-btn")
380
+
381
+ def on_button_pressed(self, event: Button.Pressed) -> None:
382
+ """Handle export or cancel."""
383
+ if event.button.id == "export-cancel-btn":
384
+ self.dismiss(None)
385
+ elif event.button.id == "export-csv-btn":
386
+ self.dismiss("csv")
387
+ elif event.button.id == "export-json-btn":
388
+ self.dismiss("json")
389
+
390
+ def on_key(self, event) -> None:
391
+ """Escape cancels."""
392
+ if event.key == "escape":
393
+ self.dismiss(None)
394
+ event.prevent_default()
395
+
396
+
397
+ class OutputPanel(Vertical, can_focus=True):
398
+ """Display panel for expression results.
399
+
400
+ A reactive Static widget that displays formatted output from wxpath
401
+ expression execution. Supports multiple output formats including plain
402
+ text, HTML elements, and table views.
403
+
404
+ Attributes:
405
+ output_text: Reactive string that triggers display updates
406
+ """
407
+
408
+ # output_text: reactive[str] = reactive("Waiting for expression...")
409
+
410
+ def __init__(self, *args, **kwargs):
411
+ """Initialize the output panel.
412
+
413
+ Args:
414
+ *args: Positional arguments passed to Static
415
+ **kwargs: Keyword arguments passed to Static
416
+ """
417
+ super().__init__(*args, **kwargs)
418
+ self.border_title = "Output"
419
+
420
+ def clear(self) -> None:
421
+ self.remove_children()
422
+
423
+ def append(self, renderable) -> None:
424
+ self.mount(Static(renderable))
425
+ # self.scroll_end(animate=False)
426
+
427
+ # def watch_output_text(self, new_text: str) -> None:
428
+ # """Update display when output changes.
429
+
430
+ # Args:
431
+ # new_text: New text content to display
432
+ # """
433
+ # self.update(new_text)
434
+
435
+
436
+ class DebugPanel(VerticalScroll, can_focus=False):
437
+ """Scrollable panel for debug messages.
438
+
439
+ A simple vertical scroll region that collects timestamped debug
440
+ messages. Intended for lightweight, append-only logging during
441
+ interactive sessions.
442
+ """
443
+
444
+ def __init__(self, *args, **kwargs):
445
+ """Initialize the debug panel."""
446
+ super().__init__(*args, **kwargs)
447
+ # self.border_title = "Debug"
448
+
449
+ def clear(self) -> None:
450
+ """Clear all debug messages."""
451
+ self.remove_children()
452
+
453
+ def append(self, message: str) -> None:
454
+ """Append a new debug message and scroll to bottom.
455
+
456
+ Args:
457
+ message: Message text to append
458
+ """
459
+ # Keep debug output simple Rich-markup strings.
460
+ self.mount(Static(message, classes="debug-line"))
461
+ self.scroll_end(animate=False)
462
+
463
+
464
+ class WXPathTUI(App):
465
+ """Interactive TUI for wxpath expression testing.
466
+
467
+ Top panel: Expression editor
468
+ Bottom panel: Live output display
469
+ """
470
+
471
+ TITLE = "wxpath TUI - Interactive Expression Testing"
472
+ # SUB_TITLE will be set dynamically based on cache state
473
+
474
+ CSS = """
475
+ Screen {
476
+ layout: vertical;
477
+ background: $surface;
478
+ }
479
+
480
+ #editor-container {
481
+ height: 40%;
482
+ border: heavy $primary;
483
+ background: $panel;
484
+ }
485
+
486
+ #output-container {
487
+ /* height: 60%; */
488
+ height: 60%;
489
+ border: heavy $accent;
490
+ background: $panel;
491
+ }
492
+
493
+ #output-panel {
494
+ height: 3fr;
495
+ }
496
+
497
+ #debug-container {
498
+ layout: vertical;
499
+ height: 1fr;
500
+ min-height: 5;
501
+ border-top: tall $accent-darken-1;
502
+ background: $surface-darken-1;
503
+ }
504
+
505
+ #debug-header {
506
+ background: $accent-darken-1;
507
+ color: $text;
508
+ text-style: bold;
509
+ padding: 0 2;
510
+ dock: top;
511
+ }
512
+
513
+ #debug-panel {
514
+ height: 1fr;
515
+ min-height: 3;
516
+ padding: 0 2;
517
+ overflow-y: auto;
518
+ background: $surface-darken-1;
519
+ }
520
+
521
+ TextArea {
522
+ height: 100%;
523
+ background: $surface;
524
+ }
525
+
526
+ OutputPanel {
527
+ height: 100%;
528
+ padding: 1 2;
529
+ overflow-y: auto;
530
+ background: $surface;
531
+ }
532
+
533
+ DebugPanel {
534
+ height: 100%;
535
+ padding: 1 0;
536
+ overflow-y: auto;
537
+ background: $surface;
538
+ }
539
+
540
+ .panel-header {
541
+ background: $primary;
542
+ color: $text;
543
+ text-style: bold;
544
+ padding: 0 2;
545
+ dock: top;
546
+ }
547
+
548
+ Header {
549
+ background: $primary-darken-2;
550
+ }
551
+
552
+ Footer {
553
+ background: $primary-darken-2;
554
+ }
555
+ """
556
+
557
+ BINDINGS = [
558
+ ("ctrl+q", "quit", "Quit"),
559
+ ("ctrl+r", "execute", "Execute"),
560
+ ("escape", "cancel_crawl", "Cancel Crawl"),
561
+ ("ctrl+c", "clear", "Clear"),
562
+ ("ctrl+shift+backspace", "clear_editor", "Clear Editor"),
563
+ ("ctrl+d", "clear_debug", "Clear Debug"),
564
+ ("ctrl+shift+d", "toggle_debug", "Toggle Debug"),
565
+ ("ctrl+e", "export", "Export"),
566
+ ("ctrl+l", "toggle_cache", "Cache"),
567
+ ("ctrl+h", "edit_headers", "Headers"),
568
+ ("ctrl+shift+s", "edit_settings", "Settings"),
569
+ ("f5", "execute", "Execute"),
570
+ ("tab", "focus_next", "Focus Next"),
571
+ ]
572
+
573
+ cache_enabled = reactive(False)
574
+ debug_panel_visible = reactive(True)
575
+ custom_headers = reactive({})
576
+ tui_settings = reactive({})
577
+
578
+ def __init__(self):
579
+ """Initialize the TUI application.
580
+
581
+ Sets up the wxpath engine with XPathMap serialization hook for
582
+ clean dict output in table views.
583
+ """
584
+ super().__init__()
585
+ # Register serialization hook to convert XPathMap to dicts
586
+ registry.register(SerializeXPathMapAndNodeHook)
587
+ # self.engine = WXPathEngine()
588
+ self._executing = False
589
+ self._crawl_worker = None # Worker for current crawl; used for cancellation
590
+ self._last_sort_column: str | None = None
591
+ self._last_sort_reverse = False
592
+ # Don't set cache_enabled here - let on_mount handle it
593
+
594
+ def compose(self) -> ComposeResult:
595
+ """Build the application layout."""
596
+ yield Header()
597
+
598
+ with Container(id="editor-container"):
599
+ yield Static("Expression Editor (Ctrl+R to execute)", classes="panel-header")
600
+ yield TextArea(id="expression-editor", language="python")
601
+
602
+ with Container(id="output-container"):
603
+ yield Static("Output", classes="panel-header")
604
+ yield OutputPanel(id="output-panel")
605
+ # yield Button("Export (Ctrl+E)", id="export_button")
606
+
607
+ with Container(id="debug-container"):
608
+ yield Static("Debug", id="debug-header", classes="panel-header")
609
+ yield DebugPanel(id="debug-panel")
610
+
611
+ yield Footer()
612
+
613
+ def on_mount(self) -> None:
614
+ """Initialize with a sample expression."""
615
+ # Set cache_enabled from settings - this will trigger the watcher and update subtitle
616
+ self.cache_enabled = bool(SETTINGS.http.client.cache.enabled)
617
+ # Load persistent TUI settings (CONCURRENCY, PER_HOST, RESPECT_ROBOTS)
618
+ self.tui_settings = load_tui_settings()
619
+
620
+ editor = self.query_one("#expression-editor", TextArea)
621
+ # Start with a simple example
622
+ editor.text = "url('https://quotes.toscrape.com')//span[@class='text']/text()"
623
+ editor.focus()
624
+
625
+ # Show initial help text
626
+ self._update_output(
627
+ "[dim]Welcome to wxpath TUI![/dim]\n\n"
628
+ "[cyan]Quick Start:[/cyan]\n"
629
+ " • Edit the expression above\n"
630
+ " • Press [bold]Ctrl+R[/bold] or [bold]F5[/bold] to execute\n"
631
+ " • Press [bold]Escape[/bold] to cancel a running crawl\n"
632
+ " • Press [bold]Ctrl+E[/bold] to export table (CSV/JSON)\n"
633
+ " • Press [bold]Ctrl+C[/bold] to clear output\n"
634
+ " • Press [bold]Ctrl+Shift+Backspace[/bold] to clear expression editor\n"
635
+ " • Press [bold]Ctrl+Shift+D[/bold] to toggle debug panel\n"
636
+ " • Press [bold]Ctrl+H[/bold] to configure HTTP headers\n"
637
+ " • Press [bold]Ctrl+Shift+S[/bold] to edit persistent settings (concurrency, robots)\n" # noqa: E501
638
+ " • Press [bold]Ctrl+L[/bold] to toggle HTTP caching\n"
639
+ " • Use [bold]arrow keys[/bold] or [bold]scroll[/bold] to view results\n\n"
640
+ "[cyan]Example expressions:[/cyan]\n"
641
+ " • Extract text: url('...')//div//text()\n"
642
+ " • Extract as dict/table: url('...')//div/map { 'title': .//h1/text() }\n"
643
+ " • Follow links: url('...') ///url(//a/@href) //div/text()\n\n"
644
+ "[green]Expression appears valid - Press Ctrl+R or F5 to execute[/green]"
645
+ )
646
+
647
+ def watch_cache_enabled(self, new_value: bool) -> None:
648
+ """Update global settings and subtitle when cache setting changes."""
649
+ # Update the global settings - this is what the HTTP crawler will read
650
+ SETTINGS.http.client.cache.enabled = bool(new_value)
651
+ self._debug(f"Cache enabled: {SETTINGS.http.client.cache.enabled}")
652
+ self._update_subtitle()
653
+
654
+ def watch_custom_headers(self, new_value: dict) -> None:
655
+ """Update subtitle when custom headers change."""
656
+ self._update_subtitle()
657
+
658
+ def watch_tui_settings(self, new_value: dict) -> None:
659
+ """Update subtitle when persistent settings change."""
660
+ self._update_subtitle()
661
+
662
+ def _update_subtitle(self) -> None:
663
+ """Update subtitle with current cache, headers, and persistent settings."""
664
+ # cache_state = "ON" if self.cache_enabled else "OFF"
665
+ cache_state = SETTINGS.http.client.cache.enabled
666
+ headers_count = len(self.custom_headers)
667
+ headers_info = f"{headers_count} custom" if headers_count > 0 else "default"
668
+ conc = self.tui_settings.get("concurrency", 16)
669
+ ph = self.tui_settings.get("per_host", 8)
670
+ robots = "ON" if self.tui_settings.get("respect_robots", True) else "OFF"
671
+ self.sub_title = (
672
+ f"Cache: {cache_state} | Headers: {headers_info} | "
673
+ f"Concurrency: {conc} | Per host: {ph} | Robots: {robots} | "
674
+ f"Ctrl+R: Run | Ctrl+Shift+S: Settings | Ctrl+Q: Quit"
675
+ )
676
+
677
+ async def action_toggle_cache(self) -> None:
678
+ """Toggle HTTP caching on/off for new requests."""
679
+ old_state = self.cache_enabled
680
+ self.cache_enabled = not self.cache_enabled
681
+ new_state = self.cache_enabled
682
+
683
+ old_label = "ON" if old_state else "OFF"
684
+ new_label = "ON" if new_state else "OFF"
685
+
686
+ self._update_output(
687
+ f"[cyan]HTTP caching toggled: {old_label} → {new_label}[/cyan]\n\n"
688
+ "[dim]This setting will apply to the next expression execution.[/dim]"
689
+ )
690
+ self._debug(f"Toggled cache from {old_label} to {new_label}")
691
+
692
+ def action_edit_headers(self) -> None:
693
+ """Open the headers configuration screen."""
694
+ def handle_headers_result(result):
695
+ """Handle the result from the headers screen."""
696
+ if result is not None:
697
+ self.custom_headers = result
698
+ count = len(result)
699
+ if count == 0:
700
+ self._update_output(
701
+ "[cyan]Headers cleared - using defaults[/cyan]\n\n"
702
+ "[dim]This will apply to the next expression execution.[/dim]"
703
+ )
704
+ else:
705
+ headers_preview = json.dumps(result, indent=2)
706
+ self._update_output(
707
+ f"[cyan]Custom headers saved ({count} headers)[/cyan]\n\n"
708
+ f"[green]{headers_preview}[/green]\n\n"
709
+ "[dim]These will apply to the next expression execution.[/dim]"
710
+ )
711
+
712
+ self.push_screen(HeadersScreen(dict(self.custom_headers)), handle_headers_result)
713
+ self._debug("Opened headers configuration screen")
714
+
715
+ def action_edit_settings(self) -> None:
716
+ """Open the persistent settings screen (CONCURRENCY, PER_HOST, RESPECT_ROBOTS)."""
717
+ def handle_settings_result(result: dict[str, Any] | None) -> None:
718
+ if result is not None:
719
+ self.tui_settings = result
720
+ self._update_output(
721
+ "[cyan]Persistent settings saved[/cyan]\n\n"
722
+ f"CONCURRENCY: {result.get('concurrency', 16)} | "
723
+ f"PER_HOST: {result.get('per_host', 8)} | "
724
+ f"RESPECT_ROBOTS: {result.get('respect_robots', True)}\n\n"
725
+ "[dim]These apply to the next expression execution.[/dim]"
726
+ )
727
+ self._debug("Persistent settings saved and applied")
728
+
729
+ self.push_screen(SettingsScreen(dict(self.tui_settings)), handle_settings_result)
730
+ self._debug("Opened persistent settings screen")
731
+
732
+ def _get_output_data_table(self) -> DataTable | None:
733
+ """Return the first DataTable in the output panel, or None if none.
734
+
735
+ Returns:
736
+ The output DataTable when the last run produced a table; None otherwise.
737
+ """
738
+ panel = self.query_one("#output-panel", OutputPanel)
739
+ tables = panel.query(DataTable)
740
+ return tables.first() if tables else None
741
+
742
+ def _export_table_csv(self, data_table: DataTable, path: Path) -> None:
743
+ """Write table data to a CSV file.
744
+
745
+ Args:
746
+ data_table: The DataTable to export.
747
+ path: Output file path.
748
+ """
749
+ columns = data_table.ordered_columns
750
+ if not columns:
751
+ return
752
+ headers = [str(c.label) for c in columns]
753
+ with path.open("w", newline="", encoding="utf-8") as f:
754
+ writer = csv.writer(f)
755
+ writer.writerow(headers)
756
+ for row_meta in data_table.ordered_rows:
757
+ row_key = row_meta.key
758
+ cells = data_table.get_row(row_key)
759
+ writer.writerow([str(c) for c in cells])
760
+
761
+ def _export_table_json(self, data_table: DataTable, path: Path) -> None:
762
+ """Write table data to a JSON file (list of row objects).
763
+
764
+ Args:
765
+ data_table: The DataTable to export.
766
+ path: Output file path.
767
+ """
768
+ columns = data_table.ordered_columns
769
+ if not columns:
770
+ return
771
+ keys = [str(c.label) for c in columns]
772
+ rows = []
773
+ for row_meta in data_table.ordered_rows:
774
+ cells = data_table.get_row(row_meta.key)
775
+ rows.append(dict(zip(keys, [str(c) for c in cells], strict=True)))
776
+ with path.open("w", encoding="utf-8") as f:
777
+ json.dump(rows, f, indent=2)
778
+
779
+ def action_export(self) -> None:
780
+ """Open export dialog to save table as CSV or JSON."""
781
+ def handle_export_result(fmt: str | None) -> None:
782
+ if fmt is None:
783
+ self._debug("Export cancelled")
784
+ return
785
+ table = self._get_output_data_table()
786
+ if table is None:
787
+ self.notify(
788
+ "No table to export. Run an expression that produces a table first.",
789
+ severity="warning",
790
+ )
791
+ self._debug("Export attempted but output panel has no DataTable")
792
+ return
793
+ stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
794
+ ext = ".csv" if fmt == "csv" else ".json"
795
+ path = Path.cwd() / f"wxpath_export_{stamp}{ext}"
796
+ try:
797
+ if fmt == "csv":
798
+ self._export_table_csv(table, path)
799
+ else:
800
+ self._export_table_json(table, path)
801
+ self.notify(f"Exported to {path}", severity="information")
802
+ self._debug(f"Exported table to {path} ({fmt.upper()}, {table.row_count} rows)")
803
+ except OSError as e:
804
+ self.notify(f"Export failed: {e}", severity="error")
805
+ self._debug(f"Export failed: {e}")
806
+
807
+ self.push_screen(ExportScreen(), handle_export_result)
808
+ self._debug("Opened export dialog")
809
+
810
+ def _numeric_sort_key(self, value: Any) -> tuple[int, float | str]:
811
+ """Key for sorting: numbers by value, then non-numeric by string.
812
+
813
+ Used so numeric columns sort numerically (e.g. 2 < 10) instead of
814
+ lexicographically (e.g. "10" < "2"). Single cell value is passed
815
+ when sorting by one column.
816
+ """
817
+ s = "" if value is None else str(value).strip()
818
+ if not s:
819
+ return (1, "")
820
+ try:
821
+ return (0, float(s))
822
+ except (ValueError, TypeError):
823
+ return (1, str(value))
824
+
825
+ def _is_numeric_column(self, table: DataTable, column_key: Any) -> bool:
826
+ """Return True if column appears to be numeric (majority of non-empty parse as float)."""
827
+ numeric = 0
828
+ non_empty = 0
829
+ for cell in table.get_column(column_key):
830
+ if non_empty >= 10:
831
+ break
832
+ s = "" if cell is None else str(cell).strip()
833
+ if not s:
834
+ continue
835
+ non_empty += 1
836
+ try:
837
+ float(s)
838
+ numeric += 1
839
+ except (ValueError, TypeError):
840
+ pass
841
+ return numeric > 0 and numeric >= (non_empty / 2)
842
+
843
+ def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
844
+ """Handle column header click: sort by that column (toggle asc/desc on repeat click)."""
845
+ table = event.data_table
846
+ column_key = event.column_key
847
+ key_str = str(column_key)
848
+ if self._last_sort_column == key_str:
849
+ self._last_sort_reverse = not self._last_sort_reverse
850
+ else:
851
+ self._last_sort_column = key_str
852
+ self._last_sort_reverse = False
853
+ if self._is_numeric_column(table, column_key):
854
+ table.sort(column_key, key=self._numeric_sort_key, reverse=self._last_sort_reverse)
855
+ direction = "desc" if self._last_sort_reverse else "asc"
856
+ self._debug(f"Sorted by column {key_str!r} numerically ({direction})")
857
+ else:
858
+ table.sort(column_key, reverse=self._last_sort_reverse)
859
+ direction = "desc" if self._last_sort_reverse else "asc"
860
+ self._debug(f"Sorted by column {key_str!r} ({direction})")
861
+
862
+ def on_button_pressed(self, event: Button.Pressed) -> None:
863
+ """Handle button presses (e.g. Export)."""
864
+ if event.button.id == "export_button":
865
+ self.action_export()
866
+
867
+ def on_text_area_changed(self, event: TextArea.Changed) -> None:
868
+ """Validate expression as user types."""
869
+ if event.text_area.id != "expression-editor":
870
+ return
871
+
872
+ expression = event.text_area.text.strip()
873
+
874
+ if not expression:
875
+ self._update_output("[dim]Waiting - Enter an expression and press Ctrl+R "
876
+ "or F5 to execute[/dim]")
877
+ return
878
+
879
+ # Show validation status
880
+ if not self._validate_expression(expression):
881
+ self._update_output("[yellow]Waiting - Expression incomplete (check parentheses,"
882
+ " braces, brackets, quotes)[/yellow]")
883
+ else:
884
+ self._update_output("[green]Expression appears valid - Press Ctrl+R or F5 to execute"
885
+ "[/green]")
886
+
887
+ def _prep_row(self, result: XPathMap | dict, keys: list[str]) -> list[str]:
888
+ """Prepare a row for table display from a dict-like result.
889
+
890
+ Args:
891
+ result: Dictionary or XPathMap to extract values from
892
+ keys: Ordered list of column keys to extract
893
+
894
+ Returns:
895
+ List of string values in the same order as keys
896
+ """
897
+ row = []
898
+ # Handle both dict and XPathMap for backward compatibility
899
+ d = result if isinstance(result, dict) else dict(result.items())
900
+ for key in keys: # Use provided order, not sorted
901
+ value = d.get(key, "")
902
+ if isinstance(value, Iterable) and not isinstance(value, str):
903
+ # Limit iterables (except strings) to first 10 items for display
904
+ if isinstance(value, list):
905
+ value = value[:10]
906
+ elif isinstance(value, set):
907
+ value = list(value)[:10]
908
+ else:
909
+ value = list(value)[:10]
910
+ # Convert to string for table display
911
+ row.append("" if value is None else str(value))
912
+ return row
913
+
914
+ @work(exclusive=True)
915
+ async def collect_results(self, expression: str) -> None:
916
+ """Collect results from the expression."""
917
+ count = 0
918
+ try:
919
+ # Wrap the async iteration with timeout (60s for larger result sets)
920
+
921
+ # Import here to avoid circular imports
922
+ from wxpath.http.client.crawler import Crawler
923
+
924
+ conc = self.tui_settings.get("concurrency", 16)
925
+ ph = self.tui_settings.get("per_host", 8)
926
+ robots = self.tui_settings.get("respect_robots", True)
927
+ verify_ssl = self.tui_settings.get("verify_ssl", True)
928
+ crawler = Crawler(
929
+ concurrency=conc,
930
+ per_host=ph,
931
+ respect_robots=robots,
932
+ verify_ssl=verify_ssl,
933
+ headers=dict(self.custom_headers) if self.custom_headers else None,
934
+ )
935
+ engine = WXPathEngine(crawler=crawler)
936
+
937
+ # Streaming approach
938
+ panel = self.query_one("#output-panel", OutputPanel)
939
+ panel.clear()
940
+
941
+ # data_table = None
942
+ data_table = DataTable(show_header=True, zebra_stripes=True)
943
+ panel.mount(data_table)
944
+ columns_initialized = False
945
+ column_keys: list[str] = []
946
+
947
+ async for result in engine.run(expression, max_depth=1, progress=False, yield_errors=True):
948
+ if isinstance(result, dict) and result.get("__type__") == "error":
949
+ self._debug(f"Error: {result.get('reason')}: {result}")
950
+ continue
951
+ count += 1
952
+ if count % 100 == 0:
953
+ self._debug(f"Received result {count} of type {type(result).__name__}")
954
+
955
+ if isinstance(result, XPathMap):
956
+ # result = dict(result.items())
957
+ result = result._map
958
+
959
+ if not columns_initialized:
960
+ self._debug("Initializing table columns")
961
+ if isinstance(result, dict):
962
+ column_keys = list(result.keys())
963
+ for key in column_keys:
964
+ data_table.add_column(str(key), key=key)
965
+ columns_initialized = True
966
+ else:
967
+ data_table.add_column("value", key="value")
968
+ column_keys = ["value"]
969
+ columns_initialized = True
970
+ self._debug(f"Initializing table columns: {column_keys}")
971
+
972
+ # Format row using existing logic
973
+ if isinstance(result, dict):
974
+ row = self._prep_row(result, column_keys)
975
+ else:
976
+ row = [result]
977
+ # Add row with unique key for efficient updates
978
+ data_table.add_row(*row, key=str(count))
979
+
980
+ except asyncio.CancelledError:
981
+ # Keep partial results; append status without clearing the panel
982
+ panel = self.query_one("#output-panel", OutputPanel)
983
+ if count > 0:
984
+ panel.append(f"[yellow]Crawl cancelled — {count} partial result(s) shown.[/yellow]")
985
+ else:
986
+ panel.append("[yellow]Crawl cancelled.[/yellow]")
987
+ self._debug("Crawl cancelled by user.")
988
+ raise
989
+ except asyncio.TimeoutError:
990
+ if count > 0:
991
+ pass
992
+ else:
993
+ self._update_output(
994
+ "[yellow]Timeout after 60s - no results returned[/yellow]\n"
995
+ "The site may be slow or unresponsive."
996
+ )
997
+ self._executing = False
998
+ return
999
+ except Exception as e:
1000
+ # Log full stack trace to debug panel
1001
+ self._debug(traceback.format_exc())
1002
+ # Append error as next row of table (do not clear output panel)
1003
+ err_msg = f"Execution Error: {type(e).__name__}: {e}"
1004
+ if columns_initialized and column_keys:
1005
+ row = [err_msg] + [""] * (len(column_keys) - 1)
1006
+ data_table.add_row(*row, key=f"error-{count}")
1007
+ else:
1008
+ data_table.add_column("error", key="error")
1009
+ data_table.add_row(err_msg, key="error-0")
1010
+ self._executing = False
1011
+ return
1012
+ finally:
1013
+ self._executing = False
1014
+ self._debug(f"Processed {count} results.")
1015
+
1016
+
1017
+ async def action_execute(self) -> None:
1018
+ """Execute the current expression."""
1019
+ if self._executing:
1020
+ return
1021
+
1022
+ editor = self.query_one("#expression-editor", TextArea)
1023
+ expression = editor.text.strip()
1024
+
1025
+ if not expression:
1026
+ self._update_output("[yellow]Waiting - No expression to execute[/yellow]")
1027
+ return
1028
+
1029
+ self._executing = True
1030
+ self._update_output("[cyan]Executing...[/cyan]")
1031
+ self._debug(f"Executing expression: {expression!r}")
1032
+
1033
+ try:
1034
+ # Validate expression first
1035
+ if not self._validate_expression(expression):
1036
+ self._update_output("[yellow]Waiting - Expression incomplete or invalid[/yellow]")
1037
+ self._executing = False
1038
+ return
1039
+
1040
+ # # Parse the expression - useful for deducing if to display table
1041
+ # parsed = parser.parse(expression)
1042
+ self._crawl_worker = self.collect_results(expression)
1043
+ except SyntaxError as e:
1044
+ self._update_output(f"[yellow]Waiting - Syntax Error:[/yellow] {e}")
1045
+ self._executing = False
1046
+ except ValueError as e:
1047
+ self._update_output(f"[yellow]Waiting - Validation Error:[/yellow] {e}")
1048
+ self._executing = False
1049
+ except Exception as e:
1050
+ self._update_output(f"[red]Error:[/red] {type(e).__name__}: {e}")
1051
+ self._executing = False
1052
+ # Do not set _executing = False here: execution runs in the collect_results
1053
+ # coroutine; only that coroutine's finally block should clear the flag.
1054
+
1055
+ def action_cancel_crawl(self) -> None:
1056
+ """Cancel the currently running crawl (if any)."""
1057
+ self._debug(f"Cancelling crawl... executing: {self._executing}, "
1058
+ f"crawl_worker.name: {getattr(self._crawl_worker, 'name', None)}, "
1059
+ f"crawl_worker.is_running: {getattr(self._crawl_worker, 'is_running', False)}")
1060
+ if self._executing and self._crawl_worker and self._crawl_worker.is_running:
1061
+ self._debug("Cancel requested for crawl.")
1062
+ self._crawl_worker.cancel()
1063
+
1064
+ def _validate_expression(self, expression: str) -> bool:
1065
+ """Validate if expression is complete and well-formed.
1066
+
1067
+ Args:
1068
+ expression: Expression string to validate
1069
+
1070
+ Returns:
1071
+ True if expression appears complete, False otherwise
1072
+ """
1073
+ # Check for balanced parentheses
1074
+ paren_count = expression.count('(') - expression.count(')')
1075
+ if paren_count != 0:
1076
+ return False
1077
+
1078
+ # Check for balanced braces
1079
+ brace_count = expression.count('{') - expression.count('}')
1080
+ if brace_count != 0:
1081
+ return False
1082
+
1083
+ # Check for balanced brackets
1084
+ bracket_count = expression.count('[') - expression.count(']')
1085
+ if bracket_count != 0:
1086
+ return False
1087
+
1088
+ # Check for unclosed quotes
1089
+ # Simple check: even number of unescaped quotes
1090
+ single_quotes = len([c for i, c in enumerate(expression)
1091
+ if c == "'" and (i == 0 or expression[i-1] != '\\')])
1092
+ double_quotes = len([c for i, c in enumerate(expression)
1093
+ if c == '"' and (i == 0 or expression[i-1] != '\\')])
1094
+
1095
+ if single_quotes % 2 != 0 or double_quotes % 2 != 0:
1096
+ return False
1097
+
1098
+ return True
1099
+
1100
+ def action_clear(self) -> None:
1101
+ """Clear the output panel."""
1102
+ self._update_output("Waiting for expression...")
1103
+ self._debug("Cleared output panel.")
1104
+
1105
+ def action_clear_editor(self) -> None:
1106
+ """Clear the expression editor (all text)."""
1107
+ editor = self.query_one("#expression-editor", )
1108
+ editor.clear()
1109
+ self._debug("Expression editor cleared.")
1110
+
1111
+ def _update_output(self, content: str | RenderableType) -> None:
1112
+ """Update the output panel with new content."""
1113
+ # output_panel = self.query_one("#output-panel", OutputPanel)
1114
+
1115
+ # if isinstance(content, str):
1116
+ # output_panel.update(content)
1117
+ # else:
1118
+ # output_panel.update(content)
1119
+ panel = self.query_one("#output-panel", OutputPanel)
1120
+ panel.remove_children()
1121
+
1122
+ if isinstance(content, str):
1123
+ panel.mount(Static(content))
1124
+ else:
1125
+ panel.mount(Static(content))
1126
+
1127
+ def action_clear_debug(self) -> None:
1128
+ """Clear the debug panel."""
1129
+ panel = self.query_one("#debug-panel", DebugPanel)
1130
+ panel.clear()
1131
+
1132
+ def watch_debug_panel_visible(self, visible: bool) -> None:
1133
+ """Show or hide the debug panel when toggled."""
1134
+ container = self.query_one("#debug-container", Container)
1135
+ container.display = visible
1136
+
1137
+ def action_toggle_debug(self) -> None:
1138
+ """Toggle the debug panel visibility."""
1139
+ self.debug_panel_visible = not self.debug_panel_visible
1140
+ state = "shown" if self.debug_panel_visible else "hidden"
1141
+ self._debug(f"Debug panel {state}")
1142
+
1143
+ def _escape_rich_markup(self, s: str) -> str:
1144
+ """Escape [ and ] so Rich does not interpret them as markup."""
1145
+ return s.replace("[", "\\[").replace("]", "\\]")
1146
+
1147
+ def _debug(self, message: str) -> None:
1148
+ """Append a timestamped message to the debug panel."""
1149
+ panel = self.query_one("#debug-panel", DebugPanel)
1150
+ timestamp = datetime.now().strftime("%H:%M:%S")
1151
+ panel.append(f"[dim]{timestamp}[/dim] {self._escape_rich_markup(message)}")
1152
+
1153
+ def _format_stream_item(self, result: Any):
1154
+ """Helps format stream items for display."""
1155
+ if isinstance(result, dict):
1156
+ return self._format_dict(result)
1157
+ elif isinstance(result, HtmlElement):
1158
+ return self._format_html_element(result)
1159
+ else:
1160
+ return str(result)
1161
+
1162
+ def _format_html_element(self, elem: HtmlElement) -> str:
1163
+ """Format HTML element with partial content display.
1164
+
1165
+ Converts lxml HtmlElement to string representation, truncating at
1166
+ 300 characters and escaping Rich markup brackets.
1167
+
1168
+ Args:
1169
+ elem: HTML element to format
1170
+
1171
+ Returns:
1172
+ Formatted string representation with Rich markup
1173
+ """
1174
+ try:
1175
+ html_str = tostring(elem, encoding='unicode', method='html')
1176
+
1177
+ # Truncate long HTML
1178
+ if len(html_str) > 300:
1179
+ html_str = html_str[:300] + "..."
1180
+
1181
+ # Escape brackets for Rich markup
1182
+ html_str = html_str.replace("[", "\\[")
1183
+
1184
+ return f" [green]{html_str}[/green]"
1185
+ except Exception as e:
1186
+ return f" [yellow]<{elem.tag}> (error formatting: {e})[/yellow]"
1187
+
1188
+ def _format_dict(self, d: dict) -> str:
1189
+ """Format dictionary with indentation.
1190
+
1191
+ Args:
1192
+ d: Dictionary to format
1193
+
1194
+ Returns:
1195
+ Formatted string
1196
+ """
1197
+ lines = [" {"]
1198
+ for key, value in d.items():
1199
+ if isinstance(value, str) and len(value) > 100:
1200
+ value = value[:100] + "..."
1201
+ lines.append(f" {key!r}: {value!r},")
1202
+ lines.append(" }")
1203
+ return "\n".join(lines)
1204
+
1205
+ def main():
1206
+ """Launch the wxpath TUI application.
1207
+
1208
+ Entry point for the wxpath-tui command-line tool. Creates and runs
1209
+ the interactive terminal interface for testing wxpath expressions.
1210
+
1211
+ Example:
1212
+ Run from command line::
1213
+
1214
+ $ wxpath-tui
1215
+
1216
+ Note:
1217
+ This function blocks until the user quits the application with
1218
+ Ctrl+Q or closes the terminal.
1219
+ """
1220
+ app = WXPathTUI()
1221
+ app.run()
1222
+
1223
+
1224
+ if __name__ == "__main__":
1225
+ main()