wxpath 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wxpath/tui.py ADDED
@@ -0,0 +1,1204 @@
1
+ """TUI for interactive wxpath expression testing.
2
+
3
+ A two-panel terminal interface:
4
+ - Top panel: Editor for wxpath DSL expressions
5
+ - Bottom panel: Live output of executed expressions
6
+
7
+ Warning:
8
+ Pre-1.0.0 - APIs and contracts may change
9
+
10
+ Example:
11
+ Launch the TUI from command line::
12
+
13
+ $ wxpath-tui
14
+
15
+ Or run as a module::
16
+
17
+ $ python -m wxpath.tui
18
+
19
+ """
20
+ import asyncio
21
+ import csv
22
+ import json
23
+ from datetime import datetime
24
+ from pathlib import Path
25
+ from typing import Any, Iterable
26
+
27
+ from elementpath.xpath_tokens import XPathMap
28
+ from lxml.html import HtmlElement, tostring
29
+ from rich.console import RenderableType
30
+ from textual import work
31
+ from textual.app import App, ComposeResult
32
+ from textual.containers import Container, Horizontal, Vertical, VerticalScroll
33
+ from textual.reactive import reactive
34
+ from textual.screen import ModalScreen
35
+ from textual.widgets import (
36
+ Button,
37
+ DataTable,
38
+ Footer,
39
+ Header,
40
+ Input,
41
+ Static,
42
+ Switch,
43
+ TextArea,
44
+ )
45
+
46
+ from wxpath.core.runtime.engine import WXPathEngine
47
+ from wxpath.hooks import registry
48
+ from wxpath.hooks.builtin import SerializeXPathMapAndNodeHook
49
+ from wxpath.settings import SETTINGS
50
+ from wxpath.tui_settings import (
51
+ TUISettingsSchema,
52
+ load_tui_settings,
53
+ save_tui_settings,
54
+ validate_tui_settings,
55
+ )
56
+
57
+
58
+ class HeadersScreen(ModalScreen):
59
+ """Modal screen for editing HTTP headers.
60
+
61
+ Allows users to paste and edit custom HTTP headers in JSON format.
62
+ Headers are applied to all subsequent HTTP requests.
63
+ """
64
+
65
+ CSS = """
66
+ HeadersScreen {
67
+ align: center middle;
68
+ }
69
+
70
+ #headers-dialog {
71
+ width: 80;
72
+ height: 25;
73
+ border: thick $primary;
74
+ background: $surface;
75
+ padding: 1 2;
76
+ }
77
+
78
+ #headers-title {
79
+ background: $primary;
80
+ color: $text;
81
+ text-style: bold;
82
+ padding: 0 2;
83
+ dock: top;
84
+ }
85
+
86
+ #headers-editor {
87
+ height: 1fr;
88
+ margin: 1 0;
89
+ }
90
+
91
+ #headers-help {
92
+ color: $text-muted;
93
+ margin-bottom: 1;
94
+ }
95
+
96
+ #headers-buttons {
97
+ height: auto;
98
+ align: center middle;
99
+ }
100
+
101
+ Button {
102
+ margin: 0 1;
103
+ }
104
+ """
105
+
106
+ def __init__(self, current_headers: dict):
107
+ """Initialize headers screen with current headers.
108
+
109
+ Args:
110
+ current_headers: Dictionary of current HTTP headers
111
+ """
112
+ super().__init__()
113
+ self.current_headers = current_headers
114
+
115
+ def compose(self) -> ComposeResult:
116
+ """Build the headers dialog layout."""
117
+ with Vertical(id="headers-dialog"):
118
+ yield Static("HTTP Headers Configuration", id="headers-title")
119
+ yield Static(
120
+ ("Enter headers as JSON (one per line or as object)."
121
+ " Press Ctrl+S to save, Escape to cancel."),
122
+ id="headers-help"
123
+ )
124
+
125
+ # Pre-populate with current headers in JSON format
126
+ headers_json = json.dumps(self.current_headers, indent=2)
127
+ yield TextArea(headers_json, language="json", id="headers-editor")
128
+
129
+ with Container(id="headers-buttons"):
130
+ yield Button("Save (Ctrl+S)", variant="primary", id="save-btn")
131
+ yield Button("Cancel (Esc)", variant="default", id="cancel-btn")
132
+
133
+ def on_mount(self) -> None:
134
+ """Focus the editor when screen mounts."""
135
+ self.query_one("#headers-editor", TextArea).focus()
136
+
137
+ def on_button_pressed(self, event: Button.Pressed) -> None:
138
+ """Handle button presses."""
139
+ if event.button.id == "save-btn":
140
+ self._save_headers()
141
+ elif event.button.id == "cancel-btn":
142
+ self.dismiss(None)
143
+
144
+ def on_key(self, event) -> None:
145
+ """Handle keyboard shortcuts."""
146
+ if event.key == "ctrl+s":
147
+ self._save_headers()
148
+ event.prevent_default()
149
+ elif event.key == "escape":
150
+ self.dismiss(None)
151
+ event.prevent_default()
152
+
153
+ def _save_headers(self) -> None:
154
+ """Parse and save the headers."""
155
+ editor = self.query_one("#headers-editor", TextArea)
156
+ headers_text = editor.text.strip()
157
+
158
+ if not headers_text:
159
+ # Empty headers = use defaults
160
+ self.dismiss({})
161
+ return
162
+
163
+ try:
164
+ # Try to parse as JSON
165
+ headers = json.loads(headers_text)
166
+
167
+ if not isinstance(headers, dict):
168
+ self.notify("Headers must be a JSON object/dict", severity="error")
169
+ return
170
+
171
+ # Validate all keys and values are strings
172
+ for key, value in headers.items():
173
+ if not isinstance(key, str):
174
+ self.notify(f"Header key must be string: {key}", severity="error")
175
+ return
176
+ if not isinstance(value, str):
177
+ self.notify(f"Header value must be string: {value}", severity="error")
178
+ return
179
+
180
+ self.dismiss(headers)
181
+
182
+ except json.JSONDecodeError as e:
183
+ self.notify(f"Invalid JSON: {e}", severity="error")
184
+
185
+
186
+ class SettingsScreen(ModalScreen):
187
+ """Modal screen for editing persistent TUI settings (CONCURRENCY, PER_HOST, RESPECT_ROBOTS).
188
+
189
+ Settings are saved to ~/.config/wxpath/tui_settings.json and applied to the
190
+ crawler/engine on the next run.
191
+ """
192
+
193
+ CSS = """
194
+ SettingsScreen {
195
+ align: center middle;
196
+ }
197
+
198
+ #settings-dialog {
199
+ width: 60;
200
+ min-height: 18;
201
+ border: thick $primary;
202
+ background: $surface;
203
+ padding: 1 2;
204
+ }
205
+
206
+ #settings-title {
207
+ background: $primary;
208
+ color: $text;
209
+ text-style: bold;
210
+ padding: 0 2;
211
+ dock: top;
212
+ }
213
+
214
+ .settings-row {
215
+ height: auto;
216
+ padding: 1 0;
217
+ }
218
+
219
+ .settings-label {
220
+ width: 18;
221
+ text-style: bold;
222
+ }
223
+
224
+ .settings-input {
225
+ width: 1fr;
226
+ }
227
+
228
+ #settings-help {
229
+ color: $text-muted;
230
+ margin: 1 0;
231
+ }
232
+
233
+ #settings-buttons {
234
+ height: auto;
235
+ align: center middle;
236
+ padding: 1 0;
237
+ }
238
+
239
+ #settings-buttons Button {
240
+ margin: 0 1;
241
+ }
242
+ """
243
+
244
+ def __init__(self, current: dict[str, Any]):
245
+ super().__init__()
246
+ self.current = dict(current)
247
+
248
+ def compose(self) -> ComposeResult:
249
+ with Vertical(id="settings-dialog"):
250
+ yield Static("Crawler Settings (persistent)", id="settings-title")
251
+ yield Static(
252
+ "Values are saved to config and used for the next run. Ctrl+S save, Esc cancel.",
253
+ id="settings-help",
254
+ )
255
+ for entry in TUISettingsSchema:
256
+ key = entry["key"]
257
+ label = entry["label"]
258
+ typ = entry["type"]
259
+ value = self.current.get(key, entry["default"])
260
+ with Horizontal(classes="settings-row"):
261
+ yield Static(label, classes="settings-label")
262
+ if typ == "int":
263
+ inp = Input(
264
+ str(value),
265
+ type="integer",
266
+ id=f"setting-{key}",
267
+ classes="settings-input",
268
+ )
269
+ yield inp
270
+ else:
271
+ sw = Switch(
272
+ value=bool(value),
273
+ id=f"setting-{key}",
274
+ classes="settings-input",
275
+ )
276
+ yield sw
277
+ with Container(id="settings-buttons"):
278
+ yield Button("Save (Ctrl+S)", variant="primary", id="settings-save-btn")
279
+ yield Button("Cancel (Esc)", variant="default", id="settings-cancel-btn")
280
+
281
+ def on_mount(self) -> None:
282
+ first_id = f"setting-{TUISettingsSchema[0]['key']}"
283
+ self.query_one(f"#{first_id}").focus()
284
+
285
+ def _gather(self) -> dict[str, Any]:
286
+ result = {}
287
+ for entry in TUISettingsSchema:
288
+ key = entry["key"]
289
+ # typ = entry["type"]
290
+ node = self.query_one(f"#setting-{key}")
291
+ if isinstance(node, Input):
292
+ raw = node.value.strip()
293
+ result[key] = int(raw) if raw else entry["default"]
294
+ else:
295
+ result[key] = node.value
296
+ return result
297
+
298
+ def _validate(self, data: dict[str, Any]) -> str | None:
299
+ errors = validate_tui_settings(data)
300
+ return errors[0] if errors else None
301
+
302
+ def on_button_pressed(self, event: Button.Pressed) -> None:
303
+ if event.button.id == "settings-save-btn":
304
+ data = self._gather()
305
+ err = self._validate(data)
306
+ if err:
307
+ self.notify(err, severity="error")
308
+ return
309
+ save_tui_settings(data)
310
+ self.dismiss(data)
311
+ elif event.button.id == "settings-cancel-btn":
312
+ self.dismiss(None)
313
+
314
+ def on_key(self, event) -> None:
315
+ if event.key == "ctrl+s":
316
+ data = self._gather()
317
+ err = self._validate(data)
318
+ if err:
319
+ self.notify(err, severity="error")
320
+ return
321
+ save_tui_settings(data)
322
+ self.dismiss(data)
323
+ event.prevent_default()
324
+ elif event.key == "escape":
325
+ self.dismiss(None)
326
+ event.prevent_default()
327
+
328
+
329
+ class ExportScreen(ModalScreen):
330
+ """Modal screen for choosing export format (CSV or JSON).
331
+
332
+ Exports the current output data table to a file in the current
333
+ working directory with a timestamped default filename.
334
+ """
335
+
336
+ CSS = """
337
+ ExportScreen {
338
+ align: center middle;
339
+ }
340
+
341
+ #export-dialog {
342
+ width: 50;
343
+ border: thick $primary;
344
+ background: $surface;
345
+ padding: 1 2;
346
+ }
347
+
348
+ #export-title {
349
+ background: $primary;
350
+ color: $text;
351
+ text-style: bold;
352
+ padding: 0 2;
353
+ dock: top;
354
+ }
355
+
356
+ #export-buttons {
357
+ height: auto;
358
+ align: center middle;
359
+ padding: 1 0;
360
+ }
361
+
362
+ #export-buttons Button {
363
+ margin: 0 1;
364
+ }
365
+ """
366
+
367
+ def compose(self) -> ComposeResult:
368
+ """Build the export dialog layout."""
369
+ with Vertical(id="export-dialog"):
370
+ yield Static("Export table data", id="export-title")
371
+ yield Static(
372
+ "Choose format. File is saved in the current directory.",
373
+ id="export-help",
374
+ )
375
+ with Container(id="export-buttons"):
376
+ yield Button("Export CSV", variant="primary", id="export-csv-btn")
377
+ yield Button("Export JSON", variant="primary", id="export-json-btn")
378
+ yield Button("Cancel (Esc)", variant="default", id="export-cancel-btn")
379
+
380
+ def on_button_pressed(self, event: Button.Pressed) -> None:
381
+ """Handle export or cancel."""
382
+ if event.button.id == "export-cancel-btn":
383
+ self.dismiss(None)
384
+ elif event.button.id == "export-csv-btn":
385
+ self.dismiss("csv")
386
+ elif event.button.id == "export-json-btn":
387
+ self.dismiss("json")
388
+
389
+ def on_key(self, event) -> None:
390
+ """Escape cancels."""
391
+ if event.key == "escape":
392
+ self.dismiss(None)
393
+ event.prevent_default()
394
+
395
+
396
+ class OutputPanel(Vertical, can_focus=True):
397
+ """Display panel for expression results.
398
+
399
+ A reactive Static widget that displays formatted output from wxpath
400
+ expression execution. Supports multiple output formats including plain
401
+ text, HTML elements, and table views.
402
+
403
+ Attributes:
404
+ output_text: Reactive string that triggers display updates
405
+ """
406
+
407
+ # output_text: reactive[str] = reactive("Waiting for expression...")
408
+
409
+ def __init__(self, *args, **kwargs):
410
+ """Initialize the output panel.
411
+
412
+ Args:
413
+ *args: Positional arguments passed to Static
414
+ **kwargs: Keyword arguments passed to Static
415
+ """
416
+ super().__init__(*args, **kwargs)
417
+ self.border_title = "Output"
418
+
419
+ def clear(self) -> None:
420
+ self.remove_children()
421
+
422
+ def append(self, renderable) -> None:
423
+ self.mount(Static(renderable))
424
+ # self.scroll_end(animate=False)
425
+
426
+ # def watch_output_text(self, new_text: str) -> None:
427
+ # """Update display when output changes.
428
+
429
+ # Args:
430
+ # new_text: New text content to display
431
+ # """
432
+ # self.update(new_text)
433
+
434
+
435
+ class DebugPanel(VerticalScroll, can_focus=False):
436
+ """Scrollable panel for debug messages.
437
+
438
+ A simple vertical scroll region that collects timestamped debug
439
+ messages. Intended for lightweight, append-only logging during
440
+ interactive sessions.
441
+ """
442
+
443
+ def __init__(self, *args, **kwargs):
444
+ """Initialize the debug panel."""
445
+ super().__init__(*args, **kwargs)
446
+ # self.border_title = "Debug"
447
+
448
+ def clear(self) -> None:
449
+ """Clear all debug messages."""
450
+ self.remove_children()
451
+
452
+ def append(self, message: str) -> None:
453
+ """Append a new debug message and scroll to bottom.
454
+
455
+ Args:
456
+ message: Message text to append
457
+ """
458
+ # Keep debug output simple Rich-markup strings.
459
+ self.mount(Static(message, classes="debug-line"))
460
+ self.scroll_end(animate=False)
461
+
462
+
463
+ class WXPathTUI(App):
464
+ """Interactive TUI for wxpath expression testing.
465
+
466
+ Top panel: Expression editor
467
+ Bottom panel: Live output display
468
+ """
469
+
470
+ TITLE = "wxpath TUI - Interactive Expression Testing"
471
+ # SUB_TITLE will be set dynamically based on cache state
472
+
473
+ CSS = """
474
+ Screen {
475
+ layout: vertical;
476
+ background: $surface;
477
+ }
478
+
479
+ #editor-container {
480
+ height: 40%;
481
+ border: heavy $primary;
482
+ background: $panel;
483
+ }
484
+
485
+ #output-container {
486
+ /* height: 60%; */
487
+ height: 60%;
488
+ border: heavy $accent;
489
+ background: $panel;
490
+ }
491
+
492
+ #output-panel {
493
+ height: 3fr;
494
+ }
495
+
496
+ #debug-container {
497
+ layout: vertical;
498
+ height: 1fr;
499
+ min-height: 5;
500
+ border-top: tall $accent-darken-1;
501
+ background: $surface-darken-1;
502
+ }
503
+
504
+ #debug-header {
505
+ background: $accent-darken-1;
506
+ color: $text;
507
+ text-style: bold;
508
+ padding: 0 2;
509
+ dock: top;
510
+ }
511
+
512
+ #debug-panel {
513
+ height: 1fr;
514
+ min-height: 3;
515
+ padding: 0 2;
516
+ overflow-y: auto;
517
+ background: $surface-darken-1;
518
+ }
519
+
520
+ TextArea {
521
+ height: 100%;
522
+ background: $surface;
523
+ }
524
+
525
+ OutputPanel {
526
+ height: 100%;
527
+ padding: 1 2;
528
+ overflow-y: auto;
529
+ background: $surface;
530
+ }
531
+
532
+ DebugPanel {
533
+ height: 100%;
534
+ padding: 1 0;
535
+ overflow-y: auto;
536
+ background: $surface;
537
+ }
538
+
539
+ .panel-header {
540
+ background: $primary;
541
+ color: $text;
542
+ text-style: bold;
543
+ padding: 0 2;
544
+ dock: top;
545
+ }
546
+
547
+ Header {
548
+ background: $primary-darken-2;
549
+ }
550
+
551
+ Footer {
552
+ background: $primary-darken-2;
553
+ }
554
+ """
555
+
556
+ BINDINGS = [
557
+ ("ctrl+q", "quit", "Quit"),
558
+ ("ctrl+r", "execute", "Execute"),
559
+ ("escape", "cancel_crawl", "Cancel Crawl"),
560
+ ("ctrl+c", "clear", "Clear"),
561
+ ("ctrl+d", "clear_debug", "Clear Debug"),
562
+ ("ctrl+shift+d", "toggle_debug", "Toggle Debug"),
563
+ ("ctrl+e", "export", "Export"),
564
+ ("ctrl+l", "toggle_cache", "Cache"),
565
+ ("ctrl+h", "edit_headers", "Headers"),
566
+ ("ctrl+shift+s", "edit_settings", "Settings"),
567
+ ("f5", "execute", "Execute"),
568
+ ("tab", "focus_next", "Focus Next"),
569
+ ]
570
+
571
+ cache_enabled = reactive(False)
572
+ debug_panel_visible = reactive(True)
573
+ custom_headers = reactive({})
574
+ tui_settings = reactive({})
575
+
576
+ def __init__(self):
577
+ """Initialize the TUI application.
578
+
579
+ Sets up the wxpath engine with XPathMap serialization hook for
580
+ clean dict output in table views.
581
+ """
582
+ super().__init__()
583
+ # Register serialization hook to convert XPathMap to dicts
584
+ registry.register(SerializeXPathMapAndNodeHook)
585
+ # self.engine = WXPathEngine()
586
+ self._executing = False
587
+ self._crawl_worker = None # Worker for current crawl; used for cancellation
588
+ self._last_sort_column: str | None = None
589
+ self._last_sort_reverse = False
590
+ # Don't set cache_enabled here - let on_mount handle it
591
+
592
+ def compose(self) -> ComposeResult:
593
+ """Build the application layout."""
594
+ yield Header()
595
+
596
+ with Container(id="editor-container"):
597
+ yield Static("Expression Editor (Ctrl+R to execute)", classes="panel-header")
598
+ yield TextArea(id="expression-editor", language="python")
599
+
600
+ with Container(id="output-container"):
601
+ yield Static("Output", classes="panel-header")
602
+ yield OutputPanel(id="output-panel")
603
+ # yield Button("Export (Ctrl+E)", id="export_button")
604
+
605
+ with Container(id="debug-container"):
606
+ yield Static("Debug", id="debug-header", classes="panel-header")
607
+ yield DebugPanel(id="debug-panel")
608
+
609
+ yield Footer()
610
+
611
+ def on_mount(self) -> None:
612
+ """Initialize with a sample expression."""
613
+ # Set cache_enabled from settings - this will trigger the watcher and update subtitle
614
+ self.cache_enabled = bool(SETTINGS.http.client.cache.enabled)
615
+ # Load persistent TUI settings (CONCURRENCY, PER_HOST, RESPECT_ROBOTS)
616
+ self.tui_settings = load_tui_settings()
617
+
618
+ editor = self.query_one("#expression-editor", TextArea)
619
+ # Start with a simple example
620
+ editor.text = "url('https://quotes.toscrape.com')//span[@class='text']/text()"
621
+ editor.focus()
622
+
623
+ # Show initial help text
624
+ self._update_output(
625
+ "[dim]Welcome to wxpath TUI![/dim]\n\n"
626
+ "[cyan]Quick Start:[/cyan]\n"
627
+ " • Edit the expression above\n"
628
+ " • Press [bold]Ctrl+R[/bold] or [bold]F5[/bold] to execute\n"
629
+ " • Press [bold]Escape[/bold] to cancel a running crawl\n"
630
+ " • Press [bold]Ctrl+E[/bold] to export table (CSV/JSON)\n"
631
+ " • Press [bold]Ctrl+C[/bold] to clear output\n"
632
+ " • Press [bold]Ctrl+Shift+D[/bold] to toggle debug panel\n"
633
+ " • Press [bold]Ctrl+H[/bold] to configure HTTP headers\n"
634
+ " • Press [bold]Ctrl+Shift+S[/bold] to edit persistent settings (concurrency, robots)\n" # noqa: E501
635
+ " • Press [bold]Ctrl+L[/bold] to toggle HTTP caching\n"
636
+ " • Use [bold]arrow keys[/bold] or [bold]scroll[/bold] to view results\n\n"
637
+ "[cyan]Example expressions:[/cyan]\n"
638
+ " • Extract text: url('...')//div//text()\n"
639
+ " • Extract as dict/table: url('...')//div/map { 'title': .//h1/text() }\n"
640
+ " • Follow links: url('...') ///url(//a/@href) //div/text()\n\n"
641
+ "[green]Expression appears valid - Press Ctrl+R or F5 to execute[/green]"
642
+ )
643
+
644
+ def watch_cache_enabled(self, new_value: bool) -> None:
645
+ """Update global settings and subtitle when cache setting changes."""
646
+ # Update the global settings - this is what the HTTP crawler will read
647
+ SETTINGS.http.client.cache.enabled = bool(new_value)
648
+ print(f"Cache enabled: {SETTINGS.http.client.cache.enabled}")
649
+ self._update_subtitle()
650
+
651
+ def watch_custom_headers(self, new_value: dict) -> None:
652
+ """Update subtitle when custom headers change."""
653
+ self._update_subtitle()
654
+
655
+ def watch_tui_settings(self, new_value: dict) -> None:
656
+ """Update subtitle when persistent settings change."""
657
+ self._update_subtitle()
658
+
659
+ def _update_subtitle(self) -> None:
660
+ """Update subtitle with current cache, headers, and persistent settings."""
661
+ cache_state = "ON" if self.cache_enabled else "OFF"
662
+ headers_count = len(self.custom_headers)
663
+ headers_info = f"{headers_count} custom" if headers_count > 0 else "default"
664
+ conc = self.tui_settings.get("concurrency", 16)
665
+ ph = self.tui_settings.get("per_host", 8)
666
+ robots = "ON" if self.tui_settings.get("respect_robots", True) else "OFF"
667
+ self.sub_title = (
668
+ f"Cache: {cache_state} | Headers: {headers_info} | "
669
+ f"Concurrency: {conc} | Per host: {ph} | Robots: {robots} | "
670
+ f"Ctrl+R: Run | Ctrl+Shift+S: Settings | Ctrl+Q: Quit"
671
+ )
672
+
673
+ async def action_toggle_cache(self) -> None:
674
+ """Toggle HTTP caching on/off for new requests."""
675
+ old_state = self.cache_enabled
676
+ self.cache_enabled = not self.cache_enabled
677
+ new_state = self.cache_enabled
678
+
679
+ old_label = "ON" if old_state else "OFF"
680
+ new_label = "ON" if new_state else "OFF"
681
+
682
+ self._update_output(
683
+ f"[cyan]HTTP caching toggled: {old_label} → {new_label}[/cyan]\n\n"
684
+ "[dim]This setting will apply to the next expression execution.[/dim]"
685
+ )
686
+ self._debug(f"Toggled cache from {old_label} to {new_label}")
687
+
688
+ def action_edit_headers(self) -> None:
689
+ """Open the headers configuration screen."""
690
+ def handle_headers_result(result):
691
+ """Handle the result from the headers screen."""
692
+ if result is not None:
693
+ self.custom_headers = result
694
+ count = len(result)
695
+ if count == 0:
696
+ self._update_output(
697
+ "[cyan]Headers cleared - using defaults[/cyan]\n\n"
698
+ "[dim]This will apply to the next expression execution.[/dim]"
699
+ )
700
+ else:
701
+ headers_preview = json.dumps(result, indent=2)
702
+ self._update_output(
703
+ f"[cyan]Custom headers saved ({count} headers)[/cyan]\n\n"
704
+ f"[green]{headers_preview}[/green]\n\n"
705
+ "[dim]These will apply to the next expression execution.[/dim]"
706
+ )
707
+
708
+ self.push_screen(HeadersScreen(dict(self.custom_headers)), handle_headers_result)
709
+ self._debug("Opened headers configuration screen")
710
+
711
+ def action_edit_settings(self) -> None:
712
+ """Open the persistent settings screen (CONCURRENCY, PER_HOST, RESPECT_ROBOTS)."""
713
+ def handle_settings_result(result: dict[str, Any] | None) -> None:
714
+ if result is not None:
715
+ self.tui_settings = result
716
+ self._update_output(
717
+ "[cyan]Persistent settings saved[/cyan]\n\n"
718
+ f"CONCURRENCY: {result.get('concurrency', 16)} | "
719
+ f"PER_HOST: {result.get('per_host', 8)} | "
720
+ f"RESPECT_ROBOTS: {result.get('respect_robots', True)}\n\n"
721
+ "[dim]These apply to the next expression execution.[/dim]"
722
+ )
723
+ self._debug("Persistent settings saved and applied")
724
+
725
+ self.push_screen(SettingsScreen(dict(self.tui_settings)), handle_settings_result)
726
+ self._debug("Opened persistent settings screen")
727
+
728
+ def _get_output_data_table(self) -> DataTable | None:
729
+ """Return the first DataTable in the output panel, or None if none.
730
+
731
+ Returns:
732
+ The output DataTable when the last run produced a table; None otherwise.
733
+ """
734
+ panel = self.query_one("#output-panel", OutputPanel)
735
+ tables = panel.query(DataTable)
736
+ return tables.first() if tables else None
737
+
738
+ def _export_table_csv(self, data_table: DataTable, path: Path) -> None:
739
+ """Write table data to a CSV file.
740
+
741
+ Args:
742
+ data_table: The DataTable to export.
743
+ path: Output file path.
744
+ """
745
+ columns = data_table.ordered_columns
746
+ if not columns:
747
+ return
748
+ headers = [str(c.label) for c in columns]
749
+ with path.open("w", newline="", encoding="utf-8") as f:
750
+ writer = csv.writer(f)
751
+ writer.writerow(headers)
752
+ for row_meta in data_table.ordered_rows:
753
+ row_key = row_meta.key
754
+ cells = data_table.get_row(row_key)
755
+ writer.writerow([str(c) for c in cells])
756
+
757
+ def _export_table_json(self, data_table: DataTable, path: Path) -> None:
758
+ """Write table data to a JSON file (list of row objects).
759
+
760
+ Args:
761
+ data_table: The DataTable to export.
762
+ path: Output file path.
763
+ """
764
+ columns = data_table.ordered_columns
765
+ if not columns:
766
+ return
767
+ keys = [str(c.label) for c in columns]
768
+ rows = []
769
+ for row_meta in data_table.ordered_rows:
770
+ cells = data_table.get_row(row_meta.key)
771
+ rows.append(dict(zip(keys, [str(c) for c in cells], strict=True)))
772
+ with path.open("w", encoding="utf-8") as f:
773
+ json.dump(rows, f, indent=2)
774
+
775
+ def action_export(self) -> None:
776
+ """Open export dialog to save table as CSV or JSON."""
777
+ def handle_export_result(fmt: str | None) -> None:
778
+ if fmt is None:
779
+ self._debug("Export cancelled")
780
+ return
781
+ table = self._get_output_data_table()
782
+ if table is None:
783
+ self.notify(
784
+ "No table to export. Run an expression that produces a table first.",
785
+ severity="warning",
786
+ )
787
+ self._debug("Export attempted but output panel has no DataTable")
788
+ return
789
+ stamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
790
+ ext = ".csv" if fmt == "csv" else ".json"
791
+ path = Path.cwd() / f"wxpath_export_{stamp}{ext}"
792
+ try:
793
+ if fmt == "csv":
794
+ self._export_table_csv(table, path)
795
+ else:
796
+ self._export_table_json(table, path)
797
+ self.notify(f"Exported to {path}", severity="information")
798
+ self._debug(f"Exported table to {path} ({fmt.upper()}, {table.row_count} rows)")
799
+ except OSError as e:
800
+ self.notify(f"Export failed: {e}", severity="error")
801
+ self._debug(f"Export failed: {e}")
802
+
803
+ self.push_screen(ExportScreen(), handle_export_result)
804
+ self._debug("Opened export dialog")
805
+
806
+ def _numeric_sort_key(self, value: Any) -> tuple[int, float | str]:
807
+ """Key for sorting: numbers by value, then non-numeric by string.
808
+
809
+ Used so numeric columns sort numerically (e.g. 2 < 10) instead of
810
+ lexicographically (e.g. "10" < "2"). Single cell value is passed
811
+ when sorting by one column.
812
+ """
813
+ s = "" if value is None else str(value).strip()
814
+ if not s:
815
+ return (1, "")
816
+ try:
817
+ return (0, float(s))
818
+ except (ValueError, TypeError):
819
+ return (1, str(value))
820
+
821
+ def _is_numeric_column(self, table: DataTable, column_key: Any) -> bool:
822
+ """Return True if column appears to be numeric (majority of non-empty parse as float)."""
823
+ numeric = 0
824
+ non_empty = 0
825
+ for cell in table.get_column(column_key):
826
+ if non_empty >= 10:
827
+ break
828
+ s = "" if cell is None else str(cell).strip()
829
+ if not s:
830
+ continue
831
+ non_empty += 1
832
+ try:
833
+ float(s)
834
+ numeric += 1
835
+ except (ValueError, TypeError):
836
+ pass
837
+ return numeric > 0 and numeric >= (non_empty / 2)
838
+
839
+ def on_data_table_header_selected(self, event: DataTable.HeaderSelected) -> None:
840
+ """Handle column header click: sort by that column (toggle asc/desc on repeat click)."""
841
+ table = event.data_table
842
+ column_key = event.column_key
843
+ key_str = str(column_key)
844
+ if self._last_sort_column == key_str:
845
+ self._last_sort_reverse = not self._last_sort_reverse
846
+ else:
847
+ self._last_sort_column = key_str
848
+ self._last_sort_reverse = False
849
+ if self._is_numeric_column(table, column_key):
850
+ table.sort(column_key, key=self._numeric_sort_key, reverse=self._last_sort_reverse)
851
+ direction = "desc" if self._last_sort_reverse else "asc"
852
+ self._debug(f"Sorted by column {key_str!r} numerically ({direction})")
853
+ else:
854
+ table.sort(column_key, reverse=self._last_sort_reverse)
855
+ direction = "desc" if self._last_sort_reverse else "asc"
856
+ self._debug(f"Sorted by column {key_str!r} ({direction})")
857
+
858
+ def on_button_pressed(self, event: Button.Pressed) -> None:
859
+ """Handle button presses (e.g. Export)."""
860
+ if event.button.id == "export_button":
861
+ self.action_export()
862
+
863
+ def on_text_area_changed(self, event: TextArea.Changed) -> None:
864
+ """Validate expression as user types."""
865
+ if event.text_area.id != "expression-editor":
866
+ return
867
+
868
+ expression = event.text_area.text.strip()
869
+
870
+ if not expression:
871
+ self._update_output("[dim]Waiting - Enter an expression and press Ctrl+R "
872
+ "or F5 to execute[/dim]")
873
+ return
874
+
875
+ # Show validation status
876
+ if not self._validate_expression(expression):
877
+ self._update_output("[yellow]Waiting - Expression incomplete (check parentheses,"
878
+ " braces, brackets, quotes)[/yellow]")
879
+ else:
880
+ self._update_output("[green]Expression appears valid - Press Ctrl+R or F5 to execute"
881
+ "[/green]")
882
+
883
+ def _prep_row(self, result: XPathMap | dict, keys: list[str]) -> list[str]:
884
+ """Prepare a row for table display from a dict-like result.
885
+
886
+ Args:
887
+ result: Dictionary or XPathMap to extract values from
888
+ keys: Ordered list of column keys to extract
889
+
890
+ Returns:
891
+ List of string values in the same order as keys
892
+ """
893
+ row = []
894
+ # Handle both dict and XPathMap for backward compatibility
895
+ d = result if isinstance(result, dict) else dict(result.items())
896
+ for key in keys: # Use provided order, not sorted
897
+ value = d.get(key, "")
898
+ if isinstance(value, Iterable) and not isinstance(value, str):
899
+ # Limit iterables (except strings) to first 10 items for display
900
+ if isinstance(value, list):
901
+ value = value[:10]
902
+ elif isinstance(value, set):
903
+ value = list(value)[:10]
904
+ else:
905
+ value = list(value)[:10]
906
+ # Convert to string for table display
907
+ row.append("" if value is None else str(value))
908
+ return row
909
+
910
+ @work(exclusive=True)
911
+ async def collect_results(self, expression: str) -> None:
912
+ """Collect results from the expression."""
913
+ count = 0
914
+ try:
915
+ # Wrap the async iteration with timeout (60s for larger result sets)
916
+
917
+ # Import here to avoid circular imports
918
+ from wxpath.http.client.crawler import Crawler
919
+
920
+ conc = self.tui_settings.get("concurrency", 16)
921
+ ph = self.tui_settings.get("per_host", 8)
922
+ robots = self.tui_settings.get("respect_robots", True)
923
+ verify_ssl = self.tui_settings.get("verify_ssl", True)
924
+ crawler = Crawler(
925
+ concurrency=conc,
926
+ per_host=ph,
927
+ respect_robots=robots,
928
+ verify_ssl=verify_ssl,
929
+ headers=dict(self.custom_headers) if self.custom_headers else None,
930
+ )
931
+ engine = WXPathEngine(crawler=crawler)
932
+
933
+ # Streaming approach
934
+ panel = self.query_one("#output-panel", OutputPanel)
935
+ panel.clear()
936
+
937
+ # data_table = None
938
+ data_table = DataTable(show_header=True, zebra_stripes=True)
939
+ panel.mount(data_table)
940
+ columns_initialized = False
941
+ column_keys: list[str] = []
942
+
943
+ async for result in engine.run(expression, max_depth=1, progress=False):
944
+ count += 1
945
+ if count % 100 == 0:
946
+ self._debug(f"Received result {count} of type {type(result).__name__}")
947
+
948
+ if isinstance(result, XPathMap):
949
+ # result = dict(result.items())
950
+ result = result._map
951
+
952
+ if not columns_initialized:
953
+ self._debug("Initializing table columns")
954
+ if isinstance(result, dict):
955
+ column_keys = list(result.keys())
956
+ for key in column_keys:
957
+ data_table.add_column(str(key), key=key)
958
+ columns_initialized = True
959
+ else:
960
+ data_table.add_column("value", key="value")
961
+ column_keys = ["value"]
962
+ columns_initialized = True
963
+ self._debug(f"Initializing table columns: {column_keys}")
964
+
965
+ # Format row using existing logic
966
+ if isinstance(result, dict):
967
+ row = self._prep_row(result, column_keys)
968
+ else:
969
+ row = [result]
970
+ # Add row with unique key for efficient updates
971
+ data_table.add_row(*row, key=str(count))
972
+
973
+ except asyncio.CancelledError:
974
+ # Keep partial results; append status without clearing the panel
975
+ panel = self.query_one("#output-panel", OutputPanel)
976
+ if count > 0:
977
+ panel.append(f"[yellow]Crawl cancelled — {count} partial result(s) shown.[/yellow]")
978
+ else:
979
+ panel.append("[yellow]Crawl cancelled.[/yellow]")
980
+ self._debug("Crawl cancelled by user.")
981
+ raise
982
+ except asyncio.TimeoutError:
983
+ if count > 0:
984
+ pass
985
+ else:
986
+ self._update_output(
987
+ "[yellow]Timeout after 60s - no results returned[/yellow]\n"
988
+ "The site may be slow or unresponsive."
989
+ )
990
+ self._executing = False
991
+ return
992
+ except Exception as e:
993
+ # Handle execution errors separately
994
+ self._update_output(f"[red]Execution Error:[/red] {type(e).__name__}: {e}")
995
+ self._executing = False
996
+ return
997
+ finally:
998
+ self._executing = False
999
+ self._debug(f"Processed {count} results.")
1000
+
1001
+
1002
+ async def action_execute(self) -> None:
1003
+ """Execute the current expression."""
1004
+ if self._executing:
1005
+ return
1006
+
1007
+ editor = self.query_one("#expression-editor", TextArea)
1008
+ expression = editor.text.strip()
1009
+
1010
+ if not expression:
1011
+ self._update_output("[yellow]Waiting - No expression to execute[/yellow]")
1012
+ return
1013
+
1014
+ self._executing = True
1015
+ self._update_output("[cyan]Executing...[/cyan]")
1016
+ self._debug(f"Executing expression: {expression!r}")
1017
+
1018
+ try:
1019
+ # Validate expression first
1020
+ if not self._validate_expression(expression):
1021
+ self._update_output("[yellow]Waiting - Expression incomplete or invalid[/yellow]")
1022
+ self._executing = False
1023
+ return
1024
+
1025
+ # # Parse the expression - useful for deducing if to display table
1026
+ # parsed = parser.parse(expression)
1027
+ self._crawl_worker = self.collect_results(expression)
1028
+ except SyntaxError as e:
1029
+ self._update_output(f"[yellow]Waiting - Syntax Error:[/yellow] {e}")
1030
+ self._executing = False
1031
+ except ValueError as e:
1032
+ self._update_output(f"[yellow]Waiting - Validation Error:[/yellow] {e}")
1033
+ self._executing = False
1034
+ except Exception as e:
1035
+ self._update_output(f"[red]Error:[/red] {type(e).__name__}: {e}")
1036
+ self._executing = False
1037
+ # Do not set _executing = False here: execution runs in the collect_results
1038
+ # coroutine; only that coroutine's finally block should clear the flag.
1039
+
1040
+ def action_cancel_crawl(self) -> None:
1041
+ """Cancel the currently running crawl (if any)."""
1042
+ self._debug(f"Cancelling crawl... executing: {self._executing}, "
1043
+ f"crawl_worker.name: {getattr(self._crawl_worker, 'name', None)}, "
1044
+ f"crawl_worker.is_running: {getattr(self._crawl_worker, 'is_running', False)}")
1045
+ if self._executing and self._crawl_worker and self._crawl_worker.is_running:
1046
+ self._debug("Cancel requested for crawl.")
1047
+ self._crawl_worker.cancel()
1048
+
1049
+ def _validate_expression(self, expression: str) -> bool:
1050
+ """Validate if expression is complete and well-formed.
1051
+
1052
+ Args:
1053
+ expression: Expression string to validate
1054
+
1055
+ Returns:
1056
+ True if expression appears complete, False otherwise
1057
+ """
1058
+ # Check for balanced parentheses
1059
+ paren_count = expression.count('(') - expression.count(')')
1060
+ if paren_count != 0:
1061
+ return False
1062
+
1063
+ # Check for balanced braces
1064
+ brace_count = expression.count('{') - expression.count('}')
1065
+ if brace_count != 0:
1066
+ return False
1067
+
1068
+ # Check for balanced brackets
1069
+ bracket_count = expression.count('[') - expression.count(']')
1070
+ if bracket_count != 0:
1071
+ return False
1072
+
1073
+ # Check for unclosed quotes
1074
+ # Simple check: even number of unescaped quotes
1075
+ single_quotes = len([c for i, c in enumerate(expression)
1076
+ if c == "'" and (i == 0 or expression[i-1] != '\\')])
1077
+ double_quotes = len([c for i, c in enumerate(expression)
1078
+ if c == '"' and (i == 0 or expression[i-1] != '\\')])
1079
+
1080
+ if single_quotes % 2 != 0 or double_quotes % 2 != 0:
1081
+ return False
1082
+
1083
+ return True
1084
+
1085
+ def action_clear(self) -> None:
1086
+ """Clear the output panel."""
1087
+ self._update_output("Waiting for expression...")
1088
+ self._debug("Cleared output panel.")
1089
+
1090
+ def _update_output(self, content: str | RenderableType) -> None:
1091
+ """Update the output panel with new content."""
1092
+ # output_panel = self.query_one("#output-panel", OutputPanel)
1093
+
1094
+ # if isinstance(content, str):
1095
+ # output_panel.update(content)
1096
+ # else:
1097
+ # output_panel.update(content)
1098
+ panel = self.query_one("#output-panel", OutputPanel)
1099
+ panel.remove_children()
1100
+
1101
+ if isinstance(content, str):
1102
+ panel.mount(Static(content))
1103
+ else:
1104
+ panel.mount(Static(content))
1105
+
1106
+ def action_clear_debug(self) -> None:
1107
+ """Clear the debug panel."""
1108
+ panel = self.query_one("#debug-panel", DebugPanel)
1109
+ panel.clear()
1110
+
1111
+ def watch_debug_panel_visible(self, visible: bool) -> None:
1112
+ """Show or hide the debug panel when toggled."""
1113
+ container = self.query_one("#debug-container", Container)
1114
+ container.display = visible
1115
+
1116
+ def action_toggle_debug(self) -> None:
1117
+ """Toggle the debug panel visibility."""
1118
+ self.debug_panel_visible = not self.debug_panel_visible
1119
+ state = "shown" if self.debug_panel_visible else "hidden"
1120
+ self._debug(f"Debug panel {state}")
1121
+
1122
+ def _escape_rich_markup(self, s: str) -> str:
1123
+ """Escape [ and ] so Rich does not interpret them as markup."""
1124
+ return s.replace("[", "\\[").replace("]", "\\]")
1125
+
1126
+ def _debug(self, message: str) -> None:
1127
+ """Append a timestamped message to the debug panel."""
1128
+ panel = self.query_one("#debug-panel", DebugPanel)
1129
+ timestamp = datetime.now().strftime("%H:%M:%S")
1130
+ panel.append(f"[dim]{timestamp}[/dim] {self._escape_rich_markup(message)}")
1131
+
1132
+ def _format_stream_item(self, result: Any):
1133
+ """Helps format stream items for display."""
1134
+ if isinstance(result, dict):
1135
+ return self._format_dict(result)
1136
+ elif isinstance(result, HtmlElement):
1137
+ return self._format_html_element(result)
1138
+ else:
1139
+ return str(result)
1140
+
1141
+ def _format_html_element(self, elem: HtmlElement) -> str:
1142
+ """Format HTML element with partial content display.
1143
+
1144
+ Converts lxml HtmlElement to string representation, truncating at
1145
+ 300 characters and escaping Rich markup brackets.
1146
+
1147
+ Args:
1148
+ elem: HTML element to format
1149
+
1150
+ Returns:
1151
+ Formatted string representation with Rich markup
1152
+ """
1153
+ try:
1154
+ html_str = tostring(elem, encoding='unicode', method='html')
1155
+
1156
+ # Truncate long HTML
1157
+ if len(html_str) > 300:
1158
+ html_str = html_str[:300] + "..."
1159
+
1160
+ # Escape brackets for Rich markup
1161
+ html_str = html_str.replace("[", "\\[")
1162
+
1163
+ return f" [green]{html_str}[/green]"
1164
+ except Exception as e:
1165
+ return f" [yellow]<{elem.tag}> (error formatting: {e})[/yellow]"
1166
+
1167
+ def _format_dict(self, d: dict) -> str:
1168
+ """Format dictionary with indentation.
1169
+
1170
+ Args:
1171
+ d: Dictionary to format
1172
+
1173
+ Returns:
1174
+ Formatted string
1175
+ """
1176
+ lines = [" {"]
1177
+ for key, value in d.items():
1178
+ if isinstance(value, str) and len(value) > 100:
1179
+ value = value[:100] + "..."
1180
+ lines.append(f" {key!r}: {value!r},")
1181
+ lines.append(" }")
1182
+ return "\n".join(lines)
1183
+
1184
+ def main():
1185
+ """Launch the wxpath TUI application.
1186
+
1187
+ Entry point for the wxpath-tui command-line tool. Creates and runs
1188
+ the interactive terminal interface for testing wxpath expressions.
1189
+
1190
+ Example:
1191
+ Run from command line::
1192
+
1193
+ $ wxpath-tui
1194
+
1195
+ Note:
1196
+ This function blocks until the user quits the application with
1197
+ Ctrl+Q or closes the terminal.
1198
+ """
1199
+ app = WXPathTUI()
1200
+ app.run()
1201
+
1202
+
1203
+ if __name__ == "__main__":
1204
+ main()