wxpath 0.5.1__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {wxpath-0.5.1/src/wxpath.egg-info → wxpath-0.5.2}/PKG-INFO +4 -2
- {wxpath-0.5.1 → wxpath-0.5.2}/README.md +3 -1
- {wxpath-0.5.1 → wxpath-0.5.2}/pyproject.toml +1 -1
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/ops.py +7 -91
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/parser.py +1 -2
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/runtime/engine.py +7 -38
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/tui.py +7 -28
- {wxpath-0.5.1 → wxpath-0.5.2/src/wxpath.egg-info}/PKG-INFO +4 -2
- {wxpath-0.5.1 → wxpath-0.5.2}/LICENSE +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/setup.cfg +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/cli.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/dom.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/exceptions.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/models.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/runtime/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/core/runtime/helpers.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/hooks/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/hooks/builtin.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/hooks/registry.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/client/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/client/cache.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/client/crawler.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/client/request.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/client/response.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/policy/backoff.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/policy/retry.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/policy/robots.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/policy/throttler.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/http/stats.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/integrations/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/integrations/langchain/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/integrations/langchain/examples/basic_rag.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/integrations/langchain/examples/rolling_window_rag.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/integrations/langchain/loader.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/patches.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/settings.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/tui_settings.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/util/__init__.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/util/cleaners.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/util/common_paths.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/util/logging.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/util/serialize.py +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath.egg-info/SOURCES.txt +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath.egg-info/dependency_links.txt +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath.egg-info/entry_points.txt +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath.egg-info/requires.txt +0 -0
- {wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wxpath
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: wxpath - a declarative web crawler and data extractor
|
|
5
5
|
Author-email: Rodrigo Palacios <rodrigopala91@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -65,8 +65,10 @@ Requires Python 3.10+.
|
|
|
65
65
|
|
|
66
66
|
```
|
|
67
67
|
pip install wxpath
|
|
68
|
-
# For TUI support
|
|
68
|
+
# For TUI support:
|
|
69
69
|
pip install "wxpath[tui]"
|
|
70
|
+
# Immediately launch the TUI via uv:
|
|
71
|
+
uvx --from "wxpath[tui]" wxpath-tui
|
|
70
72
|
```
|
|
71
73
|
---
|
|
72
74
|
|
|
@@ -2,25 +2,11 @@ from typing import Callable, Iterable
|
|
|
2
2
|
from urllib.parse import urljoin
|
|
3
3
|
|
|
4
4
|
import elementpath
|
|
5
|
-
from elementpath import (
|
|
6
|
-
ElementPathError,
|
|
7
|
-
ElementPathSyntaxError as EPSyntaxError,
|
|
8
|
-
ElementPathTypeError as EPTypeError,
|
|
9
|
-
ElementPathZeroDivisionError,
|
|
10
|
-
ElementPathRuntimeError as EPRuntimeError,
|
|
11
|
-
MissingContextError,
|
|
12
|
-
)
|
|
13
5
|
from elementpath.datatypes import AnyAtomicType
|
|
14
6
|
from elementpath.xpath3 import XPath3Parser
|
|
15
7
|
from lxml import html
|
|
16
8
|
|
|
17
9
|
from wxpath.core.dom import get_absolute_links_from_elem_and_xpath
|
|
18
|
-
from wxpath.core.exceptions import (
|
|
19
|
-
XPathEvaluationError,
|
|
20
|
-
XPathSyntaxError,
|
|
21
|
-
XPathTypeError,
|
|
22
|
-
XPathRuntimeError,
|
|
23
|
-
)
|
|
24
10
|
from wxpath.core.models import (
|
|
25
11
|
CrawlIntent,
|
|
26
12
|
DataIntent,
|
|
@@ -133,52 +119,7 @@ def _handle_xpath(curr_elem: html.HtmlElement,
|
|
|
133
119
|
raise ValueError("Element must be provided when path_expr does not start with 'url()'.")
|
|
134
120
|
base_url = getattr(curr_elem, 'base_url', None)
|
|
135
121
|
log.debug("base url", extra={"depth": curr_depth, "op": 'xpath', "base_url": base_url})
|
|
136
|
-
|
|
137
|
-
try:
|
|
138
|
-
elems = curr_elem.xpath3(expr)
|
|
139
|
-
except EPSyntaxError as e:
|
|
140
|
-
# Parse the error message to extract line/column if available
|
|
141
|
-
# elementpath format: "... at line 1, column 7: [err:XPST0003] ..."
|
|
142
|
-
raise XPathSyntaxError(
|
|
143
|
-
f"Invalid XPath syntax: {str(e).split(': ', 1)[-1]}",
|
|
144
|
-
xpath=expr,
|
|
145
|
-
base_url=base_url,
|
|
146
|
-
element_tag=curr_elem.tag,
|
|
147
|
-
original_error=e
|
|
148
|
-
) from e
|
|
149
|
-
except EPTypeError as e:
|
|
150
|
-
raise XPathTypeError(
|
|
151
|
-
f"XPath type error: {str(e).split(': ', 1)[-1]}",
|
|
152
|
-
xpath=expr,
|
|
153
|
-
base_url=base_url,
|
|
154
|
-
element_tag=curr_elem.tag,
|
|
155
|
-
original_error=e
|
|
156
|
-
) from e
|
|
157
|
-
except ElementPathZeroDivisionError as e:
|
|
158
|
-
raise XPathRuntimeError(
|
|
159
|
-
f"Division by zero in XPath: {expr}",
|
|
160
|
-
xpath=expr,
|
|
161
|
-
base_url=base_url,
|
|
162
|
-
element_tag=curr_elem.tag,
|
|
163
|
-
original_error=e
|
|
164
|
-
) from e
|
|
165
|
-
except MissingContextError as e:
|
|
166
|
-
raise XPathRuntimeError(
|
|
167
|
-
f"XPath requires context but none provided: {expr}",
|
|
168
|
-
xpath=expr,
|
|
169
|
-
base_url=base_url,
|
|
170
|
-
element_tag=curr_elem.tag,
|
|
171
|
-
original_error=e
|
|
172
|
-
) from e
|
|
173
|
-
except ElementPathError as e:
|
|
174
|
-
# Catch-all for other elementpath errors
|
|
175
|
-
raise XPathEvaluationError(
|
|
176
|
-
f"XPath evaluation failed: {e}",
|
|
177
|
-
xpath=expr,
|
|
178
|
-
base_url=base_url,
|
|
179
|
-
element_tag=curr_elem.tag,
|
|
180
|
-
original_error=e
|
|
181
|
-
) from e
|
|
122
|
+
elems = curr_elem.xpath3(expr)
|
|
182
123
|
|
|
183
124
|
next_segments = curr_segments[1:]
|
|
184
125
|
for elem in elems:
|
|
@@ -315,37 +256,12 @@ def _handle_binary(curr_elem: html.HtmlElement | str,
|
|
|
315
256
|
base_url = getattr(curr_elem, 'base_url', None)
|
|
316
257
|
next_segments = right
|
|
317
258
|
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
)
|
|
325
|
-
except EPSyntaxError as e:
|
|
326
|
-
raise XPathSyntaxError(
|
|
327
|
-
f"Invalid XPath in binary operation: {str(e).split(': ', 1)[-1]}",
|
|
328
|
-
xpath=left.value,
|
|
329
|
-
base_url=base_url,
|
|
330
|
-
element_tag=getattr(curr_elem, 'tag', None),
|
|
331
|
-
original_error=e
|
|
332
|
-
) from e
|
|
333
|
-
except EPTypeError as e:
|
|
334
|
-
raise XPathTypeError(
|
|
335
|
-
f"XPath type error in binary operation: {str(e).split(': ', 1)[-1]}",
|
|
336
|
-
xpath=left.value,
|
|
337
|
-
base_url=base_url,
|
|
338
|
-
element_tag=getattr(curr_elem, 'tag', None),
|
|
339
|
-
original_error=e
|
|
340
|
-
) from e
|
|
341
|
-
except ElementPathError as e:
|
|
342
|
-
raise XPathEvaluationError(
|
|
343
|
-
f"XPath evaluation failed in binary operation: {e}",
|
|
344
|
-
xpath=left.value,
|
|
345
|
-
base_url=base_url,
|
|
346
|
-
element_tag=getattr(curr_elem, 'tag', None),
|
|
347
|
-
original_error=e
|
|
348
|
-
) from e
|
|
259
|
+
results = elementpath.select(
|
|
260
|
+
curr_elem,
|
|
261
|
+
left.value,
|
|
262
|
+
parser=XPath3Parser,
|
|
263
|
+
item='' if curr_elem is None else None
|
|
264
|
+
)
|
|
349
265
|
|
|
350
266
|
if isinstance(results, AnyAtomicType):
|
|
351
267
|
results = [results]
|
|
@@ -13,7 +13,6 @@ except ImportError:
|
|
|
13
13
|
|
|
14
14
|
|
|
15
15
|
TOKEN_SPEC = [
|
|
16
|
-
("WXLOOP", r"wx:loop"),
|
|
17
16
|
("NUMBER", r"\d+\.\d+"),
|
|
18
17
|
("INTEGER", r"\d+"),
|
|
19
18
|
("STRING", r"'([^'\\]|\\.)*'|\"([^\"\\]|\\.)*\""), # TODO: Rename to URL Literal
|
|
@@ -181,7 +180,7 @@ class Parser:
|
|
|
181
180
|
|
|
182
181
|
def parse_binary(self, min_prec: int) -> object:
|
|
183
182
|
"""Parse a binary expression chain honoring operator precedence."""
|
|
184
|
-
if self.token.type == "WXPATH"
|
|
183
|
+
if self.token.type == "WXPATH":
|
|
185
184
|
left = self.parse_segments()
|
|
186
185
|
else:
|
|
187
186
|
left = self.nud()
|
|
@@ -2,14 +2,13 @@ import asyncio
|
|
|
2
2
|
import contextlib
|
|
3
3
|
import inspect
|
|
4
4
|
from collections import deque
|
|
5
|
-
from typing import Any, AsyncGenerator, Iterator
|
|
5
|
+
from typing import Any, AsyncGenerator, Iterator
|
|
6
6
|
|
|
7
7
|
from lxml.html import HtmlElement
|
|
8
8
|
from tqdm import tqdm
|
|
9
9
|
|
|
10
10
|
from wxpath import patches # noqa: F401
|
|
11
11
|
from wxpath.core import parser
|
|
12
|
-
from wxpath.core.exceptions import XPathEvaluationError
|
|
13
12
|
from wxpath.core.models import (
|
|
14
13
|
CrawlIntent,
|
|
15
14
|
CrawlTask,
|
|
@@ -146,7 +145,6 @@ class WXPathEngine(HookedEngineBase):
|
|
|
146
145
|
respect_robots: bool = True,
|
|
147
146
|
allowed_response_codes: set[int] = None,
|
|
148
147
|
allow_redirects: bool = True,
|
|
149
|
-
yield_errors: bool = False,
|
|
150
148
|
):
|
|
151
149
|
# NOTE: Will grow unbounded in large crawls. Consider a LRU cache, or bloom filter.
|
|
152
150
|
self.seen_urls: set[str] = set()
|
|
@@ -159,7 +157,6 @@ class WXPathEngine(HookedEngineBase):
|
|
|
159
157
|
self.allow_redirects = allow_redirects
|
|
160
158
|
if allow_redirects:
|
|
161
159
|
self.allowed_response_codes |= {301, 302, 303, 307, 308}
|
|
162
|
-
self.yield_errors = yield_errors
|
|
163
160
|
|
|
164
161
|
def _get_max_depth(self, bin_or_segs: Binary | Segments, max_depth: int) -> int:
|
|
165
162
|
"""Get the maximum crawl depth for a given expression. Will find a Depth
|
|
@@ -185,6 +182,7 @@ class WXPathEngine(HookedEngineBase):
|
|
|
185
182
|
expression: str,
|
|
186
183
|
max_depth: int,
|
|
187
184
|
progress: bool = False,
|
|
185
|
+
yield_errors: bool = False,
|
|
188
186
|
) -> AsyncGenerator[Any, None]:
|
|
189
187
|
"""Execute a wxpath expression concurrently and yield results.
|
|
190
188
|
|
|
@@ -269,10 +267,7 @@ class WXPathEngine(HookedEngineBase):
|
|
|
269
267
|
queue=queue,
|
|
270
268
|
pbar=pbar,
|
|
271
269
|
):
|
|
272
|
-
|
|
273
|
-
yield output
|
|
274
|
-
else:
|
|
275
|
-
yield await self.post_extract_hooks(output)
|
|
270
|
+
yield await self.post_extract_hooks(output)
|
|
276
271
|
|
|
277
272
|
# While looping asynchronous generators, you MUST make sure
|
|
278
273
|
# to check terminal conditions before re-iteration.
|
|
@@ -287,7 +282,7 @@ class WXPathEngine(HookedEngineBase):
|
|
|
287
282
|
if task is None:
|
|
288
283
|
log.warning(f"Got unexpected response from {resp.request.url}")
|
|
289
284
|
|
|
290
|
-
if
|
|
285
|
+
if yield_errors:
|
|
291
286
|
yield {
|
|
292
287
|
"__type__": "error",
|
|
293
288
|
"url": resp.request.url,
|
|
@@ -303,7 +298,7 @@ class WXPathEngine(HookedEngineBase):
|
|
|
303
298
|
if resp.error:
|
|
304
299
|
log.warning(f"Got error from {resp.request.url}: {resp.error}")
|
|
305
300
|
|
|
306
|
-
if
|
|
301
|
+
if yield_errors:
|
|
307
302
|
yield {
|
|
308
303
|
"__type__": "error",
|
|
309
304
|
"url": resp.request.url,
|
|
@@ -320,7 +315,7 @@ class WXPathEngine(HookedEngineBase):
|
|
|
320
315
|
if resp.status not in self.allowed_response_codes or not resp.body:
|
|
321
316
|
log.warning(f"Got non-200 response from {resp.request.url}")
|
|
322
317
|
|
|
323
|
-
if
|
|
318
|
+
if yield_errors:
|
|
324
319
|
yield {
|
|
325
320
|
"__type__": "error",
|
|
326
321
|
"url": resp.request.url,
|
|
@@ -418,11 +413,7 @@ class WXPathEngine(HookedEngineBase):
|
|
|
418
413
|
|
|
419
414
|
binary_or_segment = bin_or_segs if isinstance(bin_or_segs, Binary) else bin_or_segs[0]
|
|
420
415
|
operator = get_operator(binary_or_segment)
|
|
421
|
-
|
|
422
|
-
if self.yield_errors:
|
|
423
|
-
intents = _safe_iterator(operator(elem, bin_or_segs, depth))
|
|
424
|
-
else:
|
|
425
|
-
intents = operator(elem, bin_or_segs, depth)
|
|
416
|
+
intents = operator(elem, bin_or_segs, depth)
|
|
426
417
|
|
|
427
418
|
if not intents:
|
|
428
419
|
return
|
|
@@ -458,28 +449,6 @@ class WXPathEngine(HookedEngineBase):
|
|
|
458
449
|
mini_queue.append((elem, next_segments))
|
|
459
450
|
|
|
460
451
|
|
|
461
|
-
def _safe_iterator(iterable: Iterable[Any]) -> Iterator[Any]:
|
|
462
|
-
"""Wrap an iterable in a try/except block and return an iterator that yields the result or the error."""
|
|
463
|
-
it = iter(iterable)
|
|
464
|
-
while True:
|
|
465
|
-
try:
|
|
466
|
-
yield next(it)
|
|
467
|
-
except StopIteration:
|
|
468
|
-
break
|
|
469
|
-
except XPathEvaluationError as e:
|
|
470
|
-
yield {
|
|
471
|
-
"__type__": "error",
|
|
472
|
-
"reason": "xpath_evaluation_error",
|
|
473
|
-
"exception": str(e),
|
|
474
|
-
}
|
|
475
|
-
except Exception as e:
|
|
476
|
-
yield {
|
|
477
|
-
"__type__": "error",
|
|
478
|
-
"reason": "iterator_error",
|
|
479
|
-
"exception": str(e),
|
|
480
|
-
}
|
|
481
|
-
|
|
482
|
-
|
|
483
452
|
def wxpath_async(path_expr: str,
|
|
484
453
|
max_depth: int,
|
|
485
454
|
progress: bool = False,
|
|
@@ -20,7 +20,6 @@ Example:
|
|
|
20
20
|
import asyncio
|
|
21
21
|
import csv
|
|
22
22
|
import json
|
|
23
|
-
import traceback
|
|
24
23
|
from datetime import datetime
|
|
25
24
|
from pathlib import Path
|
|
26
25
|
from typing import Any, Iterable
|
|
@@ -28,7 +27,7 @@ from typing import Any, Iterable
|
|
|
28
27
|
from elementpath.xpath_tokens import XPathMap
|
|
29
28
|
from lxml.html import HtmlElement, tostring
|
|
30
29
|
from rich.console import RenderableType
|
|
31
|
-
from textual import
|
|
30
|
+
from textual import work
|
|
32
31
|
from textual.app import App, ComposeResult
|
|
33
32
|
from textual.containers import Container, Horizontal, Vertical, VerticalScroll
|
|
34
33
|
from textual.reactive import reactive
|
|
@@ -559,7 +558,6 @@ class WXPathTUI(App):
|
|
|
559
558
|
("ctrl+r", "execute", "Execute"),
|
|
560
559
|
("escape", "cancel_crawl", "Cancel Crawl"),
|
|
561
560
|
("ctrl+c", "clear", "Clear"),
|
|
562
|
-
("ctrl+shift+backspace", "clear_editor", "Clear Editor"),
|
|
563
561
|
("ctrl+d", "clear_debug", "Clear Debug"),
|
|
564
562
|
("ctrl+shift+d", "toggle_debug", "Toggle Debug"),
|
|
565
563
|
("ctrl+e", "export", "Export"),
|
|
@@ -631,7 +629,6 @@ class WXPathTUI(App):
|
|
|
631
629
|
" • Press [bold]Escape[/bold] to cancel a running crawl\n"
|
|
632
630
|
" • Press [bold]Ctrl+E[/bold] to export table (CSV/JSON)\n"
|
|
633
631
|
" • Press [bold]Ctrl+C[/bold] to clear output\n"
|
|
634
|
-
" • Press [bold]Ctrl+Shift+Backspace[/bold] to clear expression editor\n"
|
|
635
632
|
" • Press [bold]Ctrl+Shift+D[/bold] to toggle debug panel\n"
|
|
636
633
|
" • Press [bold]Ctrl+H[/bold] to configure HTTP headers\n"
|
|
637
634
|
" • Press [bold]Ctrl+Shift+S[/bold] to edit persistent settings (concurrency, robots)\n" # noqa: E501
|
|
@@ -648,7 +645,7 @@ class WXPathTUI(App):
|
|
|
648
645
|
"""Update global settings and subtitle when cache setting changes."""
|
|
649
646
|
# Update the global settings - this is what the HTTP crawler will read
|
|
650
647
|
SETTINGS.http.client.cache.enabled = bool(new_value)
|
|
651
|
-
|
|
648
|
+
print(f"Cache enabled: {SETTINGS.http.client.cache.enabled}")
|
|
652
649
|
self._update_subtitle()
|
|
653
650
|
|
|
654
651
|
def watch_custom_headers(self, new_value: dict) -> None:
|
|
@@ -661,8 +658,7 @@ class WXPathTUI(App):
|
|
|
661
658
|
|
|
662
659
|
def _update_subtitle(self) -> None:
|
|
663
660
|
"""Update subtitle with current cache, headers, and persistent settings."""
|
|
664
|
-
|
|
665
|
-
cache_state = SETTINGS.http.client.cache.enabled
|
|
661
|
+
cache_state = "ON" if self.cache_enabled else "OFF"
|
|
666
662
|
headers_count = len(self.custom_headers)
|
|
667
663
|
headers_info = f"{headers_count} custom" if headers_count > 0 else "default"
|
|
668
664
|
conc = self.tui_settings.get("concurrency", 16)
|
|
@@ -944,10 +940,7 @@ class WXPathTUI(App):
|
|
|
944
940
|
columns_initialized = False
|
|
945
941
|
column_keys: list[str] = []
|
|
946
942
|
|
|
947
|
-
async for result in engine.run(expression, max_depth=1, progress=False
|
|
948
|
-
if isinstance(result, dict) and result.get("__type__") == "error":
|
|
949
|
-
self._debug(f"Error: {result.get('reason')}: {result}")
|
|
950
|
-
continue
|
|
943
|
+
async for result in engine.run(expression, max_depth=1, progress=False):
|
|
951
944
|
count += 1
|
|
952
945
|
if count % 100 == 0:
|
|
953
946
|
self._debug(f"Received result {count} of type {type(result).__name__}")
|
|
@@ -997,16 +990,8 @@ class WXPathTUI(App):
|
|
|
997
990
|
self._executing = False
|
|
998
991
|
return
|
|
999
992
|
except Exception as e:
|
|
1000
|
-
#
|
|
1001
|
-
self.
|
|
1002
|
-
# Append error as next row of table (do not clear output panel)
|
|
1003
|
-
err_msg = f"Execution Error: {type(e).__name__}: {e}"
|
|
1004
|
-
if columns_initialized and column_keys:
|
|
1005
|
-
row = [err_msg] + [""] * (len(column_keys) - 1)
|
|
1006
|
-
data_table.add_row(*row, key=f"error-{count}")
|
|
1007
|
-
else:
|
|
1008
|
-
data_table.add_column("error", key="error")
|
|
1009
|
-
data_table.add_row(err_msg, key="error-0")
|
|
993
|
+
# Handle execution errors separately
|
|
994
|
+
self._update_output(f"[red]Execution Error:[/red] {type(e).__name__}: {e}")
|
|
1010
995
|
self._executing = False
|
|
1011
996
|
return
|
|
1012
997
|
finally:
|
|
@@ -1101,13 +1086,7 @@ class WXPathTUI(App):
|
|
|
1101
1086
|
"""Clear the output panel."""
|
|
1102
1087
|
self._update_output("Waiting for expression...")
|
|
1103
1088
|
self._debug("Cleared output panel.")
|
|
1104
|
-
|
|
1105
|
-
def action_clear_editor(self) -> None:
|
|
1106
|
-
"""Clear the expression editor (all text)."""
|
|
1107
|
-
editor = self.query_one("#expression-editor", )
|
|
1108
|
-
editor.clear()
|
|
1109
|
-
self._debug("Expression editor cleared.")
|
|
1110
|
-
|
|
1089
|
+
|
|
1111
1090
|
def _update_output(self, content: str | RenderableType) -> None:
|
|
1112
1091
|
"""Update the output panel with new content."""
|
|
1113
1092
|
# output_panel = self.query_one("#output-panel", OutputPanel)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wxpath
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: wxpath - a declarative web crawler and data extractor
|
|
5
5
|
Author-email: Rodrigo Palacios <rodrigopala91@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -65,8 +65,10 @@ Requires Python 3.10+.
|
|
|
65
65
|
|
|
66
66
|
```
|
|
67
67
|
pip install wxpath
|
|
68
|
-
# For TUI support
|
|
68
|
+
# For TUI support:
|
|
69
69
|
pip install "wxpath[tui]"
|
|
70
|
+
# Immediately launch the TUI via uv:
|
|
71
|
+
uvx --from "wxpath[tui]" wxpath-tui
|
|
70
72
|
```
|
|
71
73
|
---
|
|
72
74
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{wxpath-0.5.1 → wxpath-0.5.2}/src/wxpath/integrations/langchain/examples/rolling_window_rag.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|