wxpath 0.4.1__py3-none-any.whl → 0.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wxpath/__init__.py +2 -0
- wxpath/cli.py +6 -0
- wxpath/core/models.py +1 -0
- wxpath/core/ops.py +9 -12
- wxpath/core/parser.py +92 -23
- wxpath/core/runtime/engine.py +36 -3
- wxpath/core/runtime/helpers.py +6 -3
- wxpath/http/client/__init__.py +1 -1
- wxpath/http/client/crawler.py +17 -5
- wxpath/http/client/response.py +7 -1
- wxpath/http/policy/retry.py +2 -2
- wxpath/integrations/__init__.py +0 -0
- wxpath/integrations/langchain/__init__.py +0 -0
- wxpath/integrations/langchain/examples/basic_rag.py +85 -0
- wxpath/integrations/langchain/examples/rolling_window_rag.py +218 -0
- wxpath/integrations/langchain/loader.py +60 -0
- wxpath/patches.py +215 -5
- wxpath/settings.py +3 -1
- wxpath/tui.py +1204 -0
- wxpath/tui_settings.py +151 -0
- wxpath/util/cleaners.py +31 -0
- wxpath/util/common_paths.py +22 -0
- wxpath/util/logging.py +3 -7
- {wxpath-0.4.1.dist-info → wxpath-0.5.0.dist-info}/METADATA +71 -8
- wxpath-0.5.0.dist-info/RECORD +44 -0
- {wxpath-0.4.1.dist-info → wxpath-0.5.0.dist-info}/WHEEL +1 -1
- {wxpath-0.4.1.dist-info → wxpath-0.5.0.dist-info}/entry_points.txt +1 -0
- wxpath-0.4.1.dist-info/RECORD +0 -35
- {wxpath-0.4.1.dist-info → wxpath-0.5.0.dist-info}/licenses/LICENSE +0 -0
- {wxpath-0.4.1.dist-info → wxpath-0.5.0.dist-info}/top_level.txt +0 -0
wxpath/tui_settings.py
ADDED
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
"""Persistent TUI settings: load/save crawler-related options from a config file.
|
|
2
|
+
|
|
3
|
+
pre-1.0.0 - APIs and contracts may change.
|
|
4
|
+
|
|
5
|
+
Settings are stored in a single JSON file (e.g. ~/.config/wxpath/tui_settings.json).
|
|
6
|
+
The schema is defined in TUISettingsSchema; adding a new setting requires one new
|
|
7
|
+
entry in the schema and, if needed, use of that value where the crawler/engine
|
|
8
|
+
is created.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from __future__ import annotations
|
|
12
|
+
|
|
13
|
+
import json
|
|
14
|
+
import os
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
from typing import Any
|
|
17
|
+
|
|
18
|
+
from wxpath.settings import CRAWLER_SETTINGS
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _config_dir() -> Path:
|
|
22
|
+
"""Return the wxpath config directory, creating it if needed."""
|
|
23
|
+
xdg_env = os.environ.get("XDG_CONFIG_HOME")
|
|
24
|
+
if xdg_env:
|
|
25
|
+
base = Path(xdg_env)
|
|
26
|
+
else:
|
|
27
|
+
base = Path.home() / ".config"
|
|
28
|
+
path = base / "wxpath"
|
|
29
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
30
|
+
return path
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def get_tui_settings_path() -> Path:
|
|
34
|
+
"""Return the path to the TUI settings JSON file."""
|
|
35
|
+
return _config_dir() / "tui_settings.json"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
# Schema: one dict per setting.
|
|
39
|
+
# Keys: key (file/API), label (display), type, default, optional min/max, help.
|
|
40
|
+
# Defaults are taken from CRAWLER_SETTINGS so there is a single source of truth for built-in values.
|
|
41
|
+
TUISettingsSchema: list[dict[str, Any]] = [
|
|
42
|
+
{
|
|
43
|
+
"key": "concurrency",
|
|
44
|
+
"label": "CONCURRENCY",
|
|
45
|
+
"type": "int",
|
|
46
|
+
"default": getattr(CRAWLER_SETTINGS, "concurrency", 16),
|
|
47
|
+
"min": 1,
|
|
48
|
+
"max": 256,
|
|
49
|
+
"help": "Maximum number of concurrent HTTP requests.",
|
|
50
|
+
},
|
|
51
|
+
{
|
|
52
|
+
"key": "per_host",
|
|
53
|
+
"label": "PER_HOST",
|
|
54
|
+
"type": "int",
|
|
55
|
+
"default": getattr(CRAWLER_SETTINGS, "per_host", 8),
|
|
56
|
+
"min": 1,
|
|
57
|
+
"max": 64,
|
|
58
|
+
"help": "Maximum concurrent requests per host.",
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"key": "respect_robots",
|
|
62
|
+
"label": "RESPECT_ROBOTS",
|
|
63
|
+
"type": "bool",
|
|
64
|
+
"default": getattr(CRAWLER_SETTINGS, "respect_robots", True),
|
|
65
|
+
"help": "Whether to respect robots.txt directives.",
|
|
66
|
+
},
|
|
67
|
+
{
|
|
68
|
+
"key": "verify_ssl",
|
|
69
|
+
"label": "VERIFY_SSL",
|
|
70
|
+
"type": "bool",
|
|
71
|
+
"default": getattr(CRAWLER_SETTINGS, "verify_ssl", True),
|
|
72
|
+
"help": "Verify SSL certificates. Disable for sites with broken certificate chains.",
|
|
73
|
+
},
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def _defaults_from_schema() -> dict[str, Any]:
|
|
78
|
+
"""Build a dict of default values from the schema."""
|
|
79
|
+
return {s["key"]: s["default"] for s in TUISettingsSchema}
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
def validate_tui_settings(settings: dict[str, Any]) -> list[str]:
|
|
83
|
+
"""Validate all schema keys; return list of error messages (empty if valid)."""
|
|
84
|
+
errors = []
|
|
85
|
+
for key, value in settings.items():
|
|
86
|
+
try:
|
|
87
|
+
_validate_value(key, value, TUISettingsSchema)
|
|
88
|
+
except ValueError as e:
|
|
89
|
+
errors.append(str(e))
|
|
90
|
+
return errors
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _validate_value(key: str, value: Any, schema: list[dict[str, Any]]) -> Any:
|
|
94
|
+
"""Validate and coerce a single value. Returns the coerced value or raises ValueError."""
|
|
95
|
+
entry = next((e for e in schema if e["key"] == key), None)
|
|
96
|
+
if not entry:
|
|
97
|
+
raise ValueError(f"Unknown setting: {key}")
|
|
98
|
+
t = entry["type"]
|
|
99
|
+
if t == "int":
|
|
100
|
+
try:
|
|
101
|
+
v = int(value)
|
|
102
|
+
except (TypeError, ValueError):
|
|
103
|
+
raise ValueError(f"{key}: expected integer, got {type(value).__name__}") from None
|
|
104
|
+
min_v = entry.get("min")
|
|
105
|
+
max_v = entry.get("max")
|
|
106
|
+
if min_v is not None and v < min_v:
|
|
107
|
+
raise ValueError(f"{key}: must be >= {min_v}")
|
|
108
|
+
if max_v is not None and v > max_v:
|
|
109
|
+
raise ValueError(f"{key}: must be <= {max_v}")
|
|
110
|
+
return v
|
|
111
|
+
if t == "bool":
|
|
112
|
+
if isinstance(value, bool):
|
|
113
|
+
return value
|
|
114
|
+
if isinstance(value, str):
|
|
115
|
+
return value.strip().lower() in ("1", "true", "yes", "on")
|
|
116
|
+
return bool(value)
|
|
117
|
+
return value
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def load_tui_settings() -> dict[str, Any]:
|
|
121
|
+
"""Load TUI settings from the config file. Missing or invalid keys use schema defaults."""
|
|
122
|
+
path = get_tui_settings_path()
|
|
123
|
+
defaults = _defaults_from_schema()
|
|
124
|
+
if not path.exists():
|
|
125
|
+
return defaults
|
|
126
|
+
try:
|
|
127
|
+
raw = json.loads(path.read_text(encoding="utf-8"))
|
|
128
|
+
except (OSError, json.JSONDecodeError):
|
|
129
|
+
return defaults
|
|
130
|
+
if not isinstance(raw, dict):
|
|
131
|
+
return defaults
|
|
132
|
+
result = dict(defaults)
|
|
133
|
+
for key in result:
|
|
134
|
+
if key not in raw:
|
|
135
|
+
continue
|
|
136
|
+
try:
|
|
137
|
+
result[key] = _validate_value(key, raw[key], TUISettingsSchema)
|
|
138
|
+
except ValueError:
|
|
139
|
+
pass
|
|
140
|
+
return result
|
|
141
|
+
|
|
142
|
+
|
|
143
|
+
def save_tui_settings(settings: dict[str, Any]) -> None:
|
|
144
|
+
"""Save TUI settings to the config file. Only schema keys are written."""
|
|
145
|
+
path = get_tui_settings_path()
|
|
146
|
+
schema_keys = {s["key"] for s in TUISettingsSchema}
|
|
147
|
+
to_write = {k: v for k, v in settings.items() if k in schema_keys}
|
|
148
|
+
for key in schema_keys:
|
|
149
|
+
if key not in to_write:
|
|
150
|
+
to_write[key] = _defaults_from_schema()[key]
|
|
151
|
+
path.write_text(json.dumps(to_write, indent=2), encoding="utf-8")
|
wxpath/util/cleaners.py
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
from collections import Counter
|
|
2
|
+
|
|
3
|
+
from wxpath.util.common_paths import XPATH_PATH_TO_TEXT_NODE_PARENTS
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def main_text_extractor(element):
|
|
7
|
+
"""Inspired by my eatiht implementation:
|
|
8
|
+
https://github.com/rodricios/eatiht
|
|
9
|
+
"""
|
|
10
|
+
try:
|
|
11
|
+
xpath_finder = element.getroot().getroottree().getpath
|
|
12
|
+
except(AttributeError):
|
|
13
|
+
xpath_finder = element.getroottree().getpath
|
|
14
|
+
|
|
15
|
+
nodes_with_text = element.xpath(XPATH_PATH_TO_TEXT_NODE_PARENTS)
|
|
16
|
+
|
|
17
|
+
sent_xpath_pairs = [
|
|
18
|
+
# hard-code paragraph breaks (there has to be a better way)
|
|
19
|
+
(n , xpath_finder(n))
|
|
20
|
+
for n in nodes_with_text
|
|
21
|
+
]
|
|
22
|
+
|
|
23
|
+
parent_paths = [p.rsplit('/', 1)[0] for s, p in sent_xpath_pairs]
|
|
24
|
+
|
|
25
|
+
# build frequency distribution
|
|
26
|
+
max_path = Counter(parent_paths).most_common()[0][0]
|
|
27
|
+
|
|
28
|
+
article_text = ' '.join([''.join(s.xpath('.//text()'))
|
|
29
|
+
for (s, x) in sent_xpath_pairs if max_path in x])
|
|
30
|
+
|
|
31
|
+
return article_text
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
XPATH_BOOL_INTERNAL_LINKS = (
|
|
2
|
+
" not(starts-with(., 'http')) or " # Relative links
|
|
3
|
+
" contains(., '://{0}') or " # Root domain match
|
|
4
|
+
" contains(., '.{0}')" # Subdomain match
|
|
5
|
+
)
|
|
6
|
+
XPATH_BOOL_EXTERNAL_LINKS = "not(" + XPATH_BOOL_INTERNAL_LINKS + ")"
|
|
7
|
+
|
|
8
|
+
# allows for false positives
|
|
9
|
+
XPATH_PATH_TO_INTERNAL_LINKS = "//a/@href[" + XPATH_BOOL_INTERNAL_LINKS + "]"
|
|
10
|
+
XPATH_PATH_TO_EXTERNAL_LINKS = "//a/@href[" + XPATH_BOOL_EXTERNAL_LINKS + "]"
|
|
11
|
+
|
|
12
|
+
XPATH_PATH_TO_TEXT_NODE_PARENTS = '//body\
|
|
13
|
+
//*[not(\
|
|
14
|
+
self::script or \
|
|
15
|
+
self::noscript or \
|
|
16
|
+
self::style or \
|
|
17
|
+
self::i or \
|
|
18
|
+
self::b or \
|
|
19
|
+
self::strong or \
|
|
20
|
+
self::span or \
|
|
21
|
+
self::a)] \
|
|
22
|
+
/text()[string-length(normalize-space()) > 20]/..'
|
wxpath/util/logging.py
CHANGED
|
@@ -59,13 +59,9 @@ def configure_logging(level: str | int = "INFO", **overrides) -> None:
|
|
|
59
59
|
|
|
60
60
|
Call this once in an application entry-point **or** rely on defaults.
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
"DEBUG"|"INFO"|... or `logging.DEBUG`, overrides the root wxpath logger.
|
|
66
|
-
overrides
|
|
67
|
-
Dict that is merged (shallow) into the default dictConfig.
|
|
68
|
-
Lets advanced users swap formatters/handlers.
|
|
62
|
+
Args:
|
|
63
|
+
level (str | int): Logging level to configure. Defaults to "INFO".
|
|
64
|
+
**overrides: Additional logging configuration overrides
|
|
69
65
|
"""
|
|
70
66
|
conf = {**_DEFAULT_LOGGING_CONF, **overrides}
|
|
71
67
|
conf["loggers"]["wxpath"]["level"] = level
|
|
@@ -1,9 +1,14 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: wxpath
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: wxpath - a declarative web crawler and data extractor
|
|
5
5
|
Author-email: Rodrigo Palacios <rodrigopala91@gmail.com>
|
|
6
6
|
License-Expression: MIT
|
|
7
|
+
Project-URL: Homepage, https://rodricios.github.io/wxpath
|
|
8
|
+
Project-URL: Documentation, https://rodricios.github.io/wxpath
|
|
9
|
+
Project-URL: Repository, https://github.com/rodricios/wxpath
|
|
10
|
+
Project-URL: Issues, https://github.com/rodricios/wxpath/issues
|
|
11
|
+
Project-URL: Changelog, https://github.com/rodricios/wxpath/blob/main/CHANGELOG.md
|
|
7
12
|
Requires-Python: >=3.10
|
|
8
13
|
Description-Content-Type: text/markdown
|
|
9
14
|
License-File: LICENSE
|
|
@@ -17,16 +22,55 @@ Provides-Extra: cache-sqlite
|
|
|
17
22
|
Requires-Dist: aiohttp-client-cache[sqlite]; extra == "cache-sqlite"
|
|
18
23
|
Provides-Extra: cache-redis
|
|
19
24
|
Requires-Dist: aiohttp-client-cache[redis]; extra == "cache-redis"
|
|
25
|
+
Provides-Extra: llm
|
|
26
|
+
Requires-Dist: langchain>=1.0.0; extra == "llm"
|
|
27
|
+
Requires-Dist: langchain-core>=1.0.0; extra == "llm"
|
|
28
|
+
Requires-Dist: langchain-ollama>=1.0.0; extra == "llm"
|
|
29
|
+
Requires-Dist: langchain-community>=0.4.0; extra == "llm"
|
|
30
|
+
Requires-Dist: langchain-chroma>=1.0.0; extra == "llm"
|
|
31
|
+
Requires-Dist: chromadb>=1.0.0; extra == "llm"
|
|
32
|
+
Requires-Dist: langchain-text-splitters>=1.1.0; extra == "llm"
|
|
20
33
|
Provides-Extra: test
|
|
21
34
|
Requires-Dist: pytest>=7.0; extra == "test"
|
|
22
35
|
Requires-Dist: pytest-asyncio>=0.23; extra == "test"
|
|
23
36
|
Provides-Extra: dev
|
|
24
37
|
Requires-Dist: ruff; extra == "dev"
|
|
38
|
+
Provides-Extra: docs
|
|
39
|
+
Requires-Dist: mkdocs>=1.5; extra == "docs"
|
|
40
|
+
Requires-Dist: mkdocs-material>=9.0; extra == "docs"
|
|
41
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == "docs"
|
|
42
|
+
Requires-Dist: mkdocs-macros-plugin>=1.0; extra == "docs"
|
|
43
|
+
Requires-Dist: mkdocs-resize-images>=1.0; extra == "docs"
|
|
44
|
+
Requires-Dist: mkdocs-glightbox; extra == "docs"
|
|
45
|
+
Requires-Dist: pyyaml>=6.0; extra == "docs"
|
|
46
|
+
Provides-Extra: tui
|
|
47
|
+
Requires-Dist: textual>=1.0.0; extra == "tui"
|
|
48
|
+
Requires-Dist: aiohttp-client-cache>=0.14.0; extra == "tui"
|
|
49
|
+
Requires-Dist: aiohttp-client-cache[sqlite]; extra == "tui"
|
|
25
50
|
Dynamic: license-file
|
|
26
51
|
|
|
27
|
-
# **wxpath** - declarative web
|
|
52
|
+
# **wxpath** - declarative web graph traversal with XPath
|
|
28
53
|
|
|
29
|
-
[](https://www.python.org/downloads/release/python-3100/)
|
|
54
|
+
[](https://www.python.org/downloads/release/python-3100/) [](https://rodricios.github.io/wxpath)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
> NEW: [TUI](https://rodricios.github.io/wxpath/tui/quickstart.md) - Interactive terminal interface (powered by Textual) for testing wxpath expressions and exporting data.
|
|
58
|
+
|
|
59
|
+

|
|
60
|
+
|
|
61
|
+
## Install
|
|
62
|
+
|
|
63
|
+
Requires Python 3.10+.
|
|
64
|
+
|
|
65
|
+
```
|
|
66
|
+
pip install wxpath
|
|
67
|
+
# For TUI support
|
|
68
|
+
pip install wxpath[tui]
|
|
69
|
+
```
|
|
70
|
+
---
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
## What is wxpath?
|
|
30
74
|
|
|
31
75
|
**wxpath** is a declarative web crawler where traversal is expressed directly in XPath. Instead of writing imperative crawl loops, wxpath lets you describe what to follow and what to extract in a single expression. **wxpath** executes that expression concurrently, breadth-first-*ish*, and streams results as they are discovered.
|
|
32
76
|
|
|
@@ -35,14 +79,14 @@ This expression fetches a page, extracts links, and streams them concurrently -
|
|
|
35
79
|
```python
|
|
36
80
|
import wxpath
|
|
37
81
|
|
|
38
|
-
expr = "url('https://
|
|
82
|
+
expr = "url('https://quotes.toscrape.com')//a/@href"
|
|
39
83
|
|
|
40
84
|
for link in wxpath.wxpath_async_blocking_iter(expr):
|
|
41
85
|
print(link)
|
|
42
86
|
```
|
|
43
87
|
|
|
44
88
|
|
|
45
|
-
By introducing the `url(...)` operator and the `///` syntax, wxpath's engine is able to perform
|
|
89
|
+
By introducing the `url(...)` operator and the `///` syntax, wxpath's engine is able to perform recursive (or paginated) web crawling and extraction:
|
|
46
90
|
|
|
47
91
|
```python
|
|
48
92
|
import wxpath
|
|
@@ -62,15 +106,28 @@ for item in wxpath.wxpath_async_blocking_iter(path_expr, max_depth=1):
|
|
|
62
106
|
|
|
63
107
|
Most web scrapers force you to write crawl control flow first, and extraction second.
|
|
64
108
|
|
|
65
|
-
**wxpath**
|
|
109
|
+
**wxpath** converges those two steps into one:
|
|
66
110
|
- **You describe traversal declaratively**
|
|
67
111
|
- **Extraction is expressed inline**
|
|
68
112
|
- **The engine handles scheduling, concurrency, and deduplication**
|
|
69
113
|
|
|
70
114
|
|
|
115
|
+
### RAG-Ready Output
|
|
116
|
+
|
|
117
|
+
Extract clean, structured JSON hierarchies directly from the graph - feed your LLMs signal, not noise. Refer to [LangChain Integration](https://rodricios.github.io/wxpath/api/integrations/langchain/) for more details.
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
### Deterministic
|
|
121
|
+
|
|
122
|
+
**wxpath** is deterministic (read: not powered by LLMs). While we can't guarantee the network is stable, we can guarantee the traversal is.
|
|
123
|
+
|
|
124
|
+
## Documentation (WIP)
|
|
125
|
+
|
|
126
|
+
Documentation is now available [here](https://rodricios.github.io/wxpath/).
|
|
127
|
+
|
|
71
128
|
## Contents
|
|
72
129
|
|
|
73
|
-
- [Example](#example)
|
|
130
|
+
- [Example: Knowledge Graph](#example)
|
|
74
131
|
- [Language Design](DESIGN.md)
|
|
75
132
|
- [`url(...)` and `///url(...)` Explained](#url-and-url-explained)
|
|
76
133
|
- [General flow](#general-flow)
|
|
@@ -80,6 +137,7 @@ Most web scrapers force you to write crawl control flow first, and extraction se
|
|
|
80
137
|
- [XPath 3.1](#xpath-31-by-default)
|
|
81
138
|
- [Progress Bar](#progress-bar)
|
|
82
139
|
- [CLI](#cli)
|
|
140
|
+
- [TUI](#tui)
|
|
83
141
|
- [Persistence and Caching](#persistence-and-caching)
|
|
84
142
|
- [Settings](#settings)
|
|
85
143
|
- [Hooks (Experimental)](#hooks-experimental)
|
|
@@ -294,12 +352,17 @@ Command line options:
|
|
|
294
352
|
--cache [true|false] (Default: False) Persist crawl results to a local database
|
|
295
353
|
```
|
|
296
354
|
|
|
355
|
+
## TUI
|
|
356
|
+
|
|
357
|
+
**wxpath** provides a terminal interface (TUI) for interactive expression testing and data extraction.
|
|
358
|
+
|
|
359
|
+
See [TUI Quickstart](https://rodricios.github.io/wxpath/tui/quickstart.md) for more details.
|
|
297
360
|
|
|
298
361
|
## Persistence and Caching
|
|
299
362
|
|
|
300
363
|
**wxpath** optionally persists crawl results to a local database. This is especially useful when you're crawling a large number of URLs, and you decide to pause the crawl, change extraction expressions, or otherwise need to restart the crawl.
|
|
301
364
|
|
|
302
|
-
**wxpath** supports two backends: sqlite and redis. SQLite is great for small-scale crawls, with a single worker (i.e., `engine.crawler.concurrency == 1`). Redis is great for large-scale crawls, with multiple workers. You will
|
|
365
|
+
**wxpath** supports two backends: sqlite and redis. SQLite is great for small-scale crawls, with a single worker (i.e., `engine.crawler.concurrency == 1`). Redis is great for large-scale crawls, with multiple workers. You will encounter a warning if `min(engine.crawler.concurrency, engine.crawler.per_host) > 1` when using the sqlite backend.
|
|
303
366
|
|
|
304
367
|
To use, you must install the appropriate optional dependency:
|
|
305
368
|
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
wxpath/__init__.py,sha256=nKo2ggHdlNUhTPzZlPOW-XssyYjVar6XcqaFsZLxsKU,304
|
|
2
|
+
wxpath/cli.py,sha256=P2MU6UzWHiN-5roo-GdEb4OpTlCp0XA3AlFmjL7fI1o,4544
|
|
3
|
+
wxpath/patches.py,sha256=nNFpNuapF30aVMVz6K1iJDX2RWmiyiECx2_UprcwiT4,8417
|
|
4
|
+
wxpath/settings.py,sha256=dBSIxVPlcsP3IYuhuHG0uxZaNXLzcNUdBxEaZsZHcAc,3862
|
|
5
|
+
wxpath/tui.py,sha256=CG8xvGnYNbruD4lw50Agu8RKKUJEpEl0WG0SyLBW4c8,42786
|
|
6
|
+
wxpath/tui_settings.py,sha256=rM2IBeOzQUIzjk2Ds1Jlnvb7IUtdJdKMN2j3GHk7Z9M,5051
|
|
7
|
+
wxpath/core/__init__.py,sha256=U9_In2iRaZrpiIVavIli1M59gCB6Kn1en-1Fza-qIiI,257
|
|
8
|
+
wxpath/core/dom.py,sha256=X0L3n8jRfO5evEypDaJTD-NQ3cLXWvnEUVERAHo3vV0,701
|
|
9
|
+
wxpath/core/models.py,sha256=xsNY9ZmUILB5_O1GHRkn3cLBtPs3-krguU5NlqFe0bM,1664
|
|
10
|
+
wxpath/core/ops.py,sha256=4vzLOqRM_LbXc1cAnWCuKGt2m_pbvyHO0p5ee2Upjog,9569
|
|
11
|
+
wxpath/core/parser.py,sha256=ufUSEfyR6aO10pV_E39-uSiLQfYvngNQnHcs1GJlpbA,21392
|
|
12
|
+
wxpath/core/runtime/__init__.py,sha256=_iCgkIWxXvxzQcenHOsjYGsk74HboTIYWOtgM8GtCyc,86
|
|
13
|
+
wxpath/core/runtime/engine.py,sha256=ocGBTIHdFgOh3LzkgEUKZ59Ozn3nKqvBBAloj4Ln5D4,19229
|
|
14
|
+
wxpath/core/runtime/helpers.py,sha256=RFLonAjRsL_CHFV0biUsgk0lOL8MKvHXdFg7p65xEP8,1554
|
|
15
|
+
wxpath/hooks/__init__.py,sha256=9JG63e4z_8CZLWugFcY786hebaEEPZ5FmZhyDHat-98,294
|
|
16
|
+
wxpath/hooks/builtin.py,sha256=GJ4w1C9djWNzAmAA3U0qI9OoCOeC5R8tEGtWXJVHSYs,4125
|
|
17
|
+
wxpath/hooks/registry.py,sha256=-D11f_mMboeVAH8qsTkbKTQ0aGNaQ7F6zbXDsOIYxN0,4513
|
|
18
|
+
wxpath/http/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
wxpath/http/stats.py,sha256=aqZWuybc5RCv-AmKdNbEX4uw1YvZtFoE6591UfukZns,3319
|
|
20
|
+
wxpath/http/client/__init__.py,sha256=9z_BWpU6gd-eTpoxIygsZZ84LJPZ1SnIlyL9SfzZ1as,203
|
|
21
|
+
wxpath/http/client/cache.py,sha256=cHS4XlfOStoHTG83ypNITk3Oc0lqGoTRqV0_UWBWQFY,1811
|
|
22
|
+
wxpath/http/client/crawler.py,sha256=-uRMhMV3_dYW9oxKEsvaVMDmglBuZC23EhuppWQbDkw,12579
|
|
23
|
+
wxpath/http/client/request.py,sha256=cpqo_ASG_wKz0q6m33lsE0kIIthfANt8fx7ptxlyehY,1057
|
|
24
|
+
wxpath/http/client/response.py,sha256=gvl7_2jITF1EUrUD6HLq7G6R7GLF0DxGeuHTznFK1hc,487
|
|
25
|
+
wxpath/http/policy/backoff.py,sha256=NwdUR6bRe1RtUGSJOktj-p8IyC1l9xu_-Aa_Gj_u5sw,321
|
|
26
|
+
wxpath/http/policy/retry.py,sha256=jx5t2OdnHsDzuYH120N6sc1RDZDfQD1OE7RCgLD9tAo,966
|
|
27
|
+
wxpath/http/policy/robots.py,sha256=vllXX9me78YB6yrDdpH_bwyuR5QoC9uveGEl8PmHM9Q,3134
|
|
28
|
+
wxpath/http/policy/throttler.py,sha256=wydMFV-0mxpHSI5iYkLfE78oY4z_fF8jW9MqCeb8G54,3014
|
|
29
|
+
wxpath/integrations/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
30
|
+
wxpath/integrations/langchain/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
+
wxpath/integrations/langchain/loader.py,sha256=Iio4Fv03dYzNCFmaGl2pzmjR72QBziTcpfX4BtnoiZI,1907
|
|
32
|
+
wxpath/integrations/langchain/examples/basic_rag.py,sha256=10V7-HjyRYqEE4S11S2HpjNwKfwi93rUbyFGco0AM04,2957
|
|
33
|
+
wxpath/integrations/langchain/examples/rolling_window_rag.py,sha256=fKoOQNy4RjtCHj6N5dTd3AjrWutmLmoRSblK0VBD33Q,8174
|
|
34
|
+
wxpath/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
wxpath/util/cleaners.py,sha256=JtUwCKjSJV-qw2CBrcB1oYswBDeXiqndGiz3-MlxeG0,946
|
|
36
|
+
wxpath/util/common_paths.py,sha256=Y-0yq6IMjlSl1t4GbmK9TeJFTQ-MVvJOINhglvD4djA,980
|
|
37
|
+
wxpath/util/logging.py,sha256=hgN4OC1y2oZWewtL-O-Ei_1lOaadH9eSyo0Iz2t_s1c,2858
|
|
38
|
+
wxpath/util/serialize.py,sha256=uUs4C9VErpFd97smBM2bRWo2nW25kCgKdsMrVtVxhg8,575
|
|
39
|
+
wxpath-0.5.0.dist-info/licenses/LICENSE,sha256=AVBZLhdWmqxm-f-dy5prVB1E-solHWoP2EXEIV_o-00,1076
|
|
40
|
+
wxpath-0.5.0.dist-info/METADATA,sha256=uBaoDrnCdTvzmt3yZ2ywCImWCeg_zOTQKPIgDMA3LI4,22002
|
|
41
|
+
wxpath-0.5.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
42
|
+
wxpath-0.5.0.dist-info/entry_points.txt,sha256=CSr67nPxU_tZ_XdAdDmvW9b9VRUhFAhGhEC41YNJEfE,72
|
|
43
|
+
wxpath-0.5.0.dist-info/top_level.txt,sha256=uFCcveG78mnefxRGvYsR2OexDlKR_Z1UD4vZijUcex8,7
|
|
44
|
+
wxpath-0.5.0.dist-info/RECORD,,
|
wxpath-0.4.1.dist-info/RECORD
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
wxpath/__init__.py,sha256=w1hFE_VSIYq_TSFLoPfp6MJbG1sA6BeChX6PYsXIK4o,265
|
|
2
|
-
wxpath/cli.py,sha256=e0-mHkpuC1B_WyJw7wH43UBmtuF8oL8phQ4GEzUX0Ns,4332
|
|
3
|
-
wxpath/patches.py,sha256=u0dOL-K-gvdO9SJvzGrqR9Zou6XduWjl6R7mzIcZtJg,2130
|
|
4
|
-
wxpath/settings.py,sha256=a4TlCAOvmO03oOXiiYQzIDBMZU0XpTqntwnjVsumnas,3809
|
|
5
|
-
wxpath/core/__init__.py,sha256=U9_In2iRaZrpiIVavIli1M59gCB6Kn1en-1Fza-qIiI,257
|
|
6
|
-
wxpath/core/dom.py,sha256=X0L3n8jRfO5evEypDaJTD-NQ3cLXWvnEUVERAHo3vV0,701
|
|
7
|
-
wxpath/core/models.py,sha256=3KYt-UwfLY2FlSRUHeA_getnYaNUMPW9wRrl2CRbPso,1611
|
|
8
|
-
wxpath/core/ops.py,sha256=PTjX6c4QvCqGaByYYqaK4dte5iWO3lZzgqGrMXp6f6g,9727
|
|
9
|
-
wxpath/core/parser.py,sha256=WfjQNixBz7nWtX2O0t19MOhUJmzGMg8Qol40P6oC8zc,18827
|
|
10
|
-
wxpath/core/runtime/__init__.py,sha256=_iCgkIWxXvxzQcenHOsjYGsk74HboTIYWOtgM8GtCyc,86
|
|
11
|
-
wxpath/core/runtime/engine.py,sha256=UQ8wSr49TJibRRtXzIgXVSBvuB1VttYicKEwV4xcG6Q,17345
|
|
12
|
-
wxpath/core/runtime/helpers.py,sha256=M1i4BryCktAxeboa4LOXMTNiKVCJLDBD-KpWCQXadpw,1434
|
|
13
|
-
wxpath/hooks/__init__.py,sha256=9JG63e4z_8CZLWugFcY786hebaEEPZ5FmZhyDHat-98,294
|
|
14
|
-
wxpath/hooks/builtin.py,sha256=GJ4w1C9djWNzAmAA3U0qI9OoCOeC5R8tEGtWXJVHSYs,4125
|
|
15
|
-
wxpath/hooks/registry.py,sha256=-D11f_mMboeVAH8qsTkbKTQ0aGNaQ7F6zbXDsOIYxN0,4513
|
|
16
|
-
wxpath/http/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
-
wxpath/http/stats.py,sha256=aqZWuybc5RCv-AmKdNbEX4uw1YvZtFoE6591UfukZns,3319
|
|
18
|
-
wxpath/http/client/__init__.py,sha256=QpdmqzcznUeuFvT3IIo-LmBUUHEa2BDq9sHGAHJnDLI,202
|
|
19
|
-
wxpath/http/client/cache.py,sha256=cHS4XlfOStoHTG83ypNITk3Oc0lqGoTRqV0_UWBWQFY,1811
|
|
20
|
-
wxpath/http/client/crawler.py,sha256=UiKtc5K2KBc0bBw2fTdRHLNTa2OFoE1tZsDjR7J4Xeo,12126
|
|
21
|
-
wxpath/http/client/request.py,sha256=cpqo_ASG_wKz0q6m33lsE0kIIthfANt8fx7ptxlyehY,1057
|
|
22
|
-
wxpath/http/client/response.py,sha256=z9LQPnDN-NZRnQpIKozaWCqgpRejc6nixCr_XaPyqUQ,334
|
|
23
|
-
wxpath/http/policy/backoff.py,sha256=NwdUR6bRe1RtUGSJOktj-p8IyC1l9xu_-Aa_Gj_u5sw,321
|
|
24
|
-
wxpath/http/policy/retry.py,sha256=WSrQfCy1F7IcXFpVGDi4HTphNhFq12p4DaMO0_4dgrw,982
|
|
25
|
-
wxpath/http/policy/robots.py,sha256=vllXX9me78YB6yrDdpH_bwyuR5QoC9uveGEl8PmHM9Q,3134
|
|
26
|
-
wxpath/http/policy/throttler.py,sha256=wydMFV-0mxpHSI5iYkLfE78oY4z_fF8jW9MqCeb8G54,3014
|
|
27
|
-
wxpath/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
28
|
-
wxpath/util/logging.py,sha256=oQi8sp7yKWgXkkcJ4U4WHp7TyBCQiK4VhSXOSb8pGw0,2965
|
|
29
|
-
wxpath/util/serialize.py,sha256=uUs4C9VErpFd97smBM2bRWo2nW25kCgKdsMrVtVxhg8,575
|
|
30
|
-
wxpath-0.4.1.dist-info/licenses/LICENSE,sha256=AVBZLhdWmqxm-f-dy5prVB1E-solHWoP2EXEIV_o-00,1076
|
|
31
|
-
wxpath-0.4.1.dist-info/METADATA,sha256=LxmOTsWpspYFedvP02fDL1Wy5t1ygZKuIg2cHVQU_aY,19445
|
|
32
|
-
wxpath-0.4.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
|
|
33
|
-
wxpath-0.4.1.dist-info/entry_points.txt,sha256=FwoIOnUTl-DjPqVw-eb9EHHiiXCyRZy_mEQKFu2eb5Y,43
|
|
34
|
-
wxpath-0.4.1.dist-info/top_level.txt,sha256=uFCcveG78mnefxRGvYsR2OexDlKR_Z1UD4vZijUcex8,7
|
|
35
|
-
wxpath-0.4.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|