PyPI - wxpath - Versions diffs - 0.3.0__tar.gz → 0.4.0__tar.gz - Mend

wxpath 0.3.0tar.gz → 0.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

{wxpath-0.3.0 → wxpath-0.4.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wxpath
-Version: 0.3.0
+Version: 0.4.0
 Summary: wxpath - a declarative web crawler and data extractor
 Author-email: Rodrigo Palacios <rodrigopala91@gmail.com>
 License-Expression: MIT
@@ -10,6 +10,13 @@ License-File: LICENSE
 Requires-Dist: lxml>=4.0
 Requires-Dist: elementpath<=5.0.3,>=5.0.0
 Requires-Dist: aiohttp<=3.12.15,>=3.8.0
+Requires-Dist: tqdm>=4.0.0
+Provides-Extra: cache
+Requires-Dist: aiohttp-client-cache>=0.14.0; extra == "cache"
+Provides-Extra: cache-sqlite
+Requires-Dist: aiohttp-client-cache[sqlite]; extra == "cache-sqlite"
+Provides-Extra: cache-redis
+Requires-Dist: aiohttp-client-cache[redis]; extra == "cache-redis"
 Provides-Extra: test
 Requires-Dist: pytest>=7.0; extra == "test"
 Requires-Dist: pytest-asyncio>=0.23; extra == "test"
@@ -38,7 +45,10 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
 - [Polite Crawling](#polite-crawling)
 - [Output types](#output-types)
 - [XPath 3.1](#xpath-31-by-default)
+- [Progress Bar](#progress-bar)
 - [CLI](#cli)
+- [Persistence and Caching](#persistence-and-caching)
+- [Settings](#settings)
 - [Hooks (Experimental)](#hooks-experimental)
 - [Install](#install)
 - [More Examples](EXAMPLES.md)
@@ -47,6 +57,7 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
 - [Project Philosophy](#project-philosophy)
 - [Warnings](#warnings)
 - [Commercial support / consulting](#commercial-support--consulting)
+- [Versioning](#versioning)
 - [License](#license)
@@ -54,17 +65,21 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
 ```python
 import wxpath
+from wxpath.settings import CRAWLER_SETTINGS
+# Custom headers for politeness; necessary for some sites (e.g., Wikipedia)
+CRAWLER_SETTINGS.headers = {'User-Agent': 'my-app/0.4.0 (contact: you@example.com)'}
 # Crawl, extract fields, build a knowledge graph
 path_expr = """
 url('https://en.wikipedia.org/wiki/Expression_language')
- ///url(//main//a/@href[starts-with(., '/wiki/') and not(contains(., ':'))])
- /map{
-    'title': (//span[contains(@class, "mw-page-title-main")]/text())[1] ! string(.),
-    'url': string(base-uri(.)),
-    'short_description': //div[contains(@class, 'shortdescription')]/text() ! string(.),
-    'forward_links': //div[@id="mw-content-text"]//a/@href ! string(.)
- }
+  ///url(//main//a/@href[starts-with(., '/wiki/') and not(contains(., ':'))])
+    /map{
+        'title': (//span[contains(@class, "mw-page-title-main")]/text())[1] ! string(.),
+        'url': string(base-uri(.)),
+        'short_description': //div[contains(@class, 'shortdescription')]/text() ! string(.),
+        'forward_links': //div[@id="mw-content-text"]//a/@href ! string(.)
+    }
 """
 for item in wxpath.wxpath_async_blocking_iter(path_expr, max_depth=1):
@@ -195,6 +210,17 @@ path_expr = """
 # ...]
 ```
+## Progress Bar
+**wxpath** provides a progress bar (via `tqdm`) to track crawl progress. This is especially useful for long-running crawls.
+Enable by setting `engine.run(..., progress=True)`, or pass `progress=True` to any of the `wxpath_async*(...)` functions.
+```python
+items = wxpath.wxpath_async_blocking("...", progress=True)
+> 100%|██████████████████████████████████████████████████████████▎| 469/471 [00:05<00:00, 72.00it/s, depth=2, yielded=457]
+```
 ## CLI
@@ -237,9 +263,46 @@ Command line options:
 --concurrency-per-host <concurrency> Number of concurrent fetches per host
 --header               "Key:Value"   Add a custom header (e.g., 'Key:Value'). Can be used multiple times.
 --respect-robots       [true|false] (Default: True) Respects robots.txt
+--cache                [true|false] (Default: False) Persist crawl results to a local database
+```
+## Persistence and Caching
+**wxpath** optionally persists crawl results to a local database. This is especially useful when you're crawling a large number of URLs, and you decide to pause the crawl, change extraction expressions, or otherwise need to restart the crawl.
+**wxpath** supports two backends: sqlite and redis. SQLite is great for small-scale crawls, with a single worker (i.e., `engine.crawler.concurrency == 1`). Redis is great for large-scale crawls, with multiple workers. You will be encounter a warning if you `min(engine.crawler.concurrency, engine.crawler.per_host) > 1` when using the sqlite backend.
+To use, you must install the appropriate optional dependency:
+```bash
+pip install wxpath[cache-sqlite]
+pip install wxpath[cache-redis]
+```
+Once the dependency is installed, you must enable the cache:
+```python
+from wxpath.settings import SETTINGS
+# To enable caching; sqlite is the default
+SETTINGS.http.client.cache.enabled = True
+# For redis backend
+SETTINGS.http.client.cache.enabled = True
+SETTINGS.http.client.cache.backend = "redis"
+SETTINGS.http.client.cache.redis.address = "redis://localhost:6379/0"
+# Run wxpath as usual
+items = list(wxpath_async_blocking_iter('...', max_depth=1, engine=engine))
 ```
+## Settings
+See [settings.py](src/wxpath/settings.py) for details of the settings.
 ## Hooks (Experimental)
 **wxpath** supports a pluggable hook system that allows you to modify the crawling and extraction behavior. You can register hooks to preprocess URLs, post-process HTML, filter extracted values, and more. Hooks will be executed in the order they are registered. Hooks may impact performance.
@@ -290,6 +353,13 @@ Requires Python 3.10+.
 pip install wxpath
 ```
+For persisted/cached, wxpath supports the following backends:
+```
+pip install wxpath[cache-sqlite]
+pip install wxpath[cache-redis]
+```
 ## More Examples
@@ -345,7 +415,7 @@ items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
 - Stay lightweight and composable
 - Asynchronous support for high-performance crawls
-### Guarantees/Goals
+### Goals
 - URLs are deduplicated on a best-effort, per-crawl basis.
 - Crawls are intended to terminate once the frontier is exhausted or `max_depth` is reached.
@@ -356,7 +426,6 @@ items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
 The following features are not yet supported:
-- Persistent scheduling or crawl resumption
 - Automatic proxy rotation
 - Browser-based rendering (JavaScript execution)
 - Strict result ordering
@@ -379,6 +448,13 @@ If you want help building or operating crawlers/data feeds with wxpath (extracti
 If you like wxpath and want to support its development, please consider [donating](https://www.paypal.com/donate/?business=WDNDK6J6PJEXY&no_recurring=0&item_name=Thanks+for+using+wxpath%21+Donations+fund+development%2C+docs%2C+and+bug+fixes.+If+wxpath+saved+you+time%2C+a+small+contribution+helps%21&currency_code=USD).
+## Versioning
+**wxpath** follows [semver](https://semver.org): `<MAJOR>.<MINOR>.<PATCH>`.
+However, pre-1.0.0 follows `0.<MAJOR>.<MINOR|PATCH>`.
 ## License
 MIT

wxpath-0.3.0/src/wxpath.egg-info/PKG-INFO → wxpath-0.4.0/README.md RENAMED Viewed

@@ -1,22 +1,3 @@
-Metadata-Version: 2.4
-Name: wxpath
-Version: 0.3.0
-Summary: wxpath - a declarative web crawler and data extractor
-Author-email: Rodrigo Palacios <rodrigopala91@gmail.com>
-License-Expression: MIT
-Requires-Python: >=3.10
-Description-Content-Type: text/markdown
-License-File: LICENSE
-Requires-Dist: lxml>=4.0
-Requires-Dist: elementpath<=5.0.3,>=5.0.0
-Requires-Dist: aiohttp<=3.12.15,>=3.8.0
-Provides-Extra: test
-Requires-Dist: pytest>=7.0; extra == "test"
-Requires-Dist: pytest-asyncio>=0.23; extra == "test"
-Provides-Extra: dev
-Requires-Dist: ruff; extra == "dev"
-Dynamic: license-file
 # **wxpath** - declarative web crawling with XPath
 [![Python 3.10+](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/release/python-3100/)
@@ -38,7 +19,10 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
 - [Polite Crawling](#polite-crawling)
 - [Output types](#output-types)
 - [XPath 3.1](#xpath-31-by-default)
+- [Progress Bar](#progress-bar)
 - [CLI](#cli)
+- [Persistence and Caching](#persistence-and-caching)
+- [Settings](#settings)
 - [Hooks (Experimental)](#hooks-experimental)
 - [Install](#install)
 - [More Examples](EXAMPLES.md)
@@ -47,6 +31,7 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
 - [Project Philosophy](#project-philosophy)
 - [Warnings](#warnings)
 - [Commercial support / consulting](#commercial-support--consulting)
+- [Versioning](#versioning)
 - [License](#license)
@@ -54,17 +39,21 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
 ```python
 import wxpath
+from wxpath.settings import CRAWLER_SETTINGS
+# Custom headers for politeness; necessary for some sites (e.g., Wikipedia)
+CRAWLER_SETTINGS.headers = {'User-Agent': 'my-app/0.4.0 (contact: you@example.com)'}
 # Crawl, extract fields, build a knowledge graph
 path_expr = """
 url('https://en.wikipedia.org/wiki/Expression_language')
- ///url(//main//a/@href[starts-with(., '/wiki/') and not(contains(., ':'))])
- /map{
-    'title': (//span[contains(@class, "mw-page-title-main")]/text())[1] ! string(.),
-    'url': string(base-uri(.)),
-    'short_description': //div[contains(@class, 'shortdescription')]/text() ! string(.),
-    'forward_links': //div[@id="mw-content-text"]//a/@href ! string(.)
- }
+  ///url(//main//a/@href[starts-with(., '/wiki/') and not(contains(., ':'))])
+    /map{
+        'title': (//span[contains(@class, "mw-page-title-main")]/text())[1] ! string(.),
+        'url': string(base-uri(.)),
+        'short_description': //div[contains(@class, 'shortdescription')]/text() ! string(.),
+        'forward_links': //div[@id="mw-content-text"]//a/@href ! string(.)
+    }
 """
 for item in wxpath.wxpath_async_blocking_iter(path_expr, max_depth=1):
@@ -195,6 +184,17 @@ path_expr = """
 # ...]
 ```
+## Progress Bar
+**wxpath** provides a progress bar (via `tqdm`) to track crawl progress. This is especially useful for long-running crawls.
+Enable by setting `engine.run(..., progress=True)`, or pass `progress=True` to any of the `wxpath_async*(...)` functions.
+```python
+items = wxpath.wxpath_async_blocking("...", progress=True)
+> 100%|██████████████████████████████████████████████████████████▎| 469/471 [00:05<00:00, 72.00it/s, depth=2, yielded=457]
+```
 ## CLI
@@ -237,9 +237,46 @@ Command line options:
 --concurrency-per-host <concurrency> Number of concurrent fetches per host
 --header               "Key:Value"   Add a custom header (e.g., 'Key:Value'). Can be used multiple times.
 --respect-robots       [true|false] (Default: True) Respects robots.txt
+--cache                [true|false] (Default: False) Persist crawl results to a local database
 ```
+## Persistence and Caching
+**wxpath** optionally persists crawl results to a local database. This is especially useful when you're crawling a large number of URLs, and you decide to pause the crawl, change extraction expressions, or otherwise need to restart the crawl.
+**wxpath** supports two backends: sqlite and redis. SQLite is great for small-scale crawls, with a single worker (i.e., `engine.crawler.concurrency == 1`). Redis is great for large-scale crawls, with multiple workers. You will be encounter a warning if you `min(engine.crawler.concurrency, engine.crawler.per_host) > 1` when using the sqlite backend.
+To use, you must install the appropriate optional dependency:
+```bash
+pip install wxpath[cache-sqlite]
+pip install wxpath[cache-redis]
+```
+Once the dependency is installed, you must enable the cache:
+```python
+from wxpath.settings import SETTINGS
+# To enable caching; sqlite is the default
+SETTINGS.http.client.cache.enabled = True
+# For redis backend
+SETTINGS.http.client.cache.enabled = True
+SETTINGS.http.client.cache.backend = "redis"
+SETTINGS.http.client.cache.redis.address = "redis://localhost:6379/0"
+# Run wxpath as usual
+items = list(wxpath_async_blocking_iter('...', max_depth=1, engine=engine))
+```
+## Settings
+See [settings.py](src/wxpath/settings.py) for details of the settings.
 ## Hooks (Experimental)
 **wxpath** supports a pluggable hook system that allows you to modify the crawling and extraction behavior. You can register hooks to preprocess URLs, post-process HTML, filter extracted values, and more. Hooks will be executed in the order they are registered. Hooks may impact performance.
@@ -290,6 +327,13 @@ Requires Python 3.10+.
 pip install wxpath
 ```
+For persisted/cached, wxpath supports the following backends:
+```
+pip install wxpath[cache-sqlite]
+pip install wxpath[cache-redis]
+```
 ## More Examples
@@ -345,7 +389,7 @@ items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
 - Stay lightweight and composable
 - Asynchronous support for high-performance crawls
-### Guarantees/Goals
+### Goals
 - URLs are deduplicated on a best-effort, per-crawl basis.
 - Crawls are intended to terminate once the frontier is exhausted or `max_depth` is reached.
@@ -356,7 +400,6 @@ items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
 The following features are not yet supported:
-- Persistent scheduling or crawl resumption
 - Automatic proxy rotation
 - Browser-based rendering (JavaScript execution)
 - Strict result ordering
@@ -379,6 +422,13 @@ If you want help building or operating crawlers/data feeds with wxpath (extracti
 If you like wxpath and want to support its development, please consider [donating](https://www.paypal.com/donate/?business=WDNDK6J6PJEXY&no_recurring=0&item_name=Thanks+for+using+wxpath%21+Donations+fund+development%2C+docs%2C+and+bug+fixes.+If+wxpath+saved+you+time%2C+a+small+contribution+helps%21&currency_code=USD).
+## Versioning
+**wxpath** follows [semver](https://semver.org): `<MAJOR>.<MINOR>.<PATCH>`.
+However, pre-1.0.0 follows `0.<MAJOR>.<MINOR|PATCH>`.
 ## License
 MIT

{wxpath-0.3.0 → wxpath-0.4.0}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "wxpath"
-version = "0.3.0"
+version = "0.4.0"
 description = "wxpath - a declarative web crawler and data extractor"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -16,10 +16,15 @@ license-files = ["LICENSE"]
 dependencies = [
     "lxml>=4.0",
     "elementpath>=5.0.0,<=5.0.3",
-    "aiohttp>=3.8.0,<=3.12.15"
+    "aiohttp>=3.8.0,<=3.12.15",
+    "tqdm>=4.0.0"
 ]
 [project.optional-dependencies]
+cache = ["aiohttp-client-cache>=0.14.0"]
+cache-sqlite   = ["aiohttp-client-cache[sqlite]"]
+cache-redis    = ["aiohttp-client-cache[redis]"]
 test = ["pytest>=7.0", "pytest-asyncio>=0.23"]
 dev = ["ruff"]

{wxpath-0.3.0 → wxpath-0.4.0}/src/wxpath/cli.py RENAMED Viewed

@@ -6,6 +6,7 @@ from wxpath.core import parser as wxpath_parser
 from wxpath.core.runtime.engine import WXPathEngine, wxpath_async_blocking_iter
 from wxpath.hooks import builtin, registry
 from wxpath.http.client.crawler import Crawler
+from wxpath.settings import SETTINGS
 from wxpath.util.serialize import simplify
@@ -15,9 +16,11 @@ def main():
     arg_parser.add_argument("expression", help="The wxpath expression")
     arg_parser.add_argument("--depth", type=int, default=1, help="Recursion depth")
     # debug
-    arg_parser.add_argument("--debug", action="store_true", help="Debug mode")
+    arg_parser.add_argument("--debug", action="store_true",
+                            help="Debug mode. Provides verbose runtime output and information")
     # verbose
-    arg_parser.add_argument("--verbose", action="store_true", help="Verbose mode")
+    arg_parser.add_argument("--verbose", action="store_true",
+                            help="Verbose mode. Prints CLI level information")
     arg_parser.add_argument(
         "--concurrency",
@@ -44,17 +47,27 @@ def main():
         help="Respect robots.txt",
         default=True
     )
+    arg_parser.add_argument(
+        "--cache",
+        action="store_true",
+        help="Use cache",
+        default=False
+    )
+    arg_parser.add_argument(
+        "--cache-backend",
+        type=str,
+        help="Cache backend. Possible values: redis, sqlite",
+        default="sqlite"
+    )
+    arg_parser.add_argument(
+        "--cache-db-path-or-url",
+        type=str,
+        help="Path to cache database",
+        default="cache.db"
+    )
     args = arg_parser.parse_args()
-    if args.verbose:
-        segments = wxpath_parser.parse(args.expression)
-        print("parsed expression:\n\nSegments([")
-        for s in segments:
-            print(f"\t{s},")
-        print("])")
-        print()
     if args.debug:
         from wxpath import configure_logging
         configure_logging('DEBUG')
@@ -72,6 +85,29 @@ def main():
         print(f"Using custom headers: {custom_headers}")
         print()
+    if args.cache:
+        SETTINGS.http.client.cache.enabled = True
+        if args.cache_backend == "redis":
+            SETTINGS.http.client.cache.backend = "redis"
+            SETTINGS.http.client.cache.redis.address = args.cache_db_path_or_url
+        elif args.cache_backend == "sqlite":
+            SETTINGS.http.client.cache.backend = "sqlite"
+            SETTINGS.http.client.cache.sqlite.cache_name = args.cache_db_path_or_url
+    if args.verbose:
+        print(f"Using concurrency: {args.concurrency}")
+        print(f"Using concurrency per host: {args.concurrency_per_host}")
+        print(f"Using respect robots: {args.respect_robots}")
+        print(f"Using cache: {args.cache}")
+        segments = wxpath_parser.parse(args.expression)
+        print("parsed expression:\n\nSegments([")
+        for s in segments:
+            print(f"\t{s},")
+        print("])")
+        print()
+        print()
     crawler = Crawler(
         concurrency=args.concurrency,
         per_host=args.concurrency_per_host,
@@ -81,11 +117,20 @@ def main():
     engine = WXPathEngine(crawler=crawler)
     try:
-        for r in wxpath_async_blocking_iter(args.expression, args.depth, engine):
+        for r in wxpath_async_blocking_iter(
+            path_expr=args.expression,
+            max_depth=args.depth,
+            engine=engine):
             clean = simplify(r)
             print(json.dumps(clean, ensure_ascii=False), flush=True)
     except BrokenPipeError:
-        sys.exit(0)
+        if args.verbose:
+            print("Pipe broken.")
+    if args.verbose:
+        print("Done. Printing crawl stats")
+        print(crawler._stats)
+    sys.exit(0)
 if __name__ == "__main__":

{wxpath-0.3.0 → wxpath-0.4.0}/src/wxpath/core/runtime/engine.py RENAMED Viewed

@@ -5,6 +5,7 @@ from collections import deque
 from typing import Any, AsyncGenerator, Iterator
 from lxml.html import HtmlElement
+from tqdm import tqdm
 from wxpath import patches  # noqa: F401
 from wxpath.core import parser
@@ -157,7 +158,12 @@ class WXPathEngine(HookedEngineBase):
         if allow_redirects:
             self.allowed_response_codes |= {301, 302, 303, 307, 308}
-    async def run(self, expression: str, max_depth: int) -> AsyncGenerator[Any, None]:
+    async def run(
+            self,
+            expression: str,
+            max_depth: int,
+            progress: bool = False
+        ) -> AsyncGenerator[Any, None]:
         """Execute a wxpath expression concurrently and yield results.
         Builds and drives a BFS-like crawl pipeline that honors robots rules,
@@ -166,6 +172,7 @@ class WXPathEngine(HookedEngineBase):
         Args:
             expression: WXPath expression string to evaluate.
             max_depth: Maximum crawl depth to follow for url hops.
+            progress: Whether to display a progress bar.
         Yields:
             Extracted values produced by the expression (HTML elements or
@@ -182,6 +189,12 @@ class WXPathEngine(HookedEngineBase):
             #       the current state of the engine.
             return queue.empty() and pending_tasks <= 0
+        total_yielded = 0
+        if progress:
+            pbar = tqdm(total=0)
+        else:
+            pbar = None
         async with self.crawler as crawler:
             async def submitter():
                 nonlocal pending_tasks
@@ -219,12 +232,17 @@ class WXPathEngine(HookedEngineBase):
                 depth=seed_task.depth,
                 max_depth=max_depth,
                 queue=queue,
+                pbar=pbar,
             ):
                 yield await self.post_extract_hooks(output)
             # While looping asynchronous generators, you MUST make sure
             # to check terminal conditions before re-iteration.
             async for resp in crawler:
+                if pbar is not None:
+                    pbar.update(1)
+                    pbar.refresh()
                 task = inflight.pop(resp.request.url, None)
                 pending_tasks -= 1
@@ -273,10 +291,18 @@ class WXPathEngine(HookedEngineBase):
                         depth=task.depth,
                         max_depth=max_depth,
                         queue=queue,
-                    ):
+                        pbar=pbar
+                    ):
+                        total_yielded += 1
+                        if pbar is not None:
+                            pbar.set_postfix(yielded=total_yielded, depth=task.depth,)
                         yield await self.post_extract_hooks(output)
                 else:
+                    total_yielded += 1
+                    if pbar is not None:
+                        pbar.set_postfix(yielded=total_yielded, depth=task.depth,)
                     yield await self.post_extract_hooks(elem)
                 # Termination condition
@@ -287,6 +313,9 @@ class WXPathEngine(HookedEngineBase):
             with contextlib.suppress(asyncio.CancelledError):
                 await submit_task
+        if pbar is not None:
+            pbar.close()
     async def _process_pipeline(
         self,
         task: CrawlTask,
@@ -294,6 +323,7 @@ class WXPathEngine(HookedEngineBase):
         depth: int,
         max_depth: int,
         queue: asyncio.Queue[CrawlTask],
+        pbar: tqdm = None
     ) -> AsyncGenerator[Any, None]:
         """Process a queue of intents for a single crawl branch.
@@ -331,9 +361,10 @@ class WXPathEngine(HookedEngineBase):
                 elif isinstance(intent, CrawlIntent):
                     next_depth = task.depth + 1
                     # if intent.url not in self.seen_urls and next_depth <= max_depth:
-                    if next_depth <= max_depth:
+                    if next_depth <= max_depth and intent.url not in self.seen_urls:
                         # self.seen_urls.add(intent.url)
                         log.debug(f"Depth: {next_depth}; Enqueuing {intent.url}")
                         queue.put_nowait(
                             CrawlTask(
                                 elem=None,
@@ -343,6 +374,9 @@ class WXPathEngine(HookedEngineBase):
                                 backlink=task.url,
                             )
                         )
+                        if pbar is not None:
+                            pbar.total += 1
+                            pbar.refresh()
                 elif isinstance(intent, (ExtractIntent, ProcessIntent, InfiniteCrawlIntent)):
                     # immediately traverse the extraction
@@ -351,18 +385,20 @@ class WXPathEngine(HookedEngineBase):
                     mini_queue.append((elem, next_segments))
-def wxpath_async(path_expr: str,
-                 max_depth: int,
+def wxpath_async(path_expr: str,
+                 max_depth: int,
+                 progress: bool = False,
                  engine: WXPathEngine | None = None) -> AsyncGenerator[Any, None]:
     if engine is None:
         engine = WXPathEngine()
-    return engine.run(path_expr, max_depth)
+    return engine.run(path_expr, max_depth, progress=progress)
 ##### ASYNC IN SYNC #####
 def wxpath_async_blocking_iter(
     path_expr: str,
     max_depth: int = 1,
+    progress: bool = False,
     engine: WXPathEngine | None = None,
 ) -> Iterator[Any]:
     """Evaluate a wxpath expression using concurrent breadth-first traversal.
@@ -383,7 +419,7 @@ def wxpath_async_blocking_iter(
     """
     loop = asyncio.new_event_loop()
     asyncio.set_event_loop(loop)
-    agen = wxpath_async(path_expr, max_depth=max_depth, engine=engine)
+    agen = wxpath_async(path_expr, max_depth=max_depth, progress=progress, engine=engine)
     try:
         while True:
@@ -399,8 +435,10 @@ def wxpath_async_blocking_iter(
 def wxpath_async_blocking(
     path_expr: str,
     max_depth: int = 1,
+    progress: bool = False,
     engine: WXPathEngine | None = None,
 ) -> list[Any]:
-    return list(
-        wxpath_async_blocking_iter(path_expr, max_depth=max_depth, engine=engine)
-    )
+    return list(wxpath_async_blocking_iter(path_expr,
+                                           max_depth=max_depth,
+                                           progress=progress,
+                                           engine=engine))

wxpath 0.3.0__tar.gz → 0.4.0__tar.gz

wxpath 0.3.0tar.gz → 0.4.0tar.gz