wxpath 0.3.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
wxpath/cli.py CHANGED
@@ -6,6 +6,7 @@ from wxpath.core import parser as wxpath_parser
6
6
  from wxpath.core.runtime.engine import WXPathEngine, wxpath_async_blocking_iter
7
7
  from wxpath.hooks import builtin, registry
8
8
  from wxpath.http.client.crawler import Crawler
9
+ from wxpath.settings import SETTINGS
9
10
  from wxpath.util.serialize import simplify
10
11
 
11
12
 
@@ -15,9 +16,11 @@ def main():
15
16
  arg_parser.add_argument("expression", help="The wxpath expression")
16
17
  arg_parser.add_argument("--depth", type=int, default=1, help="Recursion depth")
17
18
  # debug
18
- arg_parser.add_argument("--debug", action="store_true", help="Debug mode")
19
+ arg_parser.add_argument("--debug", action="store_true",
20
+ help="Debug mode. Provides verbose runtime output and information")
19
21
  # verbose
20
- arg_parser.add_argument("--verbose", action="store_true", help="Verbose mode")
22
+ arg_parser.add_argument("--verbose", action="store_true",
23
+ help="Verbose mode. Prints CLI level information")
21
24
 
22
25
  arg_parser.add_argument(
23
26
  "--concurrency",
@@ -44,17 +47,27 @@ def main():
44
47
  help="Respect robots.txt",
45
48
  default=True
46
49
  )
50
+ arg_parser.add_argument(
51
+ "--cache",
52
+ action="store_true",
53
+ help="Use cache",
54
+ default=False
55
+ )
56
+ arg_parser.add_argument(
57
+ "--cache-backend",
58
+ type=str,
59
+ help="Cache backend. Possible values: redis, sqlite",
60
+ default="sqlite"
61
+ )
62
+ arg_parser.add_argument(
63
+ "--cache-db-path-or-url",
64
+ type=str,
65
+ help="Path to cache database",
66
+ default="cache.db"
67
+ )
47
68
 
48
69
  args = arg_parser.parse_args()
49
70
 
50
- if args.verbose:
51
- segments = wxpath_parser.parse(args.expression)
52
- print("parsed expression:\n\nSegments([")
53
- for s in segments:
54
- print(f"\t{s},")
55
- print("])")
56
- print()
57
-
58
71
  if args.debug:
59
72
  from wxpath import configure_logging
60
73
  configure_logging('DEBUG')
@@ -72,6 +85,29 @@ def main():
72
85
  print(f"Using custom headers: {custom_headers}")
73
86
  print()
74
87
 
88
+ if args.cache:
89
+ SETTINGS.http.client.cache.enabled = True
90
+ if args.cache_backend == "redis":
91
+ SETTINGS.http.client.cache.backend = "redis"
92
+ SETTINGS.http.client.cache.redis.address = args.cache_db_path_or_url
93
+ elif args.cache_backend == "sqlite":
94
+ SETTINGS.http.client.cache.backend = "sqlite"
95
+ SETTINGS.http.client.cache.sqlite.cache_name = args.cache_db_path_or_url
96
+
97
+ if args.verbose:
98
+ print(f"Using concurrency: {args.concurrency}")
99
+ print(f"Using concurrency per host: {args.concurrency_per_host}")
100
+ print(f"Using respect robots: {args.respect_robots}")
101
+ print(f"Using cache: {args.cache}")
102
+
103
+ segments = wxpath_parser.parse(args.expression)
104
+ print("parsed expression:\n\nSegments([")
105
+ for s in segments:
106
+ print(f"\t{s},")
107
+ print("])")
108
+ print()
109
+ print()
110
+
75
111
  crawler = Crawler(
76
112
  concurrency=args.concurrency,
77
113
  per_host=args.concurrency_per_host,
@@ -81,11 +117,20 @@ def main():
81
117
  engine = WXPathEngine(crawler=crawler)
82
118
 
83
119
  try:
84
- for r in wxpath_async_blocking_iter(args.expression, args.depth, engine):
120
+ for r in wxpath_async_blocking_iter(
121
+ path_expr=args.expression,
122
+ max_depth=args.depth,
123
+ engine=engine):
85
124
  clean = simplify(r)
86
125
  print(json.dumps(clean, ensure_ascii=False), flush=True)
87
126
  except BrokenPipeError:
88
- sys.exit(0)
127
+ if args.verbose:
128
+ print("Pipe broken.")
129
+
130
+ if args.verbose:
131
+ print("Done. Printing crawl stats")
132
+ print(crawler._stats)
133
+ sys.exit(0)
89
134
 
90
135
 
91
136
  if __name__ == "__main__":
@@ -5,6 +5,7 @@ from collections import deque
5
5
  from typing import Any, AsyncGenerator, Iterator
6
6
 
7
7
  from lxml.html import HtmlElement
8
+ from tqdm import tqdm
8
9
 
9
10
  from wxpath import patches # noqa: F401
10
11
  from wxpath.core import parser
@@ -157,7 +158,13 @@ class WXPathEngine(HookedEngineBase):
157
158
  if allow_redirects:
158
159
  self.allowed_response_codes |= {301, 302, 303, 307, 308}
159
160
 
160
- async def run(self, expression: str, max_depth: int) -> AsyncGenerator[Any, None]:
161
+ async def run(
162
+ self,
163
+ expression: str,
164
+ max_depth: int,
165
+ progress: bool = False,
166
+ yield_errors: bool = False,
167
+ ) -> AsyncGenerator[Any, None]:
161
168
  """Execute a wxpath expression concurrently and yield results.
162
169
 
163
170
  Builds and drives a BFS-like crawl pipeline that honors robots rules,
@@ -166,6 +173,7 @@ class WXPathEngine(HookedEngineBase):
166
173
  Args:
167
174
  expression: WXPath expression string to evaluate.
168
175
  max_depth: Maximum crawl depth to follow for url hops.
176
+ progress: Whether to display a progress bar.
169
177
 
170
178
  Yields:
171
179
  Extracted values produced by the expression (HTML elements or
@@ -182,6 +190,12 @@ class WXPathEngine(HookedEngineBase):
182
190
  # the current state of the engine.
183
191
  return queue.empty() and pending_tasks <= 0
184
192
 
193
+ total_yielded = 0
194
+ if progress:
195
+ pbar = tqdm(total=0)
196
+ else:
197
+ pbar = None
198
+
185
199
  async with self.crawler as crawler:
186
200
  async def submitter():
187
201
  nonlocal pending_tasks
@@ -219,23 +233,48 @@ class WXPathEngine(HookedEngineBase):
219
233
  depth=seed_task.depth,
220
234
  max_depth=max_depth,
221
235
  queue=queue,
236
+ pbar=pbar,
222
237
  ):
223
238
  yield await self.post_extract_hooks(output)
224
239
 
225
240
  # While looping asynchronous generators, you MUST make sure
226
241
  # to check terminal conditions before re-iteration.
227
242
  async for resp in crawler:
243
+ if pbar is not None:
244
+ pbar.update(1)
245
+ pbar.refresh()
246
+
228
247
  task = inflight.pop(resp.request.url, None)
229
248
  pending_tasks -= 1
230
249
 
231
250
  if task is None:
232
251
  log.warning(f"Got unexpected response from {resp.request.url}")
252
+
253
+ if yield_errors:
254
+ yield {
255
+ "__type__": "error",
256
+ "url": resp.request.url,
257
+ "reason": "unexpected_response",
258
+ "status": resp.body,
259
+ "body": resp.body
260
+ }
261
+
233
262
  if is_terminal():
234
263
  break
235
264
  continue
236
265
 
237
266
  if resp.error:
238
267
  log.warning(f"Got error from {resp.request.url}: {resp.error}")
268
+
269
+ if yield_errors:
270
+ yield {
271
+ "__type__": "error",
272
+ "url": resp.request.url,
273
+ "reason": "network_error",
274
+ "exception": str(resp.error),
275
+ "status": resp.status,
276
+ "body": resp.body
277
+ }
239
278
  if is_terminal():
240
279
  break
241
280
  continue
@@ -243,6 +282,16 @@ class WXPathEngine(HookedEngineBase):
243
282
  # NOTE: Consider allowing redirects
244
283
  if resp.status not in self.allowed_response_codes or not resp.body:
245
284
  log.warning(f"Got non-200 response from {resp.request.url}")
285
+
286
+ if yield_errors:
287
+ yield {
288
+ "__type__": "error",
289
+ "url": resp.request.url,
290
+ "reason": "bad_status",
291
+ "status": resp.status,
292
+ "body": resp.body
293
+ }
294
+
246
295
  if is_terminal():
247
296
  break
248
297
  continue
@@ -273,10 +322,18 @@ class WXPathEngine(HookedEngineBase):
273
322
  depth=task.depth,
274
323
  max_depth=max_depth,
275
324
  queue=queue,
276
- ):
325
+ pbar=pbar
326
+ ):
327
+ total_yielded += 1
328
+ if pbar is not None:
329
+ pbar.set_postfix(yielded=total_yielded, depth=task.depth,)
277
330
 
278
331
  yield await self.post_extract_hooks(output)
279
332
  else:
333
+ total_yielded += 1
334
+ if pbar is not None:
335
+ pbar.set_postfix(yielded=total_yielded, depth=task.depth,)
336
+
280
337
  yield await self.post_extract_hooks(elem)
281
338
 
282
339
  # Termination condition
@@ -287,6 +344,9 @@ class WXPathEngine(HookedEngineBase):
287
344
  with contextlib.suppress(asyncio.CancelledError):
288
345
  await submit_task
289
346
 
347
+ if pbar is not None:
348
+ pbar.close()
349
+
290
350
  async def _process_pipeline(
291
351
  self,
292
352
  task: CrawlTask,
@@ -294,6 +354,7 @@ class WXPathEngine(HookedEngineBase):
294
354
  depth: int,
295
355
  max_depth: int,
296
356
  queue: asyncio.Queue[CrawlTask],
357
+ pbar: tqdm = None
297
358
  ) -> AsyncGenerator[Any, None]:
298
359
  """Process a queue of intents for a single crawl branch.
299
360
 
@@ -331,9 +392,10 @@ class WXPathEngine(HookedEngineBase):
331
392
  elif isinstance(intent, CrawlIntent):
332
393
  next_depth = task.depth + 1
333
394
  # if intent.url not in self.seen_urls and next_depth <= max_depth:
334
- if next_depth <= max_depth:
395
+ if next_depth <= max_depth and intent.url not in self.seen_urls:
335
396
  # self.seen_urls.add(intent.url)
336
397
  log.debug(f"Depth: {next_depth}; Enqueuing {intent.url}")
398
+
337
399
  queue.put_nowait(
338
400
  CrawlTask(
339
401
  elem=None,
@@ -343,6 +405,9 @@ class WXPathEngine(HookedEngineBase):
343
405
  backlink=task.url,
344
406
  )
345
407
  )
408
+ if pbar is not None:
409
+ pbar.total += 1
410
+ pbar.refresh()
346
411
 
347
412
  elif isinstance(intent, (ExtractIntent, ProcessIntent, InfiniteCrawlIntent)):
348
413
  # immediately traverse the extraction
@@ -351,19 +416,24 @@ class WXPathEngine(HookedEngineBase):
351
416
  mini_queue.append((elem, next_segments))
352
417
 
353
418
 
354
- def wxpath_async(path_expr: str,
355
- max_depth: int,
356
- engine: WXPathEngine | None = None) -> AsyncGenerator[Any, None]:
419
+ def wxpath_async(path_expr: str,
420
+ max_depth: int,
421
+ progress: bool = False,
422
+ engine: WXPathEngine | None = None,
423
+ yield_errors: bool = False
424
+ ) -> AsyncGenerator[Any, None]:
357
425
  if engine is None:
358
426
  engine = WXPathEngine()
359
- return engine.run(path_expr, max_depth)
427
+ return engine.run(path_expr, max_depth, progress=progress, yield_errors=yield_errors)
360
428
 
361
429
 
362
430
  ##### ASYNC IN SYNC #####
363
431
  def wxpath_async_blocking_iter(
364
432
  path_expr: str,
365
433
  max_depth: int = 1,
434
+ progress: bool = False,
366
435
  engine: WXPathEngine | None = None,
436
+ yield_errors: bool = False
367
437
  ) -> Iterator[Any]:
368
438
  """Evaluate a wxpath expression using concurrent breadth-first traversal.
369
439
 
@@ -383,7 +453,8 @@ def wxpath_async_blocking_iter(
383
453
  """
384
454
  loop = asyncio.new_event_loop()
385
455
  asyncio.set_event_loop(loop)
386
- agen = wxpath_async(path_expr, max_depth=max_depth, engine=engine)
456
+ agen = wxpath_async(path_expr, max_depth=max_depth, progress=progress,
457
+ engine=engine, yield_errors=yield_errors)
387
458
 
388
459
  try:
389
460
  while True:
@@ -399,8 +470,13 @@ def wxpath_async_blocking_iter(
399
470
  def wxpath_async_blocking(
400
471
  path_expr: str,
401
472
  max_depth: int = 1,
473
+ progress: bool = False,
402
474
  engine: WXPathEngine | None = None,
475
+ yield_errors: bool = False
403
476
  ) -> list[Any]:
404
- return list(
405
- wxpath_async_blocking_iter(path_expr, max_depth=max_depth, engine=engine)
406
- )
477
+ return list(wxpath_async_blocking_iter(path_expr,
478
+ max_depth=max_depth,
479
+ progress=progress,
480
+ engine=engine,
481
+ yield_errors=yield_errors,
482
+ ))
@@ -0,0 +1,43 @@
1
+ try:
2
+ from aiohttp_client_cache import SQLiteBackend
3
+ except ImportError:
4
+ CachedSession = None
5
+
6
+ from wxpath.settings import SETTINGS
7
+ from wxpath.util.logging import get_logger
8
+
9
+ log = get_logger(__name__)
10
+
11
+ CACHE_SETTINGS = SETTINGS.http.client.cache
12
+
13
+ def get_cache_backend():
14
+ log.info("cache backend", extra={"backend": CACHE_SETTINGS.backend})
15
+ if CACHE_SETTINGS.backend == "redis":
16
+ from aiohttp_client_cache.backends.redis import RedisBackend
17
+ return RedisBackend(
18
+ expire_after=CACHE_SETTINGS.expire_after,
19
+ urls_expire_after=CACHE_SETTINGS.urls_expire_after or None,
20
+ allowed_methods=CACHE_SETTINGS.allowed_methods,
21
+ allowed_codes=CACHE_SETTINGS.allowed_codes,
22
+ include_headers=CACHE_SETTINGS.include_headers,
23
+ ignored_parameters=CACHE_SETTINGS.ignored_parameters,
24
+ **CACHE_SETTINGS.redis
25
+ # cache_name=CACHE_SETTINGS.redis.cache_name,
26
+ # host=CACHE_SETTINGS.redis.host,
27
+ # port=CACHE_SETTINGS.redis.port,
28
+ # db=CACHE_SETTINGS.redis.db,
29
+ # cache_control=CACHE_SETTINGS.cache_control,
30
+ )
31
+ elif CACHE_SETTINGS.backend == "sqlite":
32
+ return SQLiteBackend(
33
+ cache_name=CACHE_SETTINGS.sqlite.cache_name,
34
+ expire_after=CACHE_SETTINGS.expire_after,
35
+ urls_expire_after=CACHE_SETTINGS.urls_expire_after or None,
36
+ allowed_methods=CACHE_SETTINGS.allowed_methods,
37
+ allowed_codes=CACHE_SETTINGS.allowed_codes,
38
+ include_headers=CACHE_SETTINGS.include_headers,
39
+ ignored_parameters=CACHE_SETTINGS.ignored_parameters,
40
+ # cache_control=CACHE_SETTINGS.cache_control,
41
+ )
42
+ else:
43
+ raise ValueError(f"Unknown cache backend: {CACHE_SETTINGS.backend}")
@@ -1,3 +1,10 @@
1
+ import aiohttp
2
+
3
+ try:
4
+ from aiohttp_client_cache import CachedSession
5
+ except ImportError:
6
+ CachedSession = None
7
+
1
8
  import asyncio
2
9
  import time
3
10
  import urllib.parse
@@ -5,21 +12,52 @@ from collections import defaultdict
5
12
  from socket import gaierror
6
13
  from typing import AsyncIterator
7
14
 
8
- import aiohttp
9
-
15
+ from wxpath.http.client.cache import get_cache_backend
10
16
  from wxpath.http.client.request import Request
11
17
  from wxpath.http.client.response import Response
12
18
  from wxpath.http.policy.retry import RetryPolicy
13
19
  from wxpath.http.policy.robots import RobotsTxtPolicy
14
20
  from wxpath.http.policy.throttler import AbstractThrottler, AutoThrottler
15
21
  from wxpath.http.stats import CrawlerStats, build_trace_config
22
+ from wxpath.settings import SETTINGS
16
23
  from wxpath.util.logging import get_logger
17
24
 
18
25
  log = get_logger(__name__)
19
26
 
20
- HEADERS = {"User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"
21
- "AppleWebKit/537.36 (KHTML, like Gecko) "
22
- "Chrome/142.0.0.0 Safari/537.36")}
27
+ CACHE_SETTINGS = SETTINGS.http.client.cache
28
+ CRAWLER_SETTINGS = SETTINGS.http.client.crawler
29
+
30
+ def get_async_session(
31
+ headers: dict | None = None,
32
+ timeout: aiohttp.ClientTimeout | None = None,
33
+ connector: aiohttp.TCPConnector | None = None,
34
+ trace_config: aiohttp.TraceConfig | None = None
35
+ ) -> aiohttp.ClientSession:
36
+ """
37
+ Create and return a new aiohttp session. If aiohttp-client-cache is available
38
+ and enabled, return a new CachedSession bound to the configured SQLite backend.
39
+ The caller is responsible for closing the session.
40
+ """
41
+
42
+ if timeout is None:
43
+ timeout = aiohttp.ClientTimeout(total=CRAWLER_SETTINGS.timeout)
44
+
45
+ if CACHE_SETTINGS.enabled and CachedSession:
46
+ log.info("using aiohttp-client-cache")
47
+ return CachedSession(
48
+ cache=get_cache_backend(),
49
+ headers=headers,
50
+ timeout=timeout,
51
+ connector=connector,
52
+ trace_configs=[trace_config] if trace_config is not None else None
53
+ )
54
+
55
+ return aiohttp.ClientSession(
56
+ headers=headers,
57
+ timeout=timeout,
58
+ connector=connector,
59
+ trace_configs=[trace_config] if trace_config is not None else None
60
+ )
23
61
 
24
62
 
25
63
  class Crawler:
@@ -27,33 +65,55 @@ class Crawler:
27
65
 
28
66
  def __init__(
29
67
  self,
30
- concurrency: int = 16,
31
- per_host: int = 8,
32
- timeout: int = 15,
68
+ concurrency: int = None,
69
+ per_host: int = None,
70
+ timeout: int = None,
33
71
  *,
34
72
  headers: dict | None = None,
35
73
  proxies: dict | None = None,
36
74
  retry_policy: RetryPolicy | None = None,
37
75
  throttler: AbstractThrottler | None = None,
38
76
  auto_throttle_target_concurrency: float = None,
39
- auto_throttle_start_delay: float = 0.25,
40
- auto_throttle_max_delay: float = 10.0,
77
+ auto_throttle_start_delay: float = None,
78
+ auto_throttle_max_delay: float = None,
41
79
  respect_robots: bool = True,
42
80
  ):
43
- self.concurrency = concurrency
81
+ cfg = CRAWLER_SETTINGS
82
+
83
+ self.concurrency = concurrency if concurrency is not None else cfg.concurrency
84
+ self.per_host = per_host if per_host is not None else cfg.per_host
85
+
86
+ timeout = timeout if timeout is not None else cfg.timeout
44
87
  self._timeout = aiohttp.ClientTimeout(total=timeout)
45
- self._headers = HEADERS | (headers or {}) # merge headers
46
- self._proxies = proxies if (isinstance(proxies, defaultdict) or proxies) else {}
47
- self.respect_robots = respect_robots
48
88
 
89
+ self._headers = cfg.headers | (headers or {}) # merge headers
90
+
91
+ _proxies = proxies if proxies is not None else cfg.proxies
92
+ self._proxies = _proxies if (isinstance(_proxies, defaultdict) or _proxies) else {}
93
+
49
94
  self.retry_policy = retry_policy or RetryPolicy()
95
+
96
+ # auto-throttle defaults
97
+ auto_throttle_target_concurrency = auto_throttle_target_concurrency \
98
+ if auto_throttle_target_concurrency is not None \
99
+ else cfg.auto_throttle_target_concurrency
100
+
101
+ auto_throttle_start_delay = auto_throttle_start_delay \
102
+ if auto_throttle_start_delay is not None \
103
+ else cfg.auto_throttle_start_delay
104
+
105
+ auto_throttle_max_delay = auto_throttle_max_delay \
106
+ if auto_throttle_max_delay is not None \
107
+ else cfg.auto_throttle_max_delay
108
+
50
109
  self.throttler = throttler or AutoThrottler(
51
- target_concurrency=auto_throttle_target_concurrency or concurrency/4.0,
110
+ target_concurrency=auto_throttle_target_concurrency or self.concurrency/4.0,
52
111
  start_delay=auto_throttle_start_delay,
53
112
  max_delay=auto_throttle_max_delay,
54
113
  )
55
- self._sem_global = asyncio.Semaphore(concurrency)
56
- self._sem_host = defaultdict(lambda: asyncio.Semaphore(per_host))
114
+
115
+ self._sem_global = asyncio.Semaphore(self.concurrency)
116
+ self._sem_host = defaultdict(lambda: asyncio.Semaphore(self.per_host))
57
117
 
58
118
  self._pending: asyncio.Queue[Request] = asyncio.Queue()
59
119
  self._results: asyncio.Queue[Response] = asyncio.Queue()
@@ -62,18 +122,31 @@ class Crawler:
62
122
  self._workers: list[asyncio.Task] = []
63
123
  self._closed = False
64
124
  self._stats = CrawlerStats()
125
+
126
+ self.respect_robots = respect_robots if respect_robots is not None else cfg.respect_robots
65
127
  self._robots_policy: RobotsTxtPolicy | None = None
66
128
 
129
+ # WARN: If SQLiteBackend caching is enabled and min(concurrency, per_host) > 1,
130
+ # write-contention is likely to occur.
131
+ if (CACHE_SETTINGS.enabled
132
+ and CACHE_SETTINGS.backend == "sqlite"
133
+ and min(self.concurrency, self.per_host) > 1
134
+ ):
135
+ log.warning(
136
+ "SQLiteBackend caching is enabled and min(concurrency, per_host) > 1. "
137
+ "Write-contention is likely to occur. Consider using RedisBackend."
138
+ )
139
+
67
140
  def build_session(self) -> aiohttp.ClientSession:
68
141
  """Construct an `aiohttp.ClientSession` with tracing and pooling."""
69
142
  trace_config = build_trace_config(self._stats)
70
143
  # Need to build the connector as late as possible as it requires the loop
71
144
  connector = aiohttp.TCPConnector(limit=self.concurrency*2, ttl_dns_cache=300)
72
- return aiohttp.ClientSession(
73
- headers=self._headers,
74
- timeout=self._timeout,
75
- connector=connector,
76
- trace_configs=[trace_config]
145
+ return get_async_session(
146
+ headers=self._headers,
147
+ timeout=self._timeout,
148
+ connector=connector,
149
+ trace_config=trace_config
77
150
  )
78
151
 
79
152
  async def __aenter__(self) -> "Crawler":
@@ -82,6 +155,7 @@ class Crawler:
82
155
  # self._session = aiohttp.ClientSession(timeout=self._timeout)
83
156
  self._session = self.build_session()
84
157
 
158
+ # Note: Set robots policy after session is created
85
159
  if self.respect_robots:
86
160
  self._robots_policy = RobotsTxtPolicy(self._session)
87
161
 
@@ -184,12 +258,22 @@ class Crawler:
184
258
 
185
259
  start = time.monotonic()
186
260
  try:
261
+ log.info("fetching", extra={"url": req.url})
187
262
  async with self._session.get(
188
263
  req.url,
189
264
  headers=self._headers | req.headers,
190
265
  proxy=self._proxy_for(req.url),
191
266
  timeout=req.timeout or self._timeout,
192
267
  ) as resp:
268
+ from_cache = getattr(resp, "from_cache", False)
269
+ if from_cache:
270
+ # NOTE: This is a bit of a hack, but it works. aiohttp-client-cache does not
271
+ # interface with TraceConfigs on cache hit, so we have to do it here.
272
+ self._stats.requests_cache_hit += 1
273
+ log.info("[CACHE HIT]", extra={"req.url": req.url, "resp.url": resp.url})
274
+ else:
275
+ log.info("[CACHE MISS]", extra={"req.url": req.url, "resp.url": resp.url})
276
+
193
277
  body = await resp.read()
194
278
 
195
279
  latency = time.monotonic() - start
@@ -9,7 +9,7 @@ class Request:
9
9
  url: str
10
10
  method: str = "GET"
11
11
  headers: dict[str, str] = field(default_factory=dict)
12
- timeout: float = 15.0
12
+ timeout: float | None = None
13
13
 
14
14
  retries: int = 0
15
15
  max_retries: int | None = None
wxpath/http/stats.py CHANGED
@@ -16,6 +16,7 @@ class CrawlerStats:
16
16
  requests_enqueued: int = 0
17
17
  requests_started: int = 0
18
18
  requests_completed: int = 0
19
+ requests_cache_hit: int = 0
19
20
 
20
21
  # ---- Concurrency ----
21
22
  in_flight_global: int = 0
@@ -57,6 +58,9 @@ def build_trace_config(stats: CrawlerStats) -> TraceConfig:
57
58
  context._start_time = time.monotonic()
58
59
 
59
60
  async def on_request_end(session, context, params):
61
+ """
62
+ Update stats on request completion.
63
+ """
60
64
  host = params.url.host
61
65
  stats.in_flight_global -= 1
62
66
  stats.in_flight_per_host[host] -= 1
@@ -82,6 +86,8 @@ def build_trace_config(stats: CrawlerStats) -> TraceConfig:
82
86
  if not hasattr(stats, "bytes_received"):
83
87
  stats.bytes_received = 0
84
88
  stats.bytes_received += content_length
89
+
90
+ stats.requests_completed += 1
85
91
 
86
92
  async def on_request_exception(session, context, params):
87
93
  host = params.url.host
wxpath/settings.py ADDED
@@ -0,0 +1,108 @@
1
+ """
2
+ Settings for wxpath.
3
+
4
+ These settings are global and can be accessed from any module in the wxpath package.
5
+
6
+ They are typically used by various modules to configure Class initializers.
7
+
8
+ The SETTINGS dict structure follows the structure of wxpath submodules.
9
+
10
+ Expected usage behavior:
11
+
12
+ ```python
13
+ from wxpath.settings import SETTINGS
14
+
15
+ CACHE_SETTINGS = SETTINGS.http.client.cache
16
+ ```
17
+
18
+ Once initialized, the settings are expected to be immutable (not enforced).
19
+ """
20
+
21
+ from datetime import timedelta
22
+
23
+ # Settings match
24
+ SETTINGS = {
25
+ 'http': {
26
+ 'client': {
27
+ 'cache': {
28
+ 'enabled': False,
29
+ # 'db_path': 'cache.db',
30
+ 'expire_after': timedelta(days=7),
31
+ 'urls_expire_after': None,
32
+ 'allowed_methods': ("GET", "HEAD"),
33
+ 'allowed_codes': (200, 203, 301, 302, 307, 308),
34
+ 'ignored_parameters': ["utm_*", "fbclid"],
35
+ 'include_headers': False, # don’t vary cache keys on headers by default
36
+ 'cache_control': False, # honor Cache-Control/Expires if present
37
+ # # TODO: size hedges (soft, enforced by wxpath)
38
+ # 'max_entries': None, # e.g. 1_000_000
39
+ # 'max_response_size': None, # bytes, e.g. 2_000_000
40
+ # 'max_db_size': None, # bytes, e.g. 5 * 1024**3
41
+ 'backend': "sqlite",
42
+ 'sqlite': {
43
+ 'cache_name': "cache.db",
44
+ },
45
+ 'redis': {
46
+ # 'host': "localhost",
47
+ # 'port': 6379,
48
+ # 'db': 0,
49
+ 'address': 'redis://localhost:6379/0',
50
+ 'cache_name': "wxpath:",
51
+ }
52
+ },
53
+ 'crawler': {
54
+ 'concurrency': 16,
55
+ 'per_host': 8,
56
+ 'timeout': 15,
57
+ 'headers': {
58
+ "User-Agent": ("Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7)"
59
+ "AppleWebKit/537.36 (KHTML, like Gecko) "
60
+ "Chrome/142.0.0.0 Safari/537.36")},
61
+ 'proxies': None,
62
+ 'auto_throttle_target_concurrency': None,
63
+ 'auto_throttle_start_delay': 0.25,
64
+ 'auto_throttle_max_delay': 10.0,
65
+ 'respect_robots': True,
66
+ },
67
+ },
68
+ },
69
+ }
70
+
71
+
72
+ class AttrDict(dict):
73
+ """
74
+ A dictionary subclass that allows dot-notation access while
75
+ recursively converting nested dictionaries.
76
+ """
77
+ def __init__(self, *args, **kwargs):
78
+ super().__init__(*args, **kwargs)
79
+ # Point the instance __dict__ to itself to allow attribute access
80
+ self.__dict__ = self
81
+ # Recursively convert any dicts passed during initialization
82
+ for key, value in self.items():
83
+ self[key] = self._convert(value)
84
+
85
+ @classmethod
86
+ def _convert(cls, value):
87
+ """Recursively converts dicts to AttrDicts, leaving other types alone."""
88
+ if isinstance(value, dict):
89
+ return cls(value)
90
+ elif isinstance(value, list):
91
+ # Optional: converts dicts inside lists while keeping the list container
92
+ return [cls._convert(item) for item in value]
93
+ return value
94
+
95
+ def __setitem__(self, key, value):
96
+ # Ensure that new items added via dict-syntax are also converted
97
+ super().__setitem__(key, self._convert(value))
98
+
99
+ def __getattr__(self, key):
100
+ try:
101
+ return self[key]
102
+ except KeyError as exc:
103
+ raise AttributeError(f"AttrDict object has no attribute '{key}'") from exc
104
+
105
+
106
+ SETTINGS = AttrDict(SETTINGS)
107
+ CACHE_SETTINGS = SETTINGS.http.client.cache
108
+ CRAWLER_SETTINGS = SETTINGS.http.client.crawler
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: wxpath
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: wxpath - a declarative web crawler and data extractor
5
5
  Author-email: Rodrigo Palacios <rodrigopala91@gmail.com>
6
6
  License-Expression: MIT
@@ -10,6 +10,13 @@ License-File: LICENSE
10
10
  Requires-Dist: lxml>=4.0
11
11
  Requires-Dist: elementpath<=5.0.3,>=5.0.0
12
12
  Requires-Dist: aiohttp<=3.12.15,>=3.8.0
13
+ Requires-Dist: tqdm>=4.0.0
14
+ Provides-Extra: cache
15
+ Requires-Dist: aiohttp-client-cache>=0.14.0; extra == "cache"
16
+ Provides-Extra: cache-sqlite
17
+ Requires-Dist: aiohttp-client-cache[sqlite]; extra == "cache-sqlite"
18
+ Provides-Extra: cache-redis
19
+ Requires-Dist: aiohttp-client-cache[redis]; extra == "cache-redis"
13
20
  Provides-Extra: test
14
21
  Requires-Dist: pytest>=7.0; extra == "test"
15
22
  Requires-Dist: pytest-asyncio>=0.23; extra == "test"
@@ -23,9 +30,42 @@ Dynamic: license-file
23
30
 
24
31
  **wxpath** is a declarative web crawler where traversal is expressed directly in XPath. Instead of writing imperative crawl loops, wxpath lets you describe what to follow and what to extract in a single expression. **wxpath** executes that expression concurrently, breadth-first-*ish*, and streams results as they are discovered.
25
32
 
26
- By introducing the `url(...)` operator and the `///` syntax, wxpath's engine is able to perform deep (or paginated) web crawling and extraction.
33
+ This expression fetches a page, extracts links, and streams them concurrently - no crawl loop required:
27
34
 
28
- NOTE: This project is in early development. Core concepts are stable, but the API and features may change. Please report issues - in particular, deadlocked crawls or unexpected behavior - and any features you'd like to see (no guarantee they'll be implemented).
35
+ ```python
36
+ import wxpath
37
+
38
+ expr = "url('https://example.com')//a/@href"
39
+
40
+ for link in wxpath.wxpath_async_blocking_iter(expr):
41
+ print(link)
42
+ ```
43
+
44
+
45
+ By introducing the `url(...)` operator and the `///` syntax, wxpath's engine is able to perform deep (or paginated) web crawling and extraction:
46
+
47
+ ```python
48
+ import wxpath
49
+
50
+ path_expr = """
51
+ url('https://quotes.toscrape.com')
52
+ ///url(//a/@href)
53
+ //a/@href
54
+ """
55
+
56
+ for item in wxpath.wxpath_async_blocking_iter(path_expr, max_depth=1):
57
+ print(item)
58
+ ```
59
+
60
+
61
+ ## Why wxpath?
62
+
63
+ Most web scrapers force you to write crawl control flow first, and extraction second.
64
+
65
+ **wxpath** inverts that:
66
+ - **You describe traversal declaratively**
67
+ - **Extraction is expressed inline**
68
+ - **The engine handles scheduling, concurrency, and deduplication**
29
69
 
30
70
 
31
71
  ## Contents
@@ -38,7 +78,10 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
38
78
  - [Polite Crawling](#polite-crawling)
39
79
  - [Output types](#output-types)
40
80
  - [XPath 3.1](#xpath-31-by-default)
81
+ - [Progress Bar](#progress-bar)
41
82
  - [CLI](#cli)
83
+ - [Persistence and Caching](#persistence-and-caching)
84
+ - [Settings](#settings)
42
85
  - [Hooks (Experimental)](#hooks-experimental)
43
86
  - [Install](#install)
44
87
  - [More Examples](EXAMPLES.md)
@@ -46,7 +89,8 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
46
89
  - [Advanced: Engine & Crawler Configuration](#advanced-engine--crawler-configuration)
47
90
  - [Project Philosophy](#project-philosophy)
48
91
  - [Warnings](#warnings)
49
- - [Commercial support / consulting](#commercial-support--consulting)
92
+ - [Commercial support/consulting](#commercial-supportconsulting)
93
+ - [Versioning](#versioning)
50
94
  - [License](#license)
51
95
 
52
96
 
@@ -54,32 +98,31 @@ NOTE: This project is in early development. Core concepts are stable, but the AP
54
98
 
55
99
  ```python
56
100
  import wxpath
101
+ from wxpath.settings import CRAWLER_SETTINGS
102
+
103
+ # Custom headers for politeness; necessary for some sites (e.g., Wikipedia)
104
+ CRAWLER_SETTINGS.headers = {'User-Agent': 'my-app/0.4.0 (contact: you@example.com)'}
57
105
 
58
106
  # Crawl, extract fields, build a knowledge graph
59
107
  path_expr = """
60
108
  url('https://en.wikipedia.org/wiki/Expression_language')
61
- ///url(//main//a/@href[starts-with(., '/wiki/') and not(contains(., ':'))])
62
- /map{
63
- 'title': (//span[contains(@class, "mw-page-title-main")]/text())[1] ! string(.),
64
- 'url': string(base-uri(.)),
65
- 'short_description': //div[contains(@class, 'shortdescription')]/text() ! string(.),
66
- 'forward_links': //div[@id="mw-content-text"]//a/@href ! string(.)
67
- }
109
+ ///url(
110
+ //main//a/@href[
111
+ starts-with(., '/wiki/') and not(contains(., ':'))
112
+ ]
113
+ )
114
+ /map{
115
+ 'title': (//span[contains(@class, "mw-page-title-main")]/text())[1] ! string(.),
116
+ 'url': string(base-uri(.)),
117
+ 'short_description': //div[contains(@class, 'shortdescription')]/text() ! string(.),
118
+ 'forward_links': //div[@id="mw-content-text"]//a/@href ! string(.)
119
+ }
68
120
  """
69
121
 
70
122
  for item in wxpath.wxpath_async_blocking_iter(path_expr, max_depth=1):
71
123
  print(item)
72
124
  ```
73
125
 
74
- Output:
75
-
76
- ```python
77
- map{'title': 'Computer language', 'url': 'https://en.wikipedia.org/wiki/Computer_language', 'short_description': 'Formal language for communicating with a computer', 'forward_links': ['/wiki/Formal_language', '/wiki/Communication', ...]}
78
- map{'title': 'Advanced Boolean Expression Language', 'url': 'https://en.wikipedia.org/wiki/Advanced_Boolean_Expression_Language', 'short_description': 'Hardware description language and software', 'forward_links': ['/wiki/File:ABEL_HDL_example_SN74162.png', '/wiki/Hardware_description_language', ...]}
79
- map{'title': 'Machine-readable medium and data', 'url': 'https://en.wikipedia.org/wiki/Machine_readable', 'short_description': 'Medium capable of storing data in a format readable by a machine', 'forward_links': ['/wiki/File:EAN-13-ISBN-13.svg', '/wiki/ISBN', ...]}
80
- ...
81
- ```
82
-
83
126
  **Note:** Some sites (including Wikipedia) may block requests without proper headers.
84
127
  See [Advanced: Engine & Crawler Configuration](#advanced-engine--crawler-configuration) to set a custom `User-Agent`.
85
128
 
@@ -195,6 +238,17 @@ path_expr = """
195
238
  # ...]
196
239
  ```
197
240
 
241
+ ## Progress Bar
242
+
243
+ **wxpath** provides a progress bar (via `tqdm`) to track crawl progress. This is especially useful for long-running crawls.
244
+
245
+ Enable by setting `engine.run(..., progress=True)`, or pass `progress=True` to any of the `wxpath_async*(...)` functions.
246
+
247
+ ```python
248
+ items = wxpath.wxpath_async_blocking("...", progress=True)
249
+ > 100%|██████████████████████████████████████████████████████████▎| 469/471 [00:05<00:00, 72.00it/s, depth=2, yielded=457]
250
+ ```
251
+
198
252
 
199
253
  ## CLI
200
254
 
@@ -237,9 +291,46 @@ Command line options:
237
291
  --concurrency-per-host <concurrency> Number of concurrent fetches per host
238
292
  --header "Key:Value" Add a custom header (e.g., 'Key:Value'). Can be used multiple times.
239
293
  --respect-robots [true|false] (Default: True) Respects robots.txt
294
+ --cache [true|false] (Default: False) Persist crawl results to a local database
295
+ ```
296
+
297
+
298
+ ## Persistence and Caching
299
+
300
+ **wxpath** optionally persists crawl results to a local database. This is especially useful when you're crawling a large number of URLs, and you decide to pause the crawl, change extraction expressions, or otherwise need to restart the crawl.
301
+
302
+ **wxpath** supports two backends: sqlite and redis. SQLite is great for small-scale crawls, with a single worker (i.e., `engine.crawler.concurrency == 1`). Redis is great for large-scale crawls, with multiple workers. You will be encounter a warning if you `min(engine.crawler.concurrency, engine.crawler.per_host) > 1` when using the sqlite backend.
303
+
304
+ To use, you must install the appropriate optional dependency:
305
+
306
+ ```bash
307
+ pip install wxpath[cache-sqlite]
308
+ pip install wxpath[cache-redis]
309
+ ```
310
+
311
+ Once the dependency is installed, you must enable the cache:
312
+
313
+ ```python
314
+ from wxpath.settings import SETTINGS
315
+
316
+ # To enable caching; sqlite is the default
317
+ SETTINGS.http.client.cache.enabled = True
318
+
319
+ # For redis backend
320
+ SETTINGS.http.client.cache.enabled = True
321
+ SETTINGS.http.client.cache.backend = "redis"
322
+ SETTINGS.http.client.cache.redis.address = "redis://localhost:6379/0"
323
+
324
+ # Run wxpath as usual
325
+ items = list(wxpath_async_blocking_iter('...', max_depth=1, engine=engine))
240
326
  ```
241
327
 
242
328
 
329
+ ## Settings
330
+
331
+ See [settings.py](src/wxpath/settings.py) for details of the settings.
332
+
333
+
243
334
  ## Hooks (Experimental)
244
335
 
245
336
  **wxpath** supports a pluggable hook system that allows you to modify the crawling and extraction behavior. You can register hooks to preprocess URLs, post-process HTML, filter extracted values, and more. Hooks will be executed in the order they are registered. Hooks may impact performance.
@@ -290,6 +381,13 @@ Requires Python 3.10+.
290
381
  pip install wxpath
291
382
  ```
292
383
 
384
+ For persisted/cached, wxpath supports the following backends:
385
+
386
+ ```
387
+ pip install wxpath[cache-sqlite]
388
+ pip install wxpath[cache-redis]
389
+ ```
390
+
293
391
 
294
392
  ## More Examples
295
393
 
@@ -336,6 +434,17 @@ path_expr = "url('https://en.wikipedia.org/wiki/Expression_language')//url(//mai
336
434
  items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
337
435
  ```
338
436
 
437
+ ### Runtime API (`wxpath_async*`) options
438
+
439
+ - `max_depth`: int = 1
440
+ - `progress`: bool = False
441
+ - `engine`: WXPathEngine | None = None
442
+ - `yield_errors`: bool = False
443
+
444
+
445
+ ### Settings
446
+ You can also use [settings.py](src/wxpath/settings.py) to enable caching, throttling, concurrency and more.
447
+
339
448
 
340
449
  ## Project Philosophy
341
450
 
@@ -345,7 +454,7 @@ items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
345
454
  - Stay lightweight and composable
346
455
  - Asynchronous support for high-performance crawls
347
456
 
348
- ### Guarantees/Goals
457
+ ### Goals
349
458
 
350
459
  - URLs are deduplicated on a best-effort, per-crawl basis.
351
460
  - Crawls are intended to terminate once the frontier is exhausted or `max_depth` is reached.
@@ -356,7 +465,6 @@ items = list(wxpath_async_blocking_iter(path_expr, max_depth=1, engine=engine))
356
465
 
357
466
  The following features are not yet supported:
358
467
 
359
- - Persistent scheduling or crawl resumption
360
468
  - Automatic proxy rotation
361
469
  - Browser-based rendering (JavaScript execution)
362
470
  - Strict result ordering
@@ -364,13 +472,15 @@ The following features are not yet supported:
364
472
 
365
473
  ## WARNINGS!!!
366
474
 
475
+ This project is in early development. Core concepts are stable, but the API and features may change. Please report issues - in particular, deadlocked crawls or unexpected behavior - and any features you'd like to see (no guarantee they'll be implemented).
476
+
367
477
  - Be respectful when crawling websites. A scrapy-inspired throttler is enabled by default.
368
478
  - Deep crawls (`///`) require user discipline to avoid unbounded expansion (traversal explosion).
369
479
  - Deadlocks and hangs are possible in certain situations (e.g., all tasks waiting on blocked requests). Please report issues if you encounter such behavior.
370
480
  - Consider using timeouts, `max_depth`, and XPath predicates and filters to limit crawl scope.
371
481
 
372
482
 
373
- ## Commercial support / consulting
483
+ ## Commercial support/consulting
374
484
 
375
485
  If you want help building or operating crawlers/data feeds with wxpath (extraction, scheduling, monitoring, breakage fixes) or other web-scraping needs, please contact me at: rodrigopala91@gmail.com.
376
486
 
@@ -379,6 +489,13 @@ If you want help building or operating crawlers/data feeds with wxpath (extracti
379
489
 
380
490
  If you like wxpath and want to support its development, please consider [donating](https://www.paypal.com/donate/?business=WDNDK6J6PJEXY&no_recurring=0&item_name=Thanks+for+using+wxpath%21+Donations+fund+development%2C+docs%2C+and+bug+fixes.+If+wxpath+saved+you+time%2C+a+small+contribution+helps%21&currency_code=USD).
381
491
 
492
+
493
+ ## Versioning
494
+
495
+ **wxpath** follows [semver](https://semver.org): `<MAJOR>.<MINOR>.<PATCH>`.
496
+
497
+ However, pre-1.0.0 follows `0.<MAJOR>.<MINOR|PATCH>`.
498
+
382
499
  ## License
383
500
 
384
501
  MIT
@@ -1,22 +1,24 @@
1
1
  wxpath/__init__.py,sha256=w1hFE_VSIYq_TSFLoPfp6MJbG1sA6BeChX6PYsXIK4o,265
2
- wxpath/cli.py,sha256=GJ4vAax5DlpxczZ_eLetlfRwa177VFKo2LHv09X-0eo,2799
2
+ wxpath/cli.py,sha256=e0-mHkpuC1B_WyJw7wH43UBmtuF8oL8phQ4GEzUX0Ns,4332
3
3
  wxpath/patches.py,sha256=u0dOL-K-gvdO9SJvzGrqR9Zou6XduWjl6R7mzIcZtJg,2130
4
+ wxpath/settings.py,sha256=a4TlCAOvmO03oOXiiYQzIDBMZU0XpTqntwnjVsumnas,3809
4
5
  wxpath/core/__init__.py,sha256=U9_In2iRaZrpiIVavIli1M59gCB6Kn1en-1Fza-qIiI,257
5
6
  wxpath/core/dom.py,sha256=X0L3n8jRfO5evEypDaJTD-NQ3cLXWvnEUVERAHo3vV0,701
6
7
  wxpath/core/models.py,sha256=3KYt-UwfLY2FlSRUHeA_getnYaNUMPW9wRrl2CRbPso,1611
7
8
  wxpath/core/ops.py,sha256=PTjX6c4QvCqGaByYYqaK4dte5iWO3lZzgqGrMXp6f6g,9727
8
9
  wxpath/core/parser.py,sha256=WfjQNixBz7nWtX2O0t19MOhUJmzGMg8Qol40P6oC8zc,18827
9
10
  wxpath/core/runtime/__init__.py,sha256=_iCgkIWxXvxzQcenHOsjYGsk74HboTIYWOtgM8GtCyc,86
10
- wxpath/core/runtime/engine.py,sha256=069ITKDXcHss__AwaYf0VSfliCNB49yZbnW2v3xEZO0,14512
11
+ wxpath/core/runtime/engine.py,sha256=UQ8wSr49TJibRRtXzIgXVSBvuB1VttYicKEwV4xcG6Q,17345
11
12
  wxpath/core/runtime/helpers.py,sha256=M1i4BryCktAxeboa4LOXMTNiKVCJLDBD-KpWCQXadpw,1434
12
13
  wxpath/hooks/__init__.py,sha256=9JG63e4z_8CZLWugFcY786hebaEEPZ5FmZhyDHat-98,294
13
14
  wxpath/hooks/builtin.py,sha256=GJ4w1C9djWNzAmAA3U0qI9OoCOeC5R8tEGtWXJVHSYs,4125
14
15
  wxpath/hooks/registry.py,sha256=-D11f_mMboeVAH8qsTkbKTQ0aGNaQ7F6zbXDsOIYxN0,4513
15
16
  wxpath/http/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
- wxpath/http/stats.py,sha256=FrXbFrnms113Gapf-Z5WiD5qaNiJ0XuOqjSQhwXfuEo,3172
17
+ wxpath/http/stats.py,sha256=aqZWuybc5RCv-AmKdNbEX4uw1YvZtFoE6591UfukZns,3319
17
18
  wxpath/http/client/__init__.py,sha256=QpdmqzcznUeuFvT3IIo-LmBUUHEa2BDq9sHGAHJnDLI,202
18
- wxpath/http/client/crawler.py,sha256=YlE469UqMck0wqRd6J9kNxm5G9BCbE_x5O6MROwmcaE,8742
19
- wxpath/http/client/request.py,sha256=LF_OIXetfouyE5GwEqp0cya0oMAZouKRPNFRFGscQS8,1050
19
+ wxpath/http/client/cache.py,sha256=cHS4XlfOStoHTG83ypNITk3Oc0lqGoTRqV0_UWBWQFY,1811
20
+ wxpath/http/client/crawler.py,sha256=UiKtc5K2KBc0bBw2fTdRHLNTa2OFoE1tZsDjR7J4Xeo,12126
21
+ wxpath/http/client/request.py,sha256=cpqo_ASG_wKz0q6m33lsE0kIIthfANt8fx7ptxlyehY,1057
20
22
  wxpath/http/client/response.py,sha256=z9LQPnDN-NZRnQpIKozaWCqgpRejc6nixCr_XaPyqUQ,334
21
23
  wxpath/http/policy/backoff.py,sha256=NwdUR6bRe1RtUGSJOktj-p8IyC1l9xu_-Aa_Gj_u5sw,321
22
24
  wxpath/http/policy/retry.py,sha256=WSrQfCy1F7IcXFpVGDi4HTphNhFq12p4DaMO0_4dgrw,982
@@ -25,9 +27,9 @@ wxpath/http/policy/throttler.py,sha256=wydMFV-0mxpHSI5iYkLfE78oY4z_fF8jW9MqCeb8G
25
27
  wxpath/util/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
28
  wxpath/util/logging.py,sha256=oQi8sp7yKWgXkkcJ4U4WHp7TyBCQiK4VhSXOSb8pGw0,2965
27
29
  wxpath/util/serialize.py,sha256=uUs4C9VErpFd97smBM2bRWo2nW25kCgKdsMrVtVxhg8,575
28
- wxpath-0.3.0.dist-info/licenses/LICENSE,sha256=AVBZLhdWmqxm-f-dy5prVB1E-solHWoP2EXEIV_o-00,1076
29
- wxpath-0.3.0.dist-info/METADATA,sha256=9Y0V7Up2efXCRtKZ7Cceawz9LHvNcfH0olmEGK2mVk0,16326
30
- wxpath-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
31
- wxpath-0.3.0.dist-info/entry_points.txt,sha256=FwoIOnUTl-DjPqVw-eb9EHHiiXCyRZy_mEQKFu2eb5Y,43
32
- wxpath-0.3.0.dist-info/top_level.txt,sha256=uFCcveG78mnefxRGvYsR2OexDlKR_Z1UD4vZijUcex8,7
33
- wxpath-0.3.0.dist-info/RECORD,,
30
+ wxpath-0.4.1.dist-info/licenses/LICENSE,sha256=AVBZLhdWmqxm-f-dy5prVB1E-solHWoP2EXEIV_o-00,1076
31
+ wxpath-0.4.1.dist-info/METADATA,sha256=LxmOTsWpspYFedvP02fDL1Wy5t1ygZKuIg2cHVQU_aY,19445
32
+ wxpath-0.4.1.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
33
+ wxpath-0.4.1.dist-info/entry_points.txt,sha256=FwoIOnUTl-DjPqVw-eb9EHHiiXCyRZy_mEQKFu2eb5Y,43
34
+ wxpath-0.4.1.dist-info/top_level.txt,sha256=uFCcveG78mnefxRGvYsR2OexDlKR_Z1UD4vZijUcex8,7
35
+ wxpath-0.4.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.9.0)
2
+ Generator: setuptools (80.10.1)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5