PyPI - wxpath - Versions diffs - 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

wxpath 0.4.1py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

wxpath/__init__.py +2 -0
wxpath/cli.py +6 -0
wxpath/core/exceptions.py +53 -0
wxpath/core/models.py +1 -0
wxpath/core/ops.py +100 -19
wxpath/core/parser.py +94 -24
wxpath/core/runtime/engine.py +74 -10
wxpath/core/runtime/helpers.py +6 -3
wxpath/http/client/__init__.py +1 -1
wxpath/http/client/crawler.py +17 -5
wxpath/http/client/response.py +7 -1
wxpath/http/policy/retry.py +2 -2
wxpath/integrations/__init__.py +0 -0
wxpath/integrations/langchain/__init__.py +0 -0
wxpath/integrations/langchain/examples/basic_rag.py +85 -0
wxpath/integrations/langchain/examples/rolling_window_rag.py +218 -0
wxpath/integrations/langchain/loader.py +60 -0
wxpath/patches.py +215 -5
wxpath/settings.py +3 -1
wxpath/tui.py +1225 -0
wxpath/tui_settings.py +151 -0
wxpath/util/cleaners.py +31 -0
wxpath/util/common_paths.py +22 -0
wxpath/util/logging.py +3 -7
{wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/METADATA +73 -9
wxpath-0.5.1.dist-info/RECORD +45 -0
{wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/WHEEL +1 -1
{wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/entry_points.txt +1 -0
wxpath-0.4.1.dist-info/RECORD +0 -35
{wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/licenses/LICENSE +0 -0
{wxpath-0.4.1.dist-info → wxpath-0.5.1.dist-info}/top_level.txt +0 -0

wxpath/core/runtime/engine.py CHANGED Viewed

@@ -2,13 +2,14 @@ import asyncio
 import contextlib
 import inspect
 from collections import deque
-from typing import Any, AsyncGenerator, Iterator
+from typing import Any, AsyncGenerator, Iterator, Iterable
 from lxml.html import HtmlElement
 from tqdm import tqdm
 from wxpath import patches  # noqa: F401
 from wxpath.core import parser
+from wxpath.core.exceptions import XPathEvaluationError
 from wxpath.core.models import (
     CrawlIntent,
     CrawlTask,
@@ -18,7 +19,7 @@ from wxpath.core.models import (
     ProcessIntent,
 )
 from wxpath.core.ops import get_operator
-from wxpath.core.parser import Binary, Segment, Segments
+from wxpath.core.parser import Binary, Depth, Segment, Segments
 from wxpath.core.runtime.helpers import parse_html
 from wxpath.hooks.registry import FetchContext, get_hooks
 from wxpath.http.client.crawler import Crawler
@@ -145,6 +146,7 @@ class WXPathEngine(HookedEngineBase):
             respect_robots: bool = True,
             allowed_response_codes: set[int] = None,
             allow_redirects: bool = True,
+            yield_errors: bool = False,
         ):
         # NOTE: Will grow unbounded in large crawls. Consider a LRU cache, or bloom filter.
         self.seen_urls: set[str] = set()
@@ -157,19 +159,49 @@ class WXPathEngine(HookedEngineBase):
         self.allow_redirects = allow_redirects
         if allow_redirects:
             self.allowed_response_codes |= {301, 302, 303, 307, 308}
+        self.yield_errors = yield_errors
+    def _get_max_depth(self, bin_or_segs: Binary | Segments, max_depth: int) -> int:
+        """Get the maximum crawl depth for a given expression. Will find a Depth
+        argument at the beginning of the expression and return its value. Otherwise, returns the
+        max_depth value provided.
+        TODO: There has to be a better way to do this.
+        """
+        if isinstance(bin_or_segs, Binary):
+            if hasattr(bin_or_segs.left, 'func') == 'url':
+                depth_arg = [arg for arg in bin_or_segs.left.args if isinstance(arg, Depth)][0]
+                return int(depth_arg.value)
+            elif hasattr(bin_or_segs.right, 'func') == 'url':
+                depth_arg = [arg for arg in bin_or_segs.right.args if isinstance(arg, Depth)][0]
+                return int(depth_arg.value)
+        elif isinstance(bin_or_segs, Segments):
+            depth_arg = [arg for arg in bin_or_segs[0].args if isinstance(arg, Depth)]
+            if depth_arg:
+                return int(depth_arg[0].value)
+        return max_depth
     async def run(
             self,
             expression: str,
             max_depth: int,
             progress: bool = False,
-            yield_errors: bool = False,
         ) -> AsyncGenerator[Any, None]:
         """Execute a wxpath expression concurrently and yield results.
         Builds and drives a BFS-like crawl pipeline that honors robots rules,
         throttling, and hook callbacks while walking the web graph.
+        NOTES ON max_depth:
+        If depth is provided in the expression, it will be used to limit the depth of the
+        crawl. If depth is provided in the expression and max_depth is provided as an argument
+        to `run`, the inline depth in the expression will take precedence.
+        Currently, max_depth control flow logic is detected and executed in the
+        engine. In the future, the operation handlers (ops.py) could be responsible for
+        detecting max_depth, and sending a terminal intent to the engine. It's also possible
+        that the depth terminals are relative to the current depth (i.e. `url(//xpath, depth=2)`
+        implies crawling only the next 2 levels). This is not yet supported.
         Args:
             expression: WXPath expression string to evaluate.
             max_depth: Maximum crawl depth to follow for url hops.
@@ -179,7 +211,9 @@ class WXPathEngine(HookedEngineBase):
             Extracted values produced by the expression (HTML elements or
             wxpath-specific value types).
         """
-        segments = parser.parse(expression)
+        bin_or_segs = parser.parse(expression)
+        max_depth = self._get_max_depth(bin_or_segs, max_depth)
         queue: asyncio.Queue[CrawlTask] = asyncio.Queue()
         inflight: dict[str, CrawlTask] = {}
@@ -223,7 +257,7 @@ class WXPathEngine(HookedEngineBase):
             seed_task = CrawlTask(
                 elem=None,
                 url=None,
-                segments=segments,
+                segments=bin_or_segs,
                 depth=-1,
                 backlink=None,
             )
@@ -235,7 +269,10 @@ class WXPathEngine(HookedEngineBase):
                 queue=queue,
                 pbar=pbar,
             ):
-                yield await self.post_extract_hooks(output)
+                if isinstance(output, dict) and output.get("__type__") == "error":
+                    yield output
+                else:
+                    yield await self.post_extract_hooks(output)
             # While looping asynchronous generators, you MUST make sure
             # to check terminal conditions before re-iteration.
@@ -250,7 +287,7 @@ class WXPathEngine(HookedEngineBase):
                 if task is None:
                     log.warning(f"Got unexpected response from {resp.request.url}")
-                    if yield_errors:
+                    if self.yield_errors:
                         yield {
                             "__type__": "error",
                             "url": resp.request.url,
@@ -266,7 +303,7 @@ class WXPathEngine(HookedEngineBase):
                 if resp.error:
                     log.warning(f"Got error from {resp.request.url}: {resp.error}")
-                    if yield_errors:
+                    if self.yield_errors:
                         yield {
                             "__type__": "error",
                             "url": resp.request.url,
@@ -283,7 +320,7 @@ class WXPathEngine(HookedEngineBase):
                 if resp.status not in self.allowed_response_codes or not resp.body:
                     log.warning(f"Got non-200 response from {resp.request.url}")
-                    if yield_errors:
+                    if self.yield_errors:
                         yield {
                             "__type__": "error",
                             "url": resp.request.url,
@@ -307,6 +344,7 @@ class WXPathEngine(HookedEngineBase):
                     base_url=task.url,
                     backlink=task.backlink,
                     depth=task.depth,
+                    response=resp
                 )
                 elem = await self.post_parse_hooks(elem, task)
@@ -380,7 +418,11 @@ class WXPathEngine(HookedEngineBase):
             binary_or_segment = bin_or_segs if isinstance(bin_or_segs, Binary) else bin_or_segs[0]
             operator = get_operator(binary_or_segment)
-            intents = operator(elem, bin_or_segs, depth)
+            if self.yield_errors:
+                intents = _safe_iterator(operator(elem, bin_or_segs, depth))
+            else:
+                intents = operator(elem, bin_or_segs, depth)
             if not intents:
                 return
@@ -416,6 +458,28 @@ class WXPathEngine(HookedEngineBase):
                     mini_queue.append((elem, next_segments))
+def _safe_iterator(iterable: Iterable[Any]) -> Iterator[Any]:
+    """Wrap an iterable in a try/except block and return an iterator that yields the result or the error."""
+    it = iter(iterable)
+    while True:
+        try:
+            yield next(it)
+        except StopIteration:
+            break
+        except XPathEvaluationError as e:
+            yield {
+                "__type__": "error",
+                "reason": "xpath_evaluation_error",
+                "exception": str(e),
+            }
+        except Exception as e:
+            yield {
+                "__type__": "error",
+                "reason": "iterator_error",
+                "exception": str(e),
+            }
 def wxpath_async(path_expr: str,
                  max_depth: int,
                  progress: bool = False,

wxpath/core/runtime/helpers.py CHANGED Viewed

@@ -6,7 +6,7 @@ from wxpath.util.logging import get_logger
 log = get_logger(__name__)
-def parse_html(content, base_url=None, **elem_kv_pairs) -> html.HtmlElement:
+def parse_html(content, base_url=None, response=None, **elem_kv_pairs) -> html.HtmlElement:
     elem = etree.HTML(content, parser=patches.html_parser_with_xpath3, base_url=base_url)
     if base_url:
         elem.getroottree().docinfo.URL = base_url  # make base-uri() work
@@ -14,12 +14,15 @@ def parse_html(content, base_url=None, **elem_kv_pairs) -> html.HtmlElement:
         elem.set("{http://www.w3.org/XML/1998/namespace}base", base_url)
         elem.base_url = base_url  # sets both attribute and doc-level URL
-    # NOTE: some pages may have multiple root elements, i.e.
+    if response:
+        elem.response = response
+        elem.getroottree().getroot().response = response
+    # NOTE: some pages may have multiple root elements, i.e.
     # len(elem.itersiblings()) > 0 AND elem.getparent() is None.
     # This breaks elementpath. If elem has siblings, recreate the
     # root element and only the root element.
     if len(list(elem.itersiblings())) > 0:
-        elem = detach_html_root(elem, base_url)
+        elem = detach_html_root(elem, base_url)
     for k, v in elem_kv_pairs.items():
         elem.set(k, str(v))

wxpath/http/client/__init__.py CHANGED Viewed

@@ -5,5 +5,5 @@ from wxpath.http.client.response import Response
 __all__ = [
     "Crawler",
     "Request",
-    "Response"
+    "Response",
 ]

wxpath/http/client/crawler.py CHANGED Viewed

@@ -71,6 +71,7 @@ class Crawler:
         *,
         headers: dict | None = None,
         proxies: dict | None = None,
+        verify_ssl: bool | None = None,
         retry_policy: RetryPolicy | None = None,
         throttler: AbstractThrottler | None = None,
         auto_throttle_target_concurrency: float = None,
@@ -82,6 +83,9 @@ class Crawler:
         self.concurrency = concurrency if concurrency is not None else cfg.concurrency
         self.per_host = per_host if per_host is not None else cfg.per_host
+        self._verify_ssl = verify_ssl if verify_ssl is not None else getattr(
+            cfg, "verify_ssl", True
+        )
         timeout = timeout if timeout is not None else cfg.timeout
         self._timeout = aiohttp.ClientTimeout(total=timeout)
@@ -141,7 +145,11 @@ class Crawler:
         """Construct an `aiohttp.ClientSession` with tracing and pooling."""
         trace_config = build_trace_config(self._stats)
         # Need to build the connector as late as possible as it requires the loop
-        connector = aiohttp.TCPConnector(limit=self.concurrency*2, ttl_dns_cache=300)
+        connector = aiohttp.TCPConnector(
+            limit=self.concurrency * 2,
+            ttl_dns_cache=300,
+            ssl=self._verify_ssl,
+        )
         return get_async_session(
             headers=self._headers,
             timeout=self._timeout,
@@ -274,22 +282,26 @@ class Crawler:
                     else:
                         log.info("[CACHE MISS]", extra={"req.url": req.url, "resp.url": resp.url})
+                    _start = time.monotonic()
                     body = await resp.read()
-                    latency = time.monotonic() - start
+                    end = time.monotonic()
+                    latency = end - _start
                     self.throttler.record_latency(host, latency)
                     if self.retry_policy.should_retry(req, response=resp):
                         await self._retry(req)
                         return None
-                    return Response(req, resp.status, body, dict(resp.headers))
+                    return Response(req, resp.status, body, dict(resp.headers),
+                                    request_start=_start, response_end=end)
             except asyncio.CancelledError:
                 # Normal during shutdown / timeout propagation
                 log.debug("cancelled error", extra={"url": req.url})
                 raise
             except Exception as exc:
-                latency = time.monotonic() - start
+                end = time.monotonic()
+                latency = end - start
                 self.throttler.record_latency(host, latency)
                 if self.retry_policy.should_retry(req, exception=exc):
@@ -297,7 +309,7 @@ class Crawler:
                     return None
                 log.error("request failed", extra={"url": req.url}, exc_info=exc)
-                return Response(req, 0, b"", error=exc)
+                return Response(req, 0, b"", error=exc, request_start=start, response_end=end)
     async def _retry(self, req: Request) -> None:
         """Reschedule a request according to the retry policy."""

wxpath/http/client/response.py CHANGED Viewed

@@ -1,4 +1,3 @@
-# wxpath/http/response.py
 from dataclasses import dataclass, field
 from typing import Optional
@@ -12,3 +11,10 @@ class Response:
     body: bytes
     headers: dict[str, str] | None = None
     error: Optional[Exception] = field(default=None, kw_only=True)
+    request_start: float | None = None
+    response_end: float | None = None
+    @property
+    def latency(self) -> float:
+        return self.response_end - self.request_start

wxpath/http/policy/retry.py CHANGED Viewed

@@ -19,13 +19,13 @@ class RetryPolicy:
         if request.max_retries is not None and request.retries >= request.max_retries:
             return False
         if request.retries >= self.max_retries:
             return False
         if response is not None and response.status in self.retry_statuses:
             return True
         if exception is not None:
             return True

wxpath/integrations/__init__.py ADDED Viewed

File without changes

wxpath/integrations/langchain/__init__.py ADDED Viewed

File without changes

wxpath/integrations/langchain/examples/basic_rag.py ADDED Viewed

@@ -0,0 +1,85 @@
+# pip install langchain langchain-ollama langchain-chroma chromadb
+from langchain_chroma import Chroma
+from langchain_core.output_parsers import StrOutputParser
+from langchain_core.prompts import ChatPromptTemplate
+from langchain_core.runnables import RunnablePassthrough
+from langchain_ollama import ChatOllama, OllamaEmbeddings
+from langchain_text_splitters import RecursiveCharacterTextSplitter
+from wxpath.integrations.langchain.loader import WXPathLoader
+# ------------------------------------------------------------------
+# STEP 1: Load & Embed (Same as before)
+# ------------------------------------------------------------------
+print("🕷️  Crawling with wxpath...")
+loader = WXPathLoader(
+    expression="""
+    url('https://docs.python.org/3/library/argparse.html',
+    follow=//a/@href[contains(., 'argparse')])
+      /map{
+          'text': string-join(//div[@role='main']//text()),
+          'source': string(base-uri(.))
+      }
+    """,
+    max_depth=1
+)
+docs = loader.load()
+print("🔪 Splitting and Embedding...")
+text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
+splits = text_splitter.split_documents(docs)
+vectorstore = Chroma.from_documents(
+    documents=splits,
+    # Must use model that support embeddings (`ollama pull nomic-embed-text`)
+    embedding=OllamaEmbeddings(model="nomic-embed-text"),
+    collection_name="wxpath"
+)
+retriever = vectorstore.as_retriever()
+# ------------------------------------------------------------------
+# STEP 2: Define Components
+# ------------------------------------------------------------------
+# A helper to join retrieved documents into a single string
+def format_docs(docs):
+    return "\n\n".join(doc.page_content for doc in docs)
+# The Prompt (Standard RAG template)
+template = """You are an assistant for question-answering tasks.
+Use the following pieces of retrieved context to answer the question.
+If you don't know the answer, just say that you don't know.
+Use three sentences maximum and keep the answer concise.
+Context: {context}
+Question: {question}
+Answer:"""
+prompt = ChatPromptTemplate.from_template(template)
+# The Model
+llm = ChatOllama(model="gemma3")
+# ------------------------------------------------------------------
+# STEP 3: Build the Chain with LCEL
+# ------------------------------------------------------------------
+# The pipe operator (|) passes output from one component to the next.
+rag_chain = (
+    {"context": retriever | format_docs, "question": RunnablePassthrough()}
+    | prompt
+    | llm
+    | StrOutputParser()
+)
+# ------------------------------------------------------------------
+# STEP 4: Invoke
+# ------------------------------------------------------------------
+query = "How do I add arguments in argparse?"
+print(f"\n❓ Question: {query}")
+# The chain returns a string directly because of StrOutputParser
+response = rag_chain.invoke(query)
+print(f"\n🤖 Ollama Answer:\n{response}")

wxpath/integrations/langchain/examples/rolling_window_rag.py ADDED Viewed

@@ -0,0 +1,218 @@
+"""
+Rolling Window RAG Example
+This examples demonstrates how to use a rolling window of news articles as context.
+More importantly, it demonstrates complex string cleanup, metadata extraction, and other
+real-world challenges of building a RAG application.
+This script assumes you have gemma3 installed and your machine is capable of running a 32k
+token model.
+"""
+import asyncio
+import datetime
+import threading
+from collections import deque
+from operator import itemgetter
+from typing import List
+from langchain_core.callbacks import CallbackManagerForRetrieverRun
+from langchain_core.documents import Document
+from langchain_core.retrievers import BaseRetriever
+from wxpath import wxpath_async
+# If you have the cache dependency installed, you can enable it:
+# wxpath.settings.CACHE_SETTINGS.enabled = True
+# ------------------------------------------------------------------
+# 1. The Rolling Buffer (The "Context Window")
+# ------------------------------------------------------------------
+class RollingNewsBuffer(BaseRetriever):
+    capacity: int = 100
+    # Define as PrivateAttrs so Pydantic ignores them for validation
+    _buffer: deque
+    _seen_urls: set
+    _lock: threading.Lock
+    def __init__(self, **kwargs):
+        super().__init__(**kwargs)
+        self._buffer = deque(maxlen=self.capacity)
+        self._seen_urls = set()
+        self._lock = threading.Lock()
+    def add_document(self, doc: Document):
+        """Thread-safe add with url cleanup on eviction."""
+        with self._lock:
+            # Check if we are about to evict an item (buffer full)
+            if len(self._buffer) == self._buffer.maxlen:
+                # We must manually find what is being removed to clean up seen_urls
+                # Note: deque[0] is the one about to be popped when appending
+                oldest_doc = self._buffer[0]
+                oldest_url = oldest_doc.metadata.get("url")
+                if oldest_url in self._seen_urls:
+                    self._seen_urls.remove(oldest_url)
+            self._buffer.append(doc)
+            self._seen_urls.add(doc.metadata["url"])
+    def is_seen(self, url: str) -> bool:
+        """Thread-safe check."""
+        with self._lock:
+            return url in self._seen_urls
+    def _get_relevant_documents(
+        self, query: str, *, run_manager: CallbackManagerForRetrieverRun = None
+    ) -> List[Document]:
+        """
+        Thread-safe read.
+        """
+        with self._lock:
+            # Create a snapshot list while locked to prevent iteration crash
+            snapshot = list(self._buffer)
+        print(f"📰 Context Retrieval: Returning {len(snapshot)} docs for query: {query}")
+        return snapshot
+# ------------------------------------------------------------------
+# 2. The Background Crawler (The Producer)
+# ------------------------------------------------------------------
+async def continuous_crawl(buffer: RollingNewsBuffer):
+    """
+    Constantly crawls Newsweek and feeds the buffer.
+    """
+    print("🕷️  Crawler started...")
+    # Example Expression: deep crawl of newsweek
+    expression = """
+    url('https://www.newsweek.com/')
+      ///url(
+          //a/@href[starts-with(., '/') or starts-with(., './') or contains(., 'newsweek.com')]
+      )
+      /map{
+          'title': //h1/text()[1] ! string(.),
+          'text': string-join(//article//p/text()),
+          'url': string(base-uri(.)),
+          'pubDate': //meta[@name='article:modified_time']/@content[1] ! string(.)
+      }
+    """
+    # Infinite loop to restart crawl if it finishes, or run continuously
+    while True:
+        try:
+            # We use the async generator to stream results as they are found
+            async for item in wxpath_async(expression, max_depth=1):
+                item = item._map
+                url = item.get('url')
+                # Check seen status safely before doing processing work
+                if not url or buffer.is_seen(url):
+                    continue
+                # Convert wxpath dict to LangChain Document
+                text_content = item.get('text', '')
+                # Basic cleaning (optional)
+                if isinstance(text_content, list):
+                    text_content = " ".join(text_content)
+                if not text_content:
+                    continue
+                title = item.get('title')
+                if not title:
+                    title = ''
+                if isinstance(title, list):
+                    title = " ".join(title)
+                pub_date = item.get('pubDate')
+                if not pub_date:
+                    pub_date = str(datetime.date.today())
+                text_content = ("Title: " + title +
+                                "\nPublished: " + pub_date + "\n" +
+                                text_content)
+                doc = Document(
+                    page_content=text_content,
+                    metadata={"title": item.get('title'),
+                              "url": item.get('url'),
+                              "pubDate": item.get('pubDate')}
+                )
+                # PUSH TO BUFFER (Oldest gets evicted automatically if full)
+                buffer.add_document(doc)
+                print(f"📰 Added: {title[:30]}... (Buffer size: {len(buffer._buffer)})")
+                print(f"\tArticle text: {doc.page_content[:100]}...")
+                print()
+            # Rate limit slightly to be polite
+            await asyncio.sleep(60)
+        except Exception as e:
+            print(f"⚠️ Crawler error: {e}. Restarting in 10s...")
+            await asyncio.sleep(10)
+def debug_print_prompt(prompt_value):
+    print("\n" + "="*40)
+    print("📢 FULL PROMPT SENT TO LLM:")
+    print("="*40)
+    print(prompt_value.to_string()) # This prints the exact text
+    print("="*40 + "\n")
+    return prompt_value
+if __name__ == "__main__":
+    # Initialize the Rolling Buffer
+    retriever = RollingNewsBuffer(capacity=100)
+    # Start Crawler in a background thread so it doesn't block the Chat
+    def start_background_loop(loop):
+        asyncio.set_event_loop(loop)
+        loop.run_until_complete(continuous_crawl(retriever))
+    crawler_loop = asyncio.new_event_loop()
+    t = threading.Thread(target=start_background_loop, args=(crawler_loop,), daemon=True)
+    t.start()
+    import time
+    from langchain_core.prompts import ChatPromptTemplate
+    from langchain_ollama import ChatOllama
+    # Setup standard RAG chain
+    llm = ChatOllama(model="gemma3", num_ctx=32768)
+    prompt = ChatPromptTemplate.from_template(
+        "Answer based ONLY on the following news:\n\n{context}\n\nQuestion: {question}\n\n"
+        "DO NOT include generic Newsweek-administrative articles like 'Corrections', "
+        "'Company Info', 'Subscribe', Opinions', 'Press Releases', 'Editorials', etc. in your "
+        "analysis or answers. Answer the question using the non-Newsweek-related news provided. "
+        "You will be penalized for including old or undated news in your answer. If asked for "
+        "overviews or summaries, split news items into paragraphs and provide a summary of each "
+        "news item."
+    )
+    def format_docs(docs):
+        slice_of_news = "\n\n".join([d.page_content[:1000] for d in docs]) # Truncate for demo
+        print(f"📰 Latest news char length: {len(slice_of_news)}")
+        return slice_of_news
+    chain = (
+        {
+            # FIX: Use itemgetter so retriever gets a string, not a dict
+            "context": itemgetter("question") | retriever | format_docs,
+            "question": itemgetter("question")
+        }
+        | prompt
+        | debug_print_prompt
+        | llm
+    )
+    # Simulate querying constantly while buffer fills in background
+    print("⏳ Waiting for crawler to gather some data...")
+    time.sleep(10)
+    while True:
+        query = input("Press Enter to ask about current news (or Ctrl+C to quit)...")
+        print(f"\nQuery: {query}\nThinking... 🤔")
+        response = chain.invoke({"question": query})
+        print(response.content)

wxpath 0.4.1__py3-none-any.whl → 0.5.1__py3-none-any.whl

wxpath 0.4.1py3-none-any.whl → 0.5.1py3-none-any.whl