PyPI - zrb - Versions diffs - 1.13.1__py3-none-any.whl → 1.21.17__py3-none-any.whl - Mend

zrb 1.13.1py3-none-any.whl → 1.21.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

zrb/__init__.py +2 -6
zrb/attr/type.py +8 -8
zrb/builtin/__init__.py +2 -0
zrb/builtin/group.py +31 -15
zrb/builtin/http.py +7 -8
zrb/builtin/llm/attachment.py +40 -0
zrb/builtin/llm/chat_session.py +130 -144
zrb/builtin/llm/chat_session_cmd.py +226 -0
zrb/builtin/llm/chat_trigger.py +73 -0
zrb/builtin/llm/history.py +4 -4
zrb/builtin/llm/llm_ask.py +218 -110
zrb/builtin/llm/tool/api.py +74 -62
zrb/builtin/llm/tool/cli.py +35 -16
zrb/builtin/llm/tool/code.py +49 -47
zrb/builtin/llm/tool/file.py +262 -251
zrb/builtin/llm/tool/note.py +84 -0
zrb/builtin/llm/tool/rag.py +25 -18
zrb/builtin/llm/tool/sub_agent.py +29 -22
zrb/builtin/llm/tool/web.py +135 -143
zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/entity/add_entity_util.py +7 -7
zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/module/add_module_util.py +5 -5
zrb/builtin/project/add/fastapp/fastapp_util.py +1 -1
zrb/builtin/searxng/config/settings.yml +5671 -0
zrb/builtin/searxng/start.py +21 -0
zrb/builtin/setup/latex/ubuntu.py +1 -0
zrb/builtin/setup/ubuntu.py +1 -1
zrb/builtin/shell/autocomplete/bash.py +4 -3
zrb/builtin/shell/autocomplete/zsh.py +4 -3
zrb/config/config.py +255 -78
zrb/config/default_prompt/file_extractor_system_prompt.md +109 -9
zrb/config/default_prompt/interactive_system_prompt.md +24 -30
zrb/config/default_prompt/persona.md +1 -1
zrb/config/default_prompt/repo_extractor_system_prompt.md +31 -31
zrb/config/default_prompt/repo_summarizer_system_prompt.md +27 -8
zrb/config/default_prompt/summarization_prompt.md +8 -13
zrb/config/default_prompt/system_prompt.md +36 -30
zrb/config/llm_config.py +129 -24
zrb/config/llm_context/config.py +127 -90
zrb/config/llm_context/config_parser.py +1 -7
zrb/config/llm_context/workflow.py +81 -0
zrb/config/llm_rate_limitter.py +89 -45
zrb/context/any_shared_context.py +7 -1
zrb/context/context.py +8 -2
zrb/context/shared_context.py +6 -8
zrb/group/any_group.py +12 -5
zrb/group/group.py +67 -3
zrb/input/any_input.py +5 -1
zrb/input/base_input.py +18 -6
zrb/input/text_input.py +7 -24
zrb/runner/cli.py +21 -20
zrb/runner/common_util.py +24 -19
zrb/runner/web_route/task_input_api_route.py +5 -5
zrb/runner/web_route/task_session_api_route.py +1 -4
zrb/runner/web_util/user.py +7 -3
zrb/session/any_session.py +12 -6
zrb/session/session.py +39 -18
zrb/task/any_task.py +24 -3
zrb/task/base/context.py +17 -9
zrb/task/base/execution.py +15 -8
zrb/task/base/lifecycle.py +8 -4
zrb/task/base/monitoring.py +12 -7
zrb/task/base_task.py +69 -5
zrb/task/base_trigger.py +12 -5
zrb/task/llm/agent.py +138 -52
zrb/task/llm/config.py +45 -13
zrb/task/llm/conversation_history.py +76 -6
zrb/task/llm/conversation_history_model.py +0 -168
zrb/task/llm/default_workflow/coding/workflow.md +41 -0
zrb/task/llm/default_workflow/copywriting/workflow.md +68 -0
zrb/task/llm/default_workflow/git/workflow.md +118 -0
zrb/task/llm/default_workflow/golang/workflow.md +128 -0
zrb/task/llm/default_workflow/html-css/workflow.md +135 -0
zrb/task/llm/default_workflow/java/workflow.md +146 -0
zrb/task/llm/default_workflow/javascript/workflow.md +158 -0
zrb/task/llm/default_workflow/python/workflow.md +160 -0
zrb/task/llm/default_workflow/researching/workflow.md +153 -0
zrb/task/llm/default_workflow/rust/workflow.md +162 -0
zrb/task/llm/default_workflow/shell/workflow.md +299 -0
zrb/task/llm/file_replacement.py +206 -0
zrb/task/llm/file_tool_model.py +57 -0
zrb/task/llm/history_summarization.py +22 -35
zrb/task/llm/history_summarization_tool.py +24 -0
zrb/task/llm/print_node.py +182 -63
zrb/task/llm/prompt.py +213 -153
zrb/task/llm/tool_wrapper.py +210 -53
zrb/task/llm/workflow.py +76 -0
zrb/task/llm_task.py +98 -47
zrb/task/make_task.py +2 -3
zrb/task/rsync_task.py +25 -10
zrb/task/scheduler.py +4 -4
zrb/util/attr.py +50 -40
zrb/util/cli/markdown.py +12 -0
zrb/util/cli/text.py +30 -0
zrb/util/file.py +27 -11
zrb/util/{llm/prompt.py → markdown.py} +2 -3
zrb/util/string/conversion.py +1 -1
zrb/util/truncate.py +23 -0
zrb/util/yaml.py +204 -0
{zrb-1.13.1.dist-info → zrb-1.21.17.dist-info}/METADATA +40 -20
{zrb-1.13.1.dist-info → zrb-1.21.17.dist-info}/RECORD +102 -79
{zrb-1.13.1.dist-info → zrb-1.21.17.dist-info}/WHEEL +1 -1
zrb/task/llm/default_workflow/coding.md +0 -24
zrb/task/llm/default_workflow/copywriting.md +0 -17
zrb/task/llm/default_workflow/researching.md +0 -18
{zrb-1.13.1.dist-info → zrb-1.21.17.dist-info}/entry_points.txt +0 -0

zrb/builtin/llm/tool/note.py ADDED Viewed

@@ -0,0 +1,84 @@
+import os
+from zrb.config.llm_context.config import llm_context_config
+def read_long_term_note() -> str:
+    """
+    Retrieves the GLOBAL long-term memory shared across ALL sessions and projects.
+    CRITICAL: Consult this first for user preferences, facts, and cross-project context.
+    Returns:
+        str: The current global note content.
+    """
+    contexts = llm_context_config.get_notes()
+    return contexts.get("/", "")
+def write_long_term_note(content: str) -> str:
+    """
+    Persists CRITICAL facts to the GLOBAL long-term memory.
+    USE EAGERLY to save:
+    - User preferences (e.g., "I prefer Python", "No unit tests").
+    - User information (e.g., user name, user email address).
+    - Important facts (e.g., "My API key is in .env").
+    - Cross-project goals.
+    - Anything that will be useful for future interaction across projects.
+    WARNING: This OVERWRITES the entire global note. Always read first.
+    Args:
+        content (str): The text to strictly memorize.
+    Returns:
+        str: Confirmation message.
+    """
+    llm_context_config.write_note(content, "/")
+    return "Global long-term note saved."
+def read_contextual_note(path: str | None = None) -> str:
+    """
+    Retrieves LOCAL memory specific to a file or directory path.
+    Use to recall project-specific architecture, code summaries, or past decisions
+    relevant to the current working location.
+    Args:
+        path (str | None): Target file/dir. Defaults to current working directory (CWD).
+    Returns:
+        str: The local note content for the path.
+    """
+    if path is None:
+        path = os.getcwd()
+    abs_path = os.path.abspath(path)
+    contexts = llm_context_config.get_notes(cwd=abs_path)
+    return contexts.get(abs_path, "")
+def write_contextual_note(content: str, path: str | None = None) -> str:
+    """
+    Persists LOCAL facts specific to a file or directory.
+    USE EAGERLY to save:
+    - Architectural patterns for this project/directory.
+    - Summaries of large files or directories.
+    - Specific guidelines for this project.
+    - Anything related to this directory that will be useful for future interaction.
+    WARNING: This OVERWRITES the note for the specific path. Always read first.
+    Args:
+        content (str): The text to memorize for this location.
+        path (str | None): Target file/dir. Defaults to CWD.
+    Returns:
+        str: Confirmation message.
+    """
+    if path is None:
+        path = os.getcwd()
+    llm_context_config.write_note(content, path)
+    return f"Contextual note saved for: {path}"

zrb/builtin/llm/tool/rag.py CHANGED Viewed

@@ -5,6 +5,7 @@ import os
 import sys
 from collections.abc import Callable
 from textwrap import dedent
+from typing import Any
 import ulid
@@ -44,35 +45,40 @@ def create_rag_from_directory(
     openai_embedding_model: str | None = None,
 ):
     """
-    Creates a powerful Retrieval-Augmented Generation (RAG) tool for querying a local knowledge base.
+    Create a powerful RAG (Retrieval-Augmented Generation) tool for querying a local
+    knowledge base.
-    This factory function generates a tool that can perform semantic searches over a directory of documents. It automatically indexes the documents into a vector database, keeping it updated as files change. The generated tool is ideal for answering questions based on a specific set of documents, such as project documentation, research papers, or internal wikis.
+    This factory function generates a tool that performs semantic search over a directory of
+    documents. It automatically indexes the documents into a vector database (ChromaDB) and
+    keeps it updated as files change.
-    The created tool will:
-    1.  Monitor a specified directory for file changes.
-    2.  Automatically update a vector database (ChromaDB) with the latest content.
-    3.  Accept a user query, embed it, and perform a similarity search against the document vectors.
-    4.  Return the most relevant document chunks that match the query.
+    The generated tool is ideal for answering questions based on a specific set of documents,
+    such as project documentation or internal wikis.
     Args:
         tool_name (str): The name for the generated RAG tool (e.g., "search_project_docs").
-        tool_description (str): A clear description of what the generated tool does and when to use it (e.g., "Searches the project's technical documentation to answer questions.").
-        document_dir_path (str, optional): The path to the directory containing the documents to be indexed.
-        vector_db_path (str, optional): The path to store the ChromaDB vector database.
-        vector_db_collection (str, optional): The name of the collection within the vector database.
+        tool_description (str): A clear description of what the tool does and when to use it.
+            This is what the LLM will see.
+        document_dir_path (str, optional): The path to the directory containing the documents
+            to be indexed.
+        vector_db_path (str, optional): The path where the ChromaDB vector database will be
+            stored.
+        vector_db_collection (str, optional): The name of the collection within the vector
+            database.
         chunk_size (int, optional): The size of text chunks for embedding.
         overlap (int, optional): The overlap between text chunks.
         max_result_count (int, optional): The maximum number of search results to return.
-        file_reader (list[RAGFileReader], optional): Custom file readers for specific file types.
-        openai_api_key (str, optional): OpenAI API key for embeddings.
-        openai_base_url (str, optional): OpenAI base URL for embeddings.
+        file_reader (list[RAGFileReader], optional): A list of custom file readers for
+            specific file types.
+        openai_api_key (str, optional): Your OpenAI API key for generating embeddings.
+        openai_base_url (str, optional): An optional base URL for the OpenAI API.
         openai_embedding_model (str, optional): The embedding model to use.
     Returns:
-        Callable: An asynchronous function that serves as the RAG tool.
+        An asynchronous function that serves as the RAG tool.
     """
-    async def retrieve(query: str) -> str:
+    async def retrieve(query: str) -> dict[str, Any]:
         # Docstring will be set dynamically below
         from chromadb import PersistentClient
         from chromadb.config import Settings
@@ -187,7 +193,7 @@ def create_rag_from_directory(
             query_embeddings=query_vector,
             n_results=max_result_count_val,
         )
-        return json.dumps(results)
+        return dict(results)
     retrieve.__name__ = tool_name
     retrieve.__doc__ = dedent(
@@ -196,7 +202,8 @@ def create_rag_from_directory(
         Args:
             query (str): The user query to search for in documents.
         Returns:
-            str: JSON string with search results: {{"ids": [...], "documents": [...], ...}}
+            dict[str, Any]: dictionary with search results:
+                {{"ids": [...], "documents": [...], ...}}
         """
     ).strip()
     return retrieve

zrb/builtin/llm/tool/sub_agent.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import json
 from collections.abc import Callable
 from textwrap import dedent
 from typing import TYPE_CHECKING, Any, Coroutine
@@ -9,14 +8,12 @@ from zrb.task.llm.config import get_model, get_model_settings
 from zrb.task.llm.prompt import get_system_and_user_prompt
 if TYPE_CHECKING:
-    from pydantic_ai import Agent, Tool
+    from pydantic_ai import Tool
     from pydantic_ai.models import Model
     from pydantic_ai.settings import ModelSettings
     from pydantic_ai.toolsets import AbstractToolset
     ToolOrCallable = Tool | Callable
-else:
-    ToolOrCallable = Any
 def create_sub_agent_tool(
@@ -25,30 +22,38 @@ def create_sub_agent_tool(
     system_prompt: str | None = None,
     model: "str | Model | None" = None,
     model_settings: "ModelSettings | None" = None,
-    tools: list[ToolOrCallable] = [],
-    toolsets: list["AbstractToolset[Agent]"] = [],
-) -> Callable[[AnyContext, str], Coroutine[Any, Any, str]]:
+    tools: "list[ToolOrCallable]" = [],
+    toolsets: list["AbstractToolset[None]"] = [],
+    yolo_mode: bool | list[str] | None = None,
+    log_indent_level: int = 2,
+) -> Callable[[AnyContext, str], Coroutine[Any, Any, dict[str, Any]]]:
     """
-    Creates a "tool that is another AI agent," capable of handling complex, multi-step sub-tasks.
+    Create a tool that is another AI agent, capable of handling complex, multi-step sub-tasks.
-    This powerful factory function generates a tool that, when used, spins up a temporary, specialized AI agent. This "sub-agent" has its own system prompt, tools, and context, allowing it to focus exclusively on accomplishing the task it's given without being distracted by the main conversation.
+    This factory function generates a tool that, when used, spins up a temporary, specialized
+    AI agent. This "sub-agent" has its own system prompt, tools, and context, allowing it to
+    focus on accomplishing a specific task without being distracted by the main conversation.
     This is ideal for delegating complex tasks like analyzing a file or a repository.
     Args:
         tool_name (str): The name for the generated sub-agent tool.
-        tool_description (str): A clear description of the sub-agent's purpose and when to use it.
-        system_prompt (str, optional): The system prompt that will guide the sub-agent's behavior.
+        tool_description (str): A clear description of the sub-agent's purpose and when to
+            use it. This is what the LLM will see.
+        system_prompt (str, optional): The system prompt that will guide the sub-agent's
+            behavior.
         model (str | Model, optional): The language model the sub-agent will use.
         model_settings (ModelSettings, optional): Specific settings for the sub-agent's model.
-        tools (list, optional): A list of tools that will be exclusively available to the sub-agent.
-        toolsets (list, optional): A list of Toolset for the sub-agent.
+        tools (list, optional): A list of tools that will be exclusively available to the
+            sub-agent.
+        toolsets (list, optional): A list of Toolsets for the sub-agent.
     Returns:
-        Callable: An asynchronous function that serves as the sub-agent tool. When called, it runs the sub-agent with a given query and returns its final result.
+        An asynchronous function that serves as the sub-agent tool. When called, it runs the
+        sub-agent with a given query and returns its final result.
     """
-    async def run_sub_agent(ctx: AnyContext, query: str) -> str:
+    async def run_sub_agent(ctx: AnyContext, query: str) -> dict[str, Any]:
         """
         Runs the sub-agent with the given query.
         """
@@ -86,6 +91,7 @@ def create_sub_agent_tool(
             model_settings=resolved_model_settings,
             tools=tools,
             toolsets=toolsets,
+            yolo_mode=yolo_mode,
         )
         sub_agent_run = None
@@ -95,16 +101,17 @@ def create_sub_agent_tool(
             ctx=ctx,
             agent=sub_agent_agent,
             user_prompt=query,
-            history_list=[],  # Start with empty history for the sub-agent
+            attachments=[],
+            history_list=[],
+            log_indent_level=log_indent_level,
         )
         # Return the sub-agent's final message content
         if sub_agent_run and sub_agent_run.result:
-            # Return the final message content as a string
-            return json.dumps({"result": sub_agent_run.result.output})
-        else:
-            ctx.log_warning("Sub-agent run did not produce a result.")
-            return "Sub-agent failed to produce a result."
+            # Return the final message content
+            return {"result": sub_agent_run.result.output}
+        ctx.log_warning("Sub-agent run did not produce a result.")
+        raise ValueError(f"{tool_name} not returning any result")
     # Set the name and docstring for the callable function
     run_sub_agent.__name__ = tool_name
@@ -116,7 +123,7 @@ def create_sub_agent_tool(
             query (str): The query or task for the sub-agent.
         Returns:
-            str: The final response or result from the sub-agent.
+            dict[str, Any]: The final response or result from the sub-agent.
         """
     ).strip()

zrb/builtin/llm/tool/web.py CHANGED Viewed

@@ -1,179 +1,171 @@
-import json
 from collections.abc import Callable
+from typing import Any
+from urllib.parse import urljoin
+from zrb.config.config import CFG
+from zrb.config.llm_config import llm_config
-async def open_web_page(url: str) -> str:
-    """
-    Fetches and parses the textual content of a given web page URL.
-    Use this tool to "read" a web page. It strips away HTML tags, scripts, and other non-textual elements to provide the clean text content. It also extracts any hyperlinks found on the page. This is useful when you need to understand the content of a specific URL that you have discovered through a search or from another source.
+_DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"  # noqa
-    Args:
-        url (str): The full URL of the web page to open (e.g., "https://example.com/article").
-    Returns:
-        str: A JSON object containing the cleaned text `content` of the page and a list of `links_on_page`.
+async def open_web_page(url: str) -> dict[str, Any]:
     """
+    Fetches, parses, and converts a web page to readable Markdown.
+    Preserves semantic structure, removes non-essentials, and extracts all absolute links.
-    async def get_page_content(page_url: str):
-        user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"  # noqa
-        try:
-            from playwright.async_api import async_playwright
-            async with async_playwright() as p:
-                browser = await p.chromium.launch(headless=True)
-                page = await browser.new_page()
-                await page.set_extra_http_headers({"User-Agent": user_agent})
-                try:
-                    # Navigate to the URL with a timeout of 30 seconds
-                    await page.goto(page_url, wait_until="networkidle", timeout=30000)
-                    # Wait for the content to load
-                    await page.wait_for_load_state("domcontentloaded")
-                    # Get the page content
-                    content = await page.content()
-                    # Extract all links from the page
-                    links = await page.eval_on_selector_all(
-                        "a[href]",
-                        """
-                        (elements) => elements.map(el => {
-                            const href = el.getAttribute('href');
-                            if (href && !href.startsWith('#') && !href.startsWith('/')) {
-                                return href;
-                            }
-                            return null;
-                        }).filter(href => href !== null)
-                    """,
-                    )
-                    return {"content": content, "links_on_page": links}
-                finally:
-                    await browser.close()
-        except BaseException:
-            import requests
-            response = requests.get(url, headers={"User-Agent": user_agent})
-            if response.status_code != 200:
-                msg = f"Unable to retrieve search results. Status code: {response.status_code}"
-                raise Exception(msg)
-            return {"content": response.text, "links_on_page": []}
-    result = await get_page_content(url)
-    # Parse the HTML content
-    return json.dumps(parse_html_text(result["content"]))
-def create_search_internet_tool(serp_api_key: str) -> Callable[[str, int], str]:
-    """
-    Creates a tool that searches the internet using the SerpAPI Google Search API.
-    This factory returns a function that can be used to find information on the web. The generated tool is the primary way to answer general knowledge questions or to find information on topics you are unfamiliar with.
+    Example:
+    open_web_page(url='https://www.example.com/article')
     Args:
-        serp_api_key (str): The API key for SerpAPI.
+        url (str): The full URL of the web page.
     Returns:
-        Callable: A function that takes a search query and returns a list of search results.
+        dict: Markdown content and a list of absolute links.
     """
+    html_content, links = await _fetch_page_content(url)
+    markdown_content = _convert_html_to_markdown(html_content)
+    return {"content": markdown_content, "links_on_page": links}
+def create_search_internet_tool() -> Callable:
+    if llm_config.default_search_internet_tool is not None:
+        return llm_config.default_search_internet_tool
-    def search_internet(query: str, num_results: int = 10) -> str:
+    def search_internet(query: str, page: int = 1) -> dict[str, Any]:
         """
-        Performs an internet search using Google and returns a summary of the results.
+        Performs an internet search using a search engine.
+        Use to find information, answer general knowledge, or research topics.
-        Use this tool to find information on the web, answer general knowledge questions, or research topics.
+        Example:
+        search_internet(query='latest AI advancements', page=1)
         Args:
             query (str): The search query.
-            num_results (int, optional): The desired number of search results. Defaults to 10.
+            page (int, optional): Search result page number. Defaults to 1.
         Returns:
-            str: A JSON object containing the parsed text content from the search results page.
+            dict: Summary of search results (titles, links, snippets).
         """
         import requests
-        response = requests.get(
-            "https://serpapi.com/search",
-            headers={
-                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"  # noqa
-            },
-            params={
-                "q": query,
-                "num": num_results,
-                "hl": "en",
-                "safe": "off",
-                "api_key": serp_api_key,
-            },
-        )
+        if (
+            CFG.SEARCH_INTERNET_METHOD.strip().lower() == "serpapi"
+            and CFG.SERPAPI_KEY != ""
+        ):
+            response = requests.get(
+                "https://serpapi.com/search",
+                headers={"User-Agent": _DEFAULT_USER_AGENT},
+                params={
+                    "q": query,
+                    "start": (page - 1) * 10,
+                    "hl": CFG.SERPAPI_LANG,
+                    "safe": CFG.SERPAPI_SAFE,
+                    "api_key": CFG.SERPAPI_KEY,
+                },
+            )
+        elif (
+            CFG.SEARCH_INTERNET_METHOD.strip().lower() == "brave"
+            and CFG.BRAVE_API_KEY != ""
+        ):
+            response = requests.get(
+                "https://api.search.brave.com/res/v1/web/search",
+                headers={
+                    "User-Agent": _DEFAULT_USER_AGENT,
+                    "Accept": "application/json",
+                    "x-subscription-token": CFG.BRAVE_API_KEY,
+                },
+                params={
+                    "q": query,
+                    "count": "10",
+                    "offset": (page - 1) * 10,
+                    "safesearch": CFG.BRAVE_API_SAFE,
+                    "search_lang": CFG.BRAVE_API_LANG,
+                    "summary": "true",
+                },
+            )
+        else:
+            response = requests.get(
+                url=f"{CFG.SEARXNG_BASE_URL}/search",
+                headers={"User-Agent": _DEFAULT_USER_AGENT},
+                params={
+                    "q": query,
+                    "format": "json",
+                    "pageno": page,
+                    "safesearch": CFG.SEARXNG_SAFE,
+                    "language": CFG.SEARXNG_LANG,
+                },
+            )
         if response.status_code != 200:
             raise Exception(
                 f"Error: Unable to retrieve search results (status code: {response.status_code})"  # noqa
             )
-        return json.dumps(parse_html_text(response.text))
+        return response.json()
     return search_internet
-def search_wikipedia(query: str) -> str:
-    """
-    Searches for articles on Wikipedia.
-    This is a specialized search tool for querying Wikipedia. It's best for when the user is asking for definitions, historical information, or biographical details that are likely to be found on an encyclopedia.
-    Args:
-        query (str): The search term or question.
-    Returns:
-        str: The raw JSON response from the Wikipedia API, containing a list of search results.
-    """
-    import requests
-    params = {"action": "query", "list": "search", "srsearch": query, "format": "json"}
-    response = requests.get("https://en.wikipedia.org/w/api.php", params=params)
-    return response.json()
-def search_arxiv(query: str, num_results: int = 10) -> str:
-    """
-    Searches for academic papers and preprints on ArXiv.
-    Use this tool when the user's query is scientific or technical in nature and they are likely looking for research papers, articles, or academic publications.
-    Args:
-        query (str): The search query, which can include keywords, author names, or titles.
-        num_results (int, optional): The maximum number of results to return. Defaults to 10.
-    Returns:
-        str: The raw XML response from the ArXiv API, containing a list of matching papers.
-    """
-    import requests
-    params = {"search_query": f"all:{query}", "start": 0, "max_results": num_results}
-    response = requests.get("http://export.arxiv.org/api/query", params=params)
-    return response.content
+async def _fetch_page_content(url: str) -> tuple[str, list[str]]:
+    """Fetches the HTML content and all absolute links from a URL."""
+    try:
+        from playwright.async_api import async_playwright
+        async with async_playwright() as p:
+            browser = await p.chromium.launch(headless=True)
+            page = await browser.new_page()
+            await page.set_extra_http_headers({"User-Agent": _DEFAULT_USER_AGENT})
+            try:
+                await page.goto(url, wait_until="networkidle", timeout=30000)
+                await page.wait_for_load_state("domcontentloaded")
+                content = await page.content()
+                links = await page.eval_on_selector_all(
+                    "a[href]",
+                    """
+                    (elements, baseUrl) => elements.map(el => {
+                        const href = el.getAttribute('href');
+                        if (!href || href.startsWith('#')) return null;
+                        try {
+                            return new URL(href, baseUrl).href;
+                        } catch (e) {
+                            return null;
+                        }
+                    }).filter(href => href !== null)
+                    """,
+                    url,
+                )
+                return content, links
+                # return json.dumps({"content": content, "links": links})
+            finally:
+                await browser.close()
+    except Exception:
+        import requests
+        from bs4 import BeautifulSoup
-def parse_html_text(html_text: str) -> dict[str, str]:
+        response = requests.get(url, headers={"User-Agent": _DEFAULT_USER_AGENT})
+        if response.status_code != 200:
+            raise Exception(
+                f"Unable to retrieve page content. Status code: {response.status_code}"
+            )
+        content = response.text
+        soup = BeautifulSoup(content, "html.parser")
+        links = [
+            urljoin(url, a["href"])
+            for a in soup.find_all("a", href=True)
+            if not a["href"].startswith("#")
+        ]
+        return content, links
+        # return json.dumps({"content": content, "links": links})
+def _convert_html_to_markdown(html_text: str) -> str:
+    """Converts HTML content to a clean Markdown representation."""
     from bs4 import BeautifulSoup
+    from markdownify import markdownify as md
-    ignored_tags = [
-        "script",
-        "link",
-        "meta",
-        "style",
-        "code",
-        "footer",
-        "nav",
-        "header",
-        "aside",
-    ]
     soup = BeautifulSoup(html_text, "html.parser")
-    links = []
-    for anchor in soup.find_all("a"):
-        if not anchor or "href" not in anchor.attrs:
-            continue
-        link: str = anchor["href"]
-        if link.startswith("#") or link.startswith("/"):
-            continue
-        links.append(link)
-    for tag in soup(ignored_tags):
+    # Remove non-content tags
+    for tag in soup(
+        ["script", "link", "meta", "style", "header", "footer", "nav", "aside"]
+    ):
         tag.decompose()
-    html_text = soup.get_text(separator=" ", strip=True)
-    return {"content": html_text, "links_on_page": links}
+    # Convert the cleaned HTML to Markdown
+    return md(str(soup))

zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/entity/add_entity_util.py CHANGED Viewed

@@ -204,7 +204,7 @@ def update_migration_metadata_file(ctx: AnyContext, migration_metadata_file_path
     app_name = os.path.basename(APP_DIR)
     existing_migration_metadata_code = read_file(migration_metadata_file_path)
     write_file(
-        file_path=migration_metadata_file_path,
+        abs_file_path=migration_metadata_file_path,
         content=[
             _get_migration_import_schema_code(
                 existing_migration_metadata_code, app_name, ctx.input.entity
@@ -251,7 +251,7 @@ def update_client_file(ctx: AnyContext, client_file_path: str):
     snake_plural_entity_name = to_snake_case(ctx.input.plural)
     pascal_entity_name = to_pascal_case(ctx.input.entity)
     write_file(
-        file_path=client_file_path,
+        abs_file_path=client_file_path,
         content=[
             _get_import_schema_for_client_code(
                 existing_code=existing_client_code, entity_name=ctx.input.entity
@@ -305,7 +305,7 @@ def update_api_client_file(ctx: AnyContext, api_client_file_path: str):
     snake_module_name = to_snake_case(ctx.input.module)
     pascal_module_name = to_pascal_case(ctx.input.module)
     write_file(
-        file_path=api_client_file_path,
+        abs_file_path=api_client_file_path,
         content=[
             f"from {app_name}.module.{snake_module_name}.service.{snake_entity_name}.{snake_entity_name}_service_factory import {snake_entity_name}_service",  # noqa
             prepend_code_to_module(
@@ -327,7 +327,7 @@ def update_direct_client_file(ctx: AnyContext, direct_client_file_path: str):
     snake_module_name = to_snake_case(ctx.input.module)
     pascal_module_name = to_pascal_case(ctx.input.module)
     write_file(
-        file_path=direct_client_file_path,
+        abs_file_path=direct_client_file_path,
         content=[
             f"from {app_name}.module.{snake_module_name}.service.{snake_entity_name}.{snake_entity_name}_service_factory import {snake_entity_name}_service",  # noqa
             prepend_code_to_module(
@@ -348,7 +348,7 @@ def update_route_file(ctx: AnyContext, route_file_path: str):
     app_name = os.path.basename(APP_DIR)
     module_name = to_snake_case(ctx.input.module)
     write_file(
-        file_path=route_file_path,
+        abs_file_path=route_file_path,
         content=[
             f"from {app_name}.module.{module_name}.service.{entity_name}.{entity_name}_service_factory import {entity_name}_service",  # noqa
             append_code_to_function(
@@ -370,7 +370,7 @@ def update_gateway_subroute_file(ctx: AnyContext, module_gateway_subroute_path:
     pascal_entity_name = to_pascal_case(ctx.input.entity)
     existing_gateway_subroute_code = read_file(module_gateway_subroute_path)
     write_file(
-        file_path=module_gateway_subroute_path,
+        abs_file_path=module_gateway_subroute_path,
         content=[
             _get_import_client_for_gateway_subroute_code(
                 existing_gateway_subroute_code, module_name=ctx.input.module
@@ -456,7 +456,7 @@ def update_gateway_navigation_config_file(
         },
     ).strip()
     write_file(
-        file_path=gateway_navigation_config_file_path,
+        abs_file_path=gateway_navigation_config_file_path,
         content=[
             existing_gateway_navigation_config_code,
             new_navigation_config_code,

zrb 1.13.1__py3-none-any.whl → 1.21.17__py3-none-any.whl

zrb 1.13.1py3-none-any.whl → 1.21.17py3-none-any.whl