zrb 1.15.3__py3-none-any.whl → 1.21.29__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of zrb might be problematic. Click here for more details.
- zrb/__init__.py +2 -6
- zrb/attr/type.py +10 -7
- zrb/builtin/__init__.py +2 -0
- zrb/builtin/git.py +12 -1
- zrb/builtin/group.py +31 -15
- zrb/builtin/llm/attachment.py +40 -0
- zrb/builtin/llm/chat_completion.py +274 -0
- zrb/builtin/llm/chat_session.py +126 -167
- zrb/builtin/llm/chat_session_cmd.py +288 -0
- zrb/builtin/llm/chat_trigger.py +79 -0
- zrb/builtin/llm/history.py +4 -4
- zrb/builtin/llm/llm_ask.py +217 -135
- zrb/builtin/llm/tool/api.py +74 -70
- zrb/builtin/llm/tool/cli.py +35 -21
- zrb/builtin/llm/tool/code.py +55 -73
- zrb/builtin/llm/tool/file.py +278 -344
- zrb/builtin/llm/tool/note.py +84 -0
- zrb/builtin/llm/tool/rag.py +27 -34
- zrb/builtin/llm/tool/sub_agent.py +54 -41
- zrb/builtin/llm/tool/web.py +74 -98
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/entity/add_entity_util.py +7 -7
- zrb/builtin/project/add/fastapp/fastapp_template/my_app_name/_zrb/module/add_module_util.py +5 -5
- zrb/builtin/project/add/fastapp/fastapp_util.py +1 -1
- zrb/builtin/searxng/config/settings.yml +5671 -0
- zrb/builtin/searxng/start.py +21 -0
- zrb/builtin/shell/autocomplete/bash.py +4 -3
- zrb/builtin/shell/autocomplete/zsh.py +4 -3
- zrb/config/config.py +202 -27
- zrb/config/default_prompt/file_extractor_system_prompt.md +109 -9
- zrb/config/default_prompt/interactive_system_prompt.md +24 -30
- zrb/config/default_prompt/persona.md +1 -1
- zrb/config/default_prompt/repo_extractor_system_prompt.md +31 -31
- zrb/config/default_prompt/repo_summarizer_system_prompt.md +27 -8
- zrb/config/default_prompt/summarization_prompt.md +57 -16
- zrb/config/default_prompt/system_prompt.md +36 -30
- zrb/config/llm_config.py +119 -23
- zrb/config/llm_context/config.py +127 -90
- zrb/config/llm_context/config_parser.py +1 -7
- zrb/config/llm_context/workflow.py +81 -0
- zrb/config/llm_rate_limitter.py +100 -47
- zrb/context/any_shared_context.py +7 -1
- zrb/context/context.py +8 -2
- zrb/context/shared_context.py +3 -7
- zrb/group/any_group.py +3 -3
- zrb/group/group.py +3 -3
- zrb/input/any_input.py +5 -1
- zrb/input/base_input.py +18 -6
- zrb/input/option_input.py +13 -1
- zrb/input/text_input.py +7 -24
- zrb/runner/cli.py +21 -20
- zrb/runner/common_util.py +24 -19
- zrb/runner/web_route/task_input_api_route.py +5 -5
- zrb/runner/web_util/user.py +7 -3
- zrb/session/any_session.py +12 -6
- zrb/session/session.py +39 -18
- zrb/task/any_task.py +24 -3
- zrb/task/base/context.py +17 -9
- zrb/task/base/execution.py +15 -8
- zrb/task/base/lifecycle.py +8 -4
- zrb/task/base/monitoring.py +12 -7
- zrb/task/base_task.py +69 -5
- zrb/task/base_trigger.py +12 -5
- zrb/task/llm/agent.py +128 -167
- zrb/task/llm/agent_runner.py +152 -0
- zrb/task/llm/config.py +39 -20
- zrb/task/llm/conversation_history.py +110 -29
- zrb/task/llm/conversation_history_model.py +4 -179
- zrb/task/llm/default_workflow/coding/workflow.md +41 -0
- zrb/task/llm/default_workflow/copywriting/workflow.md +68 -0
- zrb/task/llm/default_workflow/git/workflow.md +118 -0
- zrb/task/llm/default_workflow/golang/workflow.md +128 -0
- zrb/task/llm/default_workflow/html-css/workflow.md +135 -0
- zrb/task/llm/default_workflow/java/workflow.md +146 -0
- zrb/task/llm/default_workflow/javascript/workflow.md +158 -0
- zrb/task/llm/default_workflow/python/workflow.md +160 -0
- zrb/task/llm/default_workflow/researching/workflow.md +153 -0
- zrb/task/llm/default_workflow/rust/workflow.md +162 -0
- zrb/task/llm/default_workflow/shell/workflow.md +299 -0
- zrb/task/llm/file_replacement.py +206 -0
- zrb/task/llm/file_tool_model.py +57 -0
- zrb/task/llm/history_processor.py +206 -0
- zrb/task/llm/history_summarization.py +2 -193
- zrb/task/llm/print_node.py +184 -64
- zrb/task/llm/prompt.py +175 -179
- zrb/task/llm/subagent_conversation_history.py +41 -0
- zrb/task/llm/tool_wrapper.py +226 -85
- zrb/task/llm/workflow.py +76 -0
- zrb/task/llm_task.py +109 -71
- zrb/task/make_task.py +2 -3
- zrb/task/rsync_task.py +25 -10
- zrb/task/scheduler.py +4 -4
- zrb/util/attr.py +54 -39
- zrb/util/cli/markdown.py +12 -0
- zrb/util/cli/text.py +30 -0
- zrb/util/file.py +12 -3
- zrb/util/git.py +2 -2
- zrb/util/{llm/prompt.py → markdown.py} +2 -3
- zrb/util/string/conversion.py +1 -1
- zrb/util/truncate.py +23 -0
- zrb/util/yaml.py +204 -0
- zrb/xcom/xcom.py +10 -0
- {zrb-1.15.3.dist-info → zrb-1.21.29.dist-info}/METADATA +38 -18
- {zrb-1.15.3.dist-info → zrb-1.21.29.dist-info}/RECORD +105 -79
- {zrb-1.15.3.dist-info → zrb-1.21.29.dist-info}/WHEEL +1 -1
- zrb/task/llm/default_workflow/coding.md +0 -24
- zrb/task/llm/default_workflow/copywriting.md +0 -17
- zrb/task/llm/default_workflow/researching.md +0 -18
- {zrb-1.15.3.dist-info → zrb-1.21.29.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from zrb.config.llm_context.config import llm_context_config
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def read_long_term_note() -> str:
|
|
7
|
+
"""
|
|
8
|
+
Retrieves the GLOBAL long-term memory shared across ALL sessions and projects.
|
|
9
|
+
|
|
10
|
+
CRITICAL: Consult this first for user preferences, facts, and cross-project context.
|
|
11
|
+
|
|
12
|
+
Returns:
|
|
13
|
+
str: The current global note content.
|
|
14
|
+
"""
|
|
15
|
+
contexts = llm_context_config.get_notes()
|
|
16
|
+
return contexts.get("/", "")
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def write_long_term_note(content: str) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Persists CRITICAL facts to the GLOBAL long-term memory.
|
|
22
|
+
|
|
23
|
+
USE EAGERLY to save or update:
|
|
24
|
+
- User preferences (e.g., "I prefer Python", "No unit tests").
|
|
25
|
+
- User information (e.g., user name, user email address).
|
|
26
|
+
- Important facts (e.g., "My API key is in .env").
|
|
27
|
+
- Cross-project goals.
|
|
28
|
+
- Anything that will be useful for future interaction across projects.
|
|
29
|
+
|
|
30
|
+
WARNING: This OVERWRITES the entire global note.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
content (str): The text to strictly memorize.
|
|
34
|
+
|
|
35
|
+
Returns:
|
|
36
|
+
str: Confirmation message.
|
|
37
|
+
"""
|
|
38
|
+
llm_context_config.write_note(content, "/")
|
|
39
|
+
return "Global long-term note saved."
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def read_contextual_note(path: str | None = None) -> str:
|
|
43
|
+
"""
|
|
44
|
+
Retrieves LOCAL memory specific to a file or directory path.
|
|
45
|
+
|
|
46
|
+
Use to recall project-specific architecture, code summaries, or past decisions
|
|
47
|
+
relevant to the current working location.
|
|
48
|
+
|
|
49
|
+
Args:
|
|
50
|
+
path (str | None): Target file/dir. Defaults to current working directory (CWD).
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
str: The local note content for the path.
|
|
54
|
+
"""
|
|
55
|
+
if path is None:
|
|
56
|
+
path = os.getcwd()
|
|
57
|
+
abs_path = os.path.abspath(path)
|
|
58
|
+
contexts = llm_context_config.get_notes(cwd=abs_path)
|
|
59
|
+
return contexts.get(abs_path, "")
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def write_contextual_note(content: str, path: str | None = None) -> str:
|
|
63
|
+
"""
|
|
64
|
+
Persists LOCAL facts specific to a file or directory.
|
|
65
|
+
|
|
66
|
+
USE EAGERLY to save or update:
|
|
67
|
+
- Architectural patterns for this project/directory.
|
|
68
|
+
- Summaries of large files or directories.
|
|
69
|
+
- Specific guidelines for this project.
|
|
70
|
+
- Anything related to this directory that will be useful for future interaction.
|
|
71
|
+
|
|
72
|
+
WARNING: This OVERWRITES the note for the specific path.
|
|
73
|
+
|
|
74
|
+
Args:
|
|
75
|
+
content (str): The text to memorize for this location.
|
|
76
|
+
path (str | None): Target file/dir. Defaults to CWD.
|
|
77
|
+
|
|
78
|
+
Returns:
|
|
79
|
+
str: Confirmation message.
|
|
80
|
+
"""
|
|
81
|
+
if path is None:
|
|
82
|
+
path = os.getcwd()
|
|
83
|
+
llm_context_config.write_note(content, path)
|
|
84
|
+
return f"Contextual note saved for: {path}"
|
zrb/builtin/llm/tool/rag.py
CHANGED
|
@@ -5,6 +5,7 @@ import os
|
|
|
5
5
|
import sys
|
|
6
6
|
from collections.abc import Callable
|
|
7
7
|
from textwrap import dedent
|
|
8
|
+
from typing import Any
|
|
8
9
|
|
|
9
10
|
import ulid
|
|
10
11
|
|
|
@@ -44,49 +45,40 @@ def create_rag_from_directory(
|
|
|
44
45
|
openai_embedding_model: str | None = None,
|
|
45
46
|
):
|
|
46
47
|
"""
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
1. Monitor a specified directory for file changes.
|
|
57
|
-
2. Automatically update a vector database (ChromaDB) with the latest
|
|
58
|
-
content.
|
|
59
|
-
3. Accept a user query, embed it, and perform a similarity search against
|
|
60
|
-
the document vectors.
|
|
61
|
-
4. Return the most relevant document chunks that match the query.
|
|
48
|
+
Create a powerful RAG (Retrieval-Augmented Generation) tool for querying a local
|
|
49
|
+
knowledge base.
|
|
50
|
+
|
|
51
|
+
This factory function generates a tool that performs semantic search over a directory of
|
|
52
|
+
documents. It automatically indexes the documents into a vector database (ChromaDB) and
|
|
53
|
+
keeps it updated as files change.
|
|
54
|
+
|
|
55
|
+
The generated tool is ideal for answering questions based on a specific set of documents,
|
|
56
|
+
such as project documentation or internal wikis.
|
|
62
57
|
|
|
63
58
|
Args:
|
|
64
|
-
tool_name (str): The name for the generated RAG tool (e.g.,
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
59
|
+
tool_name (str): The name for the generated RAG tool (e.g., "search_project_docs").
|
|
60
|
+
tool_description (str): A clear description of what the tool does and when to use it.
|
|
61
|
+
This is what the LLM will see.
|
|
62
|
+
document_dir_path (str, optional): The path to the directory containing the documents
|
|
63
|
+
to be indexed.
|
|
64
|
+
vector_db_path (str, optional): The path where the ChromaDB vector database will be
|
|
65
|
+
stored.
|
|
66
|
+
vector_db_collection (str, optional): The name of the collection within the vector
|
|
72
67
|
database.
|
|
73
|
-
vector_db_collection (str, optional): The name of the collection within
|
|
74
|
-
the vector database.
|
|
75
68
|
chunk_size (int, optional): The size of text chunks for embedding.
|
|
76
69
|
overlap (int, optional): The overlap between text chunks.
|
|
77
|
-
max_result_count (int, optional): The maximum number of search results
|
|
78
|
-
|
|
79
|
-
file_reader (list[RAGFileReader], optional): Custom file readers for
|
|
70
|
+
max_result_count (int, optional): The maximum number of search results to return.
|
|
71
|
+
file_reader (list[RAGFileReader], optional): A list of custom file readers for
|
|
80
72
|
specific file types.
|
|
81
|
-
openai_api_key (str, optional): OpenAI API key for embeddings.
|
|
82
|
-
openai_base_url (str, optional):
|
|
73
|
+
openai_api_key (str, optional): Your OpenAI API key for generating embeddings.
|
|
74
|
+
openai_base_url (str, optional): An optional base URL for the OpenAI API.
|
|
83
75
|
openai_embedding_model (str, optional): The embedding model to use.
|
|
84
76
|
|
|
85
77
|
Returns:
|
|
86
|
-
|
|
78
|
+
An asynchronous function that serves as the RAG tool.
|
|
87
79
|
"""
|
|
88
80
|
|
|
89
|
-
async def retrieve(query: str) -> str:
|
|
81
|
+
async def retrieve(query: str) -> dict[str, Any]:
|
|
90
82
|
# Docstring will be set dynamically below
|
|
91
83
|
from chromadb import PersistentClient
|
|
92
84
|
from chromadb.config import Settings
|
|
@@ -201,7 +193,7 @@ def create_rag_from_directory(
|
|
|
201
193
|
query_embeddings=query_vector,
|
|
202
194
|
n_results=max_result_count_val,
|
|
203
195
|
)
|
|
204
|
-
return
|
|
196
|
+
return dict(results)
|
|
205
197
|
|
|
206
198
|
retrieve.__name__ = tool_name
|
|
207
199
|
retrieve.__doc__ = dedent(
|
|
@@ -210,7 +202,8 @@ def create_rag_from_directory(
|
|
|
210
202
|
Args:
|
|
211
203
|
query (str): The user query to search for in documents.
|
|
212
204
|
Returns:
|
|
213
|
-
str:
|
|
205
|
+
dict[str, Any]: dictionary with search results:
|
|
206
|
+
{{"ids": [...], "documents": [...], ...}}
|
|
214
207
|
"""
|
|
215
208
|
).strip()
|
|
216
209
|
return retrieve
|
|
@@ -4,12 +4,18 @@ from textwrap import dedent
|
|
|
4
4
|
from typing import TYPE_CHECKING, Any, Coroutine
|
|
5
5
|
|
|
6
6
|
from zrb.context.any_context import AnyContext
|
|
7
|
-
from zrb.task.llm.agent import create_agent_instance
|
|
7
|
+
from zrb.task.llm.agent import create_agent_instance
|
|
8
|
+
from zrb.task.llm.agent_runner import run_agent_iteration
|
|
8
9
|
from zrb.task.llm.config import get_model, get_model_settings
|
|
9
10
|
from zrb.task.llm.prompt import get_system_and_user_prompt
|
|
11
|
+
from zrb.task.llm.subagent_conversation_history import (
|
|
12
|
+
get_ctx_subagent_history,
|
|
13
|
+
set_ctx_subagent_history,
|
|
14
|
+
)
|
|
10
15
|
|
|
11
16
|
if TYPE_CHECKING:
|
|
12
|
-
from pydantic_ai import
|
|
17
|
+
from pydantic_ai import Tool
|
|
18
|
+
from pydantic_ai._agent_graph import HistoryProcessor
|
|
13
19
|
from pydantic_ai.models import Model
|
|
14
20
|
from pydantic_ai.settings import ModelSettings
|
|
15
21
|
from pydantic_ai.toolsets import AbstractToolset
|
|
@@ -24,43 +30,43 @@ def create_sub_agent_tool(
|
|
|
24
30
|
model: "str | Model | None" = None,
|
|
25
31
|
model_settings: "ModelSettings | None" = None,
|
|
26
32
|
tools: "list[ToolOrCallable]" = [],
|
|
27
|
-
toolsets: list["AbstractToolset[
|
|
28
|
-
|
|
29
|
-
|
|
33
|
+
toolsets: list["AbstractToolset[None]"] = [],
|
|
34
|
+
yolo_mode: bool | list[str] | None = None,
|
|
35
|
+
history_processors: list["HistoryProcessor"] | None = None,
|
|
36
|
+
log_indent_level: int = 2,
|
|
37
|
+
agent_name: str | None = None,
|
|
38
|
+
auto_summarize: bool = True,
|
|
39
|
+
remember_history: bool = True,
|
|
40
|
+
) -> Callable[[AnyContext, str], Coroutine[Any, Any, Any]]:
|
|
30
41
|
"""
|
|
31
|
-
|
|
32
|
-
multi-step sub-tasks.
|
|
42
|
+
Create a tool that is another AI agent, capable of handling complex, multi-step sub-tasks.
|
|
33
43
|
|
|
34
|
-
This
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
accomplishing the task it's given without being distracted by the main
|
|
38
|
-
conversation.
|
|
44
|
+
This factory function generates a tool that, when used, spins up a temporary, specialized
|
|
45
|
+
AI agent. This "sub-agent" has its own system prompt, tools, and context, allowing it to
|
|
46
|
+
focus on accomplishing a specific task without being distracted by the main conversation.
|
|
39
47
|
|
|
40
|
-
This is ideal for delegating complex tasks like analyzing a file or a
|
|
41
|
-
repository.
|
|
48
|
+
This is ideal for delegating complex tasks like analyzing a file or a repository.
|
|
42
49
|
|
|
43
50
|
Args:
|
|
44
51
|
tool_name (str): The name for the generated sub-agent tool.
|
|
45
|
-
tool_description (str): A clear description of the sub-agent's purpose
|
|
46
|
-
|
|
47
|
-
system_prompt (str, optional): The system prompt that will guide the
|
|
48
|
-
|
|
49
|
-
model (str | Model, optional): The language model the sub-agent will
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
sub-agent
|
|
53
|
-
|
|
54
|
-
available to the sub-agent.
|
|
55
|
-
toolsets (list, optional): A list of Toolset for the sub-agent.
|
|
52
|
+
tool_description (str): A clear description of the sub-agent's purpose and when to
|
|
53
|
+
use it. This is what the LLM will see.
|
|
54
|
+
system_prompt (str, optional): The system prompt that will guide the sub-agent's
|
|
55
|
+
behavior.
|
|
56
|
+
model (str | Model, optional): The language model the sub-agent will use.
|
|
57
|
+
model_settings (ModelSettings, optional): Specific settings for the sub-agent's model.
|
|
58
|
+
tools (list, optional): A list of tools that will be exclusively available to the
|
|
59
|
+
sub-agent.
|
|
60
|
+
toolsets (list, optional): A list of Toolsets for the sub-agent.
|
|
56
61
|
|
|
57
62
|
Returns:
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
its final result.
|
|
63
|
+
An asynchronous function that serves as the sub-agent tool. When called, it runs the
|
|
64
|
+
sub-agent with a given query and returns its final result.
|
|
61
65
|
"""
|
|
66
|
+
if agent_name is None:
|
|
67
|
+
agent_name = f"{tool_name}_agent"
|
|
62
68
|
|
|
63
|
-
async def run_sub_agent(ctx: AnyContext, query: str) ->
|
|
69
|
+
async def run_sub_agent(ctx: AnyContext, query: str) -> Any:
|
|
64
70
|
"""
|
|
65
71
|
Runs the sub-agent with the given query.
|
|
66
72
|
"""
|
|
@@ -79,7 +85,6 @@ def create_sub_agent_tool(
|
|
|
79
85
|
ctx=ctx,
|
|
80
86
|
model_settings_attr=model_settings,
|
|
81
87
|
)
|
|
82
|
-
|
|
83
88
|
if system_prompt is None:
|
|
84
89
|
resolved_system_prompt, query = get_system_and_user_prompt(
|
|
85
90
|
ctx=ctx,
|
|
@@ -98,27 +103,35 @@ def create_sub_agent_tool(
|
|
|
98
103
|
model_settings=resolved_model_settings,
|
|
99
104
|
tools=tools,
|
|
100
105
|
toolsets=toolsets,
|
|
101
|
-
|
|
106
|
+
yolo_mode=yolo_mode,
|
|
107
|
+
history_processors=history_processors,
|
|
108
|
+
auto_summarize=auto_summarize,
|
|
102
109
|
)
|
|
103
|
-
|
|
104
110
|
sub_agent_run = None
|
|
105
111
|
# Run the sub-agent iteration
|
|
106
|
-
|
|
112
|
+
history_list = (
|
|
113
|
+
get_ctx_subagent_history(ctx, agent_name) if remember_history else []
|
|
114
|
+
)
|
|
107
115
|
sub_agent_run = await run_agent_iteration(
|
|
108
116
|
ctx=ctx,
|
|
109
117
|
agent=sub_agent_agent,
|
|
110
118
|
user_prompt=query,
|
|
111
119
|
attachments=[],
|
|
112
|
-
history_list=
|
|
120
|
+
history_list=history_list,
|
|
121
|
+
log_indent_level=log_indent_level,
|
|
113
122
|
)
|
|
114
|
-
|
|
115
123
|
# Return the sub-agent's final message content
|
|
116
124
|
if sub_agent_run and sub_agent_run.result:
|
|
117
|
-
# Return the final message content
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
125
|
+
# Return the final message content
|
|
126
|
+
if remember_history:
|
|
127
|
+
set_ctx_subagent_history(
|
|
128
|
+
ctx,
|
|
129
|
+
agent_name,
|
|
130
|
+
json.loads(sub_agent_run.result.all_messages_json()),
|
|
131
|
+
)
|
|
132
|
+
return sub_agent_run.result.output
|
|
133
|
+
ctx.log_warning("Sub-agent run did not produce a result.")
|
|
134
|
+
raise ValueError(f"{tool_name} not returning any result")
|
|
122
135
|
|
|
123
136
|
# Set the name and docstring for the callable function
|
|
124
137
|
run_sub_agent.__name__ = tool_name
|
|
@@ -130,7 +143,7 @@ def create_sub_agent_tool(
|
|
|
130
143
|
query (str): The query or task for the sub-agent.
|
|
131
144
|
|
|
132
145
|
Returns:
|
|
133
|
-
|
|
146
|
+
Any: The final response or result from the sub-agent.
|
|
134
147
|
"""
|
|
135
148
|
).strip()
|
|
136
149
|
|
zrb/builtin/llm/tool/web.py
CHANGED
|
@@ -1,142 +1,118 @@
|
|
|
1
|
-
import json
|
|
2
1
|
from collections.abc import Callable
|
|
2
|
+
from typing import Any
|
|
3
3
|
from urllib.parse import urljoin
|
|
4
4
|
|
|
5
|
+
from zrb.config.config import CFG
|
|
6
|
+
from zrb.config.llm_config import llm_config
|
|
5
7
|
|
|
6
|
-
|
|
8
|
+
_DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" # noqa
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
async def open_web_page(url: str) -> dict[str, Any]:
|
|
7
12
|
"""
|
|
8
|
-
Fetches, parses, and converts
|
|
13
|
+
Fetches, parses, and converts a web page to readable Markdown.
|
|
14
|
+
Preserves semantic structure, removes non-essentials, and extracts all absolute links.
|
|
9
15
|
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
cleaned HTML into Markdown format. This preserves the semantic structure
|
|
13
|
-
of the content (headings, lists, etc.) while removing clutter. It also
|
|
14
|
-
extracts all hyperlinks and resolves them to absolute URLs.
|
|
16
|
+
Example:
|
|
17
|
+
open_web_page(url='https://www.example.com/article')
|
|
15
18
|
|
|
16
19
|
Args:
|
|
17
|
-
url (str): The full URL of the web page
|
|
18
|
-
"https://example.com/article").
|
|
20
|
+
url (str): The full URL of the web page.
|
|
19
21
|
|
|
20
22
|
Returns:
|
|
21
|
-
|
|
22
|
-
and a list of all absolute links found on the page.
|
|
23
|
+
dict: Markdown content and a list of absolute links.
|
|
23
24
|
"""
|
|
24
25
|
html_content, links = await _fetch_page_content(url)
|
|
25
26
|
markdown_content = _convert_html_to_markdown(html_content)
|
|
26
|
-
return
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
def create_search_internet_tool(serp_api_key: str) -> Callable[[str, int], str]:
|
|
30
|
-
"""
|
|
31
|
-
Creates a tool that searches the internet using the SerpAPI Google Search
|
|
32
|
-
API.
|
|
33
|
-
|
|
34
|
-
This factory returns a function that can be used to find information on the
|
|
35
|
-
web. The generated tool is the primary way to answer general knowledge
|
|
36
|
-
questions or to find information on topics you are unfamiliar with.
|
|
27
|
+
return {"content": markdown_content, "links_on_page": links}
|
|
37
28
|
|
|
38
|
-
Args:
|
|
39
|
-
serp_api_key (str): The API key for SerpAPI.
|
|
40
29
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
"""
|
|
30
|
+
def create_search_internet_tool() -> Callable:
|
|
31
|
+
if llm_config.default_search_internet_tool is not None:
|
|
32
|
+
return llm_config.default_search_internet_tool
|
|
45
33
|
|
|
46
|
-
def search_internet(query: str,
|
|
34
|
+
def search_internet(query: str, page: int = 1) -> dict[str, Any]:
|
|
47
35
|
"""
|
|
48
|
-
Performs an internet search using
|
|
36
|
+
Performs an internet search using a search engine.
|
|
37
|
+
Use to find information, answer general knowledge, or research topics.
|
|
49
38
|
|
|
50
|
-
|
|
39
|
+
Example:
|
|
40
|
+
search_internet(query='latest AI advancements', page=1)
|
|
51
41
|
|
|
52
42
|
Args:
|
|
53
43
|
query (str): The search query.
|
|
54
|
-
|
|
44
|
+
page (int, optional): Search result page number. Defaults to 1.
|
|
55
45
|
|
|
56
46
|
Returns:
|
|
57
|
-
|
|
47
|
+
dict: Summary of search results (titles, links, snippets).
|
|
58
48
|
"""
|
|
59
49
|
import requests
|
|
60
50
|
|
|
61
|
-
|
|
62
|
-
"
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
"
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
51
|
+
if (
|
|
52
|
+
CFG.SEARCH_INTERNET_METHOD.strip().lower() == "serpapi"
|
|
53
|
+
and CFG.SERPAPI_KEY != ""
|
|
54
|
+
):
|
|
55
|
+
response = requests.get(
|
|
56
|
+
"https://serpapi.com/search",
|
|
57
|
+
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
|
58
|
+
params={
|
|
59
|
+
"q": query,
|
|
60
|
+
"start": (page - 1) * 10,
|
|
61
|
+
"hl": CFG.SERPAPI_LANG,
|
|
62
|
+
"safe": CFG.SERPAPI_SAFE,
|
|
63
|
+
"api_key": CFG.SERPAPI_KEY,
|
|
64
|
+
},
|
|
65
|
+
)
|
|
66
|
+
elif (
|
|
67
|
+
CFG.SEARCH_INTERNET_METHOD.strip().lower() == "brave"
|
|
68
|
+
and CFG.BRAVE_API_KEY != ""
|
|
69
|
+
):
|
|
70
|
+
response = requests.get(
|
|
71
|
+
"https://api.search.brave.com/res/v1/web/search",
|
|
72
|
+
headers={
|
|
73
|
+
"User-Agent": _DEFAULT_USER_AGENT,
|
|
74
|
+
"Accept": "application/json",
|
|
75
|
+
"x-subscription-token": CFG.BRAVE_API_KEY,
|
|
76
|
+
},
|
|
77
|
+
params={
|
|
78
|
+
"q": query,
|
|
79
|
+
"count": "10",
|
|
80
|
+
"offset": (page - 1) * 10,
|
|
81
|
+
"safesearch": CFG.BRAVE_API_SAFE,
|
|
82
|
+
"search_lang": CFG.BRAVE_API_LANG,
|
|
83
|
+
"summary": "true",
|
|
84
|
+
},
|
|
85
|
+
)
|
|
86
|
+
else:
|
|
87
|
+
response = requests.get(
|
|
88
|
+
url=f"{CFG.SEARXNG_BASE_URL}/search",
|
|
89
|
+
headers={"User-Agent": _DEFAULT_USER_AGENT},
|
|
90
|
+
params={
|
|
91
|
+
"q": query,
|
|
92
|
+
"format": "json",
|
|
93
|
+
"pageno": page,
|
|
94
|
+
"safesearch": CFG.SEARXNG_SAFE,
|
|
95
|
+
"language": CFG.SEARXNG_LANG,
|
|
96
|
+
},
|
|
97
|
+
)
|
|
74
98
|
if response.status_code != 200:
|
|
75
99
|
raise Exception(
|
|
76
|
-
f"Error: Unable to retrieve search results (status code: {response.status_code})"
|
|
100
|
+
f"Error: Unable to retrieve search results (status code: {response.status_code})" # noqa
|
|
77
101
|
)
|
|
78
102
|
return response.json()
|
|
79
103
|
|
|
80
104
|
return search_internet
|
|
81
105
|
|
|
82
106
|
|
|
83
|
-
def search_wikipedia(query: str) -> str:
|
|
84
|
-
"""
|
|
85
|
-
Searches for articles on Wikipedia.
|
|
86
|
-
|
|
87
|
-
This is a specialized search tool for querying Wikipedia. It's best for
|
|
88
|
-
when the user is asking for definitions, historical information, or
|
|
89
|
-
biographical details that are likely to be found on an encyclopedia.
|
|
90
|
-
|
|
91
|
-
Args:
|
|
92
|
-
query (str): The search term or question.
|
|
93
|
-
|
|
94
|
-
Returns:
|
|
95
|
-
str: The raw JSON response from the Wikipedia API, containing a list of
|
|
96
|
-
search results.
|
|
97
|
-
"""
|
|
98
|
-
import requests
|
|
99
|
-
|
|
100
|
-
params = {"action": "query", "list": "search", "srsearch": query, "format": "json"}
|
|
101
|
-
response = requests.get("https://en.wikipedia.org/w/api.php", params=params)
|
|
102
|
-
return response.json()
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def search_arxiv(query: str, num_results: int = 10) -> str:
|
|
106
|
-
"""
|
|
107
|
-
Searches for academic papers and preprints on ArXiv.
|
|
108
|
-
|
|
109
|
-
Use this tool when the user's query is scientific or technical in nature
|
|
110
|
-
and they are likely looking for research papers, articles, or academic
|
|
111
|
-
publications.
|
|
112
|
-
|
|
113
|
-
Args:
|
|
114
|
-
query (str): The search query, which can include keywords, author
|
|
115
|
-
names, or titles.
|
|
116
|
-
num_results (int, optional): The maximum number of results to return.
|
|
117
|
-
Defaults to 10.
|
|
118
|
-
|
|
119
|
-
Returns:
|
|
120
|
-
str: The raw XML response from the ArXiv API, containing a list of
|
|
121
|
-
matching papers.
|
|
122
|
-
"""
|
|
123
|
-
import requests
|
|
124
|
-
|
|
125
|
-
params = {"search_query": f"all:{query}", "start": 0, "max_results": num_results}
|
|
126
|
-
response = requests.get("http://export.arxiv.org/api/query", params=params)
|
|
127
|
-
return response.content
|
|
128
|
-
|
|
129
|
-
|
|
130
107
|
async def _fetch_page_content(url: str) -> tuple[str, list[str]]:
|
|
131
108
|
"""Fetches the HTML content and all absolute links from a URL."""
|
|
132
|
-
user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
133
109
|
try:
|
|
134
110
|
from playwright.async_api import async_playwright
|
|
135
111
|
|
|
136
112
|
async with async_playwright() as p:
|
|
137
113
|
browser = await p.chromium.launch(headless=True)
|
|
138
114
|
page = await browser.new_page()
|
|
139
|
-
await page.set_extra_http_headers({"User-Agent":
|
|
115
|
+
await page.set_extra_http_headers({"User-Agent": _DEFAULT_USER_AGENT})
|
|
140
116
|
try:
|
|
141
117
|
await page.goto(url, wait_until="networkidle", timeout=30000)
|
|
142
118
|
await page.wait_for_load_state("domcontentloaded")
|
|
@@ -164,7 +140,7 @@ async def _fetch_page_content(url: str) -> tuple[str, list[str]]:
|
|
|
164
140
|
import requests
|
|
165
141
|
from bs4 import BeautifulSoup
|
|
166
142
|
|
|
167
|
-
response = requests.get(url, headers={"User-Agent":
|
|
143
|
+
response = requests.get(url, headers={"User-Agent": _DEFAULT_USER_AGENT})
|
|
168
144
|
if response.status_code != 200:
|
|
169
145
|
raise Exception(
|
|
170
146
|
f"Unable to retrieve page content. Status code: {response.status_code}"
|
|
@@ -204,7 +204,7 @@ def update_migration_metadata_file(ctx: AnyContext, migration_metadata_file_path
|
|
|
204
204
|
app_name = os.path.basename(APP_DIR)
|
|
205
205
|
existing_migration_metadata_code = read_file(migration_metadata_file_path)
|
|
206
206
|
write_file(
|
|
207
|
-
|
|
207
|
+
abs_file_path=migration_metadata_file_path,
|
|
208
208
|
content=[
|
|
209
209
|
_get_migration_import_schema_code(
|
|
210
210
|
existing_migration_metadata_code, app_name, ctx.input.entity
|
|
@@ -251,7 +251,7 @@ def update_client_file(ctx: AnyContext, client_file_path: str):
|
|
|
251
251
|
snake_plural_entity_name = to_snake_case(ctx.input.plural)
|
|
252
252
|
pascal_entity_name = to_pascal_case(ctx.input.entity)
|
|
253
253
|
write_file(
|
|
254
|
-
|
|
254
|
+
abs_file_path=client_file_path,
|
|
255
255
|
content=[
|
|
256
256
|
_get_import_schema_for_client_code(
|
|
257
257
|
existing_code=existing_client_code, entity_name=ctx.input.entity
|
|
@@ -305,7 +305,7 @@ def update_api_client_file(ctx: AnyContext, api_client_file_path: str):
|
|
|
305
305
|
snake_module_name = to_snake_case(ctx.input.module)
|
|
306
306
|
pascal_module_name = to_pascal_case(ctx.input.module)
|
|
307
307
|
write_file(
|
|
308
|
-
|
|
308
|
+
abs_file_path=api_client_file_path,
|
|
309
309
|
content=[
|
|
310
310
|
f"from {app_name}.module.{snake_module_name}.service.{snake_entity_name}.{snake_entity_name}_service_factory import {snake_entity_name}_service", # noqa
|
|
311
311
|
prepend_code_to_module(
|
|
@@ -327,7 +327,7 @@ def update_direct_client_file(ctx: AnyContext, direct_client_file_path: str):
|
|
|
327
327
|
snake_module_name = to_snake_case(ctx.input.module)
|
|
328
328
|
pascal_module_name = to_pascal_case(ctx.input.module)
|
|
329
329
|
write_file(
|
|
330
|
-
|
|
330
|
+
abs_file_path=direct_client_file_path,
|
|
331
331
|
content=[
|
|
332
332
|
f"from {app_name}.module.{snake_module_name}.service.{snake_entity_name}.{snake_entity_name}_service_factory import {snake_entity_name}_service", # noqa
|
|
333
333
|
prepend_code_to_module(
|
|
@@ -348,7 +348,7 @@ def update_route_file(ctx: AnyContext, route_file_path: str):
|
|
|
348
348
|
app_name = os.path.basename(APP_DIR)
|
|
349
349
|
module_name = to_snake_case(ctx.input.module)
|
|
350
350
|
write_file(
|
|
351
|
-
|
|
351
|
+
abs_file_path=route_file_path,
|
|
352
352
|
content=[
|
|
353
353
|
f"from {app_name}.module.{module_name}.service.{entity_name}.{entity_name}_service_factory import {entity_name}_service", # noqa
|
|
354
354
|
append_code_to_function(
|
|
@@ -370,7 +370,7 @@ def update_gateway_subroute_file(ctx: AnyContext, module_gateway_subroute_path:
|
|
|
370
370
|
pascal_entity_name = to_pascal_case(ctx.input.entity)
|
|
371
371
|
existing_gateway_subroute_code = read_file(module_gateway_subroute_path)
|
|
372
372
|
write_file(
|
|
373
|
-
|
|
373
|
+
abs_file_path=module_gateway_subroute_path,
|
|
374
374
|
content=[
|
|
375
375
|
_get_import_client_for_gateway_subroute_code(
|
|
376
376
|
existing_gateway_subroute_code, module_name=ctx.input.module
|
|
@@ -456,7 +456,7 @@ def update_gateway_navigation_config_file(
|
|
|
456
456
|
},
|
|
457
457
|
).strip()
|
|
458
458
|
write_file(
|
|
459
|
-
|
|
459
|
+
abs_file_path=gateway_navigation_config_file_path,
|
|
460
460
|
content=[
|
|
461
461
|
existing_gateway_navigation_config_code,
|
|
462
462
|
new_navigation_config_code,
|