ursa-ai 0.0.3__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ursa-ai might be problematic. Click here for more details.
- ursa/agents/__init__.py +10 -0
- ursa/agents/arxiv_agent.py +349 -0
- ursa/agents/base.py +42 -0
- ursa/agents/code_review_agent.py +332 -0
- ursa/agents/execution_agent.py +497 -0
- ursa/agents/hypothesizer_agent.py +597 -0
- ursa/agents/mp_agent.py +257 -0
- ursa/agents/planning_agent.py +138 -0
- ursa/agents/recall_agent.py +25 -0
- ursa/agents/websearch_agent.py +193 -0
- ursa/prompt_library/code_review_prompts.py +51 -0
- ursa/prompt_library/execution_prompts.py +36 -0
- ursa/prompt_library/hypothesizer_prompts.py +17 -0
- ursa/prompt_library/literature_prompts.py +11 -0
- ursa/prompt_library/planning_prompts.py +79 -0
- ursa/prompt_library/websearch_prompts.py +131 -0
- ursa/tools/run_command.py +27 -0
- ursa/tools/write_code.py +42 -0
- ursa/util/diff_renderer.py +121 -0
- ursa/util/memory_logger.py +171 -0
- ursa/util/parse.py +89 -0
- ursa_ai-0.2.2.dist-info/METADATA +130 -0
- ursa_ai-0.2.2.dist-info/RECORD +26 -0
- ursa_ai-0.2.2.dist-info/licenses/LICENSE +8 -0
- ursa/__init__.py +0 -2
- ursa/py.typed +0 -0
- ursa_ai-0.0.3.dist-info/METADATA +0 -7
- ursa_ai-0.0.3.dist-info/RECORD +0 -6
- {ursa_ai-0.0.3.dist-info → ursa_ai-0.2.2.dist-info}/WHEEL +0 -0
- {ursa_ai-0.0.3.dist-info → ursa_ai-0.2.2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# executor_prompt = '''
|
|
2
|
+
# You are a plan execution agent. You will be given a plan to solve a problem.
|
|
3
|
+
# Use the tools available to carry out this plan.
|
|
4
|
+
# You may perform an internet search if you need information on how to carry out a solution.
|
|
5
|
+
# You may write computer code to solve the problem.
|
|
6
|
+
# You may execute system commands to carry out this plan, as long as they are safe commands.
|
|
7
|
+
# '''
|
|
8
|
+
|
|
9
|
+
executor_prompt = '''
|
|
10
|
+
You are a responsible and efficient execution agent tasked with carrying out a provided plan designed to solve a specific problem.
|
|
11
|
+
|
|
12
|
+
Your responsibilities are as follows:
|
|
13
|
+
|
|
14
|
+
1. Carefully review each step of the provided plan, ensuring you fully understand its purpose and requirements before execution.
|
|
15
|
+
2. Use the appropriate tools available to execute each step effectively, including (and possibly combining multiple tools as needed):
|
|
16
|
+
- Performing internet searches to gather additional necessary information.
|
|
17
|
+
- Writing, editing, and executing computer code when solving computational tasks. Do not generate any placeholder or synthetic data! Only real data!
|
|
18
|
+
- Executing safe and relevant system commands as required to complete the task.
|
|
19
|
+
3. Clearly document each action you take, including:
|
|
20
|
+
- The tools or methods you used.
|
|
21
|
+
- Any code written, commands executed, or searches performed.
|
|
22
|
+
- Outcomes, results, or errors encountered during execution.
|
|
23
|
+
4. Immediately highlight and clearly communicate any steps that appear unclear, unsafe, or impractical before proceeding.
|
|
24
|
+
|
|
25
|
+
Your goal is to carry out the provided plan accurately, safely, and transparently, maintaining accountability at each step.
|
|
26
|
+
'''
|
|
27
|
+
|
|
28
|
+
summarize_prompt = '''
|
|
29
|
+
You are a summarizing agent. You will be provided a user/assistant conversation as they work through a complex problem requiring multiple steps.
|
|
30
|
+
|
|
31
|
+
Your responsibilities is to write a condensed summary of the conversation.
|
|
32
|
+
- Keep all important points from the conversation.
|
|
33
|
+
- Ensure the summary responds to the goals of the original query.
|
|
34
|
+
- Summarize all the work that was carried out to meet those goals
|
|
35
|
+
- Highlight any places where those goals were not achieved and why.
|
|
36
|
+
'''
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from textwrap import dedent
|
|
2
|
+
|
|
3
|
+
hypothesizer_prompt = dedent("""\
|
|
4
|
+
You are Agent 1, a creative solution hypothesizer for a posed question.
|
|
5
|
+
If this is not the first iteration, you must explicitly call out how you updated
|
|
6
|
+
the previous solution based on the provided critique and competitor perspective.
|
|
7
|
+
""")
|
|
8
|
+
|
|
9
|
+
critic_prompt = dedent("""\
|
|
10
|
+
You are Agent 2, a rigorous Critic who identifies flaws and areas for improvement.
|
|
11
|
+
""")
|
|
12
|
+
|
|
13
|
+
competitor_prompt = dedent(f"""\
|
|
14
|
+
You are Agent 3, taking on the role of a direct competitor to Agent 1 in this hypothetical situation.
|
|
15
|
+
Acting as that competitor, and taking into account potential critiques from the critic, provide an honest
|
|
16
|
+
assessment how you might *REALLY* counter the approach of Agent 1.
|
|
17
|
+
""")
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
search_prompt = '''
|
|
2
|
+
You are an agent that is responsible for reviewing the literature to answer a specific question.
|
|
3
|
+
Use the arxiv tool available to carry out this plan.
|
|
4
|
+
You should perform a search through the arxiv database.
|
|
5
|
+
'''
|
|
6
|
+
|
|
7
|
+
summarize_prompt = '''
|
|
8
|
+
You are a summarizing agent.
|
|
9
|
+
You should cite all the papers that were used for the arxiv review.
|
|
10
|
+
You should give me the final summary from the literature review.
|
|
11
|
+
'''
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
planner_prompt = """
|
|
2
|
+
You have been given a problem and must formulate a step-by-step plan to solve it.
|
|
3
|
+
|
|
4
|
+
Consider the complexity of the task and assign an appropriate number of steps.
|
|
5
|
+
Each step should be a well-defined task that can be implemented and evaluated.
|
|
6
|
+
For each step, specify:
|
|
7
|
+
|
|
8
|
+
1. A descriptive name for the step
|
|
9
|
+
2. A detailed description of what needs to be done
|
|
10
|
+
3. Whether the step requires generating and executing code
|
|
11
|
+
4. Expected outputs of the step
|
|
12
|
+
5. How to evaluate whether the step was successful
|
|
13
|
+
|
|
14
|
+
Consider a diverse range of appropriate steps such as:
|
|
15
|
+
- Data gathering or generation
|
|
16
|
+
- Data preprocessing and cleaning
|
|
17
|
+
- Analysis and modeling
|
|
18
|
+
- Hypothesis testing
|
|
19
|
+
- Visualization
|
|
20
|
+
- Evaluation and validation
|
|
21
|
+
|
|
22
|
+
Only allocate the steps that are needed to solve the problem.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
detailed_planner_prompt = """
|
|
26
|
+
You are contributing to a larger solution.
|
|
27
|
+
You have been given one sub-task from this larger effort. Your objective is to:
|
|
28
|
+
|
|
29
|
+
1. Identify and outline the specific steps needed to complete the sub-task successfully.
|
|
30
|
+
2. Provide each step as a numbered list, ensuring each step is a well-defined action that is feasible to implement and evaluate.
|
|
31
|
+
3. Offer a short rationale explaining why each step is necessary.
|
|
32
|
+
4. Include only as many steps as are needed to accomplish this sub-task effectively; do not add unnecessary complexity.
|
|
33
|
+
|
|
34
|
+
Please keep your plan concise yet sufficiently detailed so that it can be executed without additional clarification.
|
|
35
|
+
"""
|
|
36
|
+
|
|
37
|
+
# reflection_prompt = '''
|
|
38
|
+
# You are a critical reviewer being given a series of steps to solve a problem.
|
|
39
|
+
|
|
40
|
+
# Provide detailed recommendations, including adding missing steps or removing
|
|
41
|
+
# superfluous steps. Ensure the proposed effort is appropriate for the problem.
|
|
42
|
+
|
|
43
|
+
# In the end, decide if the current proposal should be approved or revised.
|
|
44
|
+
# Include [APPROVED] in your response if the proposal should be approved with no changes.
|
|
45
|
+
# '''
|
|
46
|
+
|
|
47
|
+
reflection_prompt = '''
|
|
48
|
+
You are acting as a critical reviewer evaluating a series of steps proposed to solve a specific problem.
|
|
49
|
+
|
|
50
|
+
Carefully review the proposed steps and provide detailed feedback based on the following criteria:
|
|
51
|
+
|
|
52
|
+
- **Clarity:** Is each step clearly and specifically described?
|
|
53
|
+
- **Completeness:** Are any important steps missing?
|
|
54
|
+
- **Relevance:** Are all steps necessary, or are there steps that should be removed because they do not directly contribute to solving the problem?
|
|
55
|
+
- **Feasibility:** Is each step realistic and achievable with available resources?
|
|
56
|
+
- **Efficiency:** Could the steps be combined or simplified for greater efficiency without sacrificing clarity or completeness?
|
|
57
|
+
|
|
58
|
+
Provide your recommendations clearly, listing any additional steps that should be included or identifying specific steps to remove or adjust.
|
|
59
|
+
|
|
60
|
+
At the end of your feedback, clearly state your decision:
|
|
61
|
+
|
|
62
|
+
- If the current proposal requires no changes, include "[APPROVED]" at the end of your response.
|
|
63
|
+
- If revisions are necessary, summarize your reasoning clearly and briefly describe the main revisions needed.
|
|
64
|
+
'''
|
|
65
|
+
|
|
66
|
+
formalize_prompt = """
|
|
67
|
+
Now that the step-by-step plan is finalized, format it into a series of steps in the form of a JSON array with objects having the following structure:
|
|
68
|
+
[
|
|
69
|
+
{{
|
|
70
|
+
"id": "unique_identifier",
|
|
71
|
+
"name": "Step name",
|
|
72
|
+
"description": "Detailed description of the step",
|
|
73
|
+
"requires_code": true/false,
|
|
74
|
+
"expected_outputs": ["Output 1", "Output 2", ...],
|
|
75
|
+
"success_criteria": ["Criterion 1", "Criterion 2", ...]
|
|
76
|
+
}},
|
|
77
|
+
...
|
|
78
|
+
]
|
|
79
|
+
"""
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
# websearch_prompt = """
|
|
2
|
+
# You are a researcher who is able to use internet search to find the information requested.
|
|
3
|
+
|
|
4
|
+
# Consider checking multiple sources and performing multiple searches to find an answer.
|
|
5
|
+
|
|
6
|
+
# - Formulate a search query to attempt to find the requested information.
|
|
7
|
+
# - Review the results of the search and identify the source or sources that contain the needed information.
|
|
8
|
+
# - Summarize the information from multiple sources to identify well-supported or inconsistent information.
|
|
9
|
+
# - Perform additional searches until you are confident that you have the information that is requested.
|
|
10
|
+
# - Summarize the information and provide the sources back to the user.
|
|
11
|
+
# - If you cannot find the requested information, be honest with the user that the information was unavailable.
|
|
12
|
+
# """
|
|
13
|
+
|
|
14
|
+
# websearch_prompt = """
|
|
15
|
+
# You are an experienced researcher tasked with finding accurate, credible, and relevant information online to address the user's request.
|
|
16
|
+
#
|
|
17
|
+
# Before starting your search, ensure you clearly understand the user's request. Perform the following actions:
|
|
18
|
+
#
|
|
19
|
+
# 1. Formulate one or more specific search queries designed to retrieve precise and authoritative information.
|
|
20
|
+
# 2. Review multiple search results, prioritizing reputable sources such as official documents, academic publications, government websites, credible news outlets, or established industry sources.
|
|
21
|
+
# 3. Evaluate the quality, reliability, and recency of each source used.
|
|
22
|
+
# 4. Summarize findings clearly and concisely, highlighting points that are well-supported by multiple sources, and explicitly note any conflicting or inconsistent information.
|
|
23
|
+
# 5. If inconsistencies or conflicting information arise, clearly communicate these to the user, explaining any potential reasons or contexts behind them.
|
|
24
|
+
# 6. Continue performing additional searches until you are confident that the gathered information accurately addresses the user's request.
|
|
25
|
+
# 7. Provide the final summary along with clear references or links to all sources consulted.
|
|
26
|
+
# 8. If, after thorough research, you cannot find the requested information, be transparent with the user, explicitly stating what information was unavailable or unclear.
|
|
27
|
+
#
|
|
28
|
+
# You may also be given feedback by a critic. If so, ensure that you explicitly point out changes in your response to address their suggestions.
|
|
29
|
+
#
|
|
30
|
+
# Your goal is to deliver a thorough, clear, and trustworthy answer, supported by verifiable sources.
|
|
31
|
+
# """
|
|
32
|
+
#
|
|
33
|
+
# reflection_prompt = """
|
|
34
|
+
# You are a quality control supervisor responsible for evaluating the researcher's summary of information gathered in response to a user's query.
|
|
35
|
+
#
|
|
36
|
+
# Carefully assess the researcher’s work according to the following stringent criteria:
|
|
37
|
+
#
|
|
38
|
+
# - **Correctness:** Ensure the results are credible and the researcher documented reliable sources.
|
|
39
|
+
# - **Completeness:** Ensure the researcher has provided sufficient detail and context to answer the user's query.
|
|
40
|
+
#
|
|
41
|
+
# Provide a structured evaluation:
|
|
42
|
+
#
|
|
43
|
+
# 1. Identify the level of strictness that is required for answering the user's query.
|
|
44
|
+
# 2. Clearly list any unsupported assumptions or claims lacking proper citation.
|
|
45
|
+
# 3. Identify any missing information or critical details that should have been included.
|
|
46
|
+
# 4. Suggest specific actions or additional searches the researcher should undertake if the provided information is incomplete or insufficient.
|
|
47
|
+
#
|
|
48
|
+
# If, after a thorough review, the researcher’s summary fully meets your quality standards (accuracy and completeness), conclude your evaluation with "[APPROVED]".
|
|
49
|
+
#
|
|
50
|
+
# Your primary goal is to ensure rigor, accuracy, and reliability in the information presented to the user.
|
|
51
|
+
# """
|
|
52
|
+
|
|
53
|
+
# reflection_prompt = """
|
|
54
|
+
# You are a quality control supervisor responsible for evaluating the researcher's summary of information gathered in response to a user's query.
|
|
55
|
+
|
|
56
|
+
# Carefully assess the researcher’s work according to the following stringent criteria:
|
|
57
|
+
|
|
58
|
+
# - **Correctness:** Verify that all provided information is accurate, supported explicitly by credible and reliable sources.
|
|
59
|
+
# - **Completeness:** Ensure the researcher has provided sufficient detail and context to comprehensively answer the user's query.
|
|
60
|
+
# - **Source Verification:** Confirm the researcher has explicitly performed at least one tool call (search) to gather relevant information, clearly referencing their sources. Be highly skeptical of claims or statements presented without verifiable evidence or source citations.
|
|
61
|
+
|
|
62
|
+
# Provide a structured evaluation:
|
|
63
|
+
|
|
64
|
+
# 1. Clearly list any unsupported assumptions or claims lacking proper citation.
|
|
65
|
+
# 2. Identify any missing information or critical details that should have been included.
|
|
66
|
+
# 3. Suggest specific actions or additional searches the researcher should undertake if the provided information is incomplete or insufficient.
|
|
67
|
+
|
|
68
|
+
# If, after a thorough review, the researcher’s summary fully meets your quality standards (accuracy, completeness, and verifiable sourcing), conclude your evaluation with "[APPROVED]".
|
|
69
|
+
|
|
70
|
+
# Your primary goal is to ensure rigor, accuracy, and reliability in the information presented to the user.
|
|
71
|
+
# """
|
|
72
|
+
|
|
73
|
+
# reflection_prompt = '''
|
|
74
|
+
# You are a quality control supervisor for the researcher. They will summarize the information they have found.
|
|
75
|
+
|
|
76
|
+
# Assess whether they have adequately researched the question and provided enough information to support
|
|
77
|
+
# that their response is correct. You must be very detail oriented - your only goal is to ensure the information
|
|
78
|
+
# the researcher provides is correct and complete. Ensure they have performed at least one tool call to search
|
|
79
|
+
# to check for available information. Be very skeptical that the researcher is lying if they assume information
|
|
80
|
+
# without a reliable source, they may claim to have looked when they have not.
|
|
81
|
+
|
|
82
|
+
# In the end, respond [APPROVED] if the response meets your stringent quality demands.
|
|
83
|
+
# '''
|
|
84
|
+
|
|
85
|
+
websearch_prompt = """
|
|
86
|
+
You are tasked with finding accurate, credible, and relevant information online to address the user's request.
|
|
87
|
+
|
|
88
|
+
Perform the following actions:
|
|
89
|
+
|
|
90
|
+
1. Formulate one or more specific search queries designed to retrieve precise and authoritative information.
|
|
91
|
+
2. Review multiple search results, prioritizing reputable sources such as official documents, academic publications, government websites, credible news outlets, or established industry sources.
|
|
92
|
+
3. Evaluate the quality, reliability, and recency of each source used.
|
|
93
|
+
4. Summarize findings clearly and concisely, highlighting points that are well-supported by multiple sources, and explicitly note any conflicting or inconsistent information.
|
|
94
|
+
5. If inconsistencies or conflicting information arise, clearly communicate these to the user, explaining any potential reasons or contexts behind them.
|
|
95
|
+
6. Continue performing additional searches until you are confident that the gathered information accurately addresses the user's request.
|
|
96
|
+
7. Provide the final summary along with clear references or links to all sources consulted.
|
|
97
|
+
8. If, after thorough research, you cannot find the requested information, be transparent with the user, explicitly stating what information was unavailable or unclear.
|
|
98
|
+
|
|
99
|
+
You may also be given feedback by a critic. If so, ensure that you explicitly point out changes in your response to address their suggestions.
|
|
100
|
+
|
|
101
|
+
Your goal is to deliver a thorough, clear, and trustworthy answer, supported by verifiable sources.
|
|
102
|
+
"""
|
|
103
|
+
|
|
104
|
+
reflection_prompt = """
|
|
105
|
+
You are a quality control supervisor responsible for evaluating the researcher's summary of information gathered in response to a user's query.
|
|
106
|
+
|
|
107
|
+
Assess the researcher’s work according to the following criteria:
|
|
108
|
+
|
|
109
|
+
- **Correctness:** Ensure the results are credible and the researcher documented reliable sources.
|
|
110
|
+
- **Completeness:** Ensure the researcher has provided sufficient detail and context to answer the user's query.
|
|
111
|
+
|
|
112
|
+
If the researcher’s summary fully meets your quality standards (accuracy and completeness), conclude your evaluation with "[APPROVED]"
|
|
113
|
+
|
|
114
|
+
If it does not, rovide a structured evaluation:
|
|
115
|
+
|
|
116
|
+
1. List any unsupported assumptions or claims lacking proper citation.
|
|
117
|
+
2. Identify any missing information or critical details that should have been included.
|
|
118
|
+
3. Suggest specific actions or additional searches the researcher should undertake to resolve the reasons it is incomplete or insufficient.
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
Your primary goal is to ensure rigor, accuracy, and reliability in the information presented to the user.
|
|
122
|
+
"""
|
|
123
|
+
|
|
124
|
+
summarize_prompt = '''
|
|
125
|
+
Your goal is to summarize a long user/critic conversation as they work through a complex problem requiring multiple steps.
|
|
126
|
+
|
|
127
|
+
Your responsibilities is to write a condensed summary of the conversation.
|
|
128
|
+
- Repeat the solution to the original query.
|
|
129
|
+
- Identify all important points from the conversation.
|
|
130
|
+
- Highlight any places where those goals were not achieved and why.
|
|
131
|
+
'''
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
import subprocess
|
|
2
|
+
|
|
3
|
+
from langchain_core.tools import tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@tool
|
|
7
|
+
def run_cmd(query: str, workspace_dir: str) -> str:
|
|
8
|
+
"""Run command from commandline in the directory workspace_dir"""
|
|
9
|
+
|
|
10
|
+
print("RUNNING: ", query)
|
|
11
|
+
print(
|
|
12
|
+
"DANGER DANGER DANGER - THERE IS NO GUARDRAIL FOR SAFETY IN THIS IMPLEMENTATION - DANGER DANGER DANGER"
|
|
13
|
+
)
|
|
14
|
+
process = subprocess.Popen(
|
|
15
|
+
query.split(" "),
|
|
16
|
+
stdout=subprocess.PIPE,
|
|
17
|
+
stderr=subprocess.PIPE,
|
|
18
|
+
text=True,
|
|
19
|
+
cwd=workspace_dir,
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
stdout, stderr = process.communicate(timeout=600)
|
|
23
|
+
|
|
24
|
+
print("STDOUT: ", stdout)
|
|
25
|
+
print("STDERR: ", stderr)
|
|
26
|
+
|
|
27
|
+
return f"STDOUT: {stdout} and STDERR: {stderr}"
|
ursa/tools/write_code.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from langchain_core.tools import tool
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
@tool
|
|
7
|
+
def write_python(code: str, filename: str, workspace_dir: str):
|
|
8
|
+
"""
|
|
9
|
+
Writes code to a file in the given workspace.
|
|
10
|
+
|
|
11
|
+
Args:
|
|
12
|
+
code: The code to write
|
|
13
|
+
filename: the filename to write
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
File writing status: string
|
|
17
|
+
"""
|
|
18
|
+
print("Writing filename ", filename)
|
|
19
|
+
try:
|
|
20
|
+
# Extract code if wrapped in markdown code blocks
|
|
21
|
+
if "```" in code:
|
|
22
|
+
code_parts = code.split("```")
|
|
23
|
+
if len(code_parts) >= 3:
|
|
24
|
+
# Extract the actual code
|
|
25
|
+
if "\n" in code_parts[1]:
|
|
26
|
+
code = "\n".join(code_parts[1].strip().split("\n")[1:])
|
|
27
|
+
else:
|
|
28
|
+
code = code_parts[2].strip()
|
|
29
|
+
|
|
30
|
+
# Write code to a file
|
|
31
|
+
code_file = os.path.join(workspace_dir, filename)
|
|
32
|
+
|
|
33
|
+
with open(code_file, "w") as f:
|
|
34
|
+
f.write(code)
|
|
35
|
+
print(f"Written code to file: {code_file}")
|
|
36
|
+
|
|
37
|
+
return f"File {filename} written successfully."
|
|
38
|
+
|
|
39
|
+
except Exception as e:
|
|
40
|
+
print(f"Error generating code: {str(e)}")
|
|
41
|
+
# Return minimal code that prints the error
|
|
42
|
+
return f"Failed to write {filename} successfully."
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import re
|
|
2
|
+
import difflib
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from rich.console import Console, ConsoleOptions, RenderResult
|
|
6
|
+
from rich.syntax import Syntax
|
|
7
|
+
from rich.text import Text
|
|
8
|
+
|
|
9
|
+
# unified diff hunk header regex
|
|
10
|
+
_HUNK_RE = re.compile(r"^@@ -(\d+)(?:,\d+)? \+(\d+)(?:,\d+)? @@")
|
|
11
|
+
|
|
12
|
+
@dataclass
|
|
13
|
+
class _LineStyle:
|
|
14
|
+
prefix: str
|
|
15
|
+
bg: str
|
|
16
|
+
|
|
17
|
+
_STYLE = {
|
|
18
|
+
"add": _LineStyle("+ ", "on #003000"),
|
|
19
|
+
"del": _LineStyle("- ", "on #300000"),
|
|
20
|
+
"ctx": _LineStyle(" ", "on grey15"),
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
class DiffRenderer:
|
|
24
|
+
"""Renderable diff—`console.print(DiffRenderer(...))`"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, content: str, updated: str, filename: str):
|
|
27
|
+
# total lines in each version
|
|
28
|
+
self._old_total = len(content.splitlines())
|
|
29
|
+
self._new_total = len(updated.splitlines())
|
|
30
|
+
|
|
31
|
+
# number of digits in the largest count
|
|
32
|
+
self._num_width = len(str(max(self._old_total, self._new_total))) + 2
|
|
33
|
+
|
|
34
|
+
# get the diff
|
|
35
|
+
self._diff_lines = list(
|
|
36
|
+
difflib.unified_diff(
|
|
37
|
+
content.splitlines(),
|
|
38
|
+
updated.splitlines(),
|
|
39
|
+
fromfile=f"{filename} (original)",
|
|
40
|
+
tofile=f"{filename} (modified)",
|
|
41
|
+
lineterm="",
|
|
42
|
+
)
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# get syntax style
|
|
46
|
+
try:
|
|
47
|
+
self._lexer_name = Syntax.guess_lexer(filename, updated)
|
|
48
|
+
except Exception:
|
|
49
|
+
self._lexer_name = "text"
|
|
50
|
+
|
|
51
|
+
def __rich_console__(
|
|
52
|
+
self, console: Console, opts: ConsoleOptions
|
|
53
|
+
) -> RenderResult:
|
|
54
|
+
old_line = new_line = None
|
|
55
|
+
width = console.width
|
|
56
|
+
|
|
57
|
+
for raw in self._diff_lines:
|
|
58
|
+
# grab line numbers from hunk header
|
|
59
|
+
if m := _HUNK_RE.match(raw):
|
|
60
|
+
old_line, new_line = map(int, m.groups())
|
|
61
|
+
# build a marker
|
|
62
|
+
n = self._num_width
|
|
63
|
+
tick_col = "." * (n - 1)
|
|
64
|
+
indent_ticks = f" {tick_col} {tick_col}"
|
|
65
|
+
# pad to the indent width
|
|
66
|
+
full_indent = indent_ticks.ljust(2 * n + 3)
|
|
67
|
+
yield Text(f"{full_indent}{raw}".ljust(width), style="white on grey30")
|
|
68
|
+
continue
|
|
69
|
+
|
|
70
|
+
# skip header lines
|
|
71
|
+
if raw.startswith(("---", "+++")):
|
|
72
|
+
continue
|
|
73
|
+
|
|
74
|
+
# split the line
|
|
75
|
+
if raw.startswith("+"):
|
|
76
|
+
style = _STYLE["add"]
|
|
77
|
+
code = raw[1:]
|
|
78
|
+
elif raw.startswith("-"):
|
|
79
|
+
style = _STYLE["del"]
|
|
80
|
+
code = raw[1:]
|
|
81
|
+
else:
|
|
82
|
+
style = _STYLE["ctx"]
|
|
83
|
+
code = raw[1:] if raw.startswith(" ") else raw
|
|
84
|
+
|
|
85
|
+
# compute line numbers
|
|
86
|
+
if raw.startswith("+"):
|
|
87
|
+
old_num, new_num = None, new_line
|
|
88
|
+
new_line += 1
|
|
89
|
+
elif raw.startswith("-"):
|
|
90
|
+
old_num, new_num = old_line, None
|
|
91
|
+
old_line += 1
|
|
92
|
+
else:
|
|
93
|
+
old_num, new_num = old_line, new_line
|
|
94
|
+
old_line += 1
|
|
95
|
+
new_line += 1
|
|
96
|
+
|
|
97
|
+
old_str = str(old_num) if old_num is not None else " "
|
|
98
|
+
new_str = str(new_num) if new_num is not None else " "
|
|
99
|
+
|
|
100
|
+
# Syntax-highlight the code part
|
|
101
|
+
syntax = Syntax(code, self._lexer_name, line_numbers=False, word_wrap=False)
|
|
102
|
+
text_code: Text = syntax.highlight(code)
|
|
103
|
+
if text_code.plain.endswith("\n"):
|
|
104
|
+
text_code = text_code[:-1]
|
|
105
|
+
# apply background
|
|
106
|
+
text_code.stylize(style.bg)
|
|
107
|
+
|
|
108
|
+
# line numbers + code
|
|
109
|
+
nums = Text(
|
|
110
|
+
f"{old_str:>{self._num_width}}{new_str:>{self._num_width}} ",
|
|
111
|
+
style=f"white {style.bg}",
|
|
112
|
+
)
|
|
113
|
+
diff_mark = Text(style.prefix, style=f"bright_white {style.bg}")
|
|
114
|
+
line_text = nums + diff_mark + text_code
|
|
115
|
+
|
|
116
|
+
# pad to console width
|
|
117
|
+
pad_len = width - line_text.cell_len
|
|
118
|
+
if pad_len > 0:
|
|
119
|
+
line_text.append(" " * pad_len, style=style.bg)
|
|
120
|
+
|
|
121
|
+
yield line_text
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import shutil
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Any, Dict, List, Optional, Sequence
|
|
5
|
+
|
|
6
|
+
from langchain_chroma import Chroma
|
|
7
|
+
from langchain_core.documents import Document
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class AgentMemory:
|
|
11
|
+
"""
|
|
12
|
+
Simple wrapper around a persistent Chroma vector-store for agent-conversation memory.
|
|
13
|
+
|
|
14
|
+
Parameters
|
|
15
|
+
----------
|
|
16
|
+
path : str | Path | None
|
|
17
|
+
Where to keep the on-disk Chroma DB. If *None*, a folder called
|
|
18
|
+
``agent_memory_db`` is created in the package’s base directory.
|
|
19
|
+
collection_name : str
|
|
20
|
+
Name of the Chroma collection.
|
|
21
|
+
embedding_model : <TODO> | None
|
|
22
|
+
the embedding model
|
|
23
|
+
|
|
24
|
+
Notes
|
|
25
|
+
-----
|
|
26
|
+
* Requires `langchain-chroma`, and `chromadb`.
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
def __init__(
|
|
30
|
+
self,
|
|
31
|
+
embedding_model,
|
|
32
|
+
path: Optional[str | Path] = None,
|
|
33
|
+
collection_name: str = "agent_memory",
|
|
34
|
+
) -> None:
|
|
35
|
+
self.path = (
|
|
36
|
+
Path(path) if path else Path(__file__).resolve().parent / "agent_memory_db"
|
|
37
|
+
)
|
|
38
|
+
self.collection_name = collection_name
|
|
39
|
+
self.path.mkdir(parents=True, exist_ok=True)
|
|
40
|
+
|
|
41
|
+
self.embeddings = embedding_model
|
|
42
|
+
|
|
43
|
+
# If a DB already exists, load it; otherwise defer creation until `build_index`.
|
|
44
|
+
self.vectorstore: Optional[Chroma] = None
|
|
45
|
+
if any(self.path.iterdir()):
|
|
46
|
+
self.vectorstore = Chroma(
|
|
47
|
+
collection_name=self.collection_name,
|
|
48
|
+
embedding_function=self.embeddings,
|
|
49
|
+
persist_directory=str(self.path),
|
|
50
|
+
)
|
|
51
|
+
|
|
52
|
+
# --------------------------------------------------------------------- #
|
|
53
|
+
# ❶ Build & index a brand-new database #
|
|
54
|
+
# --------------------------------------------------------------------- #
|
|
55
|
+
def build_index(
|
|
56
|
+
self,
|
|
57
|
+
chunks: Sequence[str],
|
|
58
|
+
metadatas: Optional[Sequence[Dict[str, Any]]] = None,
|
|
59
|
+
) -> None:
|
|
60
|
+
"""
|
|
61
|
+
Create a fresh vector store from ``chunks``. Existing data (if any)
|
|
62
|
+
are overwritten.
|
|
63
|
+
|
|
64
|
+
Parameters
|
|
65
|
+
----------
|
|
66
|
+
chunks : Sequence[str]
|
|
67
|
+
Text snippets (already chunked) to embed.
|
|
68
|
+
metadatas : Sequence[dict] | None
|
|
69
|
+
Optional metadata dict for each chunk, same length as ``chunks``.
|
|
70
|
+
"""
|
|
71
|
+
docs = [
|
|
72
|
+
Document(page_content=text, metadata=metadatas[i] if metadatas else {})
|
|
73
|
+
for i, text in enumerate(chunks)
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
# Create (or overwrite) the collection
|
|
77
|
+
self.vectorstore = Chroma.from_documents(
|
|
78
|
+
documents=docs,
|
|
79
|
+
embedding=self.embeddings,
|
|
80
|
+
collection_name=self.collection_name,
|
|
81
|
+
persist_directory=str(self.path),
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
# --------------------------------------------------------------------- #
|
|
85
|
+
# ❷ Add new chunks and re-index #
|
|
86
|
+
# --------------------------------------------------------------------- #
|
|
87
|
+
def add_memories(
|
|
88
|
+
self,
|
|
89
|
+
new_chunks: Sequence[str],
|
|
90
|
+
metadatas: Optional[Sequence[Dict[str, Any]]] = None,
|
|
91
|
+
) -> None:
|
|
92
|
+
"""
|
|
93
|
+
Append new text chunks to the existing store (must call `build_index`
|
|
94
|
+
first if the DB is empty).
|
|
95
|
+
|
|
96
|
+
Raises
|
|
97
|
+
------
|
|
98
|
+
RuntimeError
|
|
99
|
+
If the vector store is not yet initialised.
|
|
100
|
+
"""
|
|
101
|
+
if self.vectorstore is None:
|
|
102
|
+
self.build_index(new_chunks, metadatas)
|
|
103
|
+
print("----- Vector store initialised -----")
|
|
104
|
+
|
|
105
|
+
docs = []
|
|
106
|
+
for i, text in enumerate(new_chunks):
|
|
107
|
+
if len(text) > 0: # only add non-empty documents
|
|
108
|
+
docs.append(
|
|
109
|
+
Document(
|
|
110
|
+
page_content=text, metadata=metadatas[i] if metadatas else {}
|
|
111
|
+
)
|
|
112
|
+
)
|
|
113
|
+
self.vectorstore.add_documents(docs)
|
|
114
|
+
|
|
115
|
+
# --------------------------------------------------------------------- #
|
|
116
|
+
# ❸ Retrieve relevant chunks (RAG query) #
|
|
117
|
+
# --------------------------------------------------------------------- #
|
|
118
|
+
def retrieve(
|
|
119
|
+
self,
|
|
120
|
+
query: str,
|
|
121
|
+
k: int = 4,
|
|
122
|
+
with_scores: bool = False,
|
|
123
|
+
**search_kwargs,
|
|
124
|
+
):
|
|
125
|
+
"""
|
|
126
|
+
Return the *k* most similar chunks for `query`.
|
|
127
|
+
|
|
128
|
+
Parameters
|
|
129
|
+
----------
|
|
130
|
+
query : str
|
|
131
|
+
Natural-language question or statement.
|
|
132
|
+
k : int
|
|
133
|
+
How many results to return.
|
|
134
|
+
with_scores : bool
|
|
135
|
+
If True, also return similarity scores.
|
|
136
|
+
**search_kwargs
|
|
137
|
+
Extra kwargs forwarded to Chroma’s ``similarity_search*`` helpers.
|
|
138
|
+
|
|
139
|
+
Returns
|
|
140
|
+
-------
|
|
141
|
+
list[Document] | list[tuple[Document, float]]
|
|
142
|
+
"""
|
|
143
|
+
if self.vectorstore is None:
|
|
144
|
+
return ["None"]
|
|
145
|
+
|
|
146
|
+
if with_scores:
|
|
147
|
+
return self.vectorstore.similarity_search_with_score(
|
|
148
|
+
query, k=k, **search_kwargs
|
|
149
|
+
)
|
|
150
|
+
return self.vectorstore.similarity_search(query, k=k, **search_kwargs)
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def delete_database(path: Optional[str | Path] = None):
|
|
154
|
+
"""
|
|
155
|
+
Simple wrapper around a persistent Chroma vector-store for agent-conversation memory.
|
|
156
|
+
|
|
157
|
+
Parameters
|
|
158
|
+
----------
|
|
159
|
+
path : str | Path | None
|
|
160
|
+
Where the on-disk Chroma DB is for deleting. If *None*, a folder called
|
|
161
|
+
``agent_memory_db`` is created in the package’s base directory.
|
|
162
|
+
"""
|
|
163
|
+
|
|
164
|
+
db_path = (
|
|
165
|
+
Path(path) if path else Path("~/.cache/ursa/rag/db/")
|
|
166
|
+
)
|
|
167
|
+
if os.path.exists(db_path):
|
|
168
|
+
shutil.rmtree(db_path)
|
|
169
|
+
print(f"Database: {db_path} has been deleted.")
|
|
170
|
+
else:
|
|
171
|
+
print("No database found to delete.")
|