vision-agent 0.2.56__py3-none-any.whl → 0.2.57__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,475 +0,0 @@
1
- import json
2
- import logging
3
- from pathlib import Path
4
- from typing import Any, Callable, Dict, List, Mapping, Optional, Tuple, Union
5
-
6
- import pandas as pd
7
- from langsmith import traceable
8
- from rich.console import Console
9
- from rich.syntax import Syntax
10
- from tabulate import tabulate
11
-
12
- from vision_agent.agent import Agent
13
- from vision_agent.agent.data_interpreter_prompts import (
14
- CODE,
15
- CODE_SYS_MSG,
16
- DEBUG,
17
- DEBUG_EXAMPLE,
18
- DEBUG_SYS_MSG,
19
- PLAN,
20
- PREV_CODE_CONTEXT,
21
- PREV_CODE_CONTEXT_WITH_REFLECTION,
22
- TEST,
23
- USER_REQ_CONTEXT,
24
- USER_REQ_SUBTASK_CONTEXT,
25
- USER_REQ_SUBTASK_WM_CONTEXT,
26
- )
27
- from vision_agent.llm import LLM, OpenAILLM
28
- from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF
29
- from vision_agent.utils import CodeInterpreter, CodeInterpreterFactory, Execution, Sim
30
-
31
- logging.basicConfig(level=logging.INFO)
32
- _LOGGER = logging.getLogger(__name__)
33
- _MAX_TABULATE_COL_WIDTH = 80
34
- _EXECUTE = CodeInterpreterFactory.get_default_instance()
35
- _CONSOLE = Console()
36
-
37
-
38
- def build_working_memory(working_memory: Mapping[str, List[str]]) -> Sim:
39
- data: Mapping[str, List[str]] = {"desc": [], "doc": []}
40
- for key, value in working_memory.items():
41
- data["desc"].append(key)
42
- data["doc"].append("\n".join(value))
43
- df = pd.DataFrame(data) # type: ignore
44
- return Sim(df, sim_key="desc")
45
-
46
-
47
- def extract_code(code: str) -> str:
48
- if "```python" in code:
49
- code = code[code.find("```python") + len("```python") :]
50
- code = code[: code.find("```")]
51
- if code.startswith("python\n"):
52
- code = code[len("python\n") :]
53
- return code
54
-
55
-
56
- def extract_json(json_str: str) -> Dict[str, Any]:
57
- try:
58
- json_dict = json.loads(json_str)
59
- except json.JSONDecodeError:
60
- if "```json" in json_str:
61
- json_str = json_str[json_str.find("```json") + len("```json") :]
62
- json_str = json_str[: json_str.find("```")]
63
- elif "```" in json_str:
64
- json_str = json_str[json_str.find("```") + len("```") :]
65
- # get the last ``` not one from an intermediate string
66
- json_str = json_str[: json_str.find("}```")]
67
- json_dict = json.loads(json_str)
68
- return json_dict # type: ignore
69
-
70
-
71
- @traceable(name="planning")
72
- def write_plan(
73
- chat: List[Dict[str, str]],
74
- plan: Optional[List[Dict[str, Any]]],
75
- tool_desc: str,
76
- model: LLM,
77
- ) -> Tuple[str, List[Dict[str, Any]]]:
78
- # Get last user request
79
- if chat[-1]["role"] != "user":
80
- raise ValueError("Last chat message must be from the user.")
81
- user_requirements = chat[-1]["content"]
82
-
83
- context = USER_REQ_CONTEXT.format(user_requirement=user_requirements)
84
- prompt = PLAN.format(context=context, plan=str(plan), tool_desc=tool_desc)
85
- chat[-1]["content"] = prompt
86
- new_plan = extract_json(model.chat(chat))
87
- return new_plan["user_req"], new_plan["plan"]
88
-
89
-
90
- def write_code(
91
- user_req: str,
92
- subtask: str,
93
- working_memory: str,
94
- tool_info: str,
95
- code: str,
96
- model: LLM,
97
- ) -> str:
98
- prompt = CODE.format(
99
- context=USER_REQ_SUBTASK_WM_CONTEXT.format(
100
- user_requirement=user_req, working_memory=working_memory, subtask=subtask
101
- ),
102
- tool_info=tool_info,
103
- code=code,
104
- )
105
- messages = [
106
- {"role": "system", "content": CODE_SYS_MSG},
107
- {"role": "user", "content": prompt},
108
- ]
109
- code = model.chat(messages)
110
- return extract_code(code)
111
-
112
-
113
- def write_test(
114
- user_req: str, subtask: str, tool_info: str, _: str, code: str, model: LLM
115
- ) -> str:
116
- prompt = TEST.format(
117
- context=USER_REQ_SUBTASK_CONTEXT.format(
118
- user_requirement=user_req, subtask=subtask
119
- ),
120
- tool_info=tool_info,
121
- code=code,
122
- )
123
- messages = [
124
- {"role": "system", "content": CODE_SYS_MSG},
125
- {"role": "user", "content": prompt},
126
- ]
127
- code = model.chat(messages)
128
- return extract_code(code)
129
-
130
-
131
- def debug_code(
132
- user_req: str,
133
- subtask: str,
134
- retrieved_ltm: str,
135
- working_memory: str,
136
- model: LLM,
137
- ) -> Tuple[str, str]:
138
- # Make debug model output JSON
139
- if hasattr(model, "kwargs"):
140
- model.kwargs["response_format"] = {"type": "json_object"}
141
- prompt = DEBUG.format(
142
- debug_example=DEBUG_EXAMPLE,
143
- context=USER_REQ_SUBTASK_WM_CONTEXT.format(
144
- user_requirement=user_req,
145
- subtask=subtask,
146
- working_memory=retrieved_ltm,
147
- ),
148
- previous_impl=working_memory,
149
- )
150
- messages = [
151
- {"role": "system", "content": DEBUG_SYS_MSG},
152
- {"role": "user", "content": prompt},
153
- ]
154
- code_and_ref = extract_json(model.chat(messages))
155
- if hasattr(model, "kwargs"):
156
- del model.kwargs["response_format"]
157
- return extract_code(code_and_ref["improved_impl"]), code_and_ref["reflection"]
158
-
159
-
160
- def write_and_exec_code(
161
- user_req: str,
162
- subtask: str,
163
- orig_code: str,
164
- code_writer_call: Callable[..., str],
165
- model: LLM,
166
- tool_info: str,
167
- exec: CodeInterpreter,
168
- retrieved_ltm: str,
169
- log_progress: Callable[[Dict[str, Any]], None],
170
- max_retry: int = 3,
171
- verbosity: int = 0,
172
- ) -> Tuple[bool, str, Execution, Dict[str, List[str]]]:
173
- success = False
174
- counter = 0
175
- reflection = ""
176
-
177
- code = code_writer_call(
178
- user_req, subtask, retrieved_ltm, tool_info, orig_code, model
179
- )
180
- result = exec.exec_isolation(code)
181
- success = result.success
182
- if verbosity == 2:
183
- _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
184
- log_progress(
185
- {
186
- "log": f"Code success: {success}",
187
- }
188
- )
189
- log_progress(
190
- {
191
- "log": "Code:",
192
- "code": code,
193
- }
194
- )
195
- log_progress(
196
- {
197
- "log": "Result:",
198
- "result": result.to_json(),
199
- }
200
- )
201
- _LOGGER.info(f"\tCode success: {success}, result: {result.text(False)}")
202
- working_memory: Dict[str, List[str]] = {}
203
- while not success and counter < max_retry:
204
- if subtask not in working_memory:
205
- working_memory[subtask] = []
206
-
207
- if reflection:
208
- working_memory[subtask].append(
209
- PREV_CODE_CONTEXT_WITH_REFLECTION.format(
210
- code=code, result=result, reflection=reflection
211
- )
212
- )
213
- else:
214
- working_memory[subtask].append(
215
- PREV_CODE_CONTEXT.format(code=code, result=result.text())
216
- )
217
-
218
- code, reflection = debug_code(
219
- user_req, subtask, retrieved_ltm, "\n".join(working_memory[subtask]), model
220
- )
221
- result = exec.exec_isolation(code)
222
- counter += 1
223
- if verbosity == 2:
224
- _CONSOLE.print(
225
- Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
226
- )
227
- log_progress(
228
- {
229
- "log": "Debugging reflection:",
230
- "reflection": reflection,
231
- }
232
- )
233
- log_progress(
234
- {
235
- "log": "Result:",
236
- "result": result.to_json(),
237
- }
238
- )
239
- _LOGGER.info(
240
- f"\tDebugging reflection: {reflection}, result: {result.text(False)}"
241
- )
242
-
243
- if success:
244
- working_memory[subtask].append(
245
- PREV_CODE_CONTEXT_WITH_REFLECTION.format(
246
- reflection=reflection, code=code, result=result.text()
247
- )
248
- )
249
-
250
- return result.success, code, result, working_memory
251
-
252
-
253
- @traceable(name="plan execution")
254
- def run_plan(
255
- user_req: str,
256
- plan: List[Dict[str, Any]],
257
- coder: LLM,
258
- exec: CodeInterpreter,
259
- code: str,
260
- tool_recommender: Sim,
261
- log_progress: Callable[[Dict[str, Any]], None],
262
- long_term_memory: Optional[Sim] = None,
263
- verbosity: int = 0,
264
- ) -> Tuple[str, str, List[Dict[str, Any]], Dict[str, List[str]]]:
265
- active_plan = [e for e in plan if "success" not in e or not e["success"]]
266
- current_code = code
267
- current_test = ""
268
- retrieved_ltm = ""
269
- working_memory: Dict[str, List[str]] = {}
270
-
271
- for task in active_plan:
272
- log_progress(
273
- {"log": "Going to run the following task(s) in sequence:", "task": task}
274
- )
275
- _LOGGER.info(
276
- f"""
277
- {tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
278
- )
279
- tools = tool_recommender.top_k(task["instruction"], thresh=0.3)
280
- tool_info = "\n".join([e["doc"] for e in tools])
281
-
282
- if verbosity == 2:
283
- log_progress({"log": f"Tools retrieved: {[e['desc'] for e in tools]}"})
284
- _LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
285
-
286
- if long_term_memory is not None:
287
- retrieved_ltm = "\n".join(
288
- [e["doc"] for e in long_term_memory.top_k(task["instruction"], 1)]
289
- )
290
-
291
- success, code, result, working_memory_i = write_and_exec_code(
292
- user_req,
293
- task["instruction"],
294
- current_code,
295
- write_code if task["type"] == "code" else write_test,
296
- coder,
297
- tool_info,
298
- exec,
299
- retrieved_ltm,
300
- log_progress,
301
- verbosity=verbosity,
302
- )
303
- if task["type"] == "code":
304
- current_code = code
305
- else:
306
- current_test = code
307
-
308
- working_memory.update(working_memory_i)
309
-
310
- if verbosity == 1:
311
- _CONSOLE.print(
312
- Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
313
- )
314
-
315
- log_progress(
316
- {
317
- "log": f"Code success: {success}",
318
- }
319
- )
320
- log_progress(
321
- {
322
- "log": "Result:",
323
- "result": result.to_json(),
324
- }
325
- )
326
- _LOGGER.info(f"\tCode success: {success} result: {result.text(False)}")
327
-
328
- task["success"] = success
329
- task["result"] = result
330
- task["code"] = code
331
-
332
- if not success:
333
- break
334
-
335
- return current_code, current_test, plan, working_memory
336
-
337
-
338
- class DataInterpreter(Agent):
339
- """This version of Data Interpreter is an AI agentic framework geared towards
340
- outputting Python code to solve vision tasks. It is inspired by MetaGPT's Data
341
- Interpreter https://arxiv.org/abs/2402.18679. This version of Data Interpreter has
342
- several key features to help it generate code:
343
-
344
- - A planner to generate a plan of tasks to solve a user requirement. The planner
345
- can output code tasks or test tasks, where test tasks are used to verify the code.
346
- - Automatic debugging, if a task fails, the agent will attempt to debug the code
347
- using the failed output to fix it.
348
- - A tool recommender to recommend tools to use for a given task. LLM performance
349
- on tool retrieval starts to decrease as you add more tools, tool retrieval helps
350
- keep the number of tools to choose from low.
351
- - Memory retrieval, the agent can remember previous iterations on tasks to help it
352
- with new tasks.
353
- - Dynamic replanning, the agent can ask for feedback and replan remaining tasks
354
- based off of that feedback.
355
- """
356
-
357
- def __init__(
358
- self,
359
- timeout: int = 600,
360
- tool_recommender: Optional[Sim] = None,
361
- long_term_memory: Optional[Sim] = None,
362
- verbosity: int = 0,
363
- report_progress_callback: Optional[Callable[[Dict[str, Any]], None]] = None,
364
- ) -> None:
365
- self.planner = OpenAILLM(temperature=0.0, json_mode=True)
366
- self.coder = OpenAILLM(temperature=0.0)
367
- self.exec = _EXECUTE
368
- self.report_progress_callback = report_progress_callback
369
- if tool_recommender is None:
370
- self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")
371
- else:
372
- self.tool_recommender = tool_recommender
373
- self.verbosity = verbosity
374
- self._working_memory: Dict[str, List[str]] = {}
375
- if long_term_memory is not None:
376
- if "doc" not in long_term_memory.df.columns:
377
- raise ValueError("Long term memory must have a 'doc' column.")
378
- self.long_term_memory = long_term_memory
379
- self.max_retries = 3
380
- if self.verbosity:
381
- _LOGGER.setLevel(logging.INFO)
382
-
383
- def __call__(
384
- self,
385
- input: Union[List[Dict[str, str]], str],
386
- media: Optional[Union[str, Path]] = None,
387
- plan: Optional[List[Dict[str, Any]]] = None,
388
- ) -> str:
389
- if isinstance(input, str):
390
- input = [{"role": "user", "content": input}]
391
- results = self.chat_with_workflow(input, media, plan)
392
- return results["code"] # type: ignore
393
-
394
- @traceable
395
- def chat_with_workflow(
396
- self,
397
- chat: List[Dict[str, str]],
398
- media: Optional[Union[str, Path]] = None,
399
- plan: Optional[List[Dict[str, Any]]] = None,
400
- ) -> Dict[str, Any]:
401
- if len(chat) == 0:
402
- raise ValueError("Input cannot be empty.")
403
-
404
- if media is not None:
405
- # append file names to all user messages
406
- for chat_i in chat:
407
- if chat_i["role"] == "user":
408
- chat_i["content"] += f" Image name {media}"
409
-
410
- working_code = ""
411
- if plan is not None:
412
- # grab the latest working code from a previous plan
413
- for task in plan:
414
- if "success" in task and "code" in task and task["success"]:
415
- working_code = task["code"]
416
-
417
- user_req, plan = write_plan(chat, plan, TOOL_DESCRIPTIONS, self.planner)
418
- self.log_progress(
419
- {
420
- "log": "Plans:",
421
- "plan": plan,
422
- }
423
- )
424
- _LOGGER.info(
425
- f"""Plan:
426
- {tabulate(tabular_data=plan, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
427
- )
428
-
429
- working_test = ""
430
- working_memory: Dict[str, List[str]] = {}
431
- success = False
432
- retries = 0
433
-
434
- while not success and retries < self.max_retries:
435
- working_code, working_test, plan, working_memory_i = run_plan(
436
- user_req,
437
- plan,
438
- self.coder,
439
- self.exec,
440
- working_code,
441
- self.tool_recommender,
442
- self.log_progress,
443
- self.long_term_memory,
444
- self.verbosity,
445
- )
446
- success = all(
447
- task["success"] if "success" in task else False for task in plan
448
- )
449
- working_memory.update(working_memory_i)
450
-
451
- if not success:
452
- # return to user and request feedback
453
- break
454
-
455
- retries += 1
456
-
457
- self.log_progress(
458
- {
459
- "log": f"The Vision Agent V2 has concluded this chat.\nSuccess: {success}",
460
- "finished": True,
461
- }
462
- )
463
-
464
- return {
465
- "code": working_code,
466
- "test": working_test,
467
- "success": success,
468
- "working_memory": build_working_memory(working_memory),
469
- "plan": plan,
470
- }
471
-
472
- def log_progress(self, data: Dict[str, Any]) -> None:
473
- if self.report_progress_callback is not None:
474
- self.report_progress_callback(data)
475
- pass
@@ -1,186 +0,0 @@
1
- USER_REQ_CONTEXT = """
2
- ## User Requirement
3
- {user_requirement}
4
- """
5
-
6
- USER_REQ_SUBTASK_CONTEXT = """
7
- ## User Requirement
8
- {user_requirement}
9
-
10
- ## Current Subtask
11
- {subtask}
12
- """
13
-
14
- USER_REQ_SUBTASK_WM_CONTEXT = """
15
- ## User Requirement
16
- {user_requirement}
17
-
18
- ## Current Subtask
19
- {subtask}
20
-
21
- ## Previous Task
22
- {working_memory}
23
- """
24
-
25
- PLAN = """
26
- # Context
27
- {context}
28
-
29
- # Current Plan
30
- {plan}
31
-
32
- # Tools Available
33
- {tool_desc}
34
-
35
- # Task:
36
- Based on the context and the tools you have available, write a plan of subtasks to achieve the user request that adhere to the following requirements:
37
- - For each subtask, you should provide instructions on what to do. Write detailed subtasks, ensure they are large enough to be meaningful, encompassing multiple lines of code.
38
- - You do not need to have the agent rewrite any tool functionality you already have, you should instead instruct it to utilize one or more of those tools in each subtask.
39
- - You can have agents either write coding tasks, to code some functionality or testing tasks to test previous functionality.
40
- - If a current plan exists, examine each item in the plan to determine if it was successful. If there was an item that failed, i.e. 'success': False, then you should rewrite that item and all subsequent items to ensure that the rewritten plan is successful.
41
-
42
- Output a list of jsons in the following format:
43
-
44
- ```json
45
- {{
46
- "user_req": str, # "a summarized version of the user requirement"
47
- "plan":
48
- [
49
- {{
50
- "task_id": int, # "unique identifier for a task in plan, can be an ordinal"
51
- "dependent_task_ids": list[int], # "ids of tasks prerequisite to this task"
52
- "instruction": str, # "what you should do in this task, one short phrase or sentence"
53
- "type": str, # "the type of the task, tasks can either be 'code' for coding tasks or 'test' for testing tasks"
54
- }},
55
- ...
56
- ]
57
- }}
58
- ```
59
- """
60
-
61
-
62
- CODE_SYS_MSG = """You are an AI Python assistant. You need to help user to achieve their goal by implementing a function. Your code will be run in a jupyter notebook environment so don't use asyncio.run. Instead, use await if you need to call an async function. Do not use 'display' for showing images, instead use matplotlib or PIL."""
63
-
64
-
65
- CODE = """
66
- # Context
67
- {context}
68
-
69
- # Tool Info for Current Subtask
70
- {tool_info}
71
-
72
- # Previous Code
73
- {code}
74
-
75
- # Constraints
76
- - Write a function that accomplishes the 'Current Subtask'. You are supplied code from a previous task under 'Previous Code', do not delete or change previous code unless it contains a bug or it is necessary to complete the 'Current Subtask'.
77
- - Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools import *` import.
78
- - You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
79
- - Use the `save_json` function from `vision_agent.tools` to save your output as a json file.
80
- - Write clean, readable, and well-documented code.
81
-
82
- # Output
83
- While some concise thoughts are helpful, code is absolutely required. If possible, execute your defined functions in the code output. Output code in the following format:
84
- ```python
85
- from vision_agent.tools imoprt *
86
-
87
- # your code goes here
88
- ```
89
- """
90
-
91
-
92
- DEBUG_SYS_MSG = """You are an AI Python assistant. You will be given your previous implementation code of a task, runtime error results, and a hint to change the implementation appropriately. Your code will be run in a jupyter notebook environment. Write your full implementation."""
93
-
94
-
95
- DEBUG_EXAMPLE = '''
96
- [previous impl]:
97
- ```python
98
- def add(a: int, b: int) -> int:
99
- """Given integers a and b, return the total value of a and b."""
100
- return a - b
101
- ```
102
-
103
- [previous output]
104
- Tests failed:
105
- assert add(1, 2) == 3 # output: -1
106
- assert add(1, 3) == 4 # output: -2
107
-
108
- [reflection on previous impl]:
109
- The implementation failed the test cases where the input integers are 1 and 2. The issue arises because the code does not add the two integers together, but instead subtracts the second integer from the first. To fix this issue, we should change the operator from `-` to `+` in the return statement. This will ensure that the function returns the correct output for the given input.
110
-
111
- [improved impl]:
112
- def add(a: int, b: int) -> int:
113
- """Given integers a and b, return the total value of a and b."""
114
- return a + b
115
- '''
116
-
117
-
118
- PREV_CODE_CONTEXT = """
119
- [previous impl]
120
- ```python
121
- {code}
122
- ```
123
-
124
- [previous output]
125
- {result}
126
- """
127
-
128
-
129
- PREV_CODE_CONTEXT_WITH_REFLECTION = """
130
- [reflection on previous impl]
131
- {reflection}
132
-
133
- [new impl]
134
- ```python
135
- {code}
136
- ```
137
-
138
- [new output]
139
- {result}
140
-
141
- """
142
-
143
- # don't need [previous impl] because it will come from PREV_CODE_CONTEXT or PREV_CODE_CONTEXT_WITH_REFLECTION
144
- DEBUG = """
145
- [example]
146
- Here is an example of debugging with reflection.
147
- {debug_example}
148
- [/example]
149
-
150
- [context]
151
- {context}
152
-
153
- {previous_impl}
154
-
155
- [instruction]
156
- Analyze your previous code and error in [context] step by step, provide me with improved method and code. Remember to follow [context] requirement. Because you are writing code in a jupyter notebook, you can run `!pip install` to install missing packages. Output a json following the format:
157
- ```json
158
- {{
159
- "reflection": str = "Reflection on previous implementation",
160
- "improved_impl": str = "Refined code after reflection.",
161
- }}
162
- ```
163
- """
164
-
165
-
166
- TEST = """
167
- # Context
168
- {context}
169
-
170
- # Tool Info for Current Subtask
171
- {tool_info}
172
-
173
- # Code to Test
174
- {code}
175
-
176
- # Constraints
177
- - Write code to test the functionality of the provided code according to the 'Current Subtask'. If you cannot test the code, then write code to visualize the result by calling the code.
178
- - Always prioritize using pre-defined tools for the same functionality.
179
- - Write clean, readable, and well-documented code.
180
-
181
- # Output
182
- While some concise thoughts are helpful, code is absolutely required. Always output one and only one code block in your response. Output code in the following format:
183
- ```python
184
- your code
185
- ```
186
- """