vision-agent 0.2.29__py3-none-any.whl → 0.2.31__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/__init__.py +2 -2
- vision_agent/agent/agent.py +2 -2
- vision_agent/agent/agent_coder.py +8 -8
- vision_agent/agent/{vision_agent_v2.py → data_interpreter.py} +12 -12
- vision_agent/agent/{vision_agent_v2_prompts.py → data_interpreter_prompts.py} +3 -3
- vision_agent/agent/easytool.py +8 -8
- vision_agent/agent/easytool_v2.py +778 -0
- vision_agent/agent/easytool_v2_prompts.py +152 -0
- vision_agent/agent/reflexion.py +8 -8
- vision_agent/agent/vision_agent.py +360 -691
- vision_agent/agent/vision_agent_prompts.py +231 -149
- vision_agent/llm/llm.py +3 -4
- vision_agent/lmm/lmm.py +6 -6
- vision_agent/tools/__init__.py +21 -22
- vision_agent/tools/easytool_tools.py +1242 -0
- vision_agent/tools/tools.py +533 -1090
- vision_agent-0.2.31.dist-info/METADATA +175 -0
- vision_agent-0.2.31.dist-info/RECORD +36 -0
- vision_agent/agent/vision_agent_v3.py +0 -386
- vision_agent/agent/vision_agent_v3_prompts.py +0 -226
- vision_agent/tools/tools_v2.py +0 -685
- vision_agent-0.2.29.dist-info/METADATA +0 -226
- vision_agent-0.2.29.dist-info/RECORD +0 -36
- {vision_agent-0.2.29.dist-info → vision_agent-0.2.31.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.29.dist-info → vision_agent-0.2.31.dist-info}/WHEEL +0 -0
vision_agent/agent/__init__.py
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
from .agent import Agent
|
2
2
|
from .agent_coder import AgentCoder
|
3
|
+
from .data_interpreter import DataInterpreter
|
3
4
|
from .easytool import EasyTool
|
5
|
+
from .easytool_v2 import EasyToolV2
|
4
6
|
from .reflexion import Reflexion
|
5
7
|
from .vision_agent import VisionAgent
|
6
|
-
from .vision_agent_v2 import VisionAgentV2
|
7
|
-
from .vision_agent_v3 import VisionAgentV3
|
vision_agent/agent/agent.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
from abc import ABC, abstractmethod
|
2
2
|
from pathlib import Path
|
3
|
-
from typing import Dict, List, Optional, Union
|
3
|
+
from typing import Any, Dict, List, Optional, Union
|
4
4
|
|
5
5
|
|
6
6
|
class Agent(ABC):
|
@@ -8,7 +8,7 @@ class Agent(ABC):
|
|
8
8
|
def __call__(
|
9
9
|
self,
|
10
10
|
input: Union[List[Dict[str, str]], str],
|
11
|
-
|
11
|
+
media: Optional[Union[str, Path]] = None,
|
12
12
|
) -> str:
|
13
13
|
pass
|
14
14
|
|
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import sys
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Dict, List, Optional, Union
|
6
|
+
from typing import Any, Dict, List, Optional, Union
|
7
7
|
|
8
8
|
from rich.console import Console
|
9
9
|
from rich.syntax import Syntax
|
@@ -18,7 +18,7 @@ from vision_agent.agent.agent_coder_prompts import (
|
|
18
18
|
)
|
19
19
|
from vision_agent.llm import LLM, OpenAILLM
|
20
20
|
from vision_agent.lmm import LMM, OpenAILMM
|
21
|
-
from vision_agent.tools
|
21
|
+
from vision_agent.tools import TOOL_DOCSTRING, UTILITIES_DOCSTRING
|
22
22
|
from vision_agent.utils import Execute
|
23
23
|
|
24
24
|
IMPORT_HELPER = """
|
@@ -38,7 +38,7 @@ import numpy as np
|
|
38
38
|
import string
|
39
39
|
from typing import *
|
40
40
|
from collections import *
|
41
|
-
from vision_agent.tools
|
41
|
+
from vision_agent.tools import *
|
42
42
|
"""
|
43
43
|
logging.basicConfig(stream=sys.stdout)
|
44
44
|
_LOGGER = logging.getLogger(__name__)
|
@@ -150,20 +150,20 @@ class AgentCoder(Agent):
|
|
150
150
|
def __call__(
|
151
151
|
self,
|
152
152
|
input: Union[List[Dict[str, str]], str],
|
153
|
-
|
153
|
+
media: Optional[Union[str, Path]] = None,
|
154
154
|
) -> str:
|
155
155
|
if isinstance(input, str):
|
156
156
|
input = [{"role": "user", "content": input}]
|
157
|
-
return self.chat(input,
|
157
|
+
return self.chat(input, media)
|
158
158
|
|
159
159
|
def chat(
|
160
160
|
self,
|
161
161
|
input: List[Dict[str, str]],
|
162
|
-
|
162
|
+
media: Optional[Union[str, Path]] = None,
|
163
163
|
) -> str:
|
164
164
|
question = input[0]["content"]
|
165
|
-
if
|
166
|
-
question += f" Input file path: {os.path.abspath(
|
165
|
+
if media:
|
166
|
+
question += f" Input file path: {os.path.abspath(media)}"
|
167
167
|
|
168
168
|
code = ""
|
169
169
|
feedback = ""
|
@@ -10,7 +10,7 @@ from rich.syntax import Syntax
|
|
10
10
|
from tabulate import tabulate
|
11
11
|
|
12
12
|
from vision_agent.agent import Agent
|
13
|
-
from vision_agent.agent.
|
13
|
+
from vision_agent.agent.data_interpreter_prompts import (
|
14
14
|
CODE,
|
15
15
|
CODE_SYS_MSG,
|
16
16
|
DEBUG,
|
@@ -25,7 +25,7 @@ from vision_agent.agent.vision_agent_v2_prompts import (
|
|
25
25
|
USER_REQ_SUBTASK_WM_CONTEXT,
|
26
26
|
)
|
27
27
|
from vision_agent.llm import LLM, OpenAILLM
|
28
|
-
from vision_agent.tools
|
28
|
+
from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF
|
29
29
|
from vision_agent.utils import Execute, Sim
|
30
30
|
|
31
31
|
logging.basicConfig(level=logging.INFO)
|
@@ -331,11 +331,11 @@ def run_plan(
|
|
331
331
|
return current_code, current_test, plan, working_memory
|
332
332
|
|
333
333
|
|
334
|
-
class
|
335
|
-
"""
|
336
|
-
solve vision tasks. It is inspired by MetaGPT's Data
|
337
|
-
https://arxiv.org/abs/2402.18679.
|
338
|
-
generate code:
|
334
|
+
class DataInterpreter(Agent):
|
335
|
+
"""This version of Data Interpreter is an AI agentic framework geared towards
|
336
|
+
outputting Python code to solve vision tasks. It is inspired by MetaGPT's Data
|
337
|
+
Interpreter https://arxiv.org/abs/2402.18679. This version of Data Interpreter has
|
338
|
+
several key features to help it generate code:
|
339
339
|
|
340
340
|
- A planner to generate a plan of tasks to solve a user requirement. The planner
|
341
341
|
can output code tasks or test tasks, where test tasks are used to verify the code.
|
@@ -379,29 +379,29 @@ class VisionAgentV2(Agent):
|
|
379
379
|
def __call__(
|
380
380
|
self,
|
381
381
|
input: Union[List[Dict[str, str]], str],
|
382
|
-
|
382
|
+
media: Optional[Union[str, Path]] = None,
|
383
383
|
plan: Optional[List[Dict[str, Any]]] = None,
|
384
384
|
) -> str:
|
385
385
|
if isinstance(input, str):
|
386
386
|
input = [{"role": "user", "content": input}]
|
387
|
-
results = self.chat_with_workflow(input,
|
387
|
+
results = self.chat_with_workflow(input, media, plan)
|
388
388
|
return results["code"] # type: ignore
|
389
389
|
|
390
390
|
@traceable
|
391
391
|
def chat_with_workflow(
|
392
392
|
self,
|
393
393
|
chat: List[Dict[str, str]],
|
394
|
-
|
394
|
+
media: Optional[Union[str, Path]] = None,
|
395
395
|
plan: Optional[List[Dict[str, Any]]] = None,
|
396
396
|
) -> Dict[str, Any]:
|
397
397
|
if len(chat) == 0:
|
398
398
|
raise ValueError("Input cannot be empty.")
|
399
399
|
|
400
|
-
if
|
400
|
+
if media is not None:
|
401
401
|
# append file names to all user messages
|
402
402
|
for chat_i in chat:
|
403
403
|
if chat_i["role"] == "user":
|
404
|
-
chat_i["content"] += f" Image name {
|
404
|
+
chat_i["content"] += f" Image name {media}"
|
405
405
|
|
406
406
|
working_code = ""
|
407
407
|
if plan is not None:
|
@@ -74,15 +74,15 @@ CODE = """
|
|
74
74
|
|
75
75
|
# Constraints
|
76
76
|
- Write a function that accomplishes the 'Current Subtask'. You are supplied code from a previous task under 'Previous Code', do not delete or change previous code unless it contains a bug or it is necessary to complete the 'Current Subtask'.
|
77
|
-
- Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools
|
77
|
+
- Always prioritize using pre-defined tools or code for the same functionality from 'Tool Info' when working on 'Current Subtask'. You have access to all these tools through the `from vision_agent.tools import *` import.
|
78
78
|
- You may recieve previous trials and errors under 'Previous Task', this is code, output and reflections from previous tasks. You can use these to avoid running in to the same issues when writing your code.
|
79
|
-
- Use the `save_json` function from `vision_agent.tools
|
79
|
+
- Use the `save_json` function from `vision_agent.tools` to save your output as a json file.
|
80
80
|
- Write clean, readable, and well-documented code.
|
81
81
|
|
82
82
|
# Output
|
83
83
|
While some concise thoughts are helpful, code is absolutely required. If possible, execute your defined functions in the code output. Output code in the following format:
|
84
84
|
```python
|
85
|
-
from vision_agent.tools
|
85
|
+
from vision_agent.tools imoprt *
|
86
86
|
|
87
87
|
# your code goes here
|
88
88
|
```
|
vision_agent/agent/easytool.py
CHANGED
@@ -6,7 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
6
6
|
|
7
7
|
from vision_agent.llm import LLM, OpenAILLM
|
8
8
|
from vision_agent.lmm import LMM
|
9
|
-
from vision_agent.tools import TOOLS
|
9
|
+
from vision_agent.tools.easytool_tools import TOOLS
|
10
10
|
|
11
11
|
from .agent import Agent
|
12
12
|
from .easytool_prompts import (
|
@@ -272,7 +272,7 @@ class EasyTool(Agent):
|
|
272
272
|
def __call__(
|
273
273
|
self,
|
274
274
|
input: Union[List[Dict[str, str]], str],
|
275
|
-
|
275
|
+
media: Optional[Union[str, Path]] = None,
|
276
276
|
) -> str:
|
277
277
|
"""Invoke the vision agent.
|
278
278
|
|
@@ -285,14 +285,14 @@ class EasyTool(Agent):
|
|
285
285
|
"""
|
286
286
|
if isinstance(input, str):
|
287
287
|
input = [{"role": "user", "content": input}]
|
288
|
-
return self.chat(input,
|
288
|
+
return self.chat(input, media=media)
|
289
289
|
|
290
290
|
def chat_with_workflow(
|
291
|
-
self, chat: List[Dict[str, str]],
|
291
|
+
self, chat: List[Dict[str, str]], media: Optional[Union[str, Path]] = None
|
292
292
|
) -> Tuple[str, List[Dict]]:
|
293
293
|
question = chat[0]["content"]
|
294
|
-
if
|
295
|
-
question += f" Image name: {
|
294
|
+
if media:
|
295
|
+
question += f" Image name: {media}"
|
296
296
|
tasks = task_decompose(
|
297
297
|
self.task_model,
|
298
298
|
question,
|
@@ -340,7 +340,7 @@ class EasyTool(Agent):
|
|
340
340
|
return answer_summarize(self.answer_model, question, answers), all_tool_results
|
341
341
|
|
342
342
|
def chat(
|
343
|
-
self, chat: List[Dict[str, str]],
|
343
|
+
self, chat: List[Dict[str, str]], media: Optional[Union[str, Path]] = None
|
344
344
|
) -> str:
|
345
|
-
answer, _ = self.chat_with_workflow(chat,
|
345
|
+
answer, _ = self.chat_with_workflow(chat, media=media)
|
346
346
|
return answer
|