vision-agent 0.2.204__tar.gz → 0.2.206__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.204 → vision_agent-0.2.206}/PKG-INFO +1 -1
- {vision_agent-0.2.204 → vision_agent-0.2.206}/pyproject.toml +1 -1
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner_v2.py +3 -1
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/planner_tools.py +71 -36
- {vision_agent-0.2.204 → vision_agent-0.2.206}/LICENSE +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/README.md +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/.sim_tools/df.csv +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/.sim_tools/embs.npy +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/README.md +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/types.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/utils/video.py +0 -0
@@ -367,8 +367,10 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
|
|
367
367
|
response = json.loads(chat[i + 1].content)
|
368
368
|
function_name = response["function_name"]
|
369
369
|
tool_doc = get_tool_documentation(function_name)
|
370
|
+
if "box_threshold" in response:
|
371
|
+
tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
|
370
372
|
new_chat.append(AgentMessage(role="observation", content=tool_doc))
|
371
|
-
except json.JSONDecodeError:
|
373
|
+
except (json.JSONDecodeError, KeyError):
|
372
374
|
raise ValueError(f"Invalid JSON in interaction response: {chat_i}")
|
373
375
|
else:
|
374
376
|
new_chat.append(chat_i)
|
@@ -1,6 +1,8 @@
|
|
1
|
+
import inspect
|
1
2
|
import logging
|
2
3
|
import shutil
|
3
4
|
import tempfile
|
5
|
+
from functools import lru_cache
|
4
6
|
from typing import Any, Callable, Dict, List, Optional, Tuple, cast
|
5
7
|
|
6
8
|
import libcst as cst
|
@@ -31,15 +33,19 @@ from vision_agent.utils.execute import (
|
|
31
33
|
MimeType,
|
32
34
|
)
|
33
35
|
from vision_agent.utils.image_utils import convert_to_b64
|
34
|
-
from vision_agent.utils.sim import load_cached_sim
|
36
|
+
from vision_agent.utils.sim import Sim, load_cached_sim
|
35
37
|
|
36
38
|
TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
|
37
|
-
TOOL_RECOMMENDER = load_cached_sim(T.TOOLS_DF)
|
38
39
|
|
39
40
|
_LOGGER = logging.getLogger(__name__)
|
40
41
|
EXAMPLES = f"\n{TEST_TOOLS_EXAMPLE1}\n{TEST_TOOLS_EXAMPLE2}\n"
|
41
42
|
|
42
43
|
|
44
|
+
@lru_cache(maxsize=1)
|
45
|
+
def get_tool_recommender() -> Sim:
|
46
|
+
return load_cached_sim(T.TOOLS_DF)
|
47
|
+
|
48
|
+
|
43
49
|
def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
|
44
50
|
return_str = "[get_tool_for_task output]\n"
|
45
51
|
if tool_thoughts.strip() != "":
|
@@ -51,7 +57,7 @@ def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
|
|
51
57
|
|
52
58
|
|
53
59
|
def extract_tool_info(
|
54
|
-
tool_choice_context: Dict[str, Any]
|
60
|
+
tool_choice_context: Dict[str, Any],
|
55
61
|
) -> Tuple[Optional[Callable], str, str, str]:
|
56
62
|
tool_thoughts = tool_choice_context.get("thoughts", "")
|
57
63
|
tool_docstring = ""
|
@@ -63,12 +69,55 @@ def extract_tool_info(
|
|
63
69
|
return tool, tool_thoughts, tool_docstring, ""
|
64
70
|
|
65
71
|
|
72
|
+
def replace_box_threshold(code: str, functions: List[str], box_threshold: float) -> str:
|
73
|
+
class ReplaceBoxThresholdTransformer(cst.CSTTransformer):
|
74
|
+
def leave_Call(
|
75
|
+
self, original_node: cst.Call, updated_node: cst.Call
|
76
|
+
) -> cst.Call:
|
77
|
+
if (
|
78
|
+
isinstance(updated_node.func, cst.Name)
|
79
|
+
and updated_node.func.value in functions
|
80
|
+
) or (
|
81
|
+
isinstance(updated_node.func, cst.Attribute)
|
82
|
+
and updated_node.func.attr.value in functions
|
83
|
+
):
|
84
|
+
new_args = []
|
85
|
+
found = False
|
86
|
+
for arg in updated_node.args:
|
87
|
+
if arg.keyword and arg.keyword.value == "box_threshold":
|
88
|
+
new_arg = arg.with_changes(value=cst.Float(str(box_threshold)))
|
89
|
+
new_args.append(new_arg)
|
90
|
+
found = True
|
91
|
+
else:
|
92
|
+
new_args.append(arg)
|
93
|
+
|
94
|
+
if not found:
|
95
|
+
new_args.append(
|
96
|
+
cst.Arg(
|
97
|
+
keyword=cst.Name("box_threshold"),
|
98
|
+
value=cst.Float(str(box_threshold)),
|
99
|
+
equal=cst.AssignEqual(
|
100
|
+
whitespace_before=cst.SimpleWhitespace(""),
|
101
|
+
whitespace_after=cst.SimpleWhitespace(""),
|
102
|
+
),
|
103
|
+
)
|
104
|
+
)
|
105
|
+
return updated_node.with_changes(args=new_args)
|
106
|
+
return updated_node
|
107
|
+
|
108
|
+
tree = cst.parse_module(code)
|
109
|
+
transformer = ReplaceBoxThresholdTransformer()
|
110
|
+
new_tree = tree.visit(transformer)
|
111
|
+
return new_tree.code
|
112
|
+
|
113
|
+
|
66
114
|
def run_tool_testing(
|
67
115
|
task: str,
|
68
116
|
image_paths: List[str],
|
69
117
|
lmm: LMM,
|
70
118
|
exclude_tools: Optional[List[str]],
|
71
119
|
code_interpreter: CodeInterpreter,
|
120
|
+
process_code: Callable[[str], str] = lambda x: x,
|
72
121
|
) -> tuple[str, str, Execution]:
|
73
122
|
"""Helper function to generate and run tool testing code."""
|
74
123
|
query = lmm.generate(CATEGORIZE_TOOL_REQUEST.format(task=task))
|
@@ -80,7 +129,7 @@ def run_tool_testing(
|
|
80
129
|
f"I need models from the {category.strip()} category of tools. {task}"
|
81
130
|
)
|
82
131
|
|
83
|
-
tool_docs =
|
132
|
+
tool_docs = get_tool_recommender().top_k(category, k=10, thresh=0.2)
|
84
133
|
if exclude_tools is not None and len(exclude_tools) > 0:
|
85
134
|
cleaned_tool_docs = []
|
86
135
|
for tool_doc in tool_docs:
|
@@ -101,6 +150,7 @@ def run_tool_testing(
|
|
101
150
|
code = extract_tag(response, "code") # type: ignore
|
102
151
|
if code is None:
|
103
152
|
raise ValueError(f"Could not extract code from response: {response}")
|
153
|
+
code = process_code(code)
|
104
154
|
tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
|
105
155
|
tool_output_str = tool_output.text(include_results=False).strip()
|
106
156
|
|
@@ -119,6 +169,7 @@ def run_tool_testing(
|
|
119
169
|
media=str(image_paths),
|
120
170
|
)
|
121
171
|
code = extract_code(lmm.generate(prompt, media=image_paths)) # type: ignore
|
172
|
+
code = process_code(code)
|
122
173
|
tool_output = code_interpreter.exec_isolation(
|
123
174
|
DefaultImports.prepend_imports(code)
|
124
175
|
)
|
@@ -200,7 +251,9 @@ def get_tool_for_task(
|
|
200
251
|
context=f"<code>\n{code}\n</code>\n<tool_output>\n{tool_output_str}\n</tool_output>",
|
201
252
|
previous_attempts=error_message,
|
202
253
|
)
|
203
|
-
tool_choice_context_dict = extract_json(
|
254
|
+
tool_choice_context_dict = extract_json(
|
255
|
+
lmm.generate(prompt, media=image_paths) # type: ignore
|
256
|
+
)
|
204
257
|
tool, tool_thoughts, tool_docstring, error_message = extract_tool_info(
|
205
258
|
tool_choice_context_dict
|
206
259
|
)
|
@@ -221,36 +274,7 @@ def get_tool_documentation(tool_name: str) -> str:
|
|
221
274
|
def get_tool_for_task_human_reviewer(
|
222
275
|
task: str, images: List[np.ndarray], exclude_tools: Optional[List[str]] = None
|
223
276
|
) -> None:
|
224
|
-
# NOTE: this
|
225
|
-
"""Given a task and one or more images this function will find a tool to accomplish
|
226
|
-
the jobs. It prints the tool documentation and thoughts on why it chose the tool.
|
227
|
-
|
228
|
-
It can produce tools for the following types of tasks:
|
229
|
-
- Object detection and counting
|
230
|
-
- Classification
|
231
|
-
- Segmentation
|
232
|
-
- OCR
|
233
|
-
- VQA
|
234
|
-
- Depth and pose estimation
|
235
|
-
- Video object tracking
|
236
|
-
|
237
|
-
Wait until the documentation is printed to use the function so you know what the
|
238
|
-
input and output signatures are.
|
239
|
-
|
240
|
-
Parameters:
|
241
|
-
task: str: The task to accomplish.
|
242
|
-
images: List[np.ndarray]: The images to use for the task.
|
243
|
-
exclude_tools: Optional[List[str]]: A list of tool names to exclude from the
|
244
|
-
recommendations. This is helpful if you are calling get_tool_for_task twice
|
245
|
-
and do not want the same tool recommended.
|
246
|
-
|
247
|
-
Returns:
|
248
|
-
The tool to use for the task is printed to stdout
|
249
|
-
|
250
|
-
Examples
|
251
|
-
--------
|
252
|
-
>>> get_tool_for_task("Give me an OCR model that can find 'hot chocolate' in the image", [image])
|
253
|
-
"""
|
277
|
+
# NOTE: this will have the same documentation as get_tool_for_task
|
254
278
|
lmm = AnthropicLMM()
|
255
279
|
|
256
280
|
with (
|
@@ -263,8 +287,19 @@ def get_tool_for_task_human_reviewer(
|
|
263
287
|
Image.fromarray(image).save(image_path)
|
264
288
|
image_paths.append(image_path)
|
265
289
|
|
290
|
+
tools = [
|
291
|
+
t.__name__
|
292
|
+
for t in T.TOOLS
|
293
|
+
if inspect.signature(t).parameters.get("box_threshold") # type: ignore
|
294
|
+
]
|
295
|
+
|
266
296
|
_, _, tool_output = run_tool_testing(
|
267
|
-
task,
|
297
|
+
task,
|
298
|
+
image_paths,
|
299
|
+
lmm,
|
300
|
+
exclude_tools,
|
301
|
+
code_interpreter,
|
302
|
+
process_code=lambda x: replace_box_threshold(x, tools, 0.05),
|
268
303
|
)
|
269
304
|
|
270
305
|
# need to re-display results for the outer notebook to see them
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_coder_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.204 → vision_agent-0.2.206}/vision_agent/agent/vision_agent_planner_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|