vision-agent 0.2.203__tar.gz → 0.2.207__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.203 → vision_agent-0.2.207}/PKG-INFO +1 -1
- {vision_agent-0.2.203 → vision_agent-0.2.207}/pyproject.toml +1 -1
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_v2.py +2 -2
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_v2.py +3 -1
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_prompts.py +1 -1
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/planner_tools.py +64 -36
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/tools.py +7 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/LICENSE +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/README.md +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/.sim_tools/df.csv +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/.sim_tools/embs.npy +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/README.md +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/types.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/utils/video.py +0 -0
@@ -34,7 +34,7 @@ from vision_agent.utils.execute import (
|
|
34
34
|
CodeInterpreterFactory,
|
35
35
|
Execution,
|
36
36
|
)
|
37
|
-
from vision_agent.utils.sim import Sim
|
37
|
+
from vision_agent.utils.sim import Sim
|
38
38
|
|
39
39
|
_CONSOLE = Console()
|
40
40
|
|
@@ -316,7 +316,7 @@ class VisionAgentCoderV2(AgentCoder):
|
|
316
316
|
elif isinstance(tool_recommender, Sim):
|
317
317
|
self.tool_recommender = tool_recommender
|
318
318
|
else:
|
319
|
-
self.tool_recommender =
|
319
|
+
self.tool_recommender = T.get_tool_recommender()
|
320
320
|
|
321
321
|
self.verbose = verbose
|
322
322
|
self.code_sandbox_runtime = code_sandbox_runtime
|
@@ -367,8 +367,10 @@ def replace_interaction_with_obs(chat: List[AgentMessage]) -> List[AgentMessage]
|
|
367
367
|
response = json.loads(chat[i + 1].content)
|
368
368
|
function_name = response["function_name"]
|
369
369
|
tool_doc = get_tool_documentation(function_name)
|
370
|
+
if "box_threshold" in response:
|
371
|
+
tool_doc = f"Use the following function with box_threshold={response['box_threshold']}\n\n{tool_doc}"
|
370
372
|
new_chat.append(AgentMessage(role="observation", content=tool_doc))
|
371
|
-
except json.JSONDecodeError:
|
373
|
+
except (json.JSONDecodeError, KeyError):
|
372
374
|
raise ValueError(f"Invalid JSON in interaction response: {chat_i}")
|
373
375
|
else:
|
374
376
|
new_chat.append(chat_i)
|
@@ -1,3 +1,4 @@
|
|
1
|
+
import inspect
|
1
2
|
import logging
|
2
3
|
import shutil
|
3
4
|
import tempfile
|
@@ -31,10 +32,8 @@ from vision_agent.utils.execute import (
|
|
31
32
|
MimeType,
|
32
33
|
)
|
33
34
|
from vision_agent.utils.image_utils import convert_to_b64
|
34
|
-
from vision_agent.utils.sim import load_cached_sim
|
35
35
|
|
36
36
|
TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
|
37
|
-
TOOL_RECOMMENDER = load_cached_sim(T.TOOLS_DF)
|
38
37
|
|
39
38
|
_LOGGER = logging.getLogger(__name__)
|
40
39
|
EXAMPLES = f"\n{TEST_TOOLS_EXAMPLE1}\n{TEST_TOOLS_EXAMPLE2}\n"
|
@@ -51,7 +50,7 @@ def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
|
|
51
50
|
|
52
51
|
|
53
52
|
def extract_tool_info(
|
54
|
-
tool_choice_context: Dict[str, Any]
|
53
|
+
tool_choice_context: Dict[str, Any],
|
55
54
|
) -> Tuple[Optional[Callable], str, str, str]:
|
56
55
|
tool_thoughts = tool_choice_context.get("thoughts", "")
|
57
56
|
tool_docstring = ""
|
@@ -63,12 +62,55 @@ def extract_tool_info(
|
|
63
62
|
return tool, tool_thoughts, tool_docstring, ""
|
64
63
|
|
65
64
|
|
65
|
+
def replace_box_threshold(code: str, functions: List[str], box_threshold: float) -> str:
|
66
|
+
class ReplaceBoxThresholdTransformer(cst.CSTTransformer):
|
67
|
+
def leave_Call(
|
68
|
+
self, original_node: cst.Call, updated_node: cst.Call
|
69
|
+
) -> cst.Call:
|
70
|
+
if (
|
71
|
+
isinstance(updated_node.func, cst.Name)
|
72
|
+
and updated_node.func.value in functions
|
73
|
+
) or (
|
74
|
+
isinstance(updated_node.func, cst.Attribute)
|
75
|
+
and updated_node.func.attr.value in functions
|
76
|
+
):
|
77
|
+
new_args = []
|
78
|
+
found = False
|
79
|
+
for arg in updated_node.args:
|
80
|
+
if arg.keyword and arg.keyword.value == "box_threshold":
|
81
|
+
new_arg = arg.with_changes(value=cst.Float(str(box_threshold)))
|
82
|
+
new_args.append(new_arg)
|
83
|
+
found = True
|
84
|
+
else:
|
85
|
+
new_args.append(arg)
|
86
|
+
|
87
|
+
if not found:
|
88
|
+
new_args.append(
|
89
|
+
cst.Arg(
|
90
|
+
keyword=cst.Name("box_threshold"),
|
91
|
+
value=cst.Float(str(box_threshold)),
|
92
|
+
equal=cst.AssignEqual(
|
93
|
+
whitespace_before=cst.SimpleWhitespace(""),
|
94
|
+
whitespace_after=cst.SimpleWhitespace(""),
|
95
|
+
),
|
96
|
+
)
|
97
|
+
)
|
98
|
+
return updated_node.with_changes(args=new_args)
|
99
|
+
return updated_node
|
100
|
+
|
101
|
+
tree = cst.parse_module(code)
|
102
|
+
transformer = ReplaceBoxThresholdTransformer()
|
103
|
+
new_tree = tree.visit(transformer)
|
104
|
+
return new_tree.code
|
105
|
+
|
106
|
+
|
66
107
|
def run_tool_testing(
|
67
108
|
task: str,
|
68
109
|
image_paths: List[str],
|
69
110
|
lmm: LMM,
|
70
111
|
exclude_tools: Optional[List[str]],
|
71
112
|
code_interpreter: CodeInterpreter,
|
113
|
+
process_code: Callable[[str], str] = lambda x: x,
|
72
114
|
) -> tuple[str, str, Execution]:
|
73
115
|
"""Helper function to generate and run tool testing code."""
|
74
116
|
query = lmm.generate(CATEGORIZE_TOOL_REQUEST.format(task=task))
|
@@ -80,7 +122,7 @@ def run_tool_testing(
|
|
80
122
|
f"I need models from the {category.strip()} category of tools. {task}"
|
81
123
|
)
|
82
124
|
|
83
|
-
tool_docs =
|
125
|
+
tool_docs = T.get_tool_recommender().top_k(category, k=10, thresh=0.2)
|
84
126
|
if exclude_tools is not None and len(exclude_tools) > 0:
|
85
127
|
cleaned_tool_docs = []
|
86
128
|
for tool_doc in tool_docs:
|
@@ -101,6 +143,7 @@ def run_tool_testing(
|
|
101
143
|
code = extract_tag(response, "code") # type: ignore
|
102
144
|
if code is None:
|
103
145
|
raise ValueError(f"Could not extract code from response: {response}")
|
146
|
+
code = process_code(code)
|
104
147
|
tool_output = code_interpreter.exec_isolation(DefaultImports.prepend_imports(code))
|
105
148
|
tool_output_str = tool_output.text(include_results=False).strip()
|
106
149
|
|
@@ -119,6 +162,7 @@ def run_tool_testing(
|
|
119
162
|
media=str(image_paths),
|
120
163
|
)
|
121
164
|
code = extract_code(lmm.generate(prompt, media=image_paths)) # type: ignore
|
165
|
+
code = process_code(code)
|
122
166
|
tool_output = code_interpreter.exec_isolation(
|
123
167
|
DefaultImports.prepend_imports(code)
|
124
168
|
)
|
@@ -200,7 +244,9 @@ def get_tool_for_task(
|
|
200
244
|
context=f"<code>\n{code}\n</code>\n<tool_output>\n{tool_output_str}\n</tool_output>",
|
201
245
|
previous_attempts=error_message,
|
202
246
|
)
|
203
|
-
tool_choice_context_dict = extract_json(
|
247
|
+
tool_choice_context_dict = extract_json(
|
248
|
+
lmm.generate(prompt, media=image_paths) # type: ignore
|
249
|
+
)
|
204
250
|
tool, tool_thoughts, tool_docstring, error_message = extract_tool_info(
|
205
251
|
tool_choice_context_dict
|
206
252
|
)
|
@@ -221,36 +267,7 @@ def get_tool_documentation(tool_name: str) -> str:
|
|
221
267
|
def get_tool_for_task_human_reviewer(
|
222
268
|
task: str, images: List[np.ndarray], exclude_tools: Optional[List[str]] = None
|
223
269
|
) -> None:
|
224
|
-
# NOTE: this
|
225
|
-
"""Given a task and one or more images this function will find a tool to accomplish
|
226
|
-
the jobs. It prints the tool documentation and thoughts on why it chose the tool.
|
227
|
-
|
228
|
-
It can produce tools for the following types of tasks:
|
229
|
-
- Object detection and counting
|
230
|
-
- Classification
|
231
|
-
- Segmentation
|
232
|
-
- OCR
|
233
|
-
- VQA
|
234
|
-
- Depth and pose estimation
|
235
|
-
- Video object tracking
|
236
|
-
|
237
|
-
Wait until the documentation is printed to use the function so you know what the
|
238
|
-
input and output signatures are.
|
239
|
-
|
240
|
-
Parameters:
|
241
|
-
task: str: The task to accomplish.
|
242
|
-
images: List[np.ndarray]: The images to use for the task.
|
243
|
-
exclude_tools: Optional[List[str]]: A list of tool names to exclude from the
|
244
|
-
recommendations. This is helpful if you are calling get_tool_for_task twice
|
245
|
-
and do not want the same tool recommended.
|
246
|
-
|
247
|
-
Returns:
|
248
|
-
The tool to use for the task is printed to stdout
|
249
|
-
|
250
|
-
Examples
|
251
|
-
--------
|
252
|
-
>>> get_tool_for_task("Give me an OCR model that can find 'hot chocolate' in the image", [image])
|
253
|
-
"""
|
270
|
+
# NOTE: this will have the same documentation as get_tool_for_task
|
254
271
|
lmm = AnthropicLMM()
|
255
272
|
|
256
273
|
with (
|
@@ -263,8 +280,19 @@ def get_tool_for_task_human_reviewer(
|
|
263
280
|
Image.fromarray(image).save(image_path)
|
264
281
|
image_paths.append(image_path)
|
265
282
|
|
283
|
+
tools = [
|
284
|
+
t.__name__
|
285
|
+
for t in T.TOOLS
|
286
|
+
if inspect.signature(t).parameters.get("box_threshold") # type: ignore
|
287
|
+
]
|
288
|
+
|
266
289
|
_, _, tool_output = run_tool_testing(
|
267
|
-
task,
|
290
|
+
task,
|
291
|
+
image_paths,
|
292
|
+
lmm,
|
293
|
+
exclude_tools,
|
294
|
+
code_interpreter,
|
295
|
+
process_code=lambda x: replace_box_threshold(x, tools, 0.05),
|
268
296
|
)
|
269
297
|
|
270
298
|
# need to re-display results for the outer notebook to see them
|
@@ -4,6 +4,7 @@ import logging
|
|
4
4
|
import os
|
5
5
|
import tempfile
|
6
6
|
import urllib.request
|
7
|
+
from functools import lru_cache
|
7
8
|
from importlib import resources
|
8
9
|
from pathlib import Path
|
9
10
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -45,6 +46,7 @@ from vision_agent.utils.image_utils import (
|
|
45
46
|
rle_decode,
|
46
47
|
rle_decode_array,
|
47
48
|
)
|
49
|
+
from vision_agent.utils.sim import Sim, load_cached_sim
|
48
50
|
from vision_agent.utils.video import (
|
49
51
|
extract_frames_from_video,
|
50
52
|
frames_to_bytes,
|
@@ -80,6 +82,11 @@ _OCR_URL = "https://app.landing.ai/ocr/v1/detect-text"
|
|
80
82
|
_LOGGER = logging.getLogger(__name__)
|
81
83
|
|
82
84
|
|
85
|
+
@lru_cache(maxsize=1)
|
86
|
+
def get_tool_recommender() -> Sim:
|
87
|
+
return load_cached_sim(TOOLS_DF)
|
88
|
+
|
89
|
+
|
83
90
|
def grounding_dino(
|
84
91
|
prompt: str,
|
85
92
|
image: np.ndarray,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_coder_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
{vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.203 → vision_agent-0.2.207}/vision_agent/agent/vision_agent_planner_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|