vision-agent 0.2.205__py3-none-any.whl → 0.2.207__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent_coder_v2.py +2 -2
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/planner_tools.py +5 -5
- vision_agent/tools/tools.py +7 -0
- {vision_agent-0.2.205.dist-info → vision_agent-0.2.207.dist-info}/METADATA +1 -1
- {vision_agent-0.2.205.dist-info → vision_agent-0.2.207.dist-info}/RECORD +8 -8
- {vision_agent-0.2.205.dist-info → vision_agent-0.2.207.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.205.dist-info → vision_agent-0.2.207.dist-info}/WHEEL +0 -0
@@ -34,7 +34,7 @@ from vision_agent.utils.execute import (
|
|
34
34
|
CodeInterpreterFactory,
|
35
35
|
Execution,
|
36
36
|
)
|
37
|
-
from vision_agent.utils.sim import Sim
|
37
|
+
from vision_agent.utils.sim import Sim
|
38
38
|
|
39
39
|
_CONSOLE = Console()
|
40
40
|
|
@@ -316,7 +316,7 @@ class VisionAgentCoderV2(AgentCoder):
|
|
316
316
|
elif isinstance(tool_recommender, Sim):
|
317
317
|
self.tool_recommender = tool_recommender
|
318
318
|
else:
|
319
|
-
self.tool_recommender =
|
319
|
+
self.tool_recommender = T.get_tool_recommender()
|
320
320
|
|
321
321
|
self.verbose = verbose
|
322
322
|
self.code_sandbox_runtime = code_sandbox_runtime
|
vision_agent/tools/__init__.py
CHANGED
@@ -32,10 +32,8 @@ from vision_agent.utils.execute import (
|
|
32
32
|
MimeType,
|
33
33
|
)
|
34
34
|
from vision_agent.utils.image_utils import convert_to_b64
|
35
|
-
from vision_agent.utils.sim import load_cached_sim
|
36
35
|
|
37
36
|
TOOL_FUNCTIONS = {tool.__name__: tool for tool in T.TOOLS}
|
38
|
-
TOOL_RECOMMENDER = load_cached_sim(T.TOOLS_DF)
|
39
37
|
|
40
38
|
_LOGGER = logging.getLogger(__name__)
|
41
39
|
EXAMPLES = f"\n{TEST_TOOLS_EXAMPLE1}\n{TEST_TOOLS_EXAMPLE2}\n"
|
@@ -52,7 +50,7 @@ def format_tool_output(tool_thoughts: str, tool_docstring: str) -> str:
|
|
52
50
|
|
53
51
|
|
54
52
|
def extract_tool_info(
|
55
|
-
tool_choice_context: Dict[str, Any]
|
53
|
+
tool_choice_context: Dict[str, Any],
|
56
54
|
) -> Tuple[Optional[Callable], str, str, str]:
|
57
55
|
tool_thoughts = tool_choice_context.get("thoughts", "")
|
58
56
|
tool_docstring = ""
|
@@ -124,7 +122,7 @@ def run_tool_testing(
|
|
124
122
|
f"I need models from the {category.strip()} category of tools. {task}"
|
125
123
|
)
|
126
124
|
|
127
|
-
tool_docs =
|
125
|
+
tool_docs = T.get_tool_recommender().top_k(category, k=10, thresh=0.2)
|
128
126
|
if exclude_tools is not None and len(exclude_tools) > 0:
|
129
127
|
cleaned_tool_docs = []
|
130
128
|
for tool_doc in tool_docs:
|
@@ -246,7 +244,9 @@ def get_tool_for_task(
|
|
246
244
|
context=f"<code>\n{code}\n</code>\n<tool_output>\n{tool_output_str}\n</tool_output>",
|
247
245
|
previous_attempts=error_message,
|
248
246
|
)
|
249
|
-
tool_choice_context_dict = extract_json(
|
247
|
+
tool_choice_context_dict = extract_json(
|
248
|
+
lmm.generate(prompt, media=image_paths) # type: ignore
|
249
|
+
)
|
250
250
|
tool, tool_thoughts, tool_docstring, error_message = extract_tool_info(
|
251
251
|
tool_choice_context_dict
|
252
252
|
)
|
vision_agent/tools/tools.py
CHANGED
@@ -4,6 +4,7 @@ import logging
|
|
4
4
|
import os
|
5
5
|
import tempfile
|
6
6
|
import urllib.request
|
7
|
+
from functools import lru_cache
|
7
8
|
from importlib import resources
|
8
9
|
from pathlib import Path
|
9
10
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
@@ -45,6 +46,7 @@ from vision_agent.utils.image_utils import (
|
|
45
46
|
rle_decode,
|
46
47
|
rle_decode_array,
|
47
48
|
)
|
49
|
+
from vision_agent.utils.sim import Sim, load_cached_sim
|
48
50
|
from vision_agent.utils.video import (
|
49
51
|
extract_frames_from_video,
|
50
52
|
frames_to_bytes,
|
@@ -80,6 +82,11 @@ _OCR_URL = "https://app.landing.ai/ocr/v1/detect-text"
|
|
80
82
|
_LOGGER = logging.getLogger(__name__)
|
81
83
|
|
82
84
|
|
85
|
+
@lru_cache(maxsize=1)
|
86
|
+
def get_tool_recommender() -> Sim:
|
87
|
+
return load_cached_sim(TOOLS_DF)
|
88
|
+
|
89
|
+
|
83
90
|
def grounding_dino(
|
84
91
|
prompt: str,
|
85
92
|
image: np.ndarray,
|
@@ -10,7 +10,7 @@ vision_agent/agent/vision_agent.py,sha256=I75bEU-os9Lf9OSICKfvQ_H_ftg-zOwgTwWnu4
|
|
10
10
|
vision_agent/agent/vision_agent_coder.py,sha256=ANwUuCO4JpTYJs4s6ynSRFcdjZFUVuSoSfcqp8ZQDDQ,27451
|
11
11
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
|
12
12
|
vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=9v5HwbNidSzYUEFl6ZMniWWOmyLITM_moWLtKVaTen8,4845
|
13
|
-
vision_agent/agent/vision_agent_coder_v2.py,sha256=
|
13
|
+
vision_agent/agent/vision_agent_coder_v2.py,sha256=WKYPJAliupxnF2TP5jZlinqxnID37xnYSDNGMwoFKwU,16092
|
14
14
|
vision_agent/agent/vision_agent_planner.py,sha256=KWMA7XemcSmc_jn-MwdWz9wnKDtj-sYQ9tINi70_OoU,18583
|
15
15
|
vision_agent/agent/vision_agent_planner_prompts.py,sha256=Y3jz9HRf8fz9NLUseN7cTgZqewP0RazxR7vw1sPhcn0,6691
|
16
16
|
vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=Tzon3h5iZdHJglesk8GVS-2myNf5-fhf7HUbkpZWHQk,33143
|
@@ -26,12 +26,12 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
26
26
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
27
27
|
vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
|
28
28
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
29
|
-
vision_agent/tools/__init__.py,sha256=
|
29
|
+
vision_agent/tools/__init__.py,sha256=R6Ua9j1ragY-omt1OLivCwwFY_x6-nn-Ic9vTTvzUss,2904
|
30
30
|
vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
|
31
|
-
vision_agent/tools/planner_tools.py,sha256=
|
31
|
+
vision_agent/tools/planner_tools.py,sha256=gzCCdruzAmVXSgEVRjFdc9qLhe8rlJ-O4Mi7NnI4LsA,13364
|
32
32
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
33
33
|
vision_agent/tools/tool_utils.py,sha256=AT7rMcpKwZgIErfgfSvHS0gmtvd8KMHJoHnu5aMlgO0,10259
|
34
|
-
vision_agent/tools/tools.py,sha256=
|
34
|
+
vision_agent/tools/tools.py,sha256=bq5a7ZVgPDz7sxnwJ0VoZtXAUo7ACChCBXUjFE3iETI,87760
|
35
35
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
36
36
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
37
37
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -40,7 +40,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
|
|
40
40
|
vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
|
41
41
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
42
42
|
vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
|
43
|
-
vision_agent-0.2.
|
44
|
-
vision_agent-0.2.
|
45
|
-
vision_agent-0.2.
|
46
|
-
vision_agent-0.2.
|
43
|
+
vision_agent-0.2.207.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
44
|
+
vision_agent-0.2.207.dist-info/METADATA,sha256=jkbYpz8kjqd2ijaSzduHZBDs0CU67a7kZ2gPF4Oq72w,19026
|
45
|
+
vision_agent-0.2.207.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
46
|
+
vision_agent-0.2.207.dist-info/RECORD,,
|
File without changes
|
File without changes
|