vision-agent 0.2.32__py3-none-any.whl → 0.2.34__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +112 -56
- vision_agent/tools/__init__.py +32 -0
- vision_agent/tools/tools.py +82 -17
- {vision_agent-0.2.32.dist-info → vision_agent-0.2.34.dist-info}/METADATA +33 -4
- {vision_agent-0.2.32.dist-info → vision_agent-0.2.34.dist-info}/RECORD +7 -7
- {vision_agent-0.2.32.dist-info → vision_agent-0.2.34.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.32.dist-info → vision_agent-0.2.34.dist-info}/WHEEL +0 -0
@@ -9,6 +9,7 @@ from rich.console import Console
|
|
9
9
|
from rich.syntax import Syntax
|
10
10
|
from tabulate import tabulate
|
11
11
|
|
12
|
+
import vision_agent.tools as T
|
12
13
|
from vision_agent.agent import Agent
|
13
14
|
from vision_agent.agent.vision_agent_prompts import (
|
14
15
|
CODE,
|
@@ -22,7 +23,6 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
22
23
|
)
|
23
24
|
from vision_agent.llm import LLM, OpenAILLM
|
24
25
|
from vision_agent.lmm import LMM, OpenAILMM
|
25
|
-
from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF, UTILITIES_DOCSTRING
|
26
26
|
from vision_agent.utils import Execute
|
27
27
|
from vision_agent.utils.sim import Sim
|
28
28
|
|
@@ -31,6 +31,7 @@ _LOGGER = logging.getLogger(__name__)
|
|
31
31
|
_MAX_TABULATE_COL_WIDTH = 80
|
32
32
|
_EXECUTE = Execute(600)
|
33
33
|
_CONSOLE = Console()
|
34
|
+
_DEFAULT_IMPORT = "\n".join(T.__new_tools__)
|
34
35
|
|
35
36
|
|
36
37
|
def format_memory(memory: List[Dict[str, str]]) -> str:
|
@@ -125,6 +126,12 @@ def write_and_test_code(
|
|
125
126
|
max_retries: int = 3,
|
126
127
|
input_media: Optional[Union[str, Path]] = None,
|
127
128
|
) -> Dict[str, Any]:
|
129
|
+
log_progress(
|
130
|
+
{
|
131
|
+
"type": "code",
|
132
|
+
"status": "started",
|
133
|
+
}
|
134
|
+
)
|
128
135
|
code = extract_code(
|
129
136
|
coder(CODE.format(docstring=tool_info, question=task, feedback=working_memory))
|
130
137
|
)
|
@@ -140,35 +147,44 @@ def write_and_test_code(
|
|
140
147
|
)
|
141
148
|
)
|
142
149
|
|
143
|
-
|
150
|
+
log_progress(
|
151
|
+
{
|
152
|
+
"type": "code",
|
153
|
+
"status": "running",
|
154
|
+
"payload": {
|
155
|
+
"code": code,
|
156
|
+
"test": test,
|
157
|
+
},
|
158
|
+
}
|
159
|
+
)
|
160
|
+
success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
|
161
|
+
log_progress(
|
162
|
+
{
|
163
|
+
"type": "code",
|
164
|
+
"status": "completed" if success else "failed",
|
165
|
+
"payload": {
|
166
|
+
"code": code,
|
167
|
+
"test": test,
|
168
|
+
"result": result,
|
169
|
+
},
|
170
|
+
}
|
171
|
+
)
|
144
172
|
if verbosity == 2:
|
145
173
|
_LOGGER.info("Initial code and tests:")
|
146
|
-
log_progress(
|
147
|
-
{
|
148
|
-
"log": "Code:",
|
149
|
-
"code": code,
|
150
|
-
}
|
151
|
-
)
|
152
|
-
log_progress(
|
153
|
-
{
|
154
|
-
"log": "Test:",
|
155
|
-
"code": test,
|
156
|
-
}
|
157
|
-
)
|
158
174
|
_CONSOLE.print(
|
159
175
|
Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
|
160
176
|
)
|
161
|
-
log_progress(
|
162
|
-
{
|
163
|
-
"log": "Result:",
|
164
|
-
"result": result,
|
165
|
-
}
|
166
|
-
)
|
167
177
|
_LOGGER.info(f"Initial result: {result}")
|
168
178
|
|
169
179
|
count = 0
|
170
180
|
new_working_memory = []
|
171
181
|
while not success and count < max_retries:
|
182
|
+
log_progress(
|
183
|
+
{
|
184
|
+
"type": "code",
|
185
|
+
"status": "started",
|
186
|
+
}
|
187
|
+
)
|
172
188
|
fixed_code_and_test = extract_json(
|
173
189
|
debugger(
|
174
190
|
FIX_BUG.format(
|
@@ -180,18 +196,33 @@ def write_and_test_code(
|
|
180
196
|
code = extract_code(fixed_code_and_test["code"])
|
181
197
|
if fixed_code_and_test["test"].strip() != "":
|
182
198
|
test = extract_code(fixed_code_and_test["test"])
|
199
|
+
log_progress(
|
200
|
+
{
|
201
|
+
"type": "code",
|
202
|
+
"status": "running",
|
203
|
+
"payload": {
|
204
|
+
"code": code,
|
205
|
+
"test": test,
|
206
|
+
},
|
207
|
+
}
|
208
|
+
)
|
183
209
|
new_working_memory.append(
|
184
210
|
{"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
|
185
211
|
)
|
186
212
|
|
187
|
-
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
213
|
+
success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
|
214
|
+
log_progress(
|
215
|
+
{
|
216
|
+
"type": "code",
|
217
|
+
"status": "completed" if success else "failed",
|
218
|
+
"payload": {
|
219
|
+
"code": code,
|
220
|
+
"test": test,
|
221
|
+
"result": result,
|
222
|
+
},
|
223
|
+
}
|
224
|
+
)
|
188
225
|
if verbosity == 2:
|
189
|
-
log_progress(
|
190
|
-
{
|
191
|
-
"log": f"Debug attempt {count + 1}, reflection:",
|
192
|
-
"result": fixed_code_and_test["reflections"],
|
193
|
-
}
|
194
|
-
)
|
195
226
|
_LOGGER.info(
|
196
227
|
f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
|
197
228
|
)
|
@@ -200,12 +231,6 @@ def write_and_test_code(
|
|
200
231
|
f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
|
201
232
|
)
|
202
233
|
)
|
203
|
-
log_progress(
|
204
|
-
{
|
205
|
-
"log": "Debug result:",
|
206
|
-
"result": result,
|
207
|
-
}
|
208
|
-
)
|
209
234
|
_LOGGER.info(f"Debug result: {result}")
|
210
235
|
count += 1
|
211
236
|
|
@@ -231,19 +256,26 @@ def retrieve_tools(
|
|
231
256
|
log_progress: Callable[[Dict[str, Any]], None],
|
232
257
|
verbosity: int = 0,
|
233
258
|
) -> str:
|
259
|
+
log_progress(
|
260
|
+
{
|
261
|
+
"type": "tools",
|
262
|
+
"status": "started",
|
263
|
+
}
|
264
|
+
)
|
234
265
|
tool_info = []
|
235
266
|
tool_desc = []
|
236
267
|
for task in plan:
|
237
268
|
tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
|
238
269
|
tool_info.extend([e["doc"] for e in tools])
|
239
270
|
tool_desc.extend([e["desc"] for e in tools])
|
271
|
+
log_progress(
|
272
|
+
{
|
273
|
+
"type": "tools",
|
274
|
+
"status": "completed",
|
275
|
+
"payload": tools,
|
276
|
+
}
|
277
|
+
)
|
240
278
|
if verbosity == 2:
|
241
|
-
log_progress(
|
242
|
-
{
|
243
|
-
"log": "Retrieved tools:",
|
244
|
-
"tools": tool_desc,
|
245
|
-
}
|
246
|
-
)
|
247
279
|
_LOGGER.info(f"Tools: {tool_desc}")
|
248
280
|
tool_info_set = set(tool_info)
|
249
281
|
return "\n\n".join(tool_info_set)
|
@@ -300,7 +332,7 @@ class VisionAgent(Agent):
|
|
300
332
|
)
|
301
333
|
|
302
334
|
self.tool_recommender = (
|
303
|
-
Sim(TOOLS_DF, sim_key="desc")
|
335
|
+
Sim(T.TOOLS_DF, sim_key="desc")
|
304
336
|
if tool_recommender is None
|
305
337
|
else tool_recommender
|
306
338
|
)
|
@@ -358,6 +390,10 @@ class VisionAgent(Agent):
|
|
358
390
|
if chat_i["role"] == "user":
|
359
391
|
chat_i["content"] += f" Image name {media}"
|
360
392
|
|
393
|
+
# re-grab custom tools
|
394
|
+
global _DEFAULT_IMPORT
|
395
|
+
_DEFAULT_IMPORT = "\n".join(T.__new_tools__)
|
396
|
+
|
361
397
|
code = ""
|
362
398
|
test = ""
|
363
399
|
working_memory: List[Dict[str, str]] = []
|
@@ -367,21 +403,29 @@ class VisionAgent(Agent):
|
|
367
403
|
retries = 0
|
368
404
|
|
369
405
|
while not success and retries < self.max_retries:
|
406
|
+
self.log_progress(
|
407
|
+
{
|
408
|
+
"type": "plans",
|
409
|
+
"status": "started",
|
410
|
+
}
|
411
|
+
)
|
370
412
|
plan_i = write_plan(
|
371
413
|
chat,
|
372
|
-
TOOL_DESCRIPTIONS,
|
414
|
+
T.TOOL_DESCRIPTIONS,
|
373
415
|
format_memory(working_memory),
|
374
416
|
self.planner,
|
375
417
|
media=[media] if media else None,
|
376
418
|
)
|
377
419
|
plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
|
420
|
+
|
421
|
+
self.log_progress(
|
422
|
+
{
|
423
|
+
"type": "plans",
|
424
|
+
"status": "completed",
|
425
|
+
"payload": plan_i,
|
426
|
+
}
|
427
|
+
)
|
378
428
|
if self.verbosity >= 1:
|
379
|
-
self.log_progress(
|
380
|
-
{
|
381
|
-
"log": "Going to run the following plan(s) in sequence:\n",
|
382
|
-
"plan": plan_i,
|
383
|
-
}
|
384
|
-
)
|
385
429
|
|
386
430
|
_LOGGER.info(
|
387
431
|
f"""
|
@@ -397,7 +441,7 @@ class VisionAgent(Agent):
|
|
397
441
|
results = write_and_test_code(
|
398
442
|
FULL_TASK.format(user_request=chat[0]["content"], subtasks=plan_i_str),
|
399
443
|
tool_info,
|
400
|
-
UTILITIES_DOCSTRING,
|
444
|
+
T.UTILITIES_DOCSTRING,
|
401
445
|
format_memory(working_memory),
|
402
446
|
self.coder,
|
403
447
|
self.tester,
|
@@ -413,6 +457,12 @@ class VisionAgent(Agent):
|
|
413
457
|
plan.append({"code": code, "test": test, "plan": plan_i})
|
414
458
|
|
415
459
|
if self_reflection:
|
460
|
+
self.log_progress(
|
461
|
+
{
|
462
|
+
"type": "self_reflection",
|
463
|
+
"status": "started",
|
464
|
+
}
|
465
|
+
)
|
416
466
|
reflection = reflect(
|
417
467
|
chat,
|
418
468
|
FULL_TASK.format(
|
@@ -422,23 +472,29 @@ class VisionAgent(Agent):
|
|
422
472
|
self.planner,
|
423
473
|
)
|
424
474
|
if self.verbosity > 0:
|
425
|
-
self.log_progress(
|
426
|
-
{
|
427
|
-
"log": "Reflection:",
|
428
|
-
"reflection": reflection,
|
429
|
-
}
|
430
|
-
)
|
431
475
|
_LOGGER.info(f"Reflection: {reflection}")
|
432
476
|
feedback = cast(str, reflection["feedback"])
|
433
477
|
success = cast(bool, reflection["success"])
|
478
|
+
self.log_progress(
|
479
|
+
{
|
480
|
+
"type": "self_reflection",
|
481
|
+
"status": "completed" if success else "failed",
|
482
|
+
"payload": reflection,
|
483
|
+
}
|
484
|
+
)
|
434
485
|
working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
|
435
486
|
|
436
487
|
retries += 1
|
437
488
|
|
438
489
|
self.log_progress(
|
439
490
|
{
|
440
|
-
"
|
441
|
-
"
|
491
|
+
"type": "final_code",
|
492
|
+
"status": "completed" if success else "failed",
|
493
|
+
"payload": {
|
494
|
+
"code": code,
|
495
|
+
"test": test,
|
496
|
+
"result": results["test_result"],
|
497
|
+
},
|
442
498
|
}
|
443
499
|
)
|
444
500
|
|
vision_agent/tools/__init__.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
from typing import Callable, List, Optional
|
2
|
+
|
1
3
|
from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
|
2
4
|
from .tools import (
|
3
5
|
TOOL_DESCRIPTIONS,
|
@@ -16,9 +18,39 @@ from .tools import (
|
|
16
18
|
load_image,
|
17
19
|
ocr,
|
18
20
|
overlay_bounding_boxes,
|
21
|
+
overlay_heat_map,
|
19
22
|
overlay_segmentation_masks,
|
20
23
|
save_image,
|
21
24
|
save_json,
|
22
25
|
visual_prompt_counting,
|
23
26
|
zero_shot_counting,
|
24
27
|
)
|
28
|
+
|
29
|
+
__new_tools__ = [
|
30
|
+
"import vision_agent as va",
|
31
|
+
"from vision_agent.tools import register_tool",
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
def register_tool(imports: Optional[List] = None) -> Callable:
|
36
|
+
def decorator(tool: Callable) -> Callable:
|
37
|
+
import inspect
|
38
|
+
|
39
|
+
from .tools import get_tool_descriptions, get_tool_documentation, get_tools_df
|
40
|
+
|
41
|
+
global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
|
42
|
+
|
43
|
+
if tool not in TOOLS:
|
44
|
+
TOOLS.append(tool)
|
45
|
+
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
46
|
+
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
47
|
+
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
48
|
+
|
49
|
+
globals()[tool.__name__] = tool
|
50
|
+
if imports is not None:
|
51
|
+
for import_ in imports:
|
52
|
+
__new_tools__.append(import_)
|
53
|
+
__new_tools__.append(inspect.getsource(tool))
|
54
|
+
return tool
|
55
|
+
|
56
|
+
return decorator
|
vision_agent/tools/tools.py
CHANGED
@@ -203,7 +203,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
203
203
|
|
204
204
|
Returns:
|
205
205
|
List[Dict[str, Any]]: A list of dictionaries containing the detected text, bbox,
|
206
|
-
|
206
|
+
and confidence score.
|
207
207
|
|
208
208
|
Example
|
209
209
|
-------
|
@@ -247,14 +247,16 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
247
247
|
|
248
248
|
|
249
249
|
def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
250
|
-
"""'zero_shot_counting' is a tool that counts the dominant foreground object given
|
251
|
-
|
250
|
+
"""'zero_shot_counting' is a tool that counts the dominant foreground object given
|
251
|
+
an image and no other information about the content. It returns only the count of
|
252
|
+
the objects in the image.
|
252
253
|
|
253
254
|
Parameters:
|
254
255
|
image (np.ndarray): The image that contains lot of instances of a single object
|
255
256
|
|
256
257
|
Returns:
|
257
|
-
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
258
|
+
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
259
|
+
value. E.g. {count: 12}.
|
258
260
|
|
259
261
|
Example
|
260
262
|
-------
|
@@ -276,14 +278,16 @@ def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
276
278
|
def visual_prompt_counting(
|
277
279
|
image: np.ndarray, visual_prompt: Dict[str, List[float]]
|
278
280
|
) -> Dict[str, Any]:
|
279
|
-
"""'visual_prompt_counting' is a tool that counts the dominant foreground object
|
281
|
+
"""'visual_prompt_counting' is a tool that counts the dominant foreground object
|
282
|
+
given an image and a visual prompt which is a bounding box describing the object.
|
280
283
|
It returns only the count of the objects in the image.
|
281
284
|
|
282
285
|
Parameters:
|
283
286
|
image (np.ndarray): The image that contains lot of instances of a single object
|
284
287
|
|
285
288
|
Returns:
|
286
|
-
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
289
|
+
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
290
|
+
value. E.g. {count: 12}.
|
287
291
|
|
288
292
|
Example
|
289
293
|
-------
|
@@ -308,15 +312,17 @@ def visual_prompt_counting(
|
|
308
312
|
|
309
313
|
|
310
314
|
def image_question_answering(image: np.ndarray, prompt: str) -> str:
|
311
|
-
"""'image_question_answering_' is a tool that can answer questions about the visual
|
312
|
-
It returns an answer to the
|
315
|
+
"""'image_question_answering_' is a tool that can answer questions about the visual
|
316
|
+
contents of an image given a question and an image. It returns an answer to the
|
317
|
+
question
|
313
318
|
|
314
319
|
Parameters:
|
315
320
|
image (np.ndarray): The reference image used for the question
|
316
321
|
prompt (str): The question about the image
|
317
322
|
|
318
323
|
Returns:
|
319
|
-
str: A string which is the answer to the given prompt. E.g. {'text': 'This
|
324
|
+
str: A string which is the answer to the given prompt. E.g. {'text': 'This
|
325
|
+
image contains a cat sitting on a table with a bowl of milk.'}.
|
320
326
|
|
321
327
|
Example
|
322
328
|
-------
|
@@ -338,14 +344,16 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
|
|
338
344
|
|
339
345
|
def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
340
346
|
"""'clip' is a tool that can classify an image given a list of input classes or tags.
|
341
|
-
It returns the same list of the input classes along with their probability scores
|
347
|
+
It returns the same list of the input classes along with their probability scores
|
348
|
+
based on image content.
|
342
349
|
|
343
350
|
Parameters:
|
344
351
|
image (np.ndarray): The image to classify or tag
|
345
352
|
classes (List[str]): The list of classes or tags that is associated with the image
|
346
353
|
|
347
354
|
Returns:
|
348
|
-
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
355
|
+
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
356
|
+
contains a list of given labels and other a list of scores.
|
349
357
|
|
350
358
|
Example
|
351
359
|
-------
|
@@ -366,8 +374,8 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
366
374
|
|
367
375
|
|
368
376
|
def image_caption(image: np.ndarray) -> str:
|
369
|
-
"""'image_caption' is a tool that can caption an image based on its contents.
|
370
|
-
|
377
|
+
"""'image_caption' is a tool that can caption an image based on its contents. It
|
378
|
+
returns a text describing the image.
|
371
379
|
|
372
380
|
Parameters:
|
373
381
|
image (np.ndarray): The image to caption
|
@@ -619,6 +627,51 @@ def overlay_segmentation_masks(
|
|
619
627
|
return np.array(pil_image.convert("RGB"))
|
620
628
|
|
621
629
|
|
630
|
+
def overlay_heat_map(
|
631
|
+
image: np.ndarray, heat_map: Dict[str, Any], alpha: float = 0.8
|
632
|
+
) -> np.ndarray:
|
633
|
+
"""'display_heat_map' is a utility function that displays a heat map on an image.
|
634
|
+
|
635
|
+
Parameters:
|
636
|
+
image (np.ndarray): The image to display the heat map on.
|
637
|
+
heat_map (Dict[str, Any]): A dictionary containing the heat map under the key
|
638
|
+
'heat_map'.
|
639
|
+
alpha (float, optional): The transparency of the overlay. Defaults to 0.8.
|
640
|
+
|
641
|
+
Returns:
|
642
|
+
np.ndarray: The image with the heat map displayed.
|
643
|
+
|
644
|
+
Example
|
645
|
+
-------
|
646
|
+
>>> image_with_heat_map = display_heat_map(
|
647
|
+
image,
|
648
|
+
{
|
649
|
+
'heat_map': array([[0, 0, 0, ..., 0, 0, 0],
|
650
|
+
[0, 0, 0, ..., 0, 0, 0],
|
651
|
+
...,
|
652
|
+
[0, 0, 0, ..., 0, 0, 0],
|
653
|
+
[0, 0, 0, ..., 125, 125, 125]], dtype=uint8),
|
654
|
+
},
|
655
|
+
)
|
656
|
+
"""
|
657
|
+
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
658
|
+
|
659
|
+
if "heat_map" not in heat_map or len(heat_map["heat_map"]) == 0:
|
660
|
+
return image
|
661
|
+
|
662
|
+
pil_image = pil_image.convert("L")
|
663
|
+
mask = Image.fromarray(heat_map["heat_map"])
|
664
|
+
mask = mask.resize(pil_image.size)
|
665
|
+
|
666
|
+
overlay = Image.new("RGBA", mask.size)
|
667
|
+
odraw = ImageDraw.Draw(overlay)
|
668
|
+
odraw.bitmap((0, 0), mask, fill=(255, 0, 0, round(alpha * 255)))
|
669
|
+
combined = Image.alpha_composite(
|
670
|
+
pil_image.convert("RGBA"), overlay.resize(pil_image.size)
|
671
|
+
)
|
672
|
+
return np.array(combined.convert("RGB"))
|
673
|
+
|
674
|
+
|
622
675
|
def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
|
623
676
|
docstrings = ""
|
624
677
|
for func in funcs:
|
@@ -634,9 +687,13 @@ def get_tool_descriptions(funcs: List[Callable[..., Any]]) -> str:
|
|
634
687
|
if description is None:
|
635
688
|
description = ""
|
636
689
|
|
637
|
-
|
638
|
-
description
|
639
|
-
|
690
|
+
if "Parameters:" in description:
|
691
|
+
description = (
|
692
|
+
description[: description.find("Parameters:")]
|
693
|
+
.replace("\n", " ")
|
694
|
+
.strip()
|
695
|
+
)
|
696
|
+
|
640
697
|
description = " ".join(description.split())
|
641
698
|
descriptions += f"- {func.__name__}{inspect.signature(func)}: {description}\n"
|
642
699
|
return descriptions
|
@@ -676,10 +733,18 @@ TOOLS = [
|
|
676
733
|
save_image,
|
677
734
|
overlay_bounding_boxes,
|
678
735
|
overlay_segmentation_masks,
|
736
|
+
overlay_heat_map,
|
679
737
|
]
|
680
738
|
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
681
739
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
682
740
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
683
741
|
UTILITIES_DOCSTRING = get_tool_documentation(
|
684
|
-
[
|
742
|
+
[
|
743
|
+
save_json,
|
744
|
+
load_image,
|
745
|
+
save_image,
|
746
|
+
overlay_bounding_boxes,
|
747
|
+
overlay_segmentation_masks,
|
748
|
+
overlay_heat_map,
|
749
|
+
]
|
685
750
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.34
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -153,6 +153,34 @@ you. For example:
|
|
153
153
|
}]
|
154
154
|
```
|
155
155
|
|
156
|
+
You can also add custom tools to the agent:
|
157
|
+
|
158
|
+
```python
|
159
|
+
import vision_agent as va
|
160
|
+
|
161
|
+
@va.tools.register_tool(imports=["import numpy as np"])
|
162
|
+
def custom_tool(image_path: str) -> str:
|
163
|
+
"""My custom tool documentation.
|
164
|
+
|
165
|
+
Parameters:
|
166
|
+
image_path (str): The path to the image.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
str: The result of the tool.
|
170
|
+
|
171
|
+
Example
|
172
|
+
-------
|
173
|
+
>>> custom_tool("image.jpg")
|
174
|
+
"""
|
175
|
+
|
176
|
+
import numpy as np
|
177
|
+
return np.zeros((10, 10))
|
178
|
+
```
|
179
|
+
|
180
|
+
You need to ensure you call `@va.tools.register_tool` with any imports it might use and
|
181
|
+
ensure the documentation is in the same format above with description, `Parameters:`,
|
182
|
+
`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
|
183
|
+
|
156
184
|
### Azure Setup
|
157
185
|
If you want to use Azure OpenAI models, you can set the environment variable:
|
158
186
|
|
@@ -166,9 +194,10 @@ You can then run Vision Agent using the Azure OpenAI models:
|
|
166
194
|
```python
|
167
195
|
>>> import vision_agent as va
|
168
196
|
>>> agent = va.agent.VisionAgent(
|
169
|
-
>>>
|
170
|
-
>>>
|
171
|
-
>>>
|
197
|
+
>>> planner=va.llm.AzureOpenAILLM(),
|
198
|
+
>>> coder=va.lmm.AzureOpenAILMM(),
|
199
|
+
>>> tester=va.lmm.AzureOpenAILMM(),
|
200
|
+
>>> debugger=va.lmm.AzureOpenAILMM(),
|
172
201
|
>>> )
|
173
202
|
```
|
174
203
|
|
@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
|
|
11
11
|
vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
12
|
vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
|
13
13
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
14
|
-
vision_agent/agent/vision_agent.py,sha256=
|
14
|
+
vision_agent/agent/vision_agent.py,sha256=SAk1-UWVxdpjMbcUsx2afbgQO8VjbwfKUKdM_MUs8Ck,16640
|
15
15
|
vision_agent/agent/vision_agent_prompts.py,sha256=0YbiS59IEWbiE43gCvOqfWrpudIAhTn8FHzXW0Y-Gaw,8201
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
@@ -19,18 +19,18 @@ vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,
|
|
19
19
|
vision_agent/llm/llm.py,sha256=UZ73GqQHE-NKOJWsrOTWfmdHYsbCBkJ5rZ7dhcSCHHw,5951
|
20
20
|
vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
|
21
21
|
vision_agent/lmm/lmm.py,sha256=NwcZYLTzi95LSMAk0sTtw7G_zBLa9lU-DHM5GUUCiK4,10622
|
22
|
-
vision_agent/tools/__init__.py,sha256=
|
22
|
+
vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYvE,1512
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=nXjefpW9L-Xuos73ObDqpmJfOyUAJVrzoiHsxEE7O10,23346
|
27
27
|
vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
|
28
28
|
vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.34.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.34.dist-info/METADATA,sha256=G7TLFwGHMZmxNOCXouYlajbIwhIE4YTbyRCOOeBVpPY,6698
|
35
|
+
vision_agent-0.2.34.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.34.dist-info/RECORD,,
|
File without changes
|
File without changes
|