vision-agent 0.2.31__py3-none-any.whl → 0.2.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/agent_coder.py +9 -3
- vision_agent/agent/vision_agent.py +23 -9
- vision_agent/agent/vision_agent_prompts.py +7 -5
- vision_agent/tools/__init__.py +32 -0
- vision_agent/tools/tools.py +82 -17
- {vision_agent-0.2.31.dist-info → vision_agent-0.2.33.dist-info}/METADATA +33 -4
- {vision_agent-0.2.31.dist-info → vision_agent-0.2.33.dist-info}/RECORD +9 -9
- {vision_agent-0.2.31.dist-info → vision_agent-0.2.33.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.31.dist-info → vision_agent-0.2.33.dist-info}/WHEEL +0 -0
@@ -67,11 +67,17 @@ def parse_file_name(s: str) -> str:
|
|
67
67
|
return "".join([p for p in s.split(" ") if p.endswith(".png")])
|
68
68
|
|
69
69
|
|
70
|
-
def write_program(
|
70
|
+
def write_program(
|
71
|
+
question: str, feedback: str, model: LLM, media: Optional[Union[str, Path]] = None
|
72
|
+
) -> str:
|
71
73
|
prompt = PROGRAM.format(
|
72
74
|
docstring=TOOL_DOCSTRING, question=question, feedback=feedback
|
73
75
|
)
|
74
|
-
|
76
|
+
if isinstance(model, OpenAILMM):
|
77
|
+
completion = model(prompt, images=[media] if media else None)
|
78
|
+
else:
|
79
|
+
completion = model(prompt)
|
80
|
+
|
75
81
|
return preprocess_data(completion)
|
76
82
|
|
77
83
|
|
@@ -168,7 +174,7 @@ class AgentCoder(Agent):
|
|
168
174
|
code = ""
|
169
175
|
feedback = ""
|
170
176
|
for _ in range(self.max_turns):
|
171
|
-
code = write_program(question, feedback, self.coder_agent)
|
177
|
+
code = write_program(question, feedback, self.coder_agent, media=media)
|
172
178
|
if self.verbose:
|
173
179
|
_CONSOLE.print(
|
174
180
|
Syntax(code, "python", theme="gruvbox-dark", line_numbers=True)
|
@@ -9,6 +9,7 @@ from rich.console import Console
|
|
9
9
|
from rich.syntax import Syntax
|
10
10
|
from tabulate import tabulate
|
11
11
|
|
12
|
+
import vision_agent.tools as T
|
12
13
|
from vision_agent.agent import Agent
|
13
14
|
from vision_agent.agent.vision_agent_prompts import (
|
14
15
|
CODE,
|
@@ -21,7 +22,7 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
21
22
|
USER_REQ,
|
22
23
|
)
|
23
24
|
from vision_agent.llm import LLM, OpenAILLM
|
24
|
-
from vision_agent.
|
25
|
+
from vision_agent.lmm import LMM, OpenAILMM
|
25
26
|
from vision_agent.utils import Execute
|
26
27
|
from vision_agent.utils.sim import Sim
|
27
28
|
|
@@ -30,6 +31,7 @@ _LOGGER = logging.getLogger(__name__)
|
|
30
31
|
_MAX_TABULATE_COL_WIDTH = 80
|
31
32
|
_EXECUTE = Execute(600)
|
32
33
|
_CONSOLE = Console()
|
34
|
+
_DEFAULT_IMPORT = "\n".join(T.__new_tools__)
|
33
35
|
|
34
36
|
|
35
37
|
def format_memory(memory: List[Dict[str, str]]) -> str:
|
@@ -77,7 +79,8 @@ def write_plan(
|
|
77
79
|
chat: List[Dict[str, str]],
|
78
80
|
tool_desc: str,
|
79
81
|
working_memory: str,
|
80
|
-
model: LLM,
|
82
|
+
model: Union[LLM, LMM],
|
83
|
+
media: Optional[List[Union[str, Path]]] = None,
|
81
84
|
) -> List[Dict[str, str]]:
|
82
85
|
chat = copy.deepcopy(chat)
|
83
86
|
if chat[-1]["role"] != "user":
|
@@ -87,7 +90,10 @@ def write_plan(
|
|
87
90
|
context = USER_REQ.format(user_request=user_request)
|
88
91
|
prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
|
89
92
|
chat[-1]["content"] = prompt
|
90
|
-
|
93
|
+
if isinstance(model, OpenAILMM):
|
94
|
+
return extract_json(model.chat(chat, images=media))["plan"] # type: ignore
|
95
|
+
else:
|
96
|
+
return extract_json(model.chat(chat))["plan"] # type: ignore
|
91
97
|
|
92
98
|
|
93
99
|
def reflect(
|
@@ -135,7 +141,7 @@ def write_and_test_code(
|
|
135
141
|
)
|
136
142
|
)
|
137
143
|
|
138
|
-
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
144
|
+
success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
|
139
145
|
if verbosity == 2:
|
140
146
|
_LOGGER.info("Initial code and tests:")
|
141
147
|
log_progress(
|
@@ -179,7 +185,7 @@ def write_and_test_code(
|
|
179
185
|
{"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
|
180
186
|
)
|
181
187
|
|
182
|
-
success, result = _EXECUTE.run_isolation(f"{code}\n{test}")
|
188
|
+
success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
|
183
189
|
if verbosity == 2:
|
184
190
|
log_progress(
|
185
191
|
{
|
@@ -295,7 +301,7 @@ class VisionAgent(Agent):
|
|
295
301
|
)
|
296
302
|
|
297
303
|
self.tool_recommender = (
|
298
|
-
Sim(TOOLS_DF, sim_key="desc")
|
304
|
+
Sim(T.TOOLS_DF, sim_key="desc")
|
299
305
|
if tool_recommender is None
|
300
306
|
else tool_recommender
|
301
307
|
)
|
@@ -324,7 +330,7 @@ class VisionAgent(Agent):
|
|
324
330
|
input = [{"role": "user", "content": input}]
|
325
331
|
results = self.chat_with_workflow(input, media)
|
326
332
|
results.pop("working_memory")
|
327
|
-
return results
|
333
|
+
return results # type: ignore
|
328
334
|
|
329
335
|
def chat_with_workflow(
|
330
336
|
self,
|
@@ -353,6 +359,10 @@ class VisionAgent(Agent):
|
|
353
359
|
if chat_i["role"] == "user":
|
354
360
|
chat_i["content"] += f" Image name {media}"
|
355
361
|
|
362
|
+
# re-grab custom tools
|
363
|
+
global _DEFAULT_IMPORT
|
364
|
+
_DEFAULT_IMPORT = "\n".join(T.__new_tools__)
|
365
|
+
|
356
366
|
code = ""
|
357
367
|
test = ""
|
358
368
|
working_memory: List[Dict[str, str]] = []
|
@@ -363,7 +373,11 @@ class VisionAgent(Agent):
|
|
363
373
|
|
364
374
|
while not success and retries < self.max_retries:
|
365
375
|
plan_i = write_plan(
|
366
|
-
chat,
|
376
|
+
chat,
|
377
|
+
T.TOOL_DESCRIPTIONS,
|
378
|
+
format_memory(working_memory),
|
379
|
+
self.planner,
|
380
|
+
media=[media] if media else None,
|
367
381
|
)
|
368
382
|
plan_i_str = "\n-".join([e["instructions"] for e in plan_i])
|
369
383
|
if self.verbosity >= 1:
|
@@ -388,7 +402,7 @@ class VisionAgent(Agent):
|
|
388
402
|
results = write_and_test_code(
|
389
403
|
FULL_TASK.format(user_request=chat[0]["content"], subtasks=plan_i_str),
|
390
404
|
tool_info,
|
391
|
-
UTILITIES_DOCSTRING,
|
405
|
+
T.UTILITIES_DOCSTRING,
|
392
406
|
format_memory(working_memory),
|
393
407
|
self.coder,
|
394
408
|
self.tester,
|
@@ -169,11 +169,13 @@ This is the documentation for the functions you have access to. You may call any
|
|
169
169
|
1. Verify the fundamental functionality under normal conditions.
|
170
170
|
2. Ensure each test case is well-documented with comments explaining the scenario it covers.
|
171
171
|
3. Your test case MUST run only on the given image which is {media}
|
172
|
-
4.
|
173
|
-
5. DO NOT
|
174
|
-
6. DO NOT
|
175
|
-
7. DO NOT
|
176
|
-
8.
|
172
|
+
4. Your test case MUST run only with the given values which is available in the question - {question}
|
173
|
+
5. DO NOT use any non-existent or dummy image or video files that are not provided by the user's instructions.
|
174
|
+
6. DO NOT mock any functions, you must test their functionality as is.
|
175
|
+
7. DO NOT assert the output value, run the code and assert only the output format or data structure.
|
176
|
+
8. DO NOT use try except block to handle the error, let the error be raised if the code is incorrect.
|
177
|
+
9. DO NOT import the testing function as it will available in the testing environment.
|
178
|
+
10. Print the output of the function that is being tested.
|
177
179
|
"""
|
178
180
|
|
179
181
|
|
vision_agent/tools/__init__.py
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
from typing import Callable, List, Optional
|
2
|
+
|
1
3
|
from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
|
2
4
|
from .tools import (
|
3
5
|
TOOL_DESCRIPTIONS,
|
@@ -16,9 +18,39 @@ from .tools import (
|
|
16
18
|
load_image,
|
17
19
|
ocr,
|
18
20
|
overlay_bounding_boxes,
|
21
|
+
overlay_heat_map,
|
19
22
|
overlay_segmentation_masks,
|
20
23
|
save_image,
|
21
24
|
save_json,
|
22
25
|
visual_prompt_counting,
|
23
26
|
zero_shot_counting,
|
24
27
|
)
|
28
|
+
|
29
|
+
__new_tools__ = [
|
30
|
+
"import vision_agent as va",
|
31
|
+
"from vision_agent.tools import register_tool",
|
32
|
+
]
|
33
|
+
|
34
|
+
|
35
|
+
def register_tool(imports: Optional[List] = None) -> Callable:
|
36
|
+
def decorator(tool: Callable) -> Callable:
|
37
|
+
import inspect
|
38
|
+
|
39
|
+
from .tools import get_tool_descriptions, get_tool_documentation, get_tools_df
|
40
|
+
|
41
|
+
global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
|
42
|
+
|
43
|
+
if tool not in TOOLS:
|
44
|
+
TOOLS.append(tool)
|
45
|
+
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
46
|
+
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
47
|
+
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
48
|
+
|
49
|
+
globals()[tool.__name__] = tool
|
50
|
+
if imports is not None:
|
51
|
+
for import_ in imports:
|
52
|
+
__new_tools__.append(import_)
|
53
|
+
__new_tools__.append(inspect.getsource(tool))
|
54
|
+
return tool
|
55
|
+
|
56
|
+
return decorator
|
vision_agent/tools/tools.py
CHANGED
@@ -203,7 +203,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
203
203
|
|
204
204
|
Returns:
|
205
205
|
List[Dict[str, Any]]: A list of dictionaries containing the detected text, bbox,
|
206
|
-
|
206
|
+
and confidence score.
|
207
207
|
|
208
208
|
Example
|
209
209
|
-------
|
@@ -247,14 +247,16 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
247
247
|
|
248
248
|
|
249
249
|
def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
250
|
-
"""'zero_shot_counting' is a tool that counts the dominant foreground object given
|
251
|
-
|
250
|
+
"""'zero_shot_counting' is a tool that counts the dominant foreground object given
|
251
|
+
an image and no other information about the content. It returns only the count of
|
252
|
+
the objects in the image.
|
252
253
|
|
253
254
|
Parameters:
|
254
255
|
image (np.ndarray): The image that contains lot of instances of a single object
|
255
256
|
|
256
257
|
Returns:
|
257
|
-
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
258
|
+
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
259
|
+
value. E.g. {count: 12}.
|
258
260
|
|
259
261
|
Example
|
260
262
|
-------
|
@@ -276,14 +278,16 @@ def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
|
|
276
278
|
def visual_prompt_counting(
|
277
279
|
image: np.ndarray, visual_prompt: Dict[str, List[float]]
|
278
280
|
) -> Dict[str, Any]:
|
279
|
-
"""'visual_prompt_counting' is a tool that counts the dominant foreground object
|
281
|
+
"""'visual_prompt_counting' is a tool that counts the dominant foreground object
|
282
|
+
given an image and a visual prompt which is a bounding box describing the object.
|
280
283
|
It returns only the count of the objects in the image.
|
281
284
|
|
282
285
|
Parameters:
|
283
286
|
image (np.ndarray): The image that contains lot of instances of a single object
|
284
287
|
|
285
288
|
Returns:
|
286
|
-
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
289
|
+
Dict[str, Any]: A dictionary containing the key 'count' and the count as a
|
290
|
+
value. E.g. {count: 12}.
|
287
291
|
|
288
292
|
Example
|
289
293
|
-------
|
@@ -308,15 +312,17 @@ def visual_prompt_counting(
|
|
308
312
|
|
309
313
|
|
310
314
|
def image_question_answering(image: np.ndarray, prompt: str) -> str:
|
311
|
-
"""'image_question_answering_' is a tool that can answer questions about the visual
|
312
|
-
It returns an answer to the
|
315
|
+
"""'image_question_answering_' is a tool that can answer questions about the visual
|
316
|
+
contents of an image given a question and an image. It returns an answer to the
|
317
|
+
question
|
313
318
|
|
314
319
|
Parameters:
|
315
320
|
image (np.ndarray): The reference image used for the question
|
316
321
|
prompt (str): The question about the image
|
317
322
|
|
318
323
|
Returns:
|
319
|
-
str: A string which is the answer to the given prompt. E.g. {'text': 'This
|
324
|
+
str: A string which is the answer to the given prompt. E.g. {'text': 'This
|
325
|
+
image contains a cat sitting on a table with a bowl of milk.'}.
|
320
326
|
|
321
327
|
Example
|
322
328
|
-------
|
@@ -338,14 +344,16 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
|
|
338
344
|
|
339
345
|
def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
340
346
|
"""'clip' is a tool that can classify an image given a list of input classes or tags.
|
341
|
-
It returns the same list of the input classes along with their probability scores
|
347
|
+
It returns the same list of the input classes along with their probability scores
|
348
|
+
based on image content.
|
342
349
|
|
343
350
|
Parameters:
|
344
351
|
image (np.ndarray): The image to classify or tag
|
345
352
|
classes (List[str]): The list of classes or tags that is associated with the image
|
346
353
|
|
347
354
|
Returns:
|
348
|
-
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
355
|
+
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
356
|
+
contains a list of given labels and other a list of scores.
|
349
357
|
|
350
358
|
Example
|
351
359
|
-------
|
@@ -366,8 +374,8 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
|
366
374
|
|
367
375
|
|
368
376
|
def image_caption(image: np.ndarray) -> str:
|
369
|
-
"""'image_caption' is a tool that can caption an image based on its contents.
|
370
|
-
|
377
|
+
"""'image_caption' is a tool that can caption an image based on its contents. It
|
378
|
+
returns a text describing the image.
|
371
379
|
|
372
380
|
Parameters:
|
373
381
|
image (np.ndarray): The image to caption
|
@@ -619,6 +627,51 @@ def overlay_segmentation_masks(
|
|
619
627
|
return np.array(pil_image.convert("RGB"))
|
620
628
|
|
621
629
|
|
630
|
+
def overlay_heat_map(
|
631
|
+
image: np.ndarray, heat_map: Dict[str, Any], alpha: float = 0.8
|
632
|
+
) -> np.ndarray:
|
633
|
+
"""'display_heat_map' is a utility function that displays a heat map on an image.
|
634
|
+
|
635
|
+
Parameters:
|
636
|
+
image (np.ndarray): The image to display the heat map on.
|
637
|
+
heat_map (Dict[str, Any]): A dictionary containing the heat map under the key
|
638
|
+
'heat_map'.
|
639
|
+
alpha (float, optional): The transparency of the overlay. Defaults to 0.8.
|
640
|
+
|
641
|
+
Returns:
|
642
|
+
np.ndarray: The image with the heat map displayed.
|
643
|
+
|
644
|
+
Example
|
645
|
+
-------
|
646
|
+
>>> image_with_heat_map = display_heat_map(
|
647
|
+
image,
|
648
|
+
{
|
649
|
+
'heat_map': array([[0, 0, 0, ..., 0, 0, 0],
|
650
|
+
[0, 0, 0, ..., 0, 0, 0],
|
651
|
+
...,
|
652
|
+
[0, 0, 0, ..., 0, 0, 0],
|
653
|
+
[0, 0, 0, ..., 125, 125, 125]], dtype=uint8),
|
654
|
+
},
|
655
|
+
)
|
656
|
+
"""
|
657
|
+
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
658
|
+
|
659
|
+
if "heat_map" not in heat_map or len(heat_map["heat_map"]) == 0:
|
660
|
+
return image
|
661
|
+
|
662
|
+
pil_image = pil_image.convert("L")
|
663
|
+
mask = Image.fromarray(heat_map["heat_map"])
|
664
|
+
mask = mask.resize(pil_image.size)
|
665
|
+
|
666
|
+
overlay = Image.new("RGBA", mask.size)
|
667
|
+
odraw = ImageDraw.Draw(overlay)
|
668
|
+
odraw.bitmap((0, 0), mask, fill=(255, 0, 0, round(alpha * 255)))
|
669
|
+
combined = Image.alpha_composite(
|
670
|
+
pil_image.convert("RGBA"), overlay.resize(pil_image.size)
|
671
|
+
)
|
672
|
+
return np.array(combined.convert("RGB"))
|
673
|
+
|
674
|
+
|
622
675
|
def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
|
623
676
|
docstrings = ""
|
624
677
|
for func in funcs:
|
@@ -634,9 +687,13 @@ def get_tool_descriptions(funcs: List[Callable[..., Any]]) -> str:
|
|
634
687
|
if description is None:
|
635
688
|
description = ""
|
636
689
|
|
637
|
-
|
638
|
-
description
|
639
|
-
|
690
|
+
if "Parameters:" in description:
|
691
|
+
description = (
|
692
|
+
description[: description.find("Parameters:")]
|
693
|
+
.replace("\n", " ")
|
694
|
+
.strip()
|
695
|
+
)
|
696
|
+
|
640
697
|
description = " ".join(description.split())
|
641
698
|
descriptions += f"- {func.__name__}{inspect.signature(func)}: {description}\n"
|
642
699
|
return descriptions
|
@@ -676,10 +733,18 @@ TOOLS = [
|
|
676
733
|
save_image,
|
677
734
|
overlay_bounding_boxes,
|
678
735
|
overlay_segmentation_masks,
|
736
|
+
overlay_heat_map,
|
679
737
|
]
|
680
738
|
TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
681
739
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
682
740
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
683
741
|
UTILITIES_DOCSTRING = get_tool_documentation(
|
684
|
-
[
|
742
|
+
[
|
743
|
+
save_json,
|
744
|
+
load_image,
|
745
|
+
save_image,
|
746
|
+
overlay_bounding_boxes,
|
747
|
+
overlay_segmentation_masks,
|
748
|
+
overlay_heat_map,
|
749
|
+
]
|
685
750
|
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.33
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -153,6 +153,34 @@ you. For example:
|
|
153
153
|
}]
|
154
154
|
```
|
155
155
|
|
156
|
+
You can also add custom tools to the agent:
|
157
|
+
|
158
|
+
```python
|
159
|
+
import vision_agent as va
|
160
|
+
|
161
|
+
@va.tools.register_tool(imports=["import numpy as np"])
|
162
|
+
def custom_tool(image_path: str) -> str:
|
163
|
+
"""My custom tool documentation.
|
164
|
+
|
165
|
+
Parameters:
|
166
|
+
image_path (str): The path to the image.
|
167
|
+
|
168
|
+
Returns:
|
169
|
+
str: The result of the tool.
|
170
|
+
|
171
|
+
Example
|
172
|
+
-------
|
173
|
+
>>> custom_tool("image.jpg")
|
174
|
+
"""
|
175
|
+
|
176
|
+
import numpy as np
|
177
|
+
return np.zeros((10, 10))
|
178
|
+
```
|
179
|
+
|
180
|
+
You need to ensure you call `@va.tools.register_tool` with any imports it might use and
|
181
|
+
ensure the documentation is in the same format above with description, `Parameters:`,
|
182
|
+
`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
|
183
|
+
|
156
184
|
### Azure Setup
|
157
185
|
If you want to use Azure OpenAI models, you can set the environment variable:
|
158
186
|
|
@@ -166,9 +194,10 @@ You can then run Vision Agent using the Azure OpenAI models:
|
|
166
194
|
```python
|
167
195
|
>>> import vision_agent as va
|
168
196
|
>>> agent = va.agent.VisionAgent(
|
169
|
-
>>>
|
170
|
-
>>>
|
171
|
-
>>>
|
197
|
+
>>> planner=va.llm.AzureOpenAILLM(),
|
198
|
+
>>> coder=va.lmm.AzureOpenAILMM(),
|
199
|
+
>>> tester=va.lmm.AzureOpenAILMM(),
|
200
|
+
>>> debugger=va.lmm.AzureOpenAILMM(),
|
172
201
|
>>> )
|
173
202
|
```
|
174
203
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=GVLHCeK_R-zgldpbcPmOzJat-BkadvkuRCMxDvTIcXs,108
|
2
2
|
vision_agent/agent/__init__.py,sha256=iiC5eknTQnv87iSwAoHqBthJ3g2Zm6D0dWbYPDfuQ7A,245
|
3
3
|
vision_agent/agent/agent.py,sha256=TXh93MOwmArNRieOkYrhliq1rf7wIkhxvCdTiGhTqFs,538
|
4
|
-
vision_agent/agent/agent_coder.py,sha256=
|
4
|
+
vision_agent/agent/agent_coder.py,sha256=o66bpopKwo1kRDxiW6SujueMvLZ3NC5dawQKFrI3e_w,7171
|
5
5
|
vision_agent/agent/agent_coder_prompts.py,sha256=CJe3v7xvHQ32u3RQAXQga_Tk_4UgU64RBAMHZ3S70KY,5538
|
6
6
|
vision_agent/agent/data_interpreter.py,sha256=YCREEHWiyTYpKT8hibotylEkx1kF5AH0k9wnmymwPBY,15143
|
7
7
|
vision_agent/agent/data_interpreter_prompts.py,sha256=RDJggOfXwGaEoIcTYGX41ZEayCgYei1AootDOc_SN2g,6134
|
@@ -11,26 +11,26 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
|
|
11
11
|
vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
12
|
vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
|
13
13
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
14
|
-
vision_agent/agent/vision_agent.py,sha256=
|
15
|
-
vision_agent/agent/vision_agent_prompts.py,sha256=
|
14
|
+
vision_agent/agent/vision_agent.py,sha256=0_3hRcN4b0O52kDk8eq8TMIwBO2FA2XCC4JxAWK0ZEU,15482
|
15
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=0YbiS59IEWbiE43gCvOqfWrpudIAhTn8FHzXW0Y-Gaw,8201
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
18
18
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
19
19
|
vision_agent/llm/llm.py,sha256=UZ73GqQHE-NKOJWsrOTWfmdHYsbCBkJ5rZ7dhcSCHHw,5951
|
20
20
|
vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
|
21
21
|
vision_agent/lmm/lmm.py,sha256=NwcZYLTzi95LSMAk0sTtw7G_zBLa9lU-DHM5GUUCiK4,10622
|
22
|
-
vision_agent/tools/__init__.py,sha256=
|
22
|
+
vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYvE,1512
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=nXjefpW9L-Xuos73ObDqpmJfOyUAJVrzoiHsxEE7O10,23346
|
27
27
|
vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
|
28
28
|
vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.33.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.33.dist-info/METADATA,sha256=IAp4r_1YMih7p1kgZQ9rmjrRqD1vM0ManpXMYOMoLW4,6698
|
35
|
+
vision_agent-0.2.33.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.33.dist-info/RECORD,,
|
File without changes
|
File without changes
|