vision-agent 0.2.130__py3-none-any.whl → 0.2.132__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +46 -6
- vision_agent/agent/vision_agent_prompts.py +49 -14
- vision_agent/tools/meta_tools.py +79 -10
- vision_agent/tools/tools.py +1 -12
- {vision_agent-0.2.130.dist-info → vision_agent-0.2.132.dist-info}/METADATA +1 -1
- {vision_agent-0.2.130.dist-info → vision_agent-0.2.132.dist-info}/RECORD +8 -8
- {vision_agent-0.2.130.dist-info → vision_agent-0.2.132.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.130.dist-info → vision_agent-0.2.132.dist-info}/WHEEL +0 -0
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import tempfile
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
|
7
7
|
|
8
8
|
from vision_agent.agent import Agent
|
9
9
|
from vision_agent.agent.agent_utils import extract_json
|
@@ -13,8 +13,8 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
17
|
-
from vision_agent.tools.meta_tools import Artifacts
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
|
17
|
+
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
18
|
from vision_agent.utils import CodeInterpreterFactory
|
19
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
20
20
|
|
@@ -87,11 +87,18 @@ def run_code_action(
|
|
87
87
|
return result, obs
|
88
88
|
|
89
89
|
|
90
|
-
def parse_execution(
|
90
|
+
def parse_execution(
|
91
|
+
response: str,
|
92
|
+
test_multi_plan: bool = True,
|
93
|
+
customed_tool_names: Optional[List[str]] = None,
|
94
|
+
) -> Optional[str]:
|
91
95
|
code = None
|
92
96
|
if "<execute_python>" in response:
|
93
97
|
code = response[response.find("<execute_python>") + len("<execute_python>") :]
|
94
98
|
code = code[: code.find("</execute_python>")]
|
99
|
+
|
100
|
+
if code is not None:
|
101
|
+
code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
|
95
102
|
return code
|
96
103
|
|
97
104
|
|
@@ -116,6 +123,7 @@ class VisionAgent(Agent):
|
|
116
123
|
verbosity: int = 0,
|
117
124
|
local_artifacts_path: Optional[Union[str, Path]] = None,
|
118
125
|
code_sandbox_runtime: Optional[str] = None,
|
126
|
+
callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
|
119
127
|
) -> None:
|
120
128
|
"""Initialize the VisionAgent.
|
121
129
|
|
@@ -134,6 +142,7 @@ class VisionAgent(Agent):
|
|
134
142
|
self.max_iterations = 100
|
135
143
|
self.verbosity = verbosity
|
136
144
|
self.code_sandbox_runtime = code_sandbox_runtime
|
145
|
+
self.callback_message = callback_message
|
137
146
|
if self.verbosity >= 1:
|
138
147
|
_LOGGER.setLevel(logging.INFO)
|
139
148
|
self.local_artifacts_path = cast(
|
@@ -174,6 +183,8 @@ class VisionAgent(Agent):
|
|
174
183
|
self,
|
175
184
|
chat: List[Message],
|
176
185
|
artifacts: Optional[Artifacts] = None,
|
186
|
+
test_multi_plan: bool = True,
|
187
|
+
customized_tool_names: Optional[List[str]] = None,
|
177
188
|
) -> Tuple[List[Message], Artifacts]:
|
178
189
|
"""Chat with VisionAgent, it will use code to execute actions to accomplish
|
179
190
|
its tasks.
|
@@ -184,6 +195,12 @@ class VisionAgent(Agent):
|
|
184
195
|
or if it contains media files, it should be in the format of:
|
185
196
|
[{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
|
186
197
|
artifacts (Optional[Artifacts]): The artifacts to use in the task.
|
198
|
+
test_multi_plan (bool): If True, it will test tools for multiple plans and
|
199
|
+
pick the best one based off of the tool results. If False, it will go
|
200
|
+
with the first plan.
|
201
|
+
customized_tool_names (List[str]): A list of customized tools for agent to
|
202
|
+
pick and use. If not provided, default to full tool set from
|
203
|
+
vision_agent.tools.
|
187
204
|
|
188
205
|
Returns:
|
189
206
|
List[Message]: The conversation response.
|
@@ -205,7 +222,14 @@ class VisionAgent(Agent):
|
|
205
222
|
for chat_i in int_chat:
|
206
223
|
if "media" in chat_i:
|
207
224
|
for media in chat_i["media"]:
|
208
|
-
media
|
225
|
+
if type(media) is str and media.startswith(("http", "https")):
|
226
|
+
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
+
file_path = Path(media).name
|
228
|
+
ndarray = load_image(media)
|
229
|
+
save_image(ndarray, file_path)
|
230
|
+
media = file_path
|
231
|
+
else:
|
232
|
+
media = cast(str, media)
|
209
233
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
210
234
|
|
211
235
|
media_remote_path = (
|
@@ -247,6 +271,7 @@ class VisionAgent(Agent):
|
|
247
271
|
artifacts_loaded = artifacts.show()
|
248
272
|
int_chat.append({"role": "observation", "content": artifacts_loaded})
|
249
273
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
274
|
+
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
250
275
|
|
251
276
|
while not finished and iterations < self.max_iterations:
|
252
277
|
response = run_conversation(self.agent, int_chat)
|
@@ -259,10 +284,14 @@ class VisionAgent(Agent):
|
|
259
284
|
if last_response == response:
|
260
285
|
response["let_user_respond"] = True
|
261
286
|
|
287
|
+
self.streaming_message({"role": "assistant", "content": response})
|
288
|
+
|
262
289
|
if response["let_user_respond"]:
|
263
290
|
break
|
264
291
|
|
265
|
-
code_action = parse_execution(
|
292
|
+
code_action = parse_execution(
|
293
|
+
response["response"], test_multi_plan, customized_tool_names
|
294
|
+
)
|
266
295
|
|
267
296
|
if code_action is not None:
|
268
297
|
result, obs = run_code_action(
|
@@ -276,6 +305,13 @@ class VisionAgent(Agent):
|
|
276
305
|
orig_chat.append(
|
277
306
|
{"role": "observation", "content": obs, "execution": result}
|
278
307
|
)
|
308
|
+
self.streaming_message(
|
309
|
+
{
|
310
|
+
"role": "observation",
|
311
|
+
"content": obs,
|
312
|
+
"execution": result,
|
313
|
+
}
|
314
|
+
)
|
279
315
|
|
280
316
|
iterations += 1
|
281
317
|
last_response = response
|
@@ -288,5 +324,9 @@ class VisionAgent(Agent):
|
|
288
324
|
artifacts.save()
|
289
325
|
return orig_chat, artifacts
|
290
326
|
|
327
|
+
def streaming_message(self, message: Dict[str, Any]) -> None:
|
328
|
+
if self.callback_message:
|
329
|
+
self.callback_message(message)
|
330
|
+
|
291
331
|
def log_progress(self, data: Dict[str, Any]) -> None:
|
292
332
|
pass
|
@@ -1,5 +1,5 @@
|
|
1
1
|
VA_CODE = """
|
2
|
-
**Role**: You are a helpful
|
2
|
+
**Role**: You are a helpful agent that assists users with writing code.
|
3
3
|
|
4
4
|
**Taks**: As a conversational agent, you are required to understand the user's request and provide a helpful response. Use a Chain-of-Thought approach to break down the problem, create a plan, and then provide a response. Ensure that your response is clear, concise, and helpful. You can use an interactive Python (Jupyter Notebook) environment, executing code with <execution_python>. You are given access to an `artifacts` object which contains files shared between you and the user. `artifacts` will be automatically saved everytime you execute python code.
|
5
5
|
|
@@ -56,7 +56,9 @@ OBSERVATION:
|
|
56
56
|
|
57
57
|
|
58
58
|
AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect dogs and shown the output, do the results look good to you?", "let_user_respond": true}
|
59
|
+
"""
|
59
60
|
|
61
|
+
EXAMPLES_CODE1_EXTRA = """
|
60
62
|
USER: The the image only has one dog, can you fix this?
|
61
63
|
|
62
64
|
[Artifacts loaded]
|
@@ -105,25 +107,24 @@ AGENT: {"thoughts": "I will use the generate_vision_code to count the workers wi
|
|
105
107
|
|
106
108
|
OBSERVATION:
|
107
109
|
[Artifact code.py]
|
108
|
-
0|from vision_agent.tools import load_image,
|
110
|
+
0|from vision_agent.tools import load_image, florence2_phrase_grounding, closest_box_distance, overlay_bounding_boxes, save_image
|
109
111
|
1|def count_workers_with_helmets(image_path: str, output_path: str):
|
110
112
|
2| image = load_image(image_path)
|
111
|
-
3| detections =
|
113
|
+
3| detections = florence2_phrase_grounding("worker, helmet", image)
|
112
114
|
4| workers = [d for d in detections if d['label'] == 'worker']
|
113
115
|
5| helmets = [d for d in detections if d['label'] == 'helmet']
|
114
116
|
6| count = 0
|
115
117
|
6| for worker in workers:
|
116
|
-
7|
|
117
|
-
8|
|
118
|
-
9|
|
119
|
-
10|
|
120
|
-
11|
|
121
|
-
|
122
|
-
13|
|
123
|
-
14|
|
124
|
-
15|
|
125
|
-
16|
|
126
|
-
15| return count
|
118
|
+
7| person_has_helmet = False
|
119
|
+
8| for helmet in helmets:
|
120
|
+
9| if closest_box_distance(worker['box'], helmet['box']) < 0.01:
|
121
|
+
10| person_has_helmet = True
|
122
|
+
11| break
|
123
|
+
11| if person_has_helmet:
|
124
|
+
13| count += 1
|
125
|
+
14| overlay_bounding_boxes(image, detections)
|
126
|
+
15| save_image(output_path, image)
|
127
|
+
16| return count
|
127
128
|
[End of artifact]
|
128
129
|
|
129
130
|
AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
@@ -133,4 +134,38 @@ OBSERVATION:
|
|
133
134
|
2
|
134
135
|
|
135
136
|
AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
|
137
|
+
|
138
|
+
USER: The detections are slightly off. Can you fine tune florence2 using these labels? "[{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}]"
|
139
|
+
|
140
|
+
AGENT: {"thoughts": "Because the user has supplied me with labels I can call florence2_fine_tuning on their behalf to fine tune the model", "response": "I will fine tune florence2 with the labels you provided <execute_python>florence2_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}], "phrase_grounding")</execute_python>", "let_user_respond": false}
|
141
|
+
|
142
|
+
OBSERVATION:
|
143
|
+
[Florence2 fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]
|
144
|
+
|
145
|
+
|
146
|
+
AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_florence2_fine_tuning(artifacts, "code.py", "phrase_grounding", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
|
147
|
+
|
148
|
+
OBSERVATION:
|
149
|
+
[Artifact code.py edits]
|
150
|
+
---
|
151
|
+
+++
|
152
|
+
@@ -1,7 +1,7 @@
|
153
|
+
from vision_agent.tools import load_image, florence2_phrase_grounding, closest_box_distance, overlay_bounding_boxes, save_image
|
154
|
+
def count_workers_with_helmets(image_path: str, output_path: str):
|
155
|
+
image = load_image(image_path)
|
156
|
+
- detections = florence2_phrase_grounding("worker, helmet", image)
|
157
|
+
+ detections = florence2_phrase_grounding("worker, helmet", image, "23b3b022-5ebf-4798-9373-20ef36429abf")
|
158
|
+
workers = [d for d in detections if d['label'] == 'worker']
|
159
|
+
helmets = [d for d in detections if d['label'] == 'helmet']
|
160
|
+
count = 0
|
161
|
+
[End of edits]
|
162
|
+
|
163
|
+
|
164
|
+
AGENT: {"thoughts": "I have updated the code to use the fine tuned model, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
165
|
+
|
166
|
+
OBSERVATION:
|
167
|
+
----- stdout -----
|
168
|
+
3
|
169
|
+
|
170
|
+
AGENT: {"thoughts": "Three workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have updated the code using the fine tuned florence2_phrase_grounding model to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
|
136
171
|
"""
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -297,7 +297,12 @@ def edit_code_artifact(
|
|
297
297
|
|
298
298
|
|
299
299
|
def generate_vision_code(
|
300
|
-
artifacts: Artifacts,
|
300
|
+
artifacts: Artifacts,
|
301
|
+
name: str,
|
302
|
+
chat: str,
|
303
|
+
media: List[str],
|
304
|
+
test_multi_plan: bool = True,
|
305
|
+
customized_tool_names: Optional[List[str]] = None,
|
301
306
|
) -> str:
|
302
307
|
"""Generates python code to solve vision based tasks.
|
303
308
|
|
@@ -306,6 +311,8 @@ def generate_vision_code(
|
|
306
311
|
name (str): The name of the artifact to save the code to.
|
307
312
|
chat (str): The chat message from the user.
|
308
313
|
media (List[str]): The media files to use.
|
314
|
+
test_multi_plan (bool): Do not change this parameter.
|
315
|
+
customized_tool_names (Optional[List[str]]): Do not change this parameter.
|
309
316
|
|
310
317
|
Returns:
|
311
318
|
str: The generated code.
|
@@ -330,7 +337,11 @@ def generate_vision_code(
|
|
330
337
|
agent = va.agent.VisionAgentCoder()
|
331
338
|
|
332
339
|
fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
|
333
|
-
response = agent.chat_with_workflow(
|
340
|
+
response = agent.chat_with_workflow(
|
341
|
+
fixed_chat,
|
342
|
+
test_multi_plan=test_multi_plan,
|
343
|
+
customized_tool_names=customized_tool_names,
|
344
|
+
)
|
334
345
|
redisplay_results(response["test_result"])
|
335
346
|
code = response["code"]
|
336
347
|
artifacts[name] = code
|
@@ -342,7 +353,11 @@ def generate_vision_code(
|
|
342
353
|
|
343
354
|
|
344
355
|
def edit_vision_code(
|
345
|
-
artifacts: Artifacts,
|
356
|
+
artifacts: Artifacts,
|
357
|
+
name: str,
|
358
|
+
chat_history: List[str],
|
359
|
+
media: List[str],
|
360
|
+
customized_tool_names: Optional[List[str]] = None,
|
346
361
|
) -> str:
|
347
362
|
"""Edits python code to solve a vision based task.
|
348
363
|
|
@@ -350,6 +365,7 @@ def edit_vision_code(
|
|
350
365
|
artifacts (Artifacts): The artifacts object to save the code to.
|
351
366
|
name (str): The file path to the code.
|
352
367
|
chat_history (List[str]): The chat history to used to generate the code.
|
368
|
+
customized_tool_names (Optional[List[str]]): Do not change this parameter.
|
353
369
|
|
354
370
|
Returns:
|
355
371
|
str: The edited code.
|
@@ -386,7 +402,11 @@ def edit_vision_code(
|
|
386
402
|
fixed_chat_history.append({"role": "assistant", "content": code})
|
387
403
|
fixed_chat_history.append({"role": "user", "content": chat})
|
388
404
|
|
389
|
-
response = agent.chat_with_workflow(
|
405
|
+
response = agent.chat_with_workflow(
|
406
|
+
fixed_chat_history,
|
407
|
+
test_multi_plan=False,
|
408
|
+
customized_tool_names=customized_tool_names,
|
409
|
+
)
|
390
410
|
redisplay_results(response["test_result"])
|
391
411
|
code = response["code"]
|
392
412
|
artifacts[name] = code
|
@@ -425,18 +445,19 @@ def get_tool_descriptions() -> str:
|
|
425
445
|
|
426
446
|
|
427
447
|
def florence2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> str:
|
428
|
-
"""
|
448
|
+
"""DO NOT use this function unless the user has supplied you with bboxes.
|
449
|
+
'florence2_fine_tuning' is a tool that fine-tune florence2 to be able to detect
|
429
450
|
objects in an image based on a given dataset. It returns the fine tuning job id.
|
430
451
|
|
431
452
|
Parameters:
|
432
|
-
bboxes (List[BboxInput]): A list of BboxInput containing the
|
433
|
-
|
453
|
+
bboxes (List[BboxInput]): A list of BboxInput containing the image path, labels
|
454
|
+
and bounding boxes. The coordinates are unnormalized.
|
434
455
|
task (str): The florencev2 fine-tuning task. The options are
|
435
456
|
'phrase_grounding'.
|
436
457
|
|
437
458
|
Returns:
|
438
|
-
|
439
|
-
|
459
|
+
str: The fine tuning job id, this id will used to retrieve the fine tuned
|
460
|
+
model.
|
440
461
|
|
441
462
|
Example
|
442
463
|
-------
|
@@ -473,6 +494,54 @@ def get_diff(before: str, after: str) -> str:
|
|
473
494
|
)
|
474
495
|
|
475
496
|
|
497
|
+
def get_diff_with_prompts(name: str, before: str, after: str) -> str:
|
498
|
+
diff = get_diff(before, after)
|
499
|
+
return f"[Artifact {name} edits]\n{diff}\n[End of edits]"
|
500
|
+
|
501
|
+
|
502
|
+
def use_extra_vision_agent_args(
|
503
|
+
code: str,
|
504
|
+
test_multi_plan: bool = True,
|
505
|
+
customized_tool_names: Optional[List[str]] = None,
|
506
|
+
) -> str:
|
507
|
+
"""This is for forcing arguments passed by the user to VisionAgent into the
|
508
|
+
VisionAgentCoder call.
|
509
|
+
|
510
|
+
Parameters:
|
511
|
+
code (str): The code to edit.
|
512
|
+
test_multi_plan (bool): Do not change this parameter.
|
513
|
+
customized_tool_names (Optional[List[str]]): Do not change this parameter.
|
514
|
+
|
515
|
+
Returns:
|
516
|
+
str: The edited code.
|
517
|
+
"""
|
518
|
+
generate_pattern = r"generate_vision_code\(\s*([^\)]+)\)"
|
519
|
+
|
520
|
+
def generate_replacer(match: re.Match) -> str:
|
521
|
+
arg = match.group(1)
|
522
|
+
out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
|
523
|
+
if customized_tool_names is not None:
|
524
|
+
out_str += f", customized_tool_names={customized_tool_names})"
|
525
|
+
else:
|
526
|
+
out_str += ")"
|
527
|
+
return out_str
|
528
|
+
|
529
|
+
edit_pattern = r"edit_vision_code\(\s*([^\)]+)\)"
|
530
|
+
|
531
|
+
def edit_replacer(match: re.Match) -> str:
|
532
|
+
arg = match.group(1)
|
533
|
+
out_str = f"edit_vision_code({arg}"
|
534
|
+
if customized_tool_names is not None:
|
535
|
+
out_str += f", customized_tool_names={customized_tool_names})"
|
536
|
+
else:
|
537
|
+
out_str += ")"
|
538
|
+
return out_str
|
539
|
+
|
540
|
+
new_code = re.sub(generate_pattern, generate_replacer, code)
|
541
|
+
new_code = re.sub(edit_pattern, edit_replacer, new_code)
|
542
|
+
return new_code
|
543
|
+
|
544
|
+
|
476
545
|
def use_florence2_fine_tuning(
|
477
546
|
artifacts: Artifacts, name: str, task: str, fine_tune_id: str
|
478
547
|
) -> str:
|
@@ -521,7 +590,7 @@ def use_florence2_fine_tuning(
|
|
521
590
|
|
522
591
|
artifacts[name] = new_code
|
523
592
|
|
524
|
-
diff =
|
593
|
+
diff = get_diff_with_prompts(name, code, new_code)
|
525
594
|
print(diff)
|
526
595
|
return diff
|
527
596
|
|
vision_agent/tools/tools.py
CHANGED
@@ -1945,15 +1945,4 @@ TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
|
1945
1945
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
1946
1946
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
1947
1947
|
TOOLS_INFO = get_tools_info(FUNCTION_TOOLS) # type: ignore
|
1948
|
-
UTILITIES_DOCSTRING = get_tool_documentation(
|
1949
|
-
[
|
1950
|
-
save_json,
|
1951
|
-
load_image,
|
1952
|
-
save_image,
|
1953
|
-
save_video,
|
1954
|
-
overlay_bounding_boxes,
|
1955
|
-
overlay_segmentation_masks,
|
1956
|
-
overlay_heat_map,
|
1957
|
-
overlay_counting_results,
|
1958
|
-
]
|
1959
|
-
)
|
1948
|
+
UTILITIES_DOCSTRING = get_tool_documentation(UTIL_TOOLS) # type: ignore
|
@@ -2,10 +2,10 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=nfxdY5W5UME7JhwFcsB3j2-L5zsYZzJWdlS2R8U_9lE,13224
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=_2QQd_nTGojkk2ZOiMevVCY6-eUA9q1QdCWH7-Noq4w,34237
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=nj4iRRSAWYHjKqyUSp12aTCV1D5iUVCHeezVXoozS4M,12687
|
8
|
-
vision_agent/agent/vision_agent_prompts.py,sha256
|
8
|
+
vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
|
9
9
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
11
11
|
vision_agent/clients/landing_public_api.py,sha256=lU2ev6E8NICmR8DMUljuGcVFy5VNJQ4WQkWC8WnnJEc,1503
|
@@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,
|
|
15
15
|
vision_agent/lmm/lmm.py,sha256=092oefI65_QSRvQm2znXkjTdzlZTh-Ni_38610kfbJg,16836
|
16
16
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
17
|
vision_agent/tools/__init__.py,sha256=nx60_hujcnLz3d2wQlCbcerUmT6R2vxRy66IsQjdB3M,2364
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=qbf_dzVmhf4zhv-xY1zaqRFshDlvj_7ilFQtSr70hdQ,21213
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=PjdataKjPpiFSq1QBAAWHJUGPPn4p4dr07TPSlhXvFk,7758
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=ywfolLhf8OWnavXTTEYscUvOUM0ECNy-ff3WLMdUhhQ,68465
|
22
22
|
vision_agent/tools/tools_types.py,sha256=rLpCUODPY0yI65SLOTJOxfHFfqWM3WjOq-AYX25Chjk,2356
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=GmJqu_3WhBMEwP4HToMMp8EwgftliHSpv5nd-QEDOcs,4528
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.132.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.132.dist-info/METADATA,sha256=s0AXiV6qjDjTUrzFqHL-50QJ6r7sxlJrwkSKNIGgklc,12295
|
32
|
+
vision_agent-0.2.132.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.132.dist-info/RECORD,,
|
File without changes
|
File without changes
|