vision-agent 0.2.130__tar.gz → 0.2.132__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.130 → vision_agent-0.2.132}/PKG-INFO +1 -1
- {vision_agent-0.2.130 → vision_agent-0.2.132}/pyproject.toml +1 -1
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/vision_agent.py +46 -6
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_prompts.py +49 -14
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/tools/meta_tools.py +79 -10
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/tools/tools.py +1 -12
- {vision_agent-0.2.130 → vision_agent-0.2.132}/LICENSE +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/README.md +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/utils/video.py +0 -0
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import tempfile
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
|
7
7
|
|
8
8
|
from vision_agent.agent import Agent
|
9
9
|
from vision_agent.agent.agent_utils import extract_json
|
@@ -13,8 +13,8 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
17
|
-
from vision_agent.tools.meta_tools import Artifacts
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
|
17
|
+
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
18
|
from vision_agent.utils import CodeInterpreterFactory
|
19
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
20
20
|
|
@@ -87,11 +87,18 @@ def run_code_action(
|
|
87
87
|
return result, obs
|
88
88
|
|
89
89
|
|
90
|
-
def parse_execution(
|
90
|
+
def parse_execution(
|
91
|
+
response: str,
|
92
|
+
test_multi_plan: bool = True,
|
93
|
+
customed_tool_names: Optional[List[str]] = None,
|
94
|
+
) -> Optional[str]:
|
91
95
|
code = None
|
92
96
|
if "<execute_python>" in response:
|
93
97
|
code = response[response.find("<execute_python>") + len("<execute_python>") :]
|
94
98
|
code = code[: code.find("</execute_python>")]
|
99
|
+
|
100
|
+
if code is not None:
|
101
|
+
code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
|
95
102
|
return code
|
96
103
|
|
97
104
|
|
@@ -116,6 +123,7 @@ class VisionAgent(Agent):
|
|
116
123
|
verbosity: int = 0,
|
117
124
|
local_artifacts_path: Optional[Union[str, Path]] = None,
|
118
125
|
code_sandbox_runtime: Optional[str] = None,
|
126
|
+
callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
|
119
127
|
) -> None:
|
120
128
|
"""Initialize the VisionAgent.
|
121
129
|
|
@@ -134,6 +142,7 @@ class VisionAgent(Agent):
|
|
134
142
|
self.max_iterations = 100
|
135
143
|
self.verbosity = verbosity
|
136
144
|
self.code_sandbox_runtime = code_sandbox_runtime
|
145
|
+
self.callback_message = callback_message
|
137
146
|
if self.verbosity >= 1:
|
138
147
|
_LOGGER.setLevel(logging.INFO)
|
139
148
|
self.local_artifacts_path = cast(
|
@@ -174,6 +183,8 @@ class VisionAgent(Agent):
|
|
174
183
|
self,
|
175
184
|
chat: List[Message],
|
176
185
|
artifacts: Optional[Artifacts] = None,
|
186
|
+
test_multi_plan: bool = True,
|
187
|
+
customized_tool_names: Optional[List[str]] = None,
|
177
188
|
) -> Tuple[List[Message], Artifacts]:
|
178
189
|
"""Chat with VisionAgent, it will use code to execute actions to accomplish
|
179
190
|
its tasks.
|
@@ -184,6 +195,12 @@ class VisionAgent(Agent):
|
|
184
195
|
or if it contains media files, it should be in the format of:
|
185
196
|
[{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
|
186
197
|
artifacts (Optional[Artifacts]): The artifacts to use in the task.
|
198
|
+
test_multi_plan (bool): If True, it will test tools for multiple plans and
|
199
|
+
pick the best one based off of the tool results. If False, it will go
|
200
|
+
with the first plan.
|
201
|
+
customized_tool_names (List[str]): A list of customized tools for agent to
|
202
|
+
pick and use. If not provided, default to full tool set from
|
203
|
+
vision_agent.tools.
|
187
204
|
|
188
205
|
Returns:
|
189
206
|
List[Message]: The conversation response.
|
@@ -205,7 +222,14 @@ class VisionAgent(Agent):
|
|
205
222
|
for chat_i in int_chat:
|
206
223
|
if "media" in chat_i:
|
207
224
|
for media in chat_i["media"]:
|
208
|
-
media
|
225
|
+
if type(media) is str and media.startswith(("http", "https")):
|
226
|
+
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
+
file_path = Path(media).name
|
228
|
+
ndarray = load_image(media)
|
229
|
+
save_image(ndarray, file_path)
|
230
|
+
media = file_path
|
231
|
+
else:
|
232
|
+
media = cast(str, media)
|
209
233
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
210
234
|
|
211
235
|
media_remote_path = (
|
@@ -247,6 +271,7 @@ class VisionAgent(Agent):
|
|
247
271
|
artifacts_loaded = artifacts.show()
|
248
272
|
int_chat.append({"role": "observation", "content": artifacts_loaded})
|
249
273
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
274
|
+
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
250
275
|
|
251
276
|
while not finished and iterations < self.max_iterations:
|
252
277
|
response = run_conversation(self.agent, int_chat)
|
@@ -259,10 +284,14 @@ class VisionAgent(Agent):
|
|
259
284
|
if last_response == response:
|
260
285
|
response["let_user_respond"] = True
|
261
286
|
|
287
|
+
self.streaming_message({"role": "assistant", "content": response})
|
288
|
+
|
262
289
|
if response["let_user_respond"]:
|
263
290
|
break
|
264
291
|
|
265
|
-
code_action = parse_execution(
|
292
|
+
code_action = parse_execution(
|
293
|
+
response["response"], test_multi_plan, customized_tool_names
|
294
|
+
)
|
266
295
|
|
267
296
|
if code_action is not None:
|
268
297
|
result, obs = run_code_action(
|
@@ -276,6 +305,13 @@ class VisionAgent(Agent):
|
|
276
305
|
orig_chat.append(
|
277
306
|
{"role": "observation", "content": obs, "execution": result}
|
278
307
|
)
|
308
|
+
self.streaming_message(
|
309
|
+
{
|
310
|
+
"role": "observation",
|
311
|
+
"content": obs,
|
312
|
+
"execution": result,
|
313
|
+
}
|
314
|
+
)
|
279
315
|
|
280
316
|
iterations += 1
|
281
317
|
last_response = response
|
@@ -288,5 +324,9 @@ class VisionAgent(Agent):
|
|
288
324
|
artifacts.save()
|
289
325
|
return orig_chat, artifacts
|
290
326
|
|
327
|
+
def streaming_message(self, message: Dict[str, Any]) -> None:
|
328
|
+
if self.callback_message:
|
329
|
+
self.callback_message(message)
|
330
|
+
|
291
331
|
def log_progress(self, data: Dict[str, Any]) -> None:
|
292
332
|
pass
|
@@ -1,5 +1,5 @@
|
|
1
1
|
VA_CODE = """
|
2
|
-
**Role**: You are a helpful
|
2
|
+
**Role**: You are a helpful agent that assists users with writing code.
|
3
3
|
|
4
4
|
**Taks**: As a conversational agent, you are required to understand the user's request and provide a helpful response. Use a Chain-of-Thought approach to break down the problem, create a plan, and then provide a response. Ensure that your response is clear, concise, and helpful. You can use an interactive Python (Jupyter Notebook) environment, executing code with <execution_python>. You are given access to an `artifacts` object which contains files shared between you and the user. `artifacts` will be automatically saved everytime you execute python code.
|
5
5
|
|
@@ -56,7 +56,9 @@ OBSERVATION:
|
|
56
56
|
|
57
57
|
|
58
58
|
AGENT: {"thoughts": "Two dogs are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to detect dogs and shown the output, do the results look good to you?", "let_user_respond": true}
|
59
|
+
"""
|
59
60
|
|
61
|
+
EXAMPLES_CODE1_EXTRA = """
|
60
62
|
USER: The the image only has one dog, can you fix this?
|
61
63
|
|
62
64
|
[Artifacts loaded]
|
@@ -105,25 +107,24 @@ AGENT: {"thoughts": "I will use the generate_vision_code to count the workers wi
|
|
105
107
|
|
106
108
|
OBSERVATION:
|
107
109
|
[Artifact code.py]
|
108
|
-
0|from vision_agent.tools import load_image,
|
110
|
+
0|from vision_agent.tools import load_image, florence2_phrase_grounding, closest_box_distance, overlay_bounding_boxes, save_image
|
109
111
|
1|def count_workers_with_helmets(image_path: str, output_path: str):
|
110
112
|
2| image = load_image(image_path)
|
111
|
-
3| detections =
|
113
|
+
3| detections = florence2_phrase_grounding("worker, helmet", image)
|
112
114
|
4| workers = [d for d in detections if d['label'] == 'worker']
|
113
115
|
5| helmets = [d for d in detections if d['label'] == 'helmet']
|
114
116
|
6| count = 0
|
115
117
|
6| for worker in workers:
|
116
|
-
7|
|
117
|
-
8|
|
118
|
-
9|
|
119
|
-
10|
|
120
|
-
11|
|
121
|
-
|
122
|
-
13|
|
123
|
-
14|
|
124
|
-
15|
|
125
|
-
16|
|
126
|
-
15| return count
|
118
|
+
7| person_has_helmet = False
|
119
|
+
8| for helmet in helmets:
|
120
|
+
9| if closest_box_distance(worker['box'], helmet['box']) < 0.01:
|
121
|
+
10| person_has_helmet = True
|
122
|
+
11| break
|
123
|
+
11| if person_has_helmet:
|
124
|
+
13| count += 1
|
125
|
+
14| overlay_bounding_boxes(image, detections)
|
126
|
+
15| save_image(output_path, image)
|
127
|
+
16| return count
|
127
128
|
[End of artifact]
|
128
129
|
|
129
130
|
AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
@@ -133,4 +134,38 @@ OBSERVATION:
|
|
133
134
|
2
|
134
135
|
|
135
136
|
AGENT: {"thoughts": "Two workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have written the code to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
|
137
|
+
|
138
|
+
USER: The detections are slightly off. Can you fine tune florence2 using these labels? "[{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}]"
|
139
|
+
|
140
|
+
AGENT: {"thoughts": "Because the user has supplied me with labels I can call florence2_fine_tuning on their behalf to fine tune the model", "response": "I will fine tune florence2 with the labels you provided <execute_python>florence2_fine_tuning([{'image_path': 'image1.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}, {'image_path': 'image2.png': 'labels': ['worker', 'helmet'], 'bboxes': [[235, 118, 294, 241], [232, 118, 294, 128]]}], "phrase_grounding")</execute_python>", "let_user_respond": false}
|
141
|
+
|
142
|
+
OBSERVATION:
|
143
|
+
[Florence2 fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]
|
144
|
+
|
145
|
+
|
146
|
+
AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_florence2_fine_tuning(artifacts, "code.py", "phrase_grounding", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
|
147
|
+
|
148
|
+
OBSERVATION:
|
149
|
+
[Artifact code.py edits]
|
150
|
+
---
|
151
|
+
+++
|
152
|
+
@@ -1,7 +1,7 @@
|
153
|
+
from vision_agent.tools import load_image, florence2_phrase_grounding, closest_box_distance, overlay_bounding_boxes, save_image
|
154
|
+
def count_workers_with_helmets(image_path: str, output_path: str):
|
155
|
+
image = load_image(image_path)
|
156
|
+
- detections = florence2_phrase_grounding("worker, helmet", image)
|
157
|
+
+ detections = florence2_phrase_grounding("worker, helmet", image, "23b3b022-5ebf-4798-9373-20ef36429abf")
|
158
|
+
workers = [d for d in detections if d['label'] == 'worker']
|
159
|
+
helmets = [d for d in detections if d['label'] == 'helmet']
|
160
|
+
count = 0
|
161
|
+
[End of edits]
|
162
|
+
|
163
|
+
|
164
|
+
AGENT: {"thoughts": "I have updated the code to use the fine tuned model, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
165
|
+
|
166
|
+
OBSERVATION:
|
167
|
+
----- stdout -----
|
168
|
+
3
|
169
|
+
|
170
|
+
AGENT: {"thoughts": "Three workers with helmets are detected, I will show this to the user and ask them if the result looks good.", "response": "I have updated the code using the fine tuned florence2_phrase_grounding model to count the workers wearing helmets in code.py and saved the visualization under 'workers_viz.png'.", "let_user_respond": true}
|
136
171
|
"""
|
@@ -297,7 +297,12 @@ def edit_code_artifact(
|
|
297
297
|
|
298
298
|
|
299
299
|
def generate_vision_code(
|
300
|
-
artifacts: Artifacts,
|
300
|
+
artifacts: Artifacts,
|
301
|
+
name: str,
|
302
|
+
chat: str,
|
303
|
+
media: List[str],
|
304
|
+
test_multi_plan: bool = True,
|
305
|
+
customized_tool_names: Optional[List[str]] = None,
|
301
306
|
) -> str:
|
302
307
|
"""Generates python code to solve vision based tasks.
|
303
308
|
|
@@ -306,6 +311,8 @@ def generate_vision_code(
|
|
306
311
|
name (str): The name of the artifact to save the code to.
|
307
312
|
chat (str): The chat message from the user.
|
308
313
|
media (List[str]): The media files to use.
|
314
|
+
test_multi_plan (bool): Do not change this parameter.
|
315
|
+
customized_tool_names (Optional[List[str]]): Do not change this parameter.
|
309
316
|
|
310
317
|
Returns:
|
311
318
|
str: The generated code.
|
@@ -330,7 +337,11 @@ def generate_vision_code(
|
|
330
337
|
agent = va.agent.VisionAgentCoder()
|
331
338
|
|
332
339
|
fixed_chat: List[Message] = [{"role": "user", "content": chat, "media": media}]
|
333
|
-
response = agent.chat_with_workflow(
|
340
|
+
response = agent.chat_with_workflow(
|
341
|
+
fixed_chat,
|
342
|
+
test_multi_plan=test_multi_plan,
|
343
|
+
customized_tool_names=customized_tool_names,
|
344
|
+
)
|
334
345
|
redisplay_results(response["test_result"])
|
335
346
|
code = response["code"]
|
336
347
|
artifacts[name] = code
|
@@ -342,7 +353,11 @@ def generate_vision_code(
|
|
342
353
|
|
343
354
|
|
344
355
|
def edit_vision_code(
|
345
|
-
artifacts: Artifacts,
|
356
|
+
artifacts: Artifacts,
|
357
|
+
name: str,
|
358
|
+
chat_history: List[str],
|
359
|
+
media: List[str],
|
360
|
+
customized_tool_names: Optional[List[str]] = None,
|
346
361
|
) -> str:
|
347
362
|
"""Edits python code to solve a vision based task.
|
348
363
|
|
@@ -350,6 +365,7 @@ def edit_vision_code(
|
|
350
365
|
artifacts (Artifacts): The artifacts object to save the code to.
|
351
366
|
name (str): The file path to the code.
|
352
367
|
chat_history (List[str]): The chat history to used to generate the code.
|
368
|
+
customized_tool_names (Optional[List[str]]): Do not change this parameter.
|
353
369
|
|
354
370
|
Returns:
|
355
371
|
str: The edited code.
|
@@ -386,7 +402,11 @@ def edit_vision_code(
|
|
386
402
|
fixed_chat_history.append({"role": "assistant", "content": code})
|
387
403
|
fixed_chat_history.append({"role": "user", "content": chat})
|
388
404
|
|
389
|
-
response = agent.chat_with_workflow(
|
405
|
+
response = agent.chat_with_workflow(
|
406
|
+
fixed_chat_history,
|
407
|
+
test_multi_plan=False,
|
408
|
+
customized_tool_names=customized_tool_names,
|
409
|
+
)
|
390
410
|
redisplay_results(response["test_result"])
|
391
411
|
code = response["code"]
|
392
412
|
artifacts[name] = code
|
@@ -425,18 +445,19 @@ def get_tool_descriptions() -> str:
|
|
425
445
|
|
426
446
|
|
427
447
|
def florence2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> str:
|
428
|
-
"""
|
448
|
+
"""DO NOT use this function unless the user has supplied you with bboxes.
|
449
|
+
'florence2_fine_tuning' is a tool that fine-tune florence2 to be able to detect
|
429
450
|
objects in an image based on a given dataset. It returns the fine tuning job id.
|
430
451
|
|
431
452
|
Parameters:
|
432
|
-
bboxes (List[BboxInput]): A list of BboxInput containing the
|
433
|
-
|
453
|
+
bboxes (List[BboxInput]): A list of BboxInput containing the image path, labels
|
454
|
+
and bounding boxes. The coordinates are unnormalized.
|
434
455
|
task (str): The florencev2 fine-tuning task. The options are
|
435
456
|
'phrase_grounding'.
|
436
457
|
|
437
458
|
Returns:
|
438
|
-
|
439
|
-
|
459
|
+
str: The fine tuning job id, this id will used to retrieve the fine tuned
|
460
|
+
model.
|
440
461
|
|
441
462
|
Example
|
442
463
|
-------
|
@@ -473,6 +494,54 @@ def get_diff(before: str, after: str) -> str:
|
|
473
494
|
)
|
474
495
|
|
475
496
|
|
497
|
+
def get_diff_with_prompts(name: str, before: str, after: str) -> str:
|
498
|
+
diff = get_diff(before, after)
|
499
|
+
return f"[Artifact {name} edits]\n{diff}\n[End of edits]"
|
500
|
+
|
501
|
+
|
502
|
+
def use_extra_vision_agent_args(
|
503
|
+
code: str,
|
504
|
+
test_multi_plan: bool = True,
|
505
|
+
customized_tool_names: Optional[List[str]] = None,
|
506
|
+
) -> str:
|
507
|
+
"""This is for forcing arguments passed by the user to VisionAgent into the
|
508
|
+
VisionAgentCoder call.
|
509
|
+
|
510
|
+
Parameters:
|
511
|
+
code (str): The code to edit.
|
512
|
+
test_multi_plan (bool): Do not change this parameter.
|
513
|
+
customized_tool_names (Optional[List[str]]): Do not change this parameter.
|
514
|
+
|
515
|
+
Returns:
|
516
|
+
str: The edited code.
|
517
|
+
"""
|
518
|
+
generate_pattern = r"generate_vision_code\(\s*([^\)]+)\)"
|
519
|
+
|
520
|
+
def generate_replacer(match: re.Match) -> str:
|
521
|
+
arg = match.group(1)
|
522
|
+
out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
|
523
|
+
if customized_tool_names is not None:
|
524
|
+
out_str += f", customized_tool_names={customized_tool_names})"
|
525
|
+
else:
|
526
|
+
out_str += ")"
|
527
|
+
return out_str
|
528
|
+
|
529
|
+
edit_pattern = r"edit_vision_code\(\s*([^\)]+)\)"
|
530
|
+
|
531
|
+
def edit_replacer(match: re.Match) -> str:
|
532
|
+
arg = match.group(1)
|
533
|
+
out_str = f"edit_vision_code({arg}"
|
534
|
+
if customized_tool_names is not None:
|
535
|
+
out_str += f", customized_tool_names={customized_tool_names})"
|
536
|
+
else:
|
537
|
+
out_str += ")"
|
538
|
+
return out_str
|
539
|
+
|
540
|
+
new_code = re.sub(generate_pattern, generate_replacer, code)
|
541
|
+
new_code = re.sub(edit_pattern, edit_replacer, new_code)
|
542
|
+
return new_code
|
543
|
+
|
544
|
+
|
476
545
|
def use_florence2_fine_tuning(
|
477
546
|
artifacts: Artifacts, name: str, task: str, fine_tune_id: str
|
478
547
|
) -> str:
|
@@ -521,7 +590,7 @@ def use_florence2_fine_tuning(
|
|
521
590
|
|
522
591
|
artifacts[name] = new_code
|
523
592
|
|
524
|
-
diff =
|
593
|
+
diff = get_diff_with_prompts(name, code, new_code)
|
525
594
|
print(diff)
|
526
595
|
return diff
|
527
596
|
|
@@ -1945,15 +1945,4 @@ TOOLS_DF = get_tools_df(TOOLS) # type: ignore
|
|
1945
1945
|
TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS) # type: ignore
|
1946
1946
|
TOOL_DOCSTRING = get_tool_documentation(TOOLS) # type: ignore
|
1947
1947
|
TOOLS_INFO = get_tools_info(FUNCTION_TOOLS) # type: ignore
|
1948
|
-
UTILITIES_DOCSTRING = get_tool_documentation(
|
1949
|
-
[
|
1950
|
-
save_json,
|
1951
|
-
load_image,
|
1952
|
-
save_image,
|
1953
|
-
save_video,
|
1954
|
-
overlay_bounding_boxes,
|
1955
|
-
overlay_segmentation_masks,
|
1956
|
-
overlay_heat_map,
|
1957
|
-
overlay_counting_results,
|
1958
|
-
]
|
1959
|
-
)
|
1948
|
+
UTILITIES_DOCSTRING = get_tool_documentation(UTIL_TOOLS) # type: ignore
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.130 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|