vision-agent 0.2.148__tar.gz → 0.2.149__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.148 → vision_agent-0.2.149}/PKG-INFO +1 -1
- {vision_agent-0.2.148 → vision_agent-0.2.149}/pyproject.toml +1 -1
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent.py +62 -41
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_coder.py +4 -4
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_prompts.py +5 -5
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/meta_tools.py +51 -54
- {vision_agent-0.2.148 → vision_agent-0.2.149}/LICENSE +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/README.md +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/video.py +0 -0
@@ -87,7 +87,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
|
87
87
|
return extract_json(orch([message], stream=False)) # type: ignore
|
88
88
|
|
89
89
|
|
90
|
-
def
|
90
|
+
def execute_code_action(
|
91
91
|
code: str, code_interpreter: CodeInterpreter, artifact_remote_path: str
|
92
92
|
) -> Tuple[Execution, str]:
|
93
93
|
result = code_interpreter.exec_isolation(
|
@@ -106,19 +106,53 @@ def parse_execution(
|
|
106
106
|
customed_tool_names: Optional[List[str]] = None,
|
107
107
|
) -> Optional[str]:
|
108
108
|
code = None
|
109
|
-
|
110
|
-
|
111
|
-
|
109
|
+
remaining = response
|
110
|
+
all_code = []
|
111
|
+
while "<execute_python>" in remaining:
|
112
|
+
code_i = remaining[
|
113
|
+
remaining.find("<execute_python>") + len("<execute_python>") :
|
114
|
+
]
|
115
|
+
code_i = code_i[: code_i.find("</execute_python>")]
|
116
|
+
remaining = remaining[
|
117
|
+
remaining.find("</execute_python>") + len("</execute_python>") :
|
118
|
+
]
|
119
|
+
all_code.append(code_i)
|
120
|
+
|
121
|
+
if len(all_code) > 0:
|
122
|
+
code = "\n".join(all_code)
|
112
123
|
|
113
124
|
if code is not None:
|
114
125
|
code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
|
115
126
|
return code
|
116
127
|
|
117
128
|
|
129
|
+
def execute_user_code_action(
|
130
|
+
last_user_message: Message,
|
131
|
+
code_interpreter: CodeInterpreter,
|
132
|
+
artifact_remote_path: str,
|
133
|
+
) -> Tuple[Optional[Execution], Optional[str]]:
|
134
|
+
user_result = None
|
135
|
+
user_obs = None
|
136
|
+
|
137
|
+
if last_user_message["role"] != "user":
|
138
|
+
return user_result, user_obs
|
139
|
+
|
140
|
+
last_user_content = cast(str, last_user_message.get("content", ""))
|
141
|
+
|
142
|
+
user_code_action = parse_execution(last_user_content, False)
|
143
|
+
if user_code_action is not None:
|
144
|
+
user_result, user_obs = execute_code_action(
|
145
|
+
user_code_action, code_interpreter, artifact_remote_path
|
146
|
+
)
|
147
|
+
if user_result.error:
|
148
|
+
user_obs += f"\n{user_result.error}"
|
149
|
+
return user_result, user_obs
|
150
|
+
|
151
|
+
|
118
152
|
class VisionAgent(Agent):
|
119
153
|
"""Vision Agent is an agent that can chat with the user and call tools or other
|
120
154
|
agents to generate code for it. Vision Agent uses python code to execute actions
|
121
|
-
for the user. Vision Agent is inspired by by
|
155
|
+
for the user. Vision Agent is inspired by by OpenDevin
|
122
156
|
https://github.com/OpenDevin/OpenDevin and CodeAct https://arxiv.org/abs/2402.01030
|
123
157
|
|
124
158
|
Example
|
@@ -278,9 +312,24 @@ class VisionAgent(Agent):
|
|
278
312
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
279
313
|
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
280
314
|
|
281
|
-
|
282
|
-
last_user_message, code_interpreter, remote_artifacts_path
|
315
|
+
user_result, user_obs = execute_user_code_action(
|
316
|
+
last_user_message, code_interpreter, str(remote_artifacts_path)
|
283
317
|
)
|
318
|
+
finished = user_result is not None and user_obs is not None
|
319
|
+
if user_result is not None and user_obs is not None:
|
320
|
+
# be sure to update the chat with user execution results
|
321
|
+
chat_elt: Message = {"role": "observation", "content": user_obs}
|
322
|
+
int_chat.append(chat_elt)
|
323
|
+
chat_elt["execution"] = user_result
|
324
|
+
orig_chat.append(chat_elt)
|
325
|
+
self.streaming_message(
|
326
|
+
{
|
327
|
+
"role": "observation",
|
328
|
+
"content": user_obs,
|
329
|
+
"execution": user_result,
|
330
|
+
"finished": finished,
|
331
|
+
}
|
332
|
+
)
|
284
333
|
|
285
334
|
while not finished and iterations < self.max_iterations:
|
286
335
|
response = run_conversation(self.agent, int_chat)
|
@@ -322,7 +371,7 @@ class VisionAgent(Agent):
|
|
322
371
|
)
|
323
372
|
|
324
373
|
if code_action is not None:
|
325
|
-
result, obs =
|
374
|
+
result, obs = execute_code_action(
|
326
375
|
code_action, code_interpreter, str(remote_artifacts_path)
|
327
376
|
)
|
328
377
|
|
@@ -331,17 +380,17 @@ class VisionAgent(Agent):
|
|
331
380
|
if self.verbosity >= 1:
|
332
381
|
_LOGGER.info(obs)
|
333
382
|
|
334
|
-
|
383
|
+
obs_chat_elt: Message = {"role": "observation", "content": obs}
|
335
384
|
if media_obs and result.success:
|
336
|
-
|
385
|
+
obs_chat_elt["media"] = [
|
337
386
|
Path(code_interpreter.remote_path) / media_ob
|
338
387
|
for media_ob in media_obs
|
339
388
|
]
|
340
389
|
|
341
390
|
# don't add execution results to internal chat
|
342
|
-
int_chat.append(
|
343
|
-
|
344
|
-
orig_chat.append(
|
391
|
+
int_chat.append(obs_chat_elt)
|
392
|
+
obs_chat_elt["execution"] = result
|
393
|
+
orig_chat.append(obs_chat_elt)
|
345
394
|
self.streaming_message(
|
346
395
|
{
|
347
396
|
"role": "observation",
|
@@ -362,34 +411,6 @@ class VisionAgent(Agent):
|
|
362
411
|
artifacts.save()
|
363
412
|
return orig_chat, artifacts
|
364
413
|
|
365
|
-
def execute_user_code_action(
|
366
|
-
self,
|
367
|
-
last_user_message: Message,
|
368
|
-
code_interpreter: CodeInterpreter,
|
369
|
-
remote_artifacts_path: Path,
|
370
|
-
) -> bool:
|
371
|
-
if last_user_message["role"] != "user":
|
372
|
-
return False
|
373
|
-
user_code_action = parse_execution(
|
374
|
-
cast(str, last_user_message.get("content", "")), False
|
375
|
-
)
|
376
|
-
if user_code_action is not None:
|
377
|
-
user_result, user_obs = run_code_action(
|
378
|
-
user_code_action, code_interpreter, str(remote_artifacts_path)
|
379
|
-
)
|
380
|
-
if self.verbosity >= 1:
|
381
|
-
_LOGGER.info(user_obs)
|
382
|
-
self.streaming_message(
|
383
|
-
{
|
384
|
-
"role": "observation",
|
385
|
-
"content": user_obs,
|
386
|
-
"execution": user_result,
|
387
|
-
"finished": True,
|
388
|
-
}
|
389
|
-
)
|
390
|
-
return True
|
391
|
-
return False
|
392
|
-
|
393
414
|
def streaming_message(self, message: Dict[str, Any]) -> None:
|
394
415
|
if self.callback_message:
|
395
416
|
self.callback_message(message)
|
@@ -691,7 +691,7 @@ class VisionAgentCoder(Agent):
|
|
691
691
|
chat: List[Message],
|
692
692
|
test_multi_plan: bool = True,
|
693
693
|
display_visualization: bool = False,
|
694
|
-
|
694
|
+
custom_tool_names: Optional[List[str]] = None,
|
695
695
|
) -> Dict[str, Any]:
|
696
696
|
"""Chat with VisionAgentCoder and return intermediate information regarding the
|
697
697
|
task.
|
@@ -707,8 +707,8 @@ class VisionAgentCoder(Agent):
|
|
707
707
|
with the first plan.
|
708
708
|
display_visualization (bool): If True, it opens a new window locally to
|
709
709
|
show the image(s) created by visualization code (if there is any).
|
710
|
-
|
711
|
-
If not provided, default to full tool set from vision_agent.tools.
|
710
|
+
custom_tool_names (List[str]): A list of custom tools for the agent to pick
|
711
|
+
and use. If not provided, default to full tool set from vision_agent.tools.
|
712
712
|
|
713
713
|
Returns:
|
714
714
|
Dict[str, Any]: A dictionary containing the code, test, test result, plan,
|
@@ -760,7 +760,7 @@ class VisionAgentCoder(Agent):
|
|
760
760
|
success = False
|
761
761
|
|
762
762
|
plans = self._create_plans(
|
763
|
-
int_chat,
|
763
|
+
int_chat, custom_tool_names, working_memory, self.planner
|
764
764
|
)
|
765
765
|
|
766
766
|
if test_multi_plan:
|
@@ -26,7 +26,9 @@ Here is the current conversation so far:
|
|
26
26
|
|
27
27
|
**Instructions**:
|
28
28
|
1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
|
29
|
-
2. **
|
29
|
+
2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`.
|
30
|
+
3. **Execute**: Do only what the user asked you to do and no more. If you need to ask the user a question, set `let_user_respond` to `true`.
|
31
|
+
4. **Output in JSON**: Respond in the following format in JSON:
|
30
32
|
|
31
33
|
```json
|
32
34
|
{{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
|
@@ -149,7 +151,7 @@ OBSERVATION:
|
|
149
151
|
16| return count
|
150
152
|
[End of artifact]
|
151
153
|
|
152
|
-
AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
154
|
+
AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png', 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
153
155
|
|
154
156
|
OBSERVATION:
|
155
157
|
----- stdout -----
|
@@ -164,7 +166,6 @@ AGENT: {"thoughts": "Because the user has supplied me with labels I can call obj
|
|
164
166
|
OBSERVATION:
|
165
167
|
[Fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]
|
166
168
|
|
167
|
-
|
168
169
|
AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
|
169
170
|
|
170
171
|
OBSERVATION:
|
@@ -182,8 +183,7 @@ OBSERVATION:
|
|
182
183
|
count = 0
|
183
184
|
[End of edits]
|
184
185
|
|
185
|
-
|
186
|
-
AGENT: {"thoughts": "I have updated the code to use the fine tuned model, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
186
|
+
AGENT: {"thoughts": "I have updated the code to use the fine tuned model, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png', 'workers_viz.png')</execute_python>", "let_user_respond": false}
|
187
187
|
|
188
188
|
OBSERVATION:
|
189
189
|
----- stdout -----
|
@@ -8,6 +8,7 @@ import tempfile
|
|
8
8
|
from pathlib import Path
|
9
9
|
from typing import Any, Dict, List, Optional, Union
|
10
10
|
|
11
|
+
import numpy as np
|
11
12
|
from IPython.display import display
|
12
13
|
|
13
14
|
import vision_agent as va
|
@@ -17,7 +18,8 @@ from vision_agent.tools.tool_utils import get_tool_documentation
|
|
17
18
|
from vision_agent.tools.tools import TOOL_DESCRIPTIONS
|
18
19
|
from vision_agent.tools.tools_types import BboxInput, BboxInputBase64, PromptTask
|
19
20
|
from vision_agent.utils.execute import Execution, MimeType
|
20
|
-
from vision_agent.utils.image_utils import convert_to_b64
|
21
|
+
from vision_agent.utils.image_utils import convert_to_b64, numpy_to_bytes
|
22
|
+
from vision_agent.utils.video import frames_to_bytes
|
21
23
|
|
22
24
|
# These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
|
23
25
|
|
@@ -328,7 +330,7 @@ def generate_vision_code(
|
|
328
330
|
chat: str,
|
329
331
|
media: List[str],
|
330
332
|
test_multi_plan: bool = True,
|
331
|
-
|
333
|
+
custom_tool_names: Optional[List[str]] = None,
|
332
334
|
) -> str:
|
333
335
|
"""Generates python code to solve vision based tasks.
|
334
336
|
|
@@ -338,7 +340,7 @@ def generate_vision_code(
|
|
338
340
|
chat (str): The chat message from the user.
|
339
341
|
media (List[str]): The media files to use.
|
340
342
|
test_multi_plan (bool): Do not change this parameter.
|
341
|
-
|
343
|
+
custom_tool_names (Optional[List[str]]): Do not change this parameter.
|
342
344
|
|
343
345
|
Returns:
|
344
346
|
str: The generated code.
|
@@ -366,7 +368,7 @@ def generate_vision_code(
|
|
366
368
|
response = agent.chat_with_workflow(
|
367
369
|
fixed_chat,
|
368
370
|
test_multi_plan=test_multi_plan,
|
369
|
-
|
371
|
+
custom_tool_names=custom_tool_names,
|
370
372
|
)
|
371
373
|
redisplay_results(response["test_result"])
|
372
374
|
code = response["code"]
|
@@ -432,19 +434,21 @@ def edit_vision_code(
|
|
432
434
|
|
433
435
|
# Append latest code to second to last message from assistant
|
434
436
|
fixed_chat_history: List[Message] = []
|
437
|
+
user_message = "Previous user requests:"
|
435
438
|
for i, chat in enumerate(chat_history):
|
436
|
-
if i
|
437
|
-
|
438
|
-
|
439
|
-
fixed_chat_history.append(
|
440
|
-
|
439
|
+
if i < len(chat_history) - 1:
|
440
|
+
user_message += " " + chat
|
441
|
+
else:
|
442
|
+
fixed_chat_history.append(
|
443
|
+
{"role": "user", "content": user_message, "media": media}
|
444
|
+
)
|
441
445
|
fixed_chat_history.append({"role": "assistant", "content": code})
|
442
446
|
fixed_chat_history.append({"role": "user", "content": chat})
|
443
447
|
|
444
448
|
response = agent.chat_with_workflow(
|
445
449
|
fixed_chat_history,
|
446
450
|
test_multi_plan=False,
|
447
|
-
|
451
|
+
custom_tool_names=customized_tool_names,
|
448
452
|
)
|
449
453
|
redisplay_results(response["test_result"])
|
450
454
|
code = response["code"]
|
@@ -467,17 +471,34 @@ def edit_vision_code(
|
|
467
471
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
468
472
|
|
469
473
|
|
470
|
-
def write_media_artifact(
|
474
|
+
def write_media_artifact(
|
475
|
+
artifacts: Artifacts,
|
476
|
+
name: str,
|
477
|
+
media: Union[str, np.ndarray, List[np.ndarray]],
|
478
|
+
fps: Optional[float] = None,
|
479
|
+
) -> str:
|
471
480
|
"""Writes a media file to the artifacts object.
|
472
481
|
|
473
482
|
Parameters:
|
474
483
|
artifacts (Artifacts): The artifacts object to save the media to.
|
475
|
-
|
484
|
+
name (str): The name of the media artifact to save.
|
485
|
+
media (Union[str, np.ndarray, List[np.ndarray]]): The media to save, can either
|
486
|
+
be a file path, single image or list of frames for a video.
|
487
|
+
fps (Optional[float]): The frames per second if you are writing a video.
|
476
488
|
"""
|
477
|
-
|
478
|
-
media
|
479
|
-
|
480
|
-
|
489
|
+
if isinstance(media, str):
|
490
|
+
with open(media, "rb") as f:
|
491
|
+
media_bytes = f.read()
|
492
|
+
elif isinstance(media, list):
|
493
|
+
media_bytes = frames_to_bytes(media, fps=fps if fps is not None else 1.0)
|
494
|
+
elif isinstance(media, np.ndarray):
|
495
|
+
media_bytes = numpy_to_bytes(media)
|
496
|
+
else:
|
497
|
+
print(f"[Invalid media type {type(media)}]")
|
498
|
+
return f"[Invalid media type {type(media)}]"
|
499
|
+
artifacts[name] = media_bytes
|
500
|
+
print(f"[Media {name} saved]")
|
501
|
+
return f"[Media {name} saved]"
|
481
502
|
|
482
503
|
|
483
504
|
def list_artifacts(artifacts: Artifacts) -> str:
|
@@ -491,16 +512,14 @@ def check_and_load_image(code: str) -> List[str]:
|
|
491
512
|
if not code.strip():
|
492
513
|
return []
|
493
514
|
|
494
|
-
pattern = r"
|
495
|
-
|
496
|
-
|
497
|
-
name = match.group(2)
|
498
|
-
return [name]
|
499
|
-
return []
|
515
|
+
pattern = r"view_media_artifact\(\s*([^\)]+),\s*['\"]([^\)]+)['\"]\s*\)"
|
516
|
+
matches = re.findall(pattern, code)
|
517
|
+
return [match[1] for match in matches]
|
500
518
|
|
501
519
|
|
502
520
|
def view_media_artifact(artifacts: Artifacts, name: str) -> str:
|
503
|
-
"""
|
521
|
+
"""Allows you to view the media artifact with the given name. This does not show
|
522
|
+
the media to the user, the user can already see all media saved in the artifacts.
|
504
523
|
|
505
524
|
Parameters:
|
506
525
|
artifacts (Artifacts): The artifacts object to show the image from.
|
@@ -598,7 +617,7 @@ def use_extra_vision_agent_args(
|
|
598
617
|
arg = match.group(1)
|
599
618
|
out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
|
600
619
|
if customized_tool_names is not None:
|
601
|
-
out_str += f",
|
620
|
+
out_str += f", custom_tool_names={customized_tool_names})"
|
602
621
|
else:
|
603
622
|
out_str += ")"
|
604
623
|
return out_str
|
@@ -609,7 +628,7 @@ def use_extra_vision_agent_args(
|
|
609
628
|
arg = match.group(1)
|
610
629
|
out_str = f"edit_vision_code({arg}"
|
611
630
|
if customized_tool_names is not None:
|
612
|
-
out_str += f",
|
631
|
+
out_str += f", custom_tool_names={customized_tool_names})"
|
613
632
|
else:
|
614
633
|
out_str += ")"
|
615
634
|
return out_str
|
@@ -646,50 +665,28 @@ def use_object_detection_fine_tuning(
|
|
646
665
|
|
647
666
|
patterns_with_fine_tune_id = [
|
648
667
|
(
|
649
|
-
r'florence2_phrase_grounding\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
|
668
|
+
r'florence2_phrase_grounding\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
|
650
669
|
lambda match: f'florence2_phrase_grounding("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
651
670
|
),
|
652
671
|
(
|
653
|
-
r'owl_v2_image\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
|
672
|
+
r'owl_v2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
|
654
673
|
lambda match: f'owl_v2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
655
674
|
),
|
656
675
|
(
|
657
|
-
r'florence2_sam2_image\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
|
676
|
+
r'florence2_sam2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
|
658
677
|
lambda match: f'florence2_sam2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
659
678
|
),
|
660
679
|
]
|
661
680
|
|
662
|
-
patterns_without_fine_tune_id = [
|
663
|
-
(
|
664
|
-
r"florence2_phrase_grounding\(\s*([^\)]+)\s*\)",
|
665
|
-
lambda match: f'florence2_phrase_grounding({match.group(1)}, "{fine_tune_id}")',
|
666
|
-
),
|
667
|
-
(
|
668
|
-
r"owl_v2_image\(\s*([^\)]+)\s*\)",
|
669
|
-
lambda match: f'owl_v2_image({match.group(1)}, "{fine_tune_id}")',
|
670
|
-
),
|
671
|
-
(
|
672
|
-
r"florence2_sam2_image\(\s*([^\)]+)\s*\)",
|
673
|
-
lambda match: f'florence2_sam2_image({match.group(1)}, "{fine_tune_id}")',
|
674
|
-
),
|
675
|
-
]
|
676
|
-
|
677
681
|
new_code = code
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
):
|
682
|
+
for (
|
683
|
+
pattern_with_fine_tune_id,
|
684
|
+
replacer_with_fine_tune_id,
|
685
|
+
) in patterns_with_fine_tune_id:
|
682
686
|
if re.search(pattern_with_fine_tune_id, new_code):
|
683
687
|
new_code = re.sub(
|
684
688
|
pattern_with_fine_tune_id, replacer_with_fine_tune_id, new_code
|
685
689
|
)
|
686
|
-
else:
|
687
|
-
(pattern_without_fine_tune_id, replacer_without_fine_tune_id) = (
|
688
|
-
patterns_without_fine_tune_id[index]
|
689
|
-
)
|
690
|
-
new_code = re.sub(
|
691
|
-
pattern_without_fine_tune_id, replacer_without_fine_tune_id, new_code
|
692
|
-
)
|
693
690
|
|
694
691
|
if new_code == code:
|
695
692
|
output_str = (
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|