vision-agent 0.2.148__tar.gz → 0.2.149__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {vision_agent-0.2.148 → vision_agent-0.2.149}/PKG-INFO +1 -1
  2. {vision_agent-0.2.148 → vision_agent-0.2.149}/pyproject.toml +1 -1
  3. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent.py +62 -41
  4. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_coder.py +4 -4
  5. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_prompts.py +5 -5
  6. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/meta_tools.py +51 -54
  7. {vision_agent-0.2.148 → vision_agent-0.2.149}/LICENSE +0 -0
  8. {vision_agent-0.2.148 → vision_agent-0.2.149}/README.md +0 -0
  9. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/__init__.py +0 -0
  10. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/__init__.py +0 -0
  11. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/agent.py +0 -0
  12. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/agent_utils.py +0 -0
  13. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  14. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/clients/__init__.py +0 -0
  15. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/clients/http.py +0 -0
  16. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/clients/landing_public_api.py +0 -0
  17. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/fonts/__init__.py +0 -0
  18. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  19. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/lmm/__init__.py +0 -0
  20. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/lmm/lmm.py +0 -0
  21. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/lmm/types.py +0 -0
  22. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/__init__.py +0 -0
  23. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/prompts.py +0 -0
  24. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/tool_utils.py +0 -0
  25. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/tools.py +0 -0
  26. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/tools/tools_types.py +0 -0
  27. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/__init__.py +0 -0
  28. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/exceptions.py +0 -0
  29. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/execute.py +0 -0
  30. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/image_utils.py +0 -0
  31. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/sim.py +0 -0
  32. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/type_defs.py +0 -0
  33. {vision_agent-0.2.148 → vision_agent-0.2.149}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.148
3
+ Version: 0.2.149
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.148"
7
+ version = "0.2.149"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -87,7 +87,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
87
87
  return extract_json(orch([message], stream=False)) # type: ignore
88
88
 
89
89
 
90
- def run_code_action(
90
+ def execute_code_action(
91
91
  code: str, code_interpreter: CodeInterpreter, artifact_remote_path: str
92
92
  ) -> Tuple[Execution, str]:
93
93
  result = code_interpreter.exec_isolation(
@@ -106,19 +106,53 @@ def parse_execution(
106
106
  customed_tool_names: Optional[List[str]] = None,
107
107
  ) -> Optional[str]:
108
108
  code = None
109
- if "<execute_python>" in response:
110
- code = response[response.find("<execute_python>") + len("<execute_python>") :]
111
- code = code[: code.find("</execute_python>")]
109
+ remaining = response
110
+ all_code = []
111
+ while "<execute_python>" in remaining:
112
+ code_i = remaining[
113
+ remaining.find("<execute_python>") + len("<execute_python>") :
114
+ ]
115
+ code_i = code_i[: code_i.find("</execute_python>")]
116
+ remaining = remaining[
117
+ remaining.find("</execute_python>") + len("</execute_python>") :
118
+ ]
119
+ all_code.append(code_i)
120
+
121
+ if len(all_code) > 0:
122
+ code = "\n".join(all_code)
112
123
 
113
124
  if code is not None:
114
125
  code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
115
126
  return code
116
127
 
117
128
 
129
+ def execute_user_code_action(
130
+ last_user_message: Message,
131
+ code_interpreter: CodeInterpreter,
132
+ artifact_remote_path: str,
133
+ ) -> Tuple[Optional[Execution], Optional[str]]:
134
+ user_result = None
135
+ user_obs = None
136
+
137
+ if last_user_message["role"] != "user":
138
+ return user_result, user_obs
139
+
140
+ last_user_content = cast(str, last_user_message.get("content", ""))
141
+
142
+ user_code_action = parse_execution(last_user_content, False)
143
+ if user_code_action is not None:
144
+ user_result, user_obs = execute_code_action(
145
+ user_code_action, code_interpreter, artifact_remote_path
146
+ )
147
+ if user_result.error:
148
+ user_obs += f"\n{user_result.error}"
149
+ return user_result, user_obs
150
+
151
+
118
152
  class VisionAgent(Agent):
119
153
  """Vision Agent is an agent that can chat with the user and call tools or other
120
154
  agents to generate code for it. Vision Agent uses python code to execute actions
121
- for the user. Vision Agent is inspired by by OpenDev
155
+ for the user. Vision Agent is inspired by by OpenDevin
122
156
  https://github.com/OpenDevin/OpenDevin and CodeAct https://arxiv.org/abs/2402.01030
123
157
 
124
158
  Example
@@ -278,9 +312,24 @@ class VisionAgent(Agent):
278
312
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
279
313
  self.streaming_message({"role": "observation", "content": artifacts_loaded})
280
314
 
281
- finished = self.execute_user_code_action(
282
- last_user_message, code_interpreter, remote_artifacts_path
315
+ user_result, user_obs = execute_user_code_action(
316
+ last_user_message, code_interpreter, str(remote_artifacts_path)
283
317
  )
318
+ finished = user_result is not None and user_obs is not None
319
+ if user_result is not None and user_obs is not None:
320
+ # be sure to update the chat with user execution results
321
+ chat_elt: Message = {"role": "observation", "content": user_obs}
322
+ int_chat.append(chat_elt)
323
+ chat_elt["execution"] = user_result
324
+ orig_chat.append(chat_elt)
325
+ self.streaming_message(
326
+ {
327
+ "role": "observation",
328
+ "content": user_obs,
329
+ "execution": user_result,
330
+ "finished": finished,
331
+ }
332
+ )
284
333
 
285
334
  while not finished and iterations < self.max_iterations:
286
335
  response = run_conversation(self.agent, int_chat)
@@ -322,7 +371,7 @@ class VisionAgent(Agent):
322
371
  )
323
372
 
324
373
  if code_action is not None:
325
- result, obs = run_code_action(
374
+ result, obs = execute_code_action(
326
375
  code_action, code_interpreter, str(remote_artifacts_path)
327
376
  )
328
377
 
@@ -331,17 +380,17 @@ class VisionAgent(Agent):
331
380
  if self.verbosity >= 1:
332
381
  _LOGGER.info(obs)
333
382
 
334
- chat_elt: Message = {"role": "observation", "content": obs}
383
+ obs_chat_elt: Message = {"role": "observation", "content": obs}
335
384
  if media_obs and result.success:
336
- chat_elt["media"] = [
385
+ obs_chat_elt["media"] = [
337
386
  Path(code_interpreter.remote_path) / media_ob
338
387
  for media_ob in media_obs
339
388
  ]
340
389
 
341
390
  # don't add execution results to internal chat
342
- int_chat.append(chat_elt)
343
- chat_elt["execution"] = result
344
- orig_chat.append(chat_elt)
391
+ int_chat.append(obs_chat_elt)
392
+ obs_chat_elt["execution"] = result
393
+ orig_chat.append(obs_chat_elt)
345
394
  self.streaming_message(
346
395
  {
347
396
  "role": "observation",
@@ -362,34 +411,6 @@ class VisionAgent(Agent):
362
411
  artifacts.save()
363
412
  return orig_chat, artifacts
364
413
 
365
- def execute_user_code_action(
366
- self,
367
- last_user_message: Message,
368
- code_interpreter: CodeInterpreter,
369
- remote_artifacts_path: Path,
370
- ) -> bool:
371
- if last_user_message["role"] != "user":
372
- return False
373
- user_code_action = parse_execution(
374
- cast(str, last_user_message.get("content", "")), False
375
- )
376
- if user_code_action is not None:
377
- user_result, user_obs = run_code_action(
378
- user_code_action, code_interpreter, str(remote_artifacts_path)
379
- )
380
- if self.verbosity >= 1:
381
- _LOGGER.info(user_obs)
382
- self.streaming_message(
383
- {
384
- "role": "observation",
385
- "content": user_obs,
386
- "execution": user_result,
387
- "finished": True,
388
- }
389
- )
390
- return True
391
- return False
392
-
393
414
  def streaming_message(self, message: Dict[str, Any]) -> None:
394
415
  if self.callback_message:
395
416
  self.callback_message(message)
@@ -691,7 +691,7 @@ class VisionAgentCoder(Agent):
691
691
  chat: List[Message],
692
692
  test_multi_plan: bool = True,
693
693
  display_visualization: bool = False,
694
- customized_tool_names: Optional[List[str]] = None,
694
+ custom_tool_names: Optional[List[str]] = None,
695
695
  ) -> Dict[str, Any]:
696
696
  """Chat with VisionAgentCoder and return intermediate information regarding the
697
697
  task.
@@ -707,8 +707,8 @@ class VisionAgentCoder(Agent):
707
707
  with the first plan.
708
708
  display_visualization (bool): If True, it opens a new window locally to
709
709
  show the image(s) created by visualization code (if there is any).
710
- customized_tool_names (List[str]): A list of customized tools for agent to pick and use.
711
- If not provided, default to full tool set from vision_agent.tools.
710
+ custom_tool_names (List[str]): A list of custom tools for the agent to pick
711
+ and use. If not provided, default to full tool set from vision_agent.tools.
712
712
 
713
713
  Returns:
714
714
  Dict[str, Any]: A dictionary containing the code, test, test result, plan,
@@ -760,7 +760,7 @@ class VisionAgentCoder(Agent):
760
760
  success = False
761
761
 
762
762
  plans = self._create_plans(
763
- int_chat, customized_tool_names, working_memory, self.planner
763
+ int_chat, custom_tool_names, working_memory, self.planner
764
764
  )
765
765
 
766
766
  if test_multi_plan:
@@ -26,7 +26,9 @@ Here is the current conversation so far:
26
26
 
27
27
  **Instructions**:
28
28
  1. **Understand and Clarify**: Make sure you understand the task, ask clarifying questions if the task is not clear.
29
- 2. **Output in JSON**: Respond in the following format in JSON:
29
+ 2. **Code Generation**: Only use code provided in the Documentation in your <execute_python> tags. Only use `edit_vision_code` to modify code written by `generate_vision_code`.
30
+ 3. **Execute**: Do only what the user asked you to do and no more. If you need to ask the user a question, set `let_user_respond` to `true`.
31
+ 4. **Output in JSON**: Respond in the following format in JSON:
30
32
 
31
33
  ```json
32
34
  {{"thoughts": <your thoughts>, "response": <your response to the user>, "let_user_respond": <a boolean whether or not to let the user respond>}}.
@@ -149,7 +151,7 @@ OBSERVATION:
149
151
  16| return count
150
152
  [End of artifact]
151
153
 
152
- AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
154
+ AGENT: {"thoughts": "I have generated the code to count the workers with helmets in the image, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png', 'workers_viz.png')</execute_python>", "let_user_respond": false}
153
155
 
154
156
  OBSERVATION:
155
157
  ----- stdout -----
@@ -164,7 +166,6 @@ AGENT: {"thoughts": "Because the user has supplied me with labels I can call obj
164
166
  OBSERVATION:
165
167
  [Fine tuning id: 23b3b022-5ebf-4798-9373-20ef36429abf]
166
168
 
167
-
168
169
  AGENT: {"thoughts": "The model has finished fine tuning, I will now replace the original florence2_phrase_grounding call with the fine tuning id.", "response": "I will not update the code to use the fine tuned model. <execute_python>use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")</execute_python>", "let_user_respond": false}
169
170
 
170
171
  OBSERVATION:
@@ -182,8 +183,7 @@ OBSERVATION:
182
183
  count = 0
183
184
  [End of edits]
184
185
 
185
-
186
- AGENT: {"thoughts": "I have updated the code to use the fine tuned model, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png')</execute_python>", "let_user_respond": false}
186
+ AGENT: {"thoughts": "I have updated the code to use the fine tuned model, I must now run the code and print the output and write the visualization to the artifacts so I can see the result and the user can see the visaulization.", "response": "<execute_python>from code import count_workers_with_helmets\n print(count_workers_with_helmets('/path/to/images/workers.png', 'workers_viz.png'))\n write_media_artifact(artifacts, 'workers_viz.png', 'workers_viz.png')</execute_python>", "let_user_respond": false}
187
187
 
188
188
  OBSERVATION:
189
189
  ----- stdout -----
@@ -8,6 +8,7 @@ import tempfile
8
8
  from pathlib import Path
9
9
  from typing import Any, Dict, List, Optional, Union
10
10
 
11
+ import numpy as np
11
12
  from IPython.display import display
12
13
 
13
14
  import vision_agent as va
@@ -17,7 +18,8 @@ from vision_agent.tools.tool_utils import get_tool_documentation
17
18
  from vision_agent.tools.tools import TOOL_DESCRIPTIONS
18
19
  from vision_agent.tools.tools_types import BboxInput, BboxInputBase64, PromptTask
19
20
  from vision_agent.utils.execute import Execution, MimeType
20
- from vision_agent.utils.image_utils import convert_to_b64
21
+ from vision_agent.utils.image_utils import convert_to_b64, numpy_to_bytes
22
+ from vision_agent.utils.video import frames_to_bytes
21
23
 
22
24
  # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
23
25
 
@@ -328,7 +330,7 @@ def generate_vision_code(
328
330
  chat: str,
329
331
  media: List[str],
330
332
  test_multi_plan: bool = True,
331
- customized_tool_names: Optional[List[str]] = None,
333
+ custom_tool_names: Optional[List[str]] = None,
332
334
  ) -> str:
333
335
  """Generates python code to solve vision based tasks.
334
336
 
@@ -338,7 +340,7 @@ def generate_vision_code(
338
340
  chat (str): The chat message from the user.
339
341
  media (List[str]): The media files to use.
340
342
  test_multi_plan (bool): Do not change this parameter.
341
- customized_tool_names (Optional[List[str]]): Do not change this parameter.
343
+ custom_tool_names (Optional[List[str]]): Do not change this parameter.
342
344
 
343
345
  Returns:
344
346
  str: The generated code.
@@ -366,7 +368,7 @@ def generate_vision_code(
366
368
  response = agent.chat_with_workflow(
367
369
  fixed_chat,
368
370
  test_multi_plan=test_multi_plan,
369
- customized_tool_names=customized_tool_names,
371
+ custom_tool_names=custom_tool_names,
370
372
  )
371
373
  redisplay_results(response["test_result"])
372
374
  code = response["code"]
@@ -432,19 +434,21 @@ def edit_vision_code(
432
434
 
433
435
  # Append latest code to second to last message from assistant
434
436
  fixed_chat_history: List[Message] = []
437
+ user_message = "Previous user requests:"
435
438
  for i, chat in enumerate(chat_history):
436
- if i == 0:
437
- fixed_chat_history.append({"role": "user", "content": chat, "media": media})
438
- elif i > 0 and i < len(chat_history) - 1:
439
- fixed_chat_history.append({"role": "user", "content": chat})
440
- elif i == len(chat_history) - 1:
439
+ if i < len(chat_history) - 1:
440
+ user_message += " " + chat
441
+ else:
442
+ fixed_chat_history.append(
443
+ {"role": "user", "content": user_message, "media": media}
444
+ )
441
445
  fixed_chat_history.append({"role": "assistant", "content": code})
442
446
  fixed_chat_history.append({"role": "user", "content": chat})
443
447
 
444
448
  response = agent.chat_with_workflow(
445
449
  fixed_chat_history,
446
450
  test_multi_plan=False,
447
- customized_tool_names=customized_tool_names,
451
+ custom_tool_names=customized_tool_names,
448
452
  )
449
453
  redisplay_results(response["test_result"])
450
454
  code = response["code"]
@@ -467,17 +471,34 @@ def edit_vision_code(
467
471
  return view_lines(code_lines, 0, total_lines, name, total_lines)
468
472
 
469
473
 
470
- def write_media_artifact(artifacts: Artifacts, local_path: str) -> str:
474
+ def write_media_artifact(
475
+ artifacts: Artifacts,
476
+ name: str,
477
+ media: Union[str, np.ndarray, List[np.ndarray]],
478
+ fps: Optional[float] = None,
479
+ ) -> str:
471
480
  """Writes a media file to the artifacts object.
472
481
 
473
482
  Parameters:
474
483
  artifacts (Artifacts): The artifacts object to save the media to.
475
- local_path (str): The local path to the media file.
484
+ name (str): The name of the media artifact to save.
485
+ media (Union[str, np.ndarray, List[np.ndarray]]): The media to save, can either
486
+ be a file path, single image or list of frames for a video.
487
+ fps (Optional[float]): The frames per second if you are writing a video.
476
488
  """
477
- with open(local_path, "rb") as f:
478
- media = f.read()
479
- artifacts[Path(local_path).name] = media
480
- return f"[Media {Path(local_path).name} saved]"
489
+ if isinstance(media, str):
490
+ with open(media, "rb") as f:
491
+ media_bytes = f.read()
492
+ elif isinstance(media, list):
493
+ media_bytes = frames_to_bytes(media, fps=fps if fps is not None else 1.0)
494
+ elif isinstance(media, np.ndarray):
495
+ media_bytes = numpy_to_bytes(media)
496
+ else:
497
+ print(f"[Invalid media type {type(media)}]")
498
+ return f"[Invalid media type {type(media)}]"
499
+ artifacts[name] = media_bytes
500
+ print(f"[Media {name} saved]")
501
+ return f"[Media {name} saved]"
481
502
 
482
503
 
483
504
  def list_artifacts(artifacts: Artifacts) -> str:
@@ -491,16 +512,14 @@ def check_and_load_image(code: str) -> List[str]:
491
512
  if not code.strip():
492
513
  return []
493
514
 
494
- pattern = r"show_media_artifact\(\s*([^\)]+),\s*['\"]([^\)]+)['\"]\s*\)"
495
- match = re.search(pattern, code)
496
- if match:
497
- name = match.group(2)
498
- return [name]
499
- return []
515
+ pattern = r"view_media_artifact\(\s*([^\)]+),\s*['\"]([^\)]+)['\"]\s*\)"
516
+ matches = re.findall(pattern, code)
517
+ return [match[1] for match in matches]
500
518
 
501
519
 
502
520
  def view_media_artifact(artifacts: Artifacts, name: str) -> str:
503
- """Views the image artifact with the given name.
521
+ """Allows you to view the media artifact with the given name. This does not show
522
+ the media to the user, the user can already see all media saved in the artifacts.
504
523
 
505
524
  Parameters:
506
525
  artifacts (Artifacts): The artifacts object to show the image from.
@@ -598,7 +617,7 @@ def use_extra_vision_agent_args(
598
617
  arg = match.group(1)
599
618
  out_str = f"generate_vision_code({arg}, test_multi_plan={test_multi_plan}"
600
619
  if customized_tool_names is not None:
601
- out_str += f", customized_tool_names={customized_tool_names})"
620
+ out_str += f", custom_tool_names={customized_tool_names})"
602
621
  else:
603
622
  out_str += ")"
604
623
  return out_str
@@ -609,7 +628,7 @@ def use_extra_vision_agent_args(
609
628
  arg = match.group(1)
610
629
  out_str = f"edit_vision_code({arg}"
611
630
  if customized_tool_names is not None:
612
- out_str += f", customized_tool_names={customized_tool_names})"
631
+ out_str += f", custom_tool_names={customized_tool_names})"
613
632
  else:
614
633
  out_str += ")"
615
634
  return out_str
@@ -646,50 +665,28 @@ def use_object_detection_fine_tuning(
646
665
 
647
666
  patterns_with_fine_tune_id = [
648
667
  (
649
- r'florence2_phrase_grounding\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
668
+ r'florence2_phrase_grounding\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
650
669
  lambda match: f'florence2_phrase_grounding("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
651
670
  ),
652
671
  (
653
- r'owl_v2_image\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
672
+ r'owl_v2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
654
673
  lambda match: f'owl_v2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
655
674
  ),
656
675
  (
657
- r'florence2_sam2_image\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
676
+ r'florence2_sam2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
658
677
  lambda match: f'florence2_sam2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
659
678
  ),
660
679
  ]
661
680
 
662
- patterns_without_fine_tune_id = [
663
- (
664
- r"florence2_phrase_grounding\(\s*([^\)]+)\s*\)",
665
- lambda match: f'florence2_phrase_grounding({match.group(1)}, "{fine_tune_id}")',
666
- ),
667
- (
668
- r"owl_v2_image\(\s*([^\)]+)\s*\)",
669
- lambda match: f'owl_v2_image({match.group(1)}, "{fine_tune_id}")',
670
- ),
671
- (
672
- r"florence2_sam2_image\(\s*([^\)]+)\s*\)",
673
- lambda match: f'florence2_sam2_image({match.group(1)}, "{fine_tune_id}")',
674
- ),
675
- ]
676
-
677
681
  new_code = code
678
-
679
- for index, (pattern_with_fine_tune_id, replacer_with_fine_tune_id) in enumerate(
680
- patterns_with_fine_tune_id
681
- ):
682
+ for (
683
+ pattern_with_fine_tune_id,
684
+ replacer_with_fine_tune_id,
685
+ ) in patterns_with_fine_tune_id:
682
686
  if re.search(pattern_with_fine_tune_id, new_code):
683
687
  new_code = re.sub(
684
688
  pattern_with_fine_tune_id, replacer_with_fine_tune_id, new_code
685
689
  )
686
- else:
687
- (pattern_without_fine_tune_id, replacer_without_fine_tune_id) = (
688
- patterns_without_fine_tune_id[index]
689
- )
690
- new_code = re.sub(
691
- pattern_without_fine_tune_id, replacer_without_fine_tune_id, new_code
692
- )
693
690
 
694
691
  if new_code == code:
695
692
  output_str = (
File without changes
File without changes