vision-agent 0.2.161__tar.gz → 0.2.163__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (36) hide show
  1. {vision_agent-0.2.161 → vision_agent-0.2.163}/PKG-INFO +8 -7
  2. {vision_agent-0.2.161 → vision_agent-0.2.163}/README.md +6 -6
  3. {vision_agent-0.2.161 → vision_agent-0.2.163}/pyproject.toml +2 -1
  4. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/agent/__init__.py +8 -0
  5. vision_agent-0.2.163/vision_agent/agent/agent_utils.py +181 -0
  6. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/agent/vision_agent.py +54 -22
  7. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/agent/vision_agent_coder.py +222 -512
  8. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/agent/vision_agent_coder_prompts.py +12 -221
  9. vision_agent-0.2.163/vision_agent/agent/vision_agent_planner.py +583 -0
  10. vision_agent-0.2.163/vision_agent/agent/vision_agent_planner_prompts.py +199 -0
  11. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/tools/__init__.py +0 -1
  12. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/tools/meta_tools.py +107 -35
  13. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/tools/tools.py +2 -2
  14. vision_agent-0.2.161/vision_agent/agent/agent_utils.py +0 -85
  15. {vision_agent-0.2.161 → vision_agent-0.2.163}/LICENSE +0 -0
  16. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/__init__.py +0 -0
  17. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/agent/agent.py +0 -0
  18. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/agent/vision_agent_prompts.py +0 -0
  19. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/clients/__init__.py +0 -0
  20. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/clients/http.py +0 -0
  21. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/clients/landing_public_api.py +0 -0
  22. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/fonts/__init__.py +0 -0
  23. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  24. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/lmm/__init__.py +0 -0
  25. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/lmm/lmm.py +0 -0
  26. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/lmm/types.py +0 -0
  27. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/tools/prompts.py +0 -0
  28. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/tools/tool_utils.py +0 -0
  29. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/tools/tools_types.py +0 -0
  30. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/__init__.py +0 -0
  31. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/exceptions.py +0 -0
  32. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/image_utils.py +0 -0
  34. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/sim.py +0 -0
  35. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/type_defs.py +0 -0
  36. {vision_agent-0.2.161 → vision_agent-0.2.163}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.161
3
+ Version: 0.2.163
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -27,6 +27,7 @@ Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
27
27
  Requires-Dist: pydantic (==2.7.4)
28
28
  Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
29
29
  Requires-Dist: pytube (==15.0.0)
30
+ Requires-Dist: redbaron (>=0.9.2,<0.10.0)
30
31
  Requires-Dist: requests (>=2.0.0,<3.0.0)
31
32
  Requires-Dist: rich (>=13.7.1,<14.0.0)
32
33
  Requires-Dist: scipy (>=1.13.0,<1.14.0)
@@ -142,7 +143,7 @@ continuing, for example it may want to execute code and look at the output befor
142
143
  letting the user respond.
143
144
 
144
145
  ### Chatting and Artifacts
145
- If you run `chat_with_code` you will also notice an `Artifact` object. `Artifact`'s
146
+ If you run `chat_with_artifacts` you will also notice an `Artifact` object. `Artifact`'s
146
147
  are a way to sync files between local and remote environments. The agent will read and
147
148
  write to the artifact object, which is just a pickle object, when it wants to save or
148
149
  load files.
@@ -159,7 +160,7 @@ with open("image.png", "rb") as f:
159
160
  artifacts["image.png"] = f.read()
160
161
 
161
162
  agent = va.agent.VisionAgent()
162
- response, artifacts = agent.chat_with_code(
163
+ response, artifacts = agent.chat_with_artifacts(
163
164
  [
164
165
  {
165
166
  "role": "user",
@@ -339,11 +340,11 @@ mode by passing in the verbose argument:
339
340
  ```
340
341
 
341
342
  ### Detailed Usage
342
- You can also have it return more information by calling `chat_with_workflow`. The format
343
+ You can also have it return more information by calling `generate_code`. The format
343
344
  of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
344
345
 
345
346
  ```python
346
- >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
347
+ >>> results = agent.generate_code([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
347
348
  >>> print(results)
348
349
  {
349
350
  "code": "from vision_agent.tools import ..."
@@ -372,7 +373,7 @@ conv = [
372
373
  "media": ["workers.png"],
373
374
  }
374
375
  ]
375
- result = agent.chat_with_workflow(conv)
376
+ result = agent.generate_code(conv)
376
377
  code = result["code"]
377
378
  conv.append({"role": "assistant", "content": code})
378
379
  conv.append(
@@ -381,7 +382,7 @@ conv.append(
381
382
  "content": "Can you also return the number of workers wearing safety gear?",
382
383
  }
383
384
  )
384
- result = agent.chat_with_workflow(conv)
385
+ result = agent.generate_code(conv)
385
386
  ```
386
387
 
387
388
 
@@ -101,7 +101,7 @@ continuing, for example it may want to execute code and look at the output befor
101
101
  letting the user respond.
102
102
 
103
103
  ### Chatting and Artifacts
104
- If you run `chat_with_code` you will also notice an `Artifact` object. `Artifact`'s
104
+ If you run `chat_with_artifacts` you will also notice an `Artifact` object. `Artifact`'s
105
105
  are a way to sync files between local and remote environments. The agent will read and
106
106
  write to the artifact object, which is just a pickle object, when it wants to save or
107
107
  load files.
@@ -118,7 +118,7 @@ with open("image.png", "rb") as f:
118
118
  artifacts["image.png"] = f.read()
119
119
 
120
120
  agent = va.agent.VisionAgent()
121
- response, artifacts = agent.chat_with_code(
121
+ response, artifacts = agent.chat_with_artifacts(
122
122
  [
123
123
  {
124
124
  "role": "user",
@@ -298,11 +298,11 @@ mode by passing in the verbose argument:
298
298
  ```
299
299
 
300
300
  ### Detailed Usage
301
- You can also have it return more information by calling `chat_with_workflow`. The format
301
+ You can also have it return more information by calling `generate_code`. The format
302
302
  of the input is a list of dictionaries with the keys `role`, `content`, and `media`:
303
303
 
304
304
  ```python
305
- >>> results = agent.chat_with_workflow([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
305
+ >>> results = agent.generate_code([{"role": "user", "content": "What percentage of the area of the jar is filled with coffee beans?", "media": ["jar.jpg"]}])
306
306
  >>> print(results)
307
307
  {
308
308
  "code": "from vision_agent.tools import ..."
@@ -331,7 +331,7 @@ conv = [
331
331
  "media": ["workers.png"],
332
332
  }
333
333
  ]
334
- result = agent.chat_with_workflow(conv)
334
+ result = agent.generate_code(conv)
335
335
  code = result["code"]
336
336
  conv.append({"role": "assistant", "content": code})
337
337
  conv.append(
@@ -340,7 +340,7 @@ conv.append(
340
340
  "content": "Can you also return the number of workers wearing safety gear?",
341
341
  }
342
342
  )
343
- result = agent.chat_with_workflow(conv)
343
+ result = agent.generate_code(conv)
344
344
  ```
345
345
 
346
346
 
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.161"
7
+ version = "0.2.163"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -43,6 +43,7 @@ pytube = "15.0.0"
43
43
  anthropic = "^0.31.0"
44
44
  pydantic = "2.7.4"
45
45
  av = "^11.0.0"
46
+ redbaron = "^0.9.2"
46
47
 
47
48
  [tool.poetry.group.dev.dependencies]
48
49
  autoflake = "1.*"
@@ -7,3 +7,11 @@ from .vision_agent_coder import (
7
7
  OpenAIVisionAgentCoder,
8
8
  VisionAgentCoder,
9
9
  )
10
+ from .vision_agent_planner import (
11
+ AnthropicVisionAgentPlanner,
12
+ AzureVisionAgentPlanner,
13
+ OllamaVisionAgentPlanner,
14
+ OpenAIVisionAgentPlanner,
15
+ PlanContext,
16
+ VisionAgentPlanner,
17
+ )
@@ -0,0 +1,181 @@
1
+ import json
2
+ import logging
3
+ import re
4
+ import sys
5
+ from typing import Any, Dict, List, Optional
6
+
7
+ from rich.console import Console
8
+ from rich.style import Style
9
+ from rich.syntax import Syntax
10
+
11
+ import vision_agent.tools as T
12
+
13
+ logging.basicConfig(stream=sys.stdout)
14
+ _LOGGER = logging.getLogger(__name__)
15
+ _CONSOLE = Console()
16
+ _MAX_TABULATE_COL_WIDTH = 80
17
+
18
+
19
+ def _extract_sub_json(json_str: str) -> Optional[Dict[str, Any]]:
20
+ json_pattern = r"\{.*\}"
21
+ match = re.search(json_pattern, json_str, re.DOTALL)
22
+ if match:
23
+ json_str = match.group()
24
+ try:
25
+ # remove trailing comma
26
+ trailing_bracket_pattern = r",\s+\}"
27
+ json_str = re.sub(trailing_bracket_pattern, "}", json_str, flags=re.DOTALL)
28
+
29
+ json_dict = json.loads(json_str)
30
+ return json_dict # type: ignore
31
+ except json.JSONDecodeError:
32
+ return None
33
+ return None
34
+
35
+
36
+ def _find_markdown_json(json_str: str) -> str:
37
+ pattern = r"```json(.*?)```"
38
+ match = re.search(pattern, json_str, re.DOTALL)
39
+ if match:
40
+ return match.group(1).strip()
41
+ return json_str
42
+
43
+
44
+ def _strip_markdown_code(inp_str: str) -> str:
45
+ pattern = r"```python.*?```"
46
+ cleaned_str = re.sub(pattern, "", inp_str, flags=re.DOTALL)
47
+ return cleaned_str
48
+
49
+
50
+ def extract_json(json_str: str) -> Dict[str, Any]:
51
+ json_str_mod = json_str.replace("\n", " ").strip()
52
+ json_str_mod = json_str_mod.replace(": True", ": true").replace(
53
+ ": False", ": false"
54
+ )
55
+
56
+ # sometimes the json is in single quotes
57
+ try:
58
+ return json.loads(json_str_mod.replace("'", '"')) # type: ignore
59
+ except json.JSONDecodeError:
60
+ pass
61
+
62
+ try:
63
+ return json.loads(json_str_mod) # type: ignore
64
+ except json.JSONDecodeError:
65
+ json_orig = json_str
66
+ # don't replace quotes here or booleans since it can also introduce errors
67
+ json_str = json_str.replace("\n", " ").strip()
68
+ json_str = _strip_markdown_code(json_str)
69
+ json_str = _find_markdown_json(json_str)
70
+ json_dict = _extract_sub_json(json_str)
71
+
72
+ if json_dict is None:
73
+ error_msg = f"Could not extract JSON from the given str: {json_orig}"
74
+ _LOGGER.exception(error_msg)
75
+ raise ValueError(error_msg)
76
+
77
+ return json_dict
78
+
79
+
80
+ def extract_code(code: str) -> str:
81
+ if "\n```python" in code:
82
+ start = "\n```python"
83
+ elif "```python" in code:
84
+ start = "```python"
85
+ else:
86
+ return code
87
+
88
+ code = code[code.find(start) + len(start) :]
89
+ code = code[: code.find("```")]
90
+ if code.startswith("python\n"):
91
+ code = code[len("python\n") :]
92
+ return code
93
+
94
+
95
+ def extract_tag(
96
+ content: str,
97
+ tag: str,
98
+ ) -> Optional[str]:
99
+ inner_content = None
100
+ remaning = content
101
+ all_inner_content = []
102
+
103
+ while f"<{tag}>" in remaning:
104
+ inner_content_i = remaning[remaning.find(f"<{tag}>") + len(f"<{tag}>") :]
105
+ if f"</{tag}>" not in inner_content_i:
106
+ break
107
+ inner_content_i = inner_content_i[: inner_content_i.find(f"</{tag}>")]
108
+ remaning = remaning[remaning.find(f"</{tag}>") + len(f"</{tag}>") :]
109
+ all_inner_content.append(inner_content_i)
110
+
111
+ if len(all_inner_content) > 0:
112
+ inner_content = "\n".join(all_inner_content)
113
+ return inner_content
114
+
115
+
116
+ def remove_installs_from_code(code: str) -> str:
117
+ pattern = r"\n!pip install.*?(\n|\Z)\n"
118
+ code = re.sub(pattern, "", code, flags=re.DOTALL)
119
+ return code
120
+
121
+
122
+ def format_memory(memory: List[Dict[str, str]]) -> str:
123
+ output_str = ""
124
+ for i, m in enumerate(memory):
125
+ output_str += f"### Feedback {i}:\n"
126
+ output_str += f"Code {i}:\n```python\n{m['code']}```\n\n"
127
+ output_str += f"Feedback {i}: {m['feedback']}\n\n"
128
+ if "edits" in m:
129
+ output_str += f"Edits {i}:\n{m['edits']}\n"
130
+ output_str += "\n"
131
+
132
+ return output_str
133
+
134
+
135
+ def format_plans(plans: Dict[str, Any]) -> str:
136
+ plan_str = ""
137
+ for k, v in plans.items():
138
+ plan_str += "\n" + f"{k}: {v['thoughts']}\n"
139
+ plan_str += " -" + "\n -".join([e for e in v["instructions"]])
140
+
141
+ return plan_str
142
+
143
+
144
+ class DefaultImports:
145
+ """Container for default imports used in the code execution."""
146
+
147
+ common_imports = [
148
+ "import os",
149
+ "import numpy as np",
150
+ "from vision_agent.tools import *",
151
+ "from typing import *",
152
+ "from pillow_heif import register_heif_opener",
153
+ "register_heif_opener()",
154
+ ]
155
+
156
+ @staticmethod
157
+ def to_code_string() -> str:
158
+ return "\n".join(DefaultImports.common_imports + T.__new_tools__)
159
+
160
+ @staticmethod
161
+ def prepend_imports(code: str) -> str:
162
+ """Run this method to prepend the default imports to the code.
163
+ NOTE: be sure to run this method after the custom tools have been registered.
164
+ """
165
+ return DefaultImports.to_code_string() + "\n\n" + code
166
+
167
+
168
+ def print_code(title: str, code: str, test: Optional[str] = None) -> None:
169
+ _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
170
+ _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
171
+ _CONSOLE.print(
172
+ Syntax(
173
+ DefaultImports.prepend_imports(code),
174
+ "python",
175
+ theme="gruvbox-dark",
176
+ line_numbers=True,
177
+ )
178
+ )
179
+ if test:
180
+ _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
181
+ _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
@@ -14,8 +14,8 @@ from vision_agent.agent.vision_agent_prompts import (
14
14
  VA_CODE,
15
15
  )
16
16
  from vision_agent.lmm import LMM, AnthropicLMM, Message, OpenAILMM
17
- from vision_agent.tools import META_TOOL_DOCSTRING
18
17
  from vision_agent.tools.meta_tools import (
18
+ META_TOOL_DOCSTRING,
19
19
  Artifacts,
20
20
  check_and_load_image,
21
21
  use_extra_vision_agent_args,
@@ -103,7 +103,7 @@ def execute_code_action(
103
103
  def parse_execution(
104
104
  response: str,
105
105
  test_multi_plan: bool = True,
106
- customed_tool_names: Optional[List[str]] = None,
106
+ custom_tool_names: Optional[List[str]] = None,
107
107
  ) -> Optional[str]:
108
108
  code = None
109
109
  remaining = response
@@ -122,7 +122,7 @@ def parse_execution(
122
122
  code = "\n".join(all_code)
123
123
 
124
124
  if code is not None:
125
- code = use_extra_vision_agent_args(code, test_multi_plan, customed_tool_names)
125
+ code = use_extra_vision_agent_args(code, test_multi_plan, custom_tool_names)
126
126
  return code
127
127
 
128
128
 
@@ -195,9 +195,8 @@ class VisionAgent(Agent):
195
195
  agent: Optional[LMM] = None,
196
196
  verbosity: int = 0,
197
197
  local_artifacts_path: Optional[Union[str, Path]] = None,
198
- code_sandbox_runtime: Optional[str] = None,
199
198
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
200
- code_interpreter: Optional[CodeInterpreter] = None,
199
+ code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
201
200
  ) -> None:
202
201
  """Initialize the VisionAgent.
203
202
 
@@ -207,14 +206,17 @@ class VisionAgent(Agent):
207
206
  verbosity (int): The verbosity level of the agent.
208
207
  local_artifacts_path (Optional[Union[str, Path]]): The path to the local
209
208
  artifacts file.
210
- code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
211
- code_interpreter (Optional[CodeInterpreter]): if not None, use this CodeInterpreter
209
+ callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
210
+ function to send intermediate update messages.
211
+ code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
212
+ it can be one of: None, "local" or "e2b". If None, it will read from
213
+ the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
214
+ object is provided it will use that.
212
215
  """
213
216
 
214
217
  self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
215
218
  self.max_iterations = 12
216
219
  self.verbosity = verbosity
217
- self.code_sandbox_runtime = code_sandbox_runtime
218
220
  self.code_interpreter = code_interpreter
219
221
  self.callback_message = callback_message
220
222
  if self.verbosity >= 1:
@@ -233,7 +235,7 @@ class VisionAgent(Agent):
233
235
  input: Union[str, List[Message]],
234
236
  media: Optional[Union[str, Path]] = None,
235
237
  artifacts: Optional[Artifacts] = None,
236
- ) -> List[Message]:
238
+ ) -> str:
237
239
  """Chat with VisionAgent and get the conversation response.
238
240
 
239
241
  Parameters:
@@ -250,15 +252,33 @@ class VisionAgent(Agent):
250
252
  input = [{"role": "user", "content": input}]
251
253
  if media is not None:
252
254
  input[0]["media"] = [media]
253
- results, _ = self.chat_with_code(input, artifacts)
254
- return results
255
+ results, _ = self.chat_with_artifacts(input, artifacts)
256
+ return results[-1]["content"] # type: ignore
257
+
258
+ def chat(
259
+ self,
260
+ chat: List[Message],
261
+ ) -> List[Message]:
262
+ """Chat with VisionAgent, it will use code to execute actions to accomplish
263
+ its tasks.
264
+
265
+ Parameters:
266
+ chat (List[Message]): A conversation in the format of:
267
+ [{"role": "user", "content": "describe your task here..."}]
268
+ or if it contains media files, it should be in the format of:
269
+ [{"role": "user", "content": "describe your task here...", "media": ["image1.jpg", "image2.jpg"]}]
270
+
271
+ Returns:
272
+ List[Message]: The conversation response.
273
+ """
274
+ return self.chat_with_artifacts(chat)[0]
255
275
 
256
- def chat_with_code(
276
+ def chat_with_artifacts(
257
277
  self,
258
278
  chat: List[Message],
259
279
  artifacts: Optional[Artifacts] = None,
260
280
  test_multi_plan: bool = True,
261
- customized_tool_names: Optional[List[str]] = None,
281
+ custom_tool_names: Optional[List[str]] = None,
262
282
  ) -> Tuple[List[Message], Artifacts]:
263
283
  """Chat with VisionAgent, it will use code to execute actions to accomplish
264
284
  its tasks.
@@ -272,7 +292,7 @@ class VisionAgent(Agent):
272
292
  test_multi_plan (bool): If True, it will test tools for multiple plans and
273
293
  pick the best one based off of the tool results. If False, it will go
274
294
  with the first plan.
275
- customized_tool_names (List[str]): A list of customized tools for agent to
295
+ custom_tool_names (List[str]): A list of customized tools for agent to
276
296
  pick and use. If not provided, default to full tool set from
277
297
  vision_agent.tools.
278
298
 
@@ -287,11 +307,13 @@ class VisionAgent(Agent):
287
307
  # this is setting remote artifacts path
288
308
  artifacts = Artifacts(WORKSPACE / "artifacts.pkl")
289
309
 
310
+ # NOTE: each chat should have a dedicated code interpreter instance to avoid concurrency issues
290
311
  code_interpreter = (
291
312
  self.code_interpreter
292
313
  if self.code_interpreter is not None
314
+ and not isinstance(self.code_interpreter, str)
293
315
  else CodeInterpreterFactory.new_instance(
294
- code_sandbox_runtime=self.code_sandbox_runtime,
316
+ code_sandbox_runtime=self.code_interpreter,
295
317
  )
296
318
  )
297
319
  with code_interpreter:
@@ -389,7 +411,7 @@ class VisionAgent(Agent):
389
411
  finished = response["let_user_respond"]
390
412
 
391
413
  code_action = parse_execution(
392
- response["response"], test_multi_plan, customized_tool_names
414
+ response["response"], test_multi_plan, custom_tool_names
393
415
  )
394
416
 
395
417
  if last_response == response:
@@ -480,8 +502,8 @@ class OpenAIVisionAgent(VisionAgent):
480
502
  agent: Optional[LMM] = None,
481
503
  verbosity: int = 0,
482
504
  local_artifacts_path: Optional[Union[str, Path]] = None,
483
- code_sandbox_runtime: Optional[str] = None,
484
505
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
506
+ code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
485
507
  ) -> None:
486
508
  """Initialize the VisionAgent using OpenAI LMMs.
487
509
 
@@ -491,7 +513,12 @@ class OpenAIVisionAgent(VisionAgent):
491
513
  verbosity (int): The verbosity level of the agent.
492
514
  local_artifacts_path (Optional[Union[str, Path]]): The path to the local
493
515
  artifacts file.
494
- code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
516
+ callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
517
+ function to send intermediate update messages.
518
+ code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
519
+ it can be one of: None, "local" or "e2b". If None, it will read from
520
+ the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
521
+ object is provided it will use that.
495
522
  """
496
523
 
497
524
  agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
@@ -499,8 +526,8 @@ class OpenAIVisionAgent(VisionAgent):
499
526
  agent,
500
527
  verbosity,
501
528
  local_artifacts_path,
502
- code_sandbox_runtime,
503
529
  callback_message,
530
+ code_interpreter,
504
531
  )
505
532
 
506
533
 
@@ -510,8 +537,8 @@ class AnthropicVisionAgent(VisionAgent):
510
537
  agent: Optional[LMM] = None,
511
538
  verbosity: int = 0,
512
539
  local_artifacts_path: Optional[Union[str, Path]] = None,
513
- code_sandbox_runtime: Optional[str] = None,
514
540
  callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
541
+ code_interpreter: Optional[Union[str, CodeInterpreter]] = None,
515
542
  ) -> None:
516
543
  """Initialize the VisionAgent using Anthropic LMMs.
517
544
 
@@ -521,7 +548,12 @@ class AnthropicVisionAgent(VisionAgent):
521
548
  verbosity (int): The verbosity level of the agent.
522
549
  local_artifacts_path (Optional[Union[str, Path]]): The path to the local
523
550
  artifacts file.
524
- code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
551
+ callback_message (Optional[Callable[[Dict[str, Any]], None]]): Callback
552
+ function to send intermediate update messages.
553
+ code_interpreter (Optional[Union[str, CodeInterpreter]]): For string values
554
+ it can be one of: None, "local" or "e2b". If None, it will read from
555
+ the environment variable "CODE_SANDBOX_RUNTIME". If a CodeInterpreter
556
+ object is provided it will use that.
525
557
  """
526
558
 
527
559
  agent = AnthropicLMM(temperature=0.0) if agent is None else agent
@@ -529,6 +561,6 @@ class AnthropicVisionAgent(VisionAgent):
529
561
  agent,
530
562
  verbosity,
531
563
  local_artifacts_path,
532
- code_sandbox_runtime,
533
564
  callback_message,
565
+ code_interpreter,
534
566
  )