vision-agent 0.2.140__tar.gz → 0.2.142__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. {vision_agent-0.2.140 → vision_agent-0.2.142}/PKG-INFO +60 -12
  2. {vision_agent-0.2.140 → vision_agent-0.2.142}/README.md +59 -11
  3. {vision_agent-0.2.140 → vision_agent-0.2.142}/pyproject.toml +1 -1
  4. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/__init__.py +2 -1
  5. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/agent_utils.py +8 -2
  6. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/vision_agent.py +97 -17
  7. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/vision_agent_coder.py +93 -66
  8. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/vision_agent_coder_prompts.py +53 -19
  9. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/vision_agent_prompts.py +31 -9
  10. vision_agent-0.2.142/vision_agent/lmm/__init__.py +2 -0
  11. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/lmm/lmm.py +6 -9
  12. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/tools/__init__.py +1 -1
  13. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/tools/meta_tools.py +65 -33
  14. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/tools/tools.py +115 -30
  15. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/tools/tools_types.py +1 -0
  16. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/image_utils.py +18 -7
  17. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/video.py +2 -1
  18. vision_agent-0.2.140/vision_agent/lmm/__init__.py +0 -2
  19. {vision_agent-0.2.140 → vision_agent-0.2.142}/LICENSE +0 -0
  20. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/__init__.py +0 -0
  21. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/agent/agent.py +0 -0
  22. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/clients/__init__.py +0 -0
  23. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/clients/http.py +0 -0
  24. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/clients/landing_public_api.py +0 -0
  25. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/fonts/__init__.py +0 -0
  26. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  27. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/lmm/types.py +0 -0
  28. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/tools/prompts.py +0 -0
  29. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/tools/tool_utils.py +0 -0
  30. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/__init__.py +0 -0
  31. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/exceptions.py +0 -0
  32. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/execute.py +0 -0
  33. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/sim.py +0 -0
  34. {vision_agent-0.2.140 → vision_agent-0.2.142}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.140
3
+ Version: 0.2.142
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -74,10 +74,11 @@ To get started, you can install the library using pip:
74
74
  pip install vision-agent
75
75
  ```
76
76
 
77
- Ensure you have an OpenAI API key and set it as an environment variable (if you are
78
- using Azure OpenAI please see the Azure setup section):
77
+ Ensure you have an Anthropic key and an OpenAI API key and set in your environment
78
+ variables (if you are using Azure OpenAI please see the Azure setup section):
79
79
 
80
80
  ```bash
81
+ export ANTHROPIC_API_KEY="your-api-key"
81
82
  export OPENAI_API_KEY="your-api-key"
82
83
  ```
83
84
 
@@ -112,6 +113,9 @@ You can find more details about the streamlit app [here](examples/chat/).
112
113
  >>> resp = agent(resp)
113
114
  ```
114
115
 
116
+ `VisionAgent` currently utilizes Claude-3.5 as it's default LMM and uses OpenAI for
117
+ embeddings for tool searching.
118
+
115
119
  ### Vision Agent Coder
116
120
  #### Basic Usage
117
121
  You can interact with the agent as you would with any LLM or LMM model:
@@ -173,7 +177,8 @@ of the input is a list of dictionaries with the keys `role`, `content`, and `med
173
177
  "code": "from vision_agent.tools import ..."
174
178
  "test": "calculate_filled_percentage('jar.jpg')",
175
179
  "test_result": "...",
176
- "plan": [{"code": "...", "test": "...", "plan": "..."}, ...],
180
+ "plans": {"plan1": {"thoughts": "..."}, ...},
181
+ "plan_thoughts": "...",
177
182
  "working_memory": ...,
178
183
  }
179
184
  ```
@@ -210,20 +215,25 @@ result = agent.chat_with_workflow(conv)
210
215
  ### Tools
211
216
  There are a variety of tools for the model or the user to use. Some are executed locally
212
217
  while others are hosted for you. You can easily access them yourself, for example if
213
- you want to run `owl_v2` and visualize the output you can run:
218
+ you want to run `owl_v2_image` and visualize the output you can run:
214
219
 
215
220
  ```python
216
221
  import vision_agent.tools as T
217
222
  import matplotlib.pyplot as plt
218
223
 
219
224
  image = T.load_image("dogs.jpg")
220
- dets = T.owl_v2("dogs", image)
225
+ dets = T.owl_v2_image("dogs", image)
221
226
  viz = T.overlay_bounding_boxes(image, dets)
222
227
  plt.imshow(viz)
223
228
  plt.show()
224
229
  ```
225
230
 
226
- You can also add custom tools to the agent:
231
+ You can find all available tools in `vision_agent/tools/tools.py`, however,
232
+ `VisionAgentCoder` only utilizes a subset of tools that have been tested and provide
233
+ the best performance. Those can be found in the same file under the `TOOLS` variable.
234
+
235
+ If you can't find the tool you are looking for you can also add custom tools to the
236
+ agent:
227
237
 
228
238
  ```python
229
239
  import vision_agent as va
@@ -258,9 +268,48 @@ Can't find the tool you need and want add it to `VisionAgent`? Check out our
258
268
  we add the source code for all the tools used in `VisionAgent`.
259
269
 
260
270
  ## Additional Backends
271
+ ### Anthropic
272
+ `AnthropicVisionAgentCoder` uses Anthropic. To get started you just need to get an
273
+ Anthropic API key and set it in your environment variables:
274
+
275
+ ```bash
276
+ export ANTHROPIC_API_KEY="your-api-key"
277
+ ```
278
+
279
+ Because Anthropic does not support embedding models, the default embedding model used
280
+ is the OpenAI model so you will also need to set your OpenAI API key:
281
+
282
+ ```bash
283
+ export OPEN_AI_API_KEY="your-api-key"
284
+ ```
285
+
286
+ Usage is the same as `VisionAgentCoder`:
287
+
288
+ ```python
289
+ >>> import vision_agent as va
290
+ >>> agent = va.agent.AnthropicVisionAgentCoder()
291
+ >>> agent("Count the apples in the image", media="apples.jpg")
292
+ ```
293
+
294
+ ### OpenAI
295
+ `OpenAIVisionAgentCoder` uses OpenAI. To get started you just need to get an OpenAI API
296
+ key and set it in your environment variables:
297
+
298
+ ```bash
299
+ export OPEN_AI_API_KEY="your-api-key"
300
+ ```
301
+
302
+ Usage is the same as `VisionAgentCoder`:
303
+
304
+ ```python
305
+ >>> import vision_agent as va
306
+ >>> agent = va.agent.OpenAIVisionAgentCoder()
307
+ >>> agent("Count the apples in the image", media="apples.jpg")
308
+ ```
309
+
310
+
261
311
  ### Ollama
262
- We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
263
- a few models:
312
+ `OllamaVisionAgentCoder` uses Ollama. To get started you must download a few models:
264
313
 
265
314
  ```bash
266
315
  ollama pull llama3.1
@@ -281,9 +330,8 @@ tools. You can use it just like you would use `VisionAgentCoder`:
281
330
  > WARNING: VisionAgent doesn't work well unless the underlying LMM is sufficiently powerful. Do not expect good results or even working code with smaller models like Llama 3.1 8B.
282
331
 
283
332
  ### Azure OpenAI
284
- We also provide a `AzureVisionAgentCoder` that uses Azure OpenAI models. To get started
285
- follow the Azure Setup section below. You can use it just like you would use=
286
- `VisionAgentCoder`:
333
+ `AzureVisionAgentCoder` uses Azure OpenAI models. To get started follow the Azure Setup
334
+ section below. You can use it just like you would use `VisionAgentCoder`:
287
335
 
288
336
  ```python
289
337
  >>> import vision_agent as va
@@ -33,10 +33,11 @@ To get started, you can install the library using pip:
33
33
  pip install vision-agent
34
34
  ```
35
35
 
36
- Ensure you have an OpenAI API key and set it as an environment variable (if you are
37
- using Azure OpenAI please see the Azure setup section):
36
+ Ensure you have an Anthropic key and an OpenAI API key and set in your environment
37
+ variables (if you are using Azure OpenAI please see the Azure setup section):
38
38
 
39
39
  ```bash
40
+ export ANTHROPIC_API_KEY="your-api-key"
40
41
  export OPENAI_API_KEY="your-api-key"
41
42
  ```
42
43
 
@@ -71,6 +72,9 @@ You can find more details about the streamlit app [here](examples/chat/).
71
72
  >>> resp = agent(resp)
72
73
  ```
73
74
 
75
+ `VisionAgent` currently utilizes Claude-3.5 as it's default LMM and uses OpenAI for
76
+ embeddings for tool searching.
77
+
74
78
  ### Vision Agent Coder
75
79
  #### Basic Usage
76
80
  You can interact with the agent as you would with any LLM or LMM model:
@@ -132,7 +136,8 @@ of the input is a list of dictionaries with the keys `role`, `content`, and `med
132
136
  "code": "from vision_agent.tools import ..."
133
137
  "test": "calculate_filled_percentage('jar.jpg')",
134
138
  "test_result": "...",
135
- "plan": [{"code": "...", "test": "...", "plan": "..."}, ...],
139
+ "plans": {"plan1": {"thoughts": "..."}, ...},
140
+ "plan_thoughts": "...",
136
141
  "working_memory": ...,
137
142
  }
138
143
  ```
@@ -169,20 +174,25 @@ result = agent.chat_with_workflow(conv)
169
174
  ### Tools
170
175
  There are a variety of tools for the model or the user to use. Some are executed locally
171
176
  while others are hosted for you. You can easily access them yourself, for example if
172
- you want to run `owl_v2` and visualize the output you can run:
177
+ you want to run `owl_v2_image` and visualize the output you can run:
173
178
 
174
179
  ```python
175
180
  import vision_agent.tools as T
176
181
  import matplotlib.pyplot as plt
177
182
 
178
183
  image = T.load_image("dogs.jpg")
179
- dets = T.owl_v2("dogs", image)
184
+ dets = T.owl_v2_image("dogs", image)
180
185
  viz = T.overlay_bounding_boxes(image, dets)
181
186
  plt.imshow(viz)
182
187
  plt.show()
183
188
  ```
184
189
 
185
- You can also add custom tools to the agent:
190
+ You can find all available tools in `vision_agent/tools/tools.py`, however,
191
+ `VisionAgentCoder` only utilizes a subset of tools that have been tested and provide
192
+ the best performance. Those can be found in the same file under the `TOOLS` variable.
193
+
194
+ If you can't find the tool you are looking for you can also add custom tools to the
195
+ agent:
186
196
 
187
197
  ```python
188
198
  import vision_agent as va
@@ -217,9 +227,48 @@ Can't find the tool you need and want add it to `VisionAgent`? Check out our
217
227
  we add the source code for all the tools used in `VisionAgent`.
218
228
 
219
229
  ## Additional Backends
230
+ ### Anthropic
231
+ `AnthropicVisionAgentCoder` uses Anthropic. To get started you just need to get an
232
+ Anthropic API key and set it in your environment variables:
233
+
234
+ ```bash
235
+ export ANTHROPIC_API_KEY="your-api-key"
236
+ ```
237
+
238
+ Because Anthropic does not support embedding models, the default embedding model used
239
+ is the OpenAI model so you will also need to set your OpenAI API key:
240
+
241
+ ```bash
242
+ export OPEN_AI_API_KEY="your-api-key"
243
+ ```
244
+
245
+ Usage is the same as `VisionAgentCoder`:
246
+
247
+ ```python
248
+ >>> import vision_agent as va
249
+ >>> agent = va.agent.AnthropicVisionAgentCoder()
250
+ >>> agent("Count the apples in the image", media="apples.jpg")
251
+ ```
252
+
253
+ ### OpenAI
254
+ `OpenAIVisionAgentCoder` uses OpenAI. To get started you just need to get an OpenAI API
255
+ key and set it in your environment variables:
256
+
257
+ ```bash
258
+ export OPEN_AI_API_KEY="your-api-key"
259
+ ```
260
+
261
+ Usage is the same as `VisionAgentCoder`:
262
+
263
+ ```python
264
+ >>> import vision_agent as va
265
+ >>> agent = va.agent.OpenAIVisionAgentCoder()
266
+ >>> agent("Count the apples in the image", media="apples.jpg")
267
+ ```
268
+
269
+
220
270
  ### Ollama
221
- We also provide a `VisionAgentCoder` that uses Ollama. To get started you must download
222
- a few models:
271
+ `OllamaVisionAgentCoder` uses Ollama. To get started you must download a few models:
223
272
 
224
273
  ```bash
225
274
  ollama pull llama3.1
@@ -240,9 +289,8 @@ tools. You can use it just like you would use `VisionAgentCoder`:
240
289
  > WARNING: VisionAgent doesn't work well unless the underlying LMM is sufficiently powerful. Do not expect good results or even working code with smaller models like Llama 3.1 8B.
241
290
 
242
291
  ### Azure OpenAI
243
- We also provide a `AzureVisionAgentCoder` that uses Azure OpenAI models. To get started
244
- follow the Azure Setup section below. You can use it just like you would use=
245
- `VisionAgentCoder`:
292
+ `AzureVisionAgentCoder` uses Azure OpenAI models. To get started follow the Azure Setup
293
+ section below. You can use it just like you would use `VisionAgentCoder`:
246
294
 
247
295
  ```python
248
296
  >>> import vision_agent as va
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.140"
7
+ version = "0.2.142"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -1,8 +1,9 @@
1
1
  from .agent import Agent
2
2
  from .vision_agent import VisionAgent
3
3
  from .vision_agent_coder import (
4
+ AnthropicVisionAgentCoder,
4
5
  AzureVisionAgentCoder,
5
- ClaudeVisionAgentCoder,
6
6
  OllamaVisionAgentCoder,
7
+ OpenAIVisionAgentCoder,
7
8
  VisionAgentCoder,
8
9
  )
@@ -40,12 +40,18 @@ def _strip_markdown_code(inp_str: str) -> str:
40
40
 
41
41
 
42
42
  def extract_json(json_str: str) -> Dict[str, Any]:
43
- json_str = json_str.replace("\n", " ").strip()
43
+ json_str_mod = json_str.replace("\n", " ").strip()
44
+ json_str_mod = json_str_mod.replace("'", '"')
45
+ json_str_mod = json_str_mod.replace(": True", ": true").replace(
46
+ ": False", ": false"
47
+ )
44
48
 
45
49
  try:
46
- return json.loads(json_str) # type: ignore
50
+ return json.loads(json_str_mod) # type: ignore
47
51
  except json.JSONDecodeError:
48
52
  json_orig = json_str
53
+ # don't replace quotes here or booleans since it can also introduce errors
54
+ json_str = json_str.replace("\n", " ").strip()
49
55
  json_str = _strip_markdown_code(json_str)
50
56
  json_str = _find_markdown_json(json_str)
51
57
  json_dict = _extract_sub_json(json_str)
@@ -3,18 +3,23 @@ import logging
3
3
  import os
4
4
  import tempfile
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
6
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
7
7
 
8
8
  from vision_agent.agent import Agent
9
9
  from vision_agent.agent.agent_utils import extract_json
10
10
  from vision_agent.agent.vision_agent_prompts import (
11
11
  EXAMPLES_CODE1,
12
12
  EXAMPLES_CODE2,
13
+ EXAMPLES_CODE3,
13
14
  VA_CODE,
14
15
  )
15
- from vision_agent.lmm import LMM, Message, OpenAILMM
16
+ from vision_agent.lmm import LMM, AnthropicLMM, Message, OpenAILMM
16
17
  from vision_agent.tools import META_TOOL_DOCSTRING
17
- from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
18
+ from vision_agent.tools.meta_tools import (
19
+ Artifacts,
20
+ check_and_load_image,
21
+ use_extra_vision_agent_args,
22
+ )
18
23
  from vision_agent.utils import CodeInterpreterFactory
19
24
  from vision_agent.utils.execute import CodeInterpreter, Execution
20
25
 
@@ -30,7 +35,7 @@ class BoilerplateCode:
30
35
  pre_code = [
31
36
  "from typing import *",
32
37
  "from vision_agent.utils.execute import CodeInterpreter",
33
- "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, write_media_artifact, florence2_fine_tuning, use_florence2_fine_tuning",
38
+ "from vision_agent.tools.meta_tools import Artifacts, open_code_artifact, create_code_artifact, edit_code_artifact, get_tool_descriptions, generate_vision_code, edit_vision_code, write_media_artifact, view_media_artifact, object_detection_fine_tuning, use_object_detection_fine_tuning",
34
39
  "artifacts = Artifacts('{remote_path}')",
35
40
  "artifacts.load('{remote_path}')",
36
41
  ]
@@ -68,10 +73,18 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
68
73
 
69
74
  prompt = VA_CODE.format(
70
75
  documentation=META_TOOL_DOCSTRING,
71
- examples=f"{EXAMPLES_CODE1}\n{EXAMPLES_CODE2}",
76
+ examples=f"{EXAMPLES_CODE1}\n{EXAMPLES_CODE2}\n{EXAMPLES_CODE3}",
72
77
  conversation=conversation,
73
78
  )
74
- return extract_json(orch([{"role": "user", "content": prompt}], stream=False)) # type: ignore
79
+ message: Message = {"role": "user", "content": prompt}
80
+ # only add recent media so we don't overload the model with old images
81
+ if (
82
+ chat[-1]["role"] == "observation"
83
+ and "media" in chat[-1]
84
+ and len(chat[-1]["media"]) > 0 # type: ignore
85
+ ):
86
+ message["media"] = chat[-1]["media"]
87
+ return extract_json(orch([message], stream=False)) # type: ignore
75
88
 
76
89
 
77
90
  def run_code_action(
@@ -136,10 +149,8 @@ class VisionAgent(Agent):
136
149
  code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
137
150
  """
138
151
 
139
- self.agent = (
140
- OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
141
- )
142
- self.max_iterations = 100
152
+ self.agent = AnthropicLMM(temperature=0.0) if agent is None else agent
153
+ self.max_iterations = 12
143
154
  self.verbosity = verbosity
144
155
  self.code_sandbox_runtime = code_sandbox_runtime
145
156
  self.callback_message = callback_message
@@ -267,7 +278,8 @@ class VisionAgent(Agent):
267
278
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
268
279
  self.streaming_message({"role": "observation", "content": artifacts_loaded})
269
280
 
270
- if isinstance(last_user_message_content, str):
281
+ if int_chat[-1]["role"] == "user":
282
+ last_user_message_content = cast(str, int_chat[-1].get("content", ""))
271
283
  user_code_action = parse_execution(last_user_message_content, False)
272
284
  if user_code_action is not None:
273
285
  user_result, user_obs = run_code_action(
@@ -309,8 +321,7 @@ class VisionAgent(Agent):
309
321
  else:
310
322
  self.streaming_message({"role": "assistant", "content": response})
311
323
 
312
- if response["let_user_respond"]:
313
- break
324
+ finished = response["let_user_respond"]
314
325
 
315
326
  code_action = parse_execution(
316
327
  response["response"], test_multi_plan, customized_tool_names
@@ -321,13 +332,22 @@ class VisionAgent(Agent):
321
332
  code_action, code_interpreter, str(remote_artifacts_path)
322
333
  )
323
334
 
335
+ media_obs = check_and_load_image(code_action)
336
+
324
337
  if self.verbosity >= 1:
325
338
  _LOGGER.info(obs)
339
+
340
+ chat_elt: Message = {"role": "observation", "content": obs}
341
+ if media_obs and result.success:
342
+ chat_elt["media"] = [
343
+ Path(code_interpreter.remote_path) / media_ob
344
+ for media_ob in media_obs
345
+ ]
346
+
326
347
  # don't add execution results to internal chat
327
- int_chat.append({"role": "observation", "content": obs})
328
- orig_chat.append(
329
- {"role": "observation", "content": obs, "execution": result}
330
- )
348
+ int_chat.append(chat_elt)
349
+ chat_elt["execution"] = result
350
+ orig_chat.append(chat_elt)
331
351
  self.streaming_message(
332
352
  {
333
353
  "role": "observation",
@@ -353,3 +373,63 @@ class VisionAgent(Agent):
353
373
 
354
374
  def log_progress(self, data: Dict[str, Any]) -> None:
355
375
  pass
376
+
377
+
378
+ class OpenAIVisionAgent(VisionAgent):
379
+ def __init__(
380
+ self,
381
+ agent: Optional[LMM] = None,
382
+ verbosity: int = 0,
383
+ local_artifacts_path: Optional[Union[str, Path]] = None,
384
+ code_sandbox_runtime: Optional[str] = None,
385
+ callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
386
+ ) -> None:
387
+ """Initialize the VisionAgent using OpenAI LMMs.
388
+
389
+ Parameters:
390
+ agent (Optional[LMM]): The agent to use for conversation and orchestration
391
+ of other agents.
392
+ verbosity (int): The verbosity level of the agent.
393
+ local_artifacts_path (Optional[Union[str, Path]]): The path to the local
394
+ artifacts file.
395
+ code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
396
+ """
397
+
398
+ agent = OpenAILMM(temperature=0.0, json_mode=True) if agent is None else agent
399
+ super().__init__(
400
+ agent,
401
+ verbosity,
402
+ local_artifacts_path,
403
+ code_sandbox_runtime,
404
+ callback_message,
405
+ )
406
+
407
+
408
+ class AnthropicVisionAgent(VisionAgent):
409
+ def __init__(
410
+ self,
411
+ agent: Optional[LMM] = None,
412
+ verbosity: int = 0,
413
+ local_artifacts_path: Optional[Union[str, Path]] = None,
414
+ code_sandbox_runtime: Optional[str] = None,
415
+ callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
416
+ ) -> None:
417
+ """Initialize the VisionAgent using Anthropic LMMs.
418
+
419
+ Parameters:
420
+ agent (Optional[LMM]): The agent to use for conversation and orchestration
421
+ of other agents.
422
+ verbosity (int): The verbosity level of the agent.
423
+ local_artifacts_path (Optional[Union[str, Path]]): The path to the local
424
+ artifacts file.
425
+ code_sandbox_runtime (Optional[str]): The code sandbox runtime to use.
426
+ """
427
+
428
+ agent = AnthropicLMM(temperature=0.0) if agent is None else agent
429
+ super().__init__(
430
+ agent,
431
+ verbosity,
432
+ local_artifacts_path,
433
+ code_sandbox_runtime,
434
+ callback_message,
435
+ )