vision-agent 0.2.131__py3-none-any.whl → 0.2.132__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import tempfile
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Tuple, Union, cast
6
+ from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
7
7
 
8
8
  from vision_agent.agent import Agent
9
9
  from vision_agent.agent.agent_utils import extract_json
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
13
13
  VA_CODE,
14
14
  )
15
15
  from vision_agent.lmm import LMM, Message, OpenAILMM
16
- from vision_agent.tools import META_TOOL_DOCSTRING
16
+ from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
17
17
  from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
18
18
  from vision_agent.utils import CodeInterpreterFactory
19
19
  from vision_agent.utils.execute import CodeInterpreter, Execution
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
123
123
  verbosity: int = 0,
124
124
  local_artifacts_path: Optional[Union[str, Path]] = None,
125
125
  code_sandbox_runtime: Optional[str] = None,
126
+ callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
126
127
  ) -> None:
127
128
  """Initialize the VisionAgent.
128
129
 
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
141
142
  self.max_iterations = 100
142
143
  self.verbosity = verbosity
143
144
  self.code_sandbox_runtime = code_sandbox_runtime
145
+ self.callback_message = callback_message
144
146
  if self.verbosity >= 1:
145
147
  _LOGGER.setLevel(logging.INFO)
146
148
  self.local_artifacts_path = cast(
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
220
222
  for chat_i in int_chat:
221
223
  if "media" in chat_i:
222
224
  for media in chat_i["media"]:
223
- media = cast(str, media)
225
+ if type(media) is str and media.startswith(("http", "https")):
226
+ # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
227
+ file_path = Path(media).name
228
+ ndarray = load_image(media)
229
+ save_image(ndarray, file_path)
230
+ media = file_path
231
+ else:
232
+ media = cast(str, media)
224
233
  artifacts.artifacts[Path(media).name] = open(media, "rb").read()
225
234
 
226
235
  media_remote_path = (
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
262
271
  artifacts_loaded = artifacts.show()
263
272
  int_chat.append({"role": "observation", "content": artifacts_loaded})
264
273
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
274
+ self.streaming_message({"role": "observation", "content": artifacts_loaded})
265
275
 
266
276
  while not finished and iterations < self.max_iterations:
267
277
  response = run_conversation(self.agent, int_chat)
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
274
284
  if last_response == response:
275
285
  response["let_user_respond"] = True
276
286
 
287
+ self.streaming_message({"role": "assistant", "content": response})
288
+
277
289
  if response["let_user_respond"]:
278
290
  break
279
291
 
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
293
305
  orig_chat.append(
294
306
  {"role": "observation", "content": obs, "execution": result}
295
307
  )
308
+ self.streaming_message(
309
+ {
310
+ "role": "observation",
311
+ "content": obs,
312
+ "execution": result,
313
+ }
314
+ )
296
315
 
297
316
  iterations += 1
298
317
  last_response = response
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
305
324
  artifacts.save()
306
325
  return orig_chat, artifacts
307
326
 
327
+ def streaming_message(self, message: Dict[str, Any]) -> None:
328
+ if self.callback_message:
329
+ self.callback_message(message)
330
+
308
331
  def log_progress(self, data: Dict[str, Any]) -> None:
309
332
  pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.131
3
+ Version: 0.2.132
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
5
- vision_agent/agent/vision_agent.py,sha256=7Xa_TjjbUXhlPmKpmXGCAIdT0-PJzRL2rFaACszTXX0,12001
5
+ vision_agent/agent/vision_agent.py,sha256=nfxdY5W5UME7JhwFcsB3j2-L5zsYZzJWdlS2R8U_9lE,13224
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=_2QQd_nTGojkk2ZOiMevVCY6-eUA9q1QdCWH7-Noq4w,34237
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=nj4iRRSAWYHjKqyUSp12aTCV1D5iUVCHeezVXoozS4M,12687
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=GmJqu_3WhBMEwP4HToMMp8EwgftliHSpv5nd-QEDOcs,4528
30
- vision_agent-0.2.131.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.131.dist-info/METADATA,sha256=LCIVXm-Le9Uw6Vp-XMvmmkhMRPRJJlBZmJPF28Bn6Hs,12295
32
- vision_agent-0.2.131.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.131.dist-info/RECORD,,
30
+ vision_agent-0.2.132.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.132.dist-info/METADATA,sha256=s0AXiV6qjDjTUrzFqHL-50QJ6r7sxlJrwkSKNIGgklc,12295
32
+ vision_agent-0.2.132.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.132.dist-info/RECORD,,