vision-agent 0.2.131__py3-none-any.whl → 0.2.132__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import tempfile
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Tuple, Union, cast
6
+ from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
7
7
 
8
8
  from vision_agent.agent import Agent
9
9
  from vision_agent.agent.agent_utils import extract_json
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
13
13
  VA_CODE,
14
14
  )
15
15
  from vision_agent.lmm import LMM, Message, OpenAILMM
16
- from vision_agent.tools import META_TOOL_DOCSTRING
16
+ from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
17
17
  from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
18
18
  from vision_agent.utils import CodeInterpreterFactory
19
19
  from vision_agent.utils.execute import CodeInterpreter, Execution
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
123
123
  verbosity: int = 0,
124
124
  local_artifacts_path: Optional[Union[str, Path]] = None,
125
125
  code_sandbox_runtime: Optional[str] = None,
126
+ callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
126
127
  ) -> None:
127
128
  """Initialize the VisionAgent.
128
129
 
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
141
142
  self.max_iterations = 100
142
143
  self.verbosity = verbosity
143
144
  self.code_sandbox_runtime = code_sandbox_runtime
145
+ self.callback_message = callback_message
144
146
  if self.verbosity >= 1:
145
147
  _LOGGER.setLevel(logging.INFO)
146
148
  self.local_artifacts_path = cast(
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
220
222
  for chat_i in int_chat:
221
223
  if "media" in chat_i:
222
224
  for media in chat_i["media"]:
223
- media = cast(str, media)
225
+ if type(media) is str and media.startswith(("http", "https")):
226
+ # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
227
+ file_path = Path(media).name
228
+ ndarray = load_image(media)
229
+ save_image(ndarray, file_path)
230
+ media = file_path
231
+ else:
232
+ media = cast(str, media)
224
233
  artifacts.artifacts[Path(media).name] = open(media, "rb").read()
225
234
 
226
235
  media_remote_path = (
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
262
271
  artifacts_loaded = artifacts.show()
263
272
  int_chat.append({"role": "observation", "content": artifacts_loaded})
264
273
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
274
+ self.streaming_message({"role": "observation", "content": artifacts_loaded})
265
275
 
266
276
  while not finished and iterations < self.max_iterations:
267
277
  response = run_conversation(self.agent, int_chat)
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
274
284
  if last_response == response:
275
285
  response["let_user_respond"] = True
276
286
 
287
+ self.streaming_message({"role": "assistant", "content": response})
288
+
277
289
  if response["let_user_respond"]:
278
290
  break
279
291
 
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
293
305
  orig_chat.append(
294
306
  {"role": "observation", "content": obs, "execution": result}
295
307
  )
308
+ self.streaming_message(
309
+ {
310
+ "role": "observation",
311
+ "content": obs,
312
+ "execution": result,
313
+ }
314
+ )
296
315
 
297
316
  iterations += 1
298
317
  last_response = response
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
305
324
  artifacts.save()
306
325
  return orig_chat, artifacts
307
326
 
327
+ def streaming_message(self, message: Dict[str, Any]) -> None:
328
+ if self.callback_message:
329
+ self.callback_message(message)
330
+
308
331
  def log_progress(self, data: Dict[str, Any]) -> None:
309
332
  pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.131
3
+ Version: 0.2.132
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
5
- vision_agent/agent/vision_agent.py,sha256=7Xa_TjjbUXhlPmKpmXGCAIdT0-PJzRL2rFaACszTXX0,12001
5
+ vision_agent/agent/vision_agent.py,sha256=nfxdY5W5UME7JhwFcsB3j2-L5zsYZzJWdlS2R8U_9lE,13224
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=_2QQd_nTGojkk2ZOiMevVCY6-eUA9q1QdCWH7-Noq4w,34237
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=nj4iRRSAWYHjKqyUSp12aTCV1D5iUVCHeezVXoozS4M,12687
8
8
  vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7
27
27
  vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
28
28
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
29
29
  vision_agent/utils/video.py,sha256=GmJqu_3WhBMEwP4HToMMp8EwgftliHSpv5nd-QEDOcs,4528
30
- vision_agent-0.2.131.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
- vision_agent-0.2.131.dist-info/METADATA,sha256=LCIVXm-Le9Uw6Vp-XMvmmkhMRPRJJlBZmJPF28Bn6Hs,12295
32
- vision_agent-0.2.131.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
- vision_agent-0.2.131.dist-info/RECORD,,
30
+ vision_agent-0.2.132.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
31
+ vision_agent-0.2.132.dist-info/METADATA,sha256=s0AXiV6qjDjTUrzFqHL-50QJ6r7sxlJrwkSKNIGgklc,12295
32
+ vision_agent-0.2.132.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
33
+ vision_agent-0.2.132.dist-info/RECORD,,