vision-agent 0.2.131__tar.gz → 0.2.132__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. {vision_agent-0.2.131 → vision_agent-0.2.132}/PKG-INFO +1 -1
  2. {vision_agent-0.2.131 → vision_agent-0.2.132}/pyproject.toml +1 -1
  3. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent.py +26 -3
  4. {vision_agent-0.2.131 → vision_agent-0.2.132}/LICENSE +0 -0
  5. {vision_agent-0.2.131 → vision_agent-0.2.132}/README.md +0 -0
  6. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/__init__.py +0 -0
  7. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/__init__.py +0 -0
  8. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/agent.py +0 -0
  9. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/agent_utils.py +0 -0
  10. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder.py +0 -0
  11. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  12. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_prompts.py +0 -0
  13. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/clients/__init__.py +0 -0
  14. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/clients/http.py +0 -0
  15. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/clients/landing_public_api.py +0 -0
  16. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/fonts/__init__.py +0 -0
  17. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  18. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/lmm/__init__.py +0 -0
  19. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/lmm/lmm.py +0 -0
  20. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/lmm/types.py +0 -0
  21. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/__init__.py +0 -0
  22. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/meta_tools.py +0 -0
  23. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/prompts.py +0 -0
  24. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/tool_utils.py +0 -0
  25. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/tools.py +0 -0
  26. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/tools_types.py +0 -0
  27. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/__init__.py +0 -0
  28. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/exceptions.py +0 -0
  29. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/execute.py +0 -0
  30. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/image_utils.py +0 -0
  31. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/sim.py +0 -0
  32. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/type_defs.py +0 -0
  33. {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.131
3
+ Version: 0.2.132
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.131"
7
+ version = "0.2.132"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -3,7 +3,7 @@ import logging
3
3
  import os
4
4
  import tempfile
5
5
  from pathlib import Path
6
- from typing import Any, Dict, List, Optional, Tuple, Union, cast
6
+ from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
7
7
 
8
8
  from vision_agent.agent import Agent
9
9
  from vision_agent.agent.agent_utils import extract_json
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
13
13
  VA_CODE,
14
14
  )
15
15
  from vision_agent.lmm import LMM, Message, OpenAILMM
16
- from vision_agent.tools import META_TOOL_DOCSTRING
16
+ from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
17
17
  from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
18
18
  from vision_agent.utils import CodeInterpreterFactory
19
19
  from vision_agent.utils.execute import CodeInterpreter, Execution
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
123
123
  verbosity: int = 0,
124
124
  local_artifacts_path: Optional[Union[str, Path]] = None,
125
125
  code_sandbox_runtime: Optional[str] = None,
126
+ callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
126
127
  ) -> None:
127
128
  """Initialize the VisionAgent.
128
129
 
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
141
142
  self.max_iterations = 100
142
143
  self.verbosity = verbosity
143
144
  self.code_sandbox_runtime = code_sandbox_runtime
145
+ self.callback_message = callback_message
144
146
  if self.verbosity >= 1:
145
147
  _LOGGER.setLevel(logging.INFO)
146
148
  self.local_artifacts_path = cast(
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
220
222
  for chat_i in int_chat:
221
223
  if "media" in chat_i:
222
224
  for media in chat_i["media"]:
223
- media = cast(str, media)
225
+ if type(media) is str and media.startswith(("http", "https")):
226
+ # TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
227
+ file_path = Path(media).name
228
+ ndarray = load_image(media)
229
+ save_image(ndarray, file_path)
230
+ media = file_path
231
+ else:
232
+ media = cast(str, media)
224
233
  artifacts.artifacts[Path(media).name] = open(media, "rb").read()
225
234
 
226
235
  media_remote_path = (
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
262
271
  artifacts_loaded = artifacts.show()
263
272
  int_chat.append({"role": "observation", "content": artifacts_loaded})
264
273
  orig_chat.append({"role": "observation", "content": artifacts_loaded})
274
+ self.streaming_message({"role": "observation", "content": artifacts_loaded})
265
275
 
266
276
  while not finished and iterations < self.max_iterations:
267
277
  response = run_conversation(self.agent, int_chat)
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
274
284
  if last_response == response:
275
285
  response["let_user_respond"] = True
276
286
 
287
+ self.streaming_message({"role": "assistant", "content": response})
288
+
277
289
  if response["let_user_respond"]:
278
290
  break
279
291
 
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
293
305
  orig_chat.append(
294
306
  {"role": "observation", "content": obs, "execution": result}
295
307
  )
308
+ self.streaming_message(
309
+ {
310
+ "role": "observation",
311
+ "content": obs,
312
+ "execution": result,
313
+ }
314
+ )
296
315
 
297
316
  iterations += 1
298
317
  last_response = response
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
305
324
  artifacts.save()
306
325
  return orig_chat, artifacts
307
326
 
327
+ def streaming_message(self, message: Dict[str, Any]) -> None:
328
+ if self.callback_message:
329
+ self.callback_message(message)
330
+
308
331
  def log_progress(self, data: Dict[str, Any]) -> None:
309
332
  pass
File without changes
File without changes