vision-agent 0.2.131__py3-none-any.whl → 0.2.132__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +26 -3
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/METADATA +1 -1
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/RECORD +5 -5
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/WHEEL +0 -0
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import tempfile
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
|
7
7
|
|
8
8
|
from vision_agent.agent import Agent
|
9
9
|
from vision_agent.agent.agent_utils import extract_json
|
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
|
17
17
|
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
18
|
from vision_agent.utils import CodeInterpreterFactory
|
19
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
|
|
123
123
|
verbosity: int = 0,
|
124
124
|
local_artifacts_path: Optional[Union[str, Path]] = None,
|
125
125
|
code_sandbox_runtime: Optional[str] = None,
|
126
|
+
callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
|
126
127
|
) -> None:
|
127
128
|
"""Initialize the VisionAgent.
|
128
129
|
|
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
|
|
141
142
|
self.max_iterations = 100
|
142
143
|
self.verbosity = verbosity
|
143
144
|
self.code_sandbox_runtime = code_sandbox_runtime
|
145
|
+
self.callback_message = callback_message
|
144
146
|
if self.verbosity >= 1:
|
145
147
|
_LOGGER.setLevel(logging.INFO)
|
146
148
|
self.local_artifacts_path = cast(
|
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
|
|
220
222
|
for chat_i in int_chat:
|
221
223
|
if "media" in chat_i:
|
222
224
|
for media in chat_i["media"]:
|
223
|
-
media
|
225
|
+
if type(media) is str and media.startswith(("http", "https")):
|
226
|
+
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
+
file_path = Path(media).name
|
228
|
+
ndarray = load_image(media)
|
229
|
+
save_image(ndarray, file_path)
|
230
|
+
media = file_path
|
231
|
+
else:
|
232
|
+
media = cast(str, media)
|
224
233
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
225
234
|
|
226
235
|
media_remote_path = (
|
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
|
|
262
271
|
artifacts_loaded = artifacts.show()
|
263
272
|
int_chat.append({"role": "observation", "content": artifacts_loaded})
|
264
273
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
274
|
+
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
265
275
|
|
266
276
|
while not finished and iterations < self.max_iterations:
|
267
277
|
response = run_conversation(self.agent, int_chat)
|
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
|
|
274
284
|
if last_response == response:
|
275
285
|
response["let_user_respond"] = True
|
276
286
|
|
287
|
+
self.streaming_message({"role": "assistant", "content": response})
|
288
|
+
|
277
289
|
if response["let_user_respond"]:
|
278
290
|
break
|
279
291
|
|
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
|
|
293
305
|
orig_chat.append(
|
294
306
|
{"role": "observation", "content": obs, "execution": result}
|
295
307
|
)
|
308
|
+
self.streaming_message(
|
309
|
+
{
|
310
|
+
"role": "observation",
|
311
|
+
"content": obs,
|
312
|
+
"execution": result,
|
313
|
+
}
|
314
|
+
)
|
296
315
|
|
297
316
|
iterations += 1
|
298
317
|
last_response = response
|
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
|
|
305
324
|
artifacts.save()
|
306
325
|
return orig_chat, artifacts
|
307
326
|
|
327
|
+
def streaming_message(self, message: Dict[str, Any]) -> None:
|
328
|
+
if self.callback_message:
|
329
|
+
self.callback_message(message)
|
330
|
+
|
308
331
|
def log_progress(self, data: Dict[str, Any]) -> None:
|
309
332
|
pass
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=nfxdY5W5UME7JhwFcsB3j2-L5zsYZzJWdlS2R8U_9lE,13224
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=_2QQd_nTGojkk2ZOiMevVCY6-eUA9q1QdCWH7-Noq4w,34237
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=nj4iRRSAWYHjKqyUSp12aTCV1D5iUVCHeezVXoozS4M,12687
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=GmJqu_3WhBMEwP4HToMMp8EwgftliHSpv5nd-QEDOcs,4528
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.132.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.132.dist-info/METADATA,sha256=s0AXiV6qjDjTUrzFqHL-50QJ6r7sxlJrwkSKNIGgklc,12295
|
32
|
+
vision_agent-0.2.132.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.132.dist-info/RECORD,,
|
File without changes
|
File without changes
|