vision-agent 0.2.131__py3-none-any.whl → 0.2.132__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/vision_agent.py +26 -3
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/METADATA +1 -1
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/RECORD +5 -5
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.131.dist-info → vision_agent-0.2.132.dist-info}/WHEEL +0 -0
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import tempfile
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
|
7
7
|
|
8
8
|
from vision_agent.agent import Agent
|
9
9
|
from vision_agent.agent.agent_utils import extract_json
|
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
|
17
17
|
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
18
|
from vision_agent.utils import CodeInterpreterFactory
|
19
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
|
|
123
123
|
verbosity: int = 0,
|
124
124
|
local_artifacts_path: Optional[Union[str, Path]] = None,
|
125
125
|
code_sandbox_runtime: Optional[str] = None,
|
126
|
+
callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
|
126
127
|
) -> None:
|
127
128
|
"""Initialize the VisionAgent.
|
128
129
|
|
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
|
|
141
142
|
self.max_iterations = 100
|
142
143
|
self.verbosity = verbosity
|
143
144
|
self.code_sandbox_runtime = code_sandbox_runtime
|
145
|
+
self.callback_message = callback_message
|
144
146
|
if self.verbosity >= 1:
|
145
147
|
_LOGGER.setLevel(logging.INFO)
|
146
148
|
self.local_artifacts_path = cast(
|
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
|
|
220
222
|
for chat_i in int_chat:
|
221
223
|
if "media" in chat_i:
|
222
224
|
for media in chat_i["media"]:
|
223
|
-
media
|
225
|
+
if type(media) is str and media.startswith(("http", "https")):
|
226
|
+
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
+
file_path = Path(media).name
|
228
|
+
ndarray = load_image(media)
|
229
|
+
save_image(ndarray, file_path)
|
230
|
+
media = file_path
|
231
|
+
else:
|
232
|
+
media = cast(str, media)
|
224
233
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
225
234
|
|
226
235
|
media_remote_path = (
|
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
|
|
262
271
|
artifacts_loaded = artifacts.show()
|
263
272
|
int_chat.append({"role": "observation", "content": artifacts_loaded})
|
264
273
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
274
|
+
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
265
275
|
|
266
276
|
while not finished and iterations < self.max_iterations:
|
267
277
|
response = run_conversation(self.agent, int_chat)
|
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
|
|
274
284
|
if last_response == response:
|
275
285
|
response["let_user_respond"] = True
|
276
286
|
|
287
|
+
self.streaming_message({"role": "assistant", "content": response})
|
288
|
+
|
277
289
|
if response["let_user_respond"]:
|
278
290
|
break
|
279
291
|
|
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
|
|
293
305
|
orig_chat.append(
|
294
306
|
{"role": "observation", "content": obs, "execution": result}
|
295
307
|
)
|
308
|
+
self.streaming_message(
|
309
|
+
{
|
310
|
+
"role": "observation",
|
311
|
+
"content": obs,
|
312
|
+
"execution": result,
|
313
|
+
}
|
314
|
+
)
|
296
315
|
|
297
316
|
iterations += 1
|
298
317
|
last_response = response
|
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
|
|
305
324
|
artifacts.save()
|
306
325
|
return orig_chat, artifacts
|
307
326
|
|
327
|
+
def streaming_message(self, message: Dict[str, Any]) -> None:
|
328
|
+
if self.callback_message:
|
329
|
+
self.callback_message(message)
|
330
|
+
|
308
331
|
def log_progress(self, data: Dict[str, Any]) -> None:
|
309
332
|
pass
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=FRwiux1FGvGccetyUCtY46KP01fQteqorm-JtFepovI,176
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=22LiPhkJlS5mVeo2dIi259pc2NgA7PGHRpcbnrtKo78,1930
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=nfxdY5W5UME7JhwFcsB3j2-L5zsYZzJWdlS2R8U_9lE,13224
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=_2QQd_nTGojkk2ZOiMevVCY6-eUA9q1QdCWH7-Noq4w,34237
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=nj4iRRSAWYHjKqyUSp12aTCV1D5iUVCHeezVXoozS4M,12687
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=GmJqu_3WhBMEwP4HToMMp8EwgftliHSpv5nd-QEDOcs,4528
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.132.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.132.dist-info/METADATA,sha256=s0AXiV6qjDjTUrzFqHL-50QJ6r7sxlJrwkSKNIGgklc,12295
|
32
|
+
vision_agent-0.2.132.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.132.dist-info/RECORD,,
|
File without changes
|
File without changes
|