vision-agent 0.2.131__tar.gz → 0.2.132__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.131 → vision_agent-0.2.132}/PKG-INFO +1 -1
- {vision_agent-0.2.131 → vision_agent-0.2.132}/pyproject.toml +1 -1
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent.py +26 -3
- {vision_agent-0.2.131 → vision_agent-0.2.132}/LICENSE +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/README.md +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/utils/video.py +0 -0
@@ -3,7 +3,7 @@ import logging
|
|
3
3
|
import os
|
4
4
|
import tempfile
|
5
5
|
from pathlib import Path
|
6
|
-
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
6
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast, Callable
|
7
7
|
|
8
8
|
from vision_agent.agent import Agent
|
9
9
|
from vision_agent.agent.agent_utils import extract_json
|
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
|
17
17
|
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
18
|
from vision_agent.utils import CodeInterpreterFactory
|
19
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
@@ -123,6 +123,7 @@ class VisionAgent(Agent):
|
|
123
123
|
verbosity: int = 0,
|
124
124
|
local_artifacts_path: Optional[Union[str, Path]] = None,
|
125
125
|
code_sandbox_runtime: Optional[str] = None,
|
126
|
+
callback_message: Optional[Callable[[Dict[str, Any]], None]] = None,
|
126
127
|
) -> None:
|
127
128
|
"""Initialize the VisionAgent.
|
128
129
|
|
@@ -141,6 +142,7 @@ class VisionAgent(Agent):
|
|
141
142
|
self.max_iterations = 100
|
142
143
|
self.verbosity = verbosity
|
143
144
|
self.code_sandbox_runtime = code_sandbox_runtime
|
145
|
+
self.callback_message = callback_message
|
144
146
|
if self.verbosity >= 1:
|
145
147
|
_LOGGER.setLevel(logging.INFO)
|
146
148
|
self.local_artifacts_path = cast(
|
@@ -220,7 +222,14 @@ class VisionAgent(Agent):
|
|
220
222
|
for chat_i in int_chat:
|
221
223
|
if "media" in chat_i:
|
222
224
|
for media in chat_i["media"]:
|
223
|
-
media
|
225
|
+
if type(media) is str and media.startswith(("http", "https")):
|
226
|
+
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
+
file_path = Path(media).name
|
228
|
+
ndarray = load_image(media)
|
229
|
+
save_image(ndarray, file_path)
|
230
|
+
media = file_path
|
231
|
+
else:
|
232
|
+
media = cast(str, media)
|
224
233
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
225
234
|
|
226
235
|
media_remote_path = (
|
@@ -262,6 +271,7 @@ class VisionAgent(Agent):
|
|
262
271
|
artifacts_loaded = artifacts.show()
|
263
272
|
int_chat.append({"role": "observation", "content": artifacts_loaded})
|
264
273
|
orig_chat.append({"role": "observation", "content": artifacts_loaded})
|
274
|
+
self.streaming_message({"role": "observation", "content": artifacts_loaded})
|
265
275
|
|
266
276
|
while not finished and iterations < self.max_iterations:
|
267
277
|
response = run_conversation(self.agent, int_chat)
|
@@ -274,6 +284,8 @@ class VisionAgent(Agent):
|
|
274
284
|
if last_response == response:
|
275
285
|
response["let_user_respond"] = True
|
276
286
|
|
287
|
+
self.streaming_message({"role": "assistant", "content": response})
|
288
|
+
|
277
289
|
if response["let_user_respond"]:
|
278
290
|
break
|
279
291
|
|
@@ -293,6 +305,13 @@ class VisionAgent(Agent):
|
|
293
305
|
orig_chat.append(
|
294
306
|
{"role": "observation", "content": obs, "execution": result}
|
295
307
|
)
|
308
|
+
self.streaming_message(
|
309
|
+
{
|
310
|
+
"role": "observation",
|
311
|
+
"content": obs,
|
312
|
+
"execution": result,
|
313
|
+
}
|
314
|
+
)
|
296
315
|
|
297
316
|
iterations += 1
|
298
317
|
last_response = response
|
@@ -305,5 +324,9 @@ class VisionAgent(Agent):
|
|
305
324
|
artifacts.save()
|
306
325
|
return orig_chat, artifacts
|
307
326
|
|
327
|
+
def streaming_message(self, message: Dict[str, Any]) -> None:
|
328
|
+
if self.callback_message:
|
329
|
+
self.callback_message(message)
|
330
|
+
|
308
331
|
def log_progress(self, data: Dict[str, Any]) -> None:
|
309
332
|
pass
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.131 → vision_agent-0.2.132}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|