vision-agent 0.2.133__tar.gz → 0.2.135__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.133 → vision_agent-0.2.135}/PKG-INFO +1 -1
- {vision_agent-0.2.133 → vision_agent-0.2.135}/pyproject.toml +1 -1
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent.py +15 -3
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/meta_tools.py +19 -14
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/execute.py +21 -1
- {vision_agent-0.2.133 → vision_agent-0.2.135}/LICENSE +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/README.md +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/utils/video.py +0 -0
@@ -15,6 +15,7 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
16
|
from vision_agent.tools import META_TOOL_DOCSTRING, save_image, load_image
|
17
17
|
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
|
+
from vision_agent.tools.tools import extract_frames, save_video
|
18
19
|
from vision_agent.utils import CodeInterpreterFactory
|
19
20
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
20
21
|
|
@@ -224,9 +225,20 @@ class VisionAgent(Agent):
|
|
224
225
|
for media in chat_i["media"]:
|
225
226
|
if type(media) is str and media.startswith(("http", "https")):
|
226
227
|
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
-
file_path =
|
228
|
-
|
229
|
-
|
228
|
+
file_path = str(
|
229
|
+
Path(self.local_artifacts_path).parent
|
230
|
+
/ Path(media).name
|
231
|
+
)
|
232
|
+
if file_path.lower().endswith(
|
233
|
+
".mp4"
|
234
|
+
) or file_path.lower().endswith(".mov"):
|
235
|
+
video_frames = extract_frames(media)
|
236
|
+
save_video(
|
237
|
+
[frame for frame, _ in video_frames], file_path
|
238
|
+
)
|
239
|
+
else:
|
240
|
+
ndarray = load_image(media)
|
241
|
+
save_image(ndarray, file_path)
|
230
242
|
media = file_path
|
231
243
|
else:
|
232
244
|
media = cast(str, media)
|
@@ -53,25 +53,27 @@ def redisplay_results(execution: Execution) -> None:
|
|
53
53
|
"""
|
54
54
|
for result in execution.results:
|
55
55
|
if result.text is not None:
|
56
|
-
display({MimeType.TEXT_PLAIN: result.text})
|
56
|
+
display({MimeType.TEXT_PLAIN: result.text}, raw=True)
|
57
57
|
if result.html is not None:
|
58
|
-
display({MimeType.TEXT_HTML: result.html})
|
58
|
+
display({MimeType.TEXT_HTML: result.html}, raw=True)
|
59
59
|
if result.markdown is not None:
|
60
|
-
display({MimeType.TEXT_MARKDOWN: result.markdown})
|
60
|
+
display({MimeType.TEXT_MARKDOWN: result.markdown}, raw=True)
|
61
61
|
if result.svg is not None:
|
62
|
-
display({MimeType.IMAGE_SVG: result.svg})
|
62
|
+
display({MimeType.IMAGE_SVG: result.svg}, raw=True)
|
63
63
|
if result.png is not None:
|
64
|
-
display({MimeType.IMAGE_PNG: result.png})
|
64
|
+
display({MimeType.IMAGE_PNG: result.png}, raw=True)
|
65
65
|
if result.jpeg is not None:
|
66
|
-
display({MimeType.IMAGE_JPEG: result.jpeg})
|
66
|
+
display({MimeType.IMAGE_JPEG: result.jpeg}, raw=True)
|
67
67
|
if result.mp4 is not None:
|
68
|
-
display({MimeType.VIDEO_MP4_B64: result.mp4})
|
68
|
+
display({MimeType.VIDEO_MP4_B64: result.mp4}, raw=True)
|
69
69
|
if result.latex is not None:
|
70
|
-
display({MimeType.TEXT_LATEX: result.latex})
|
70
|
+
display({MimeType.TEXT_LATEX: result.latex}, raw=True)
|
71
71
|
if result.json is not None:
|
72
|
-
display({MimeType.APPLICATION_JSON: result.json})
|
72
|
+
display({MimeType.APPLICATION_JSON: result.json}, raw=True)
|
73
|
+
if result.artifact_name is not None:
|
74
|
+
display({MimeType.TEXT_ARTIFACT_NAME: result.artifact_name}, raw=True)
|
73
75
|
if result.extra is not None:
|
74
|
-
display(result.extra)
|
76
|
+
display(result.extra, raw=True)
|
75
77
|
|
76
78
|
|
77
79
|
class Artifacts:
|
@@ -208,7 +210,7 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str:
|
|
208
210
|
return_str = f"[Artifact {name} created]"
|
209
211
|
print(return_str)
|
210
212
|
|
211
|
-
display({MimeType.
|
213
|
+
display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
|
212
214
|
return return_str
|
213
215
|
|
214
216
|
|
@@ -292,7 +294,7 @@ def edit_code_artifact(
|
|
292
294
|
|
293
295
|
artifacts[name] = "".join(edited_lines)
|
294
296
|
|
295
|
-
display({MimeType.
|
297
|
+
display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
|
296
298
|
return open_code_artifact(artifacts, name, cur_line)
|
297
299
|
|
298
300
|
|
@@ -348,7 +350,7 @@ def generate_vision_code(
|
|
348
350
|
code_lines = code.splitlines(keepends=True)
|
349
351
|
total_lines = len(code_lines)
|
350
352
|
|
351
|
-
display({MimeType.
|
353
|
+
display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
|
352
354
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
353
355
|
|
354
356
|
|
@@ -413,7 +415,7 @@ def edit_vision_code(
|
|
413
415
|
code_lines = code.splitlines(keepends=True)
|
414
416
|
total_lines = len(code_lines)
|
415
417
|
|
416
|
-
display({MimeType.
|
418
|
+
display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
|
417
419
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
418
420
|
|
419
421
|
|
@@ -427,6 +429,7 @@ def write_media_artifact(artifacts: Artifacts, local_path: str) -> str:
|
|
427
429
|
with open(local_path, "rb") as f:
|
428
430
|
media = f.read()
|
429
431
|
artifacts[Path(local_path).name] = media
|
432
|
+
display({MimeType.TEXT_ARTIFACT_NAME: Path(local_path).name}, raw=True)
|
430
433
|
return f"[Media {Path(local_path).name} saved]"
|
431
434
|
|
432
435
|
|
@@ -592,6 +595,8 @@ def use_florence2_fine_tuning(
|
|
592
595
|
|
593
596
|
diff = get_diff_with_prompts(name, code, new_code)
|
594
597
|
print(diff)
|
598
|
+
|
599
|
+
display({MimeType.TEXT_ARTIFACT_NAME: name}, raw=True)
|
595
600
|
return diff
|
596
601
|
|
597
602
|
|
@@ -56,6 +56,7 @@ class MimeType(str, Enum):
|
|
56
56
|
TEXT_LATEX = "text/latex"
|
57
57
|
APPLICATION_JSON = "application/json"
|
58
58
|
APPLICATION_JAVASCRIPT = "application/javascript"
|
59
|
+
TEXT_ARTIFACT_NAME = "text/artifact/name"
|
59
60
|
|
60
61
|
|
61
62
|
class FileSerializer:
|
@@ -103,6 +104,7 @@ class Result:
|
|
103
104
|
latex: Optional[str] = None
|
104
105
|
json: Optional[Dict[str, Any]] = None
|
105
106
|
javascript: Optional[str] = None
|
107
|
+
artifact_name: Optional[str] = None
|
106
108
|
extra: Optional[Dict[str, Any]] = None
|
107
109
|
"Extra data that can be included. Not part of the standard types."
|
108
110
|
|
@@ -127,6 +129,7 @@ class Result:
|
|
127
129
|
self.latex = data.pop(MimeType.TEXT_LATEX, None)
|
128
130
|
self.json = data.pop(MimeType.APPLICATION_JSON, None)
|
129
131
|
self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
|
132
|
+
self.artifact_name = data.pop(MimeType.TEXT_ARTIFACT_NAME, None)
|
130
133
|
self.extra = data
|
131
134
|
# Only keeping the PNG representation if both PNG and JPEG are present
|
132
135
|
if self.png and self.jpeg:
|
@@ -204,6 +207,8 @@ class Result:
|
|
204
207
|
formats.append("javascript")
|
205
208
|
if self.mp4:
|
206
209
|
formats.append("mp4")
|
210
|
+
if self.artifact_name:
|
211
|
+
formats.append("artifact_name")
|
207
212
|
if self.extra:
|
208
213
|
formats.extend(iter(self.extra))
|
209
214
|
return formats
|
@@ -691,8 +696,9 @@ class CodeInterpreterFactory:
|
|
691
696
|
if not code_sandbox_runtime:
|
692
697
|
code_sandbox_runtime = os.getenv("CODE_SANDBOX_RUNTIME", "local")
|
693
698
|
if code_sandbox_runtime == "e2b":
|
699
|
+
envs = _get_e2b_env()
|
694
700
|
instance: CodeInterpreter = E2BCodeInterpreter(
|
695
|
-
timeout=_SESSION_TIMEOUT, remote_path=remote_path
|
701
|
+
timeout=_SESSION_TIMEOUT, remote_path=remote_path, envs=envs
|
696
702
|
)
|
697
703
|
elif code_sandbox_runtime == "local":
|
698
704
|
instance = LocalCodeInterpreter(
|
@@ -705,6 +711,20 @@ class CodeInterpreterFactory:
|
|
705
711
|
return instance
|
706
712
|
|
707
713
|
|
714
|
+
def _get_e2b_env() -> Union[Dict[str, str], None]:
|
715
|
+
openai_api_key = os.getenv("OPENAI_API_KEY", "")
|
716
|
+
anthropic_api_key = os.getenv("ANTHROPIC_API_KEY", "")
|
717
|
+
if openai_api_key or anthropic_api_key:
|
718
|
+
envs = {}
|
719
|
+
if openai_api_key:
|
720
|
+
envs["OPENAI_API_KEY"] = openai_api_key
|
721
|
+
if anthropic_api_key:
|
722
|
+
envs["ANTHROPIC_API_KEY"] = anthropic_api_key
|
723
|
+
else:
|
724
|
+
envs = None
|
725
|
+
return envs
|
726
|
+
|
727
|
+
|
708
728
|
def _parse_local_code_interpreter_outputs(outputs: List[Dict[str, Any]]) -> Execution:
|
709
729
|
"""Parse notebook cell outputs to Execution object. Output types:
|
710
730
|
https://nbformat.readthedocs.io/en/latest/format_description.html#code-cell-outputs
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.133 → vision_agent-0.2.135}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|