vision-agent 0.2.134__py3-none-any.whl → 0.2.136__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +2 -9
- vision_agent/tools/meta_tools.py +46 -14
- vision_agent/utils/execute.py +5 -0
- {vision_agent-0.2.134.dist-info → vision_agent-0.2.136.dist-info}/METADATA +1 -1
- {vision_agent-0.2.134.dist-info → vision_agent-0.2.136.dist-info}/RECORD +7 -7
- {vision_agent-0.2.134.dist-info → vision_agent-0.2.136.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.134.dist-info → vision_agent-0.2.136.dist-info}/WHEEL +0 -0
@@ -13,7 +13,7 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING
|
17
17
|
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
18
|
from vision_agent.utils import CodeInterpreterFactory
|
19
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
@@ -222,14 +222,7 @@ class VisionAgent(Agent):
|
|
222
222
|
for chat_i in int_chat:
|
223
223
|
if "media" in chat_i:
|
224
224
|
for media in chat_i["media"]:
|
225
|
-
|
226
|
-
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
227
|
-
file_path = Path(media).name
|
228
|
-
ndarray = load_image(media)
|
229
|
-
save_image(ndarray, file_path)
|
230
|
-
media = file_path
|
231
|
-
else:
|
232
|
-
media = cast(str, media)
|
225
|
+
media = cast(str, media)
|
233
226
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
234
227
|
|
235
228
|
media_remote_path = (
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import difflib
|
2
|
+
import json
|
2
3
|
import os
|
3
4
|
import pickle as pkl
|
4
5
|
import re
|
@@ -53,25 +54,27 @@ def redisplay_results(execution: Execution) -> None:
|
|
53
54
|
"""
|
54
55
|
for result in execution.results:
|
55
56
|
if result.text is not None:
|
56
|
-
display({MimeType.TEXT_PLAIN: result.text})
|
57
|
+
display({MimeType.TEXT_PLAIN: result.text}, raw=True)
|
57
58
|
if result.html is not None:
|
58
|
-
display({MimeType.TEXT_HTML: result.html})
|
59
|
+
display({MimeType.TEXT_HTML: result.html}, raw=True)
|
59
60
|
if result.markdown is not None:
|
60
|
-
display({MimeType.TEXT_MARKDOWN: result.markdown})
|
61
|
+
display({MimeType.TEXT_MARKDOWN: result.markdown}, raw=True)
|
61
62
|
if result.svg is not None:
|
62
|
-
display({MimeType.IMAGE_SVG: result.svg})
|
63
|
+
display({MimeType.IMAGE_SVG: result.svg}, raw=True)
|
63
64
|
if result.png is not None:
|
64
|
-
display({MimeType.IMAGE_PNG: result.png})
|
65
|
+
display({MimeType.IMAGE_PNG: result.png}, raw=True)
|
65
66
|
if result.jpeg is not None:
|
66
|
-
display({MimeType.IMAGE_JPEG: result.jpeg})
|
67
|
+
display({MimeType.IMAGE_JPEG: result.jpeg}, raw=True)
|
67
68
|
if result.mp4 is not None:
|
68
|
-
display({MimeType.VIDEO_MP4_B64: result.mp4})
|
69
|
+
display({MimeType.VIDEO_MP4_B64: result.mp4}, raw=True)
|
69
70
|
if result.latex is not None:
|
70
|
-
display({MimeType.TEXT_LATEX: result.latex})
|
71
|
+
display({MimeType.TEXT_LATEX: result.latex}, raw=True)
|
71
72
|
if result.json is not None:
|
72
|
-
display({MimeType.APPLICATION_JSON: result.json})
|
73
|
+
display({MimeType.APPLICATION_JSON: result.json}, raw=True)
|
74
|
+
if result.artifact is not None:
|
75
|
+
display({MimeType.APPLICATION_ARTIFACT: result.artifact}, raw=True)
|
73
76
|
if result.extra is not None:
|
74
|
-
display(result.extra)
|
77
|
+
display(result.extra, raw=True)
|
75
78
|
|
76
79
|
|
77
80
|
class Artifacts:
|
@@ -208,7 +211,14 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str:
|
|
208
211
|
return_str = f"[Artifact {name} created]"
|
209
212
|
print(return_str)
|
210
213
|
|
211
|
-
display(
|
214
|
+
display(
|
215
|
+
{
|
216
|
+
MimeType.APPLICATION_ARTIFACT: json.dumps(
|
217
|
+
{"name": name, "content": artifacts[name]}
|
218
|
+
)
|
219
|
+
},
|
220
|
+
raw=True,
|
221
|
+
)
|
212
222
|
return return_str
|
213
223
|
|
214
224
|
|
@@ -292,7 +302,14 @@ def edit_code_artifact(
|
|
292
302
|
|
293
303
|
artifacts[name] = "".join(edited_lines)
|
294
304
|
|
295
|
-
display(
|
305
|
+
display(
|
306
|
+
{
|
307
|
+
MimeType.APPLICATION_ARTIFACT: json.dumps(
|
308
|
+
{"name": name, "content": artifacts[name]}
|
309
|
+
)
|
310
|
+
},
|
311
|
+
raw=True,
|
312
|
+
)
|
296
313
|
return open_code_artifact(artifacts, name, cur_line)
|
297
314
|
|
298
315
|
|
@@ -348,7 +365,10 @@ def generate_vision_code(
|
|
348
365
|
code_lines = code.splitlines(keepends=True)
|
349
366
|
total_lines = len(code_lines)
|
350
367
|
|
351
|
-
display(
|
368
|
+
display(
|
369
|
+
{MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})},
|
370
|
+
raw=True,
|
371
|
+
)
|
352
372
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
353
373
|
|
354
374
|
|
@@ -413,7 +433,10 @@ def edit_vision_code(
|
|
413
433
|
code_lines = code.splitlines(keepends=True)
|
414
434
|
total_lines = len(code_lines)
|
415
435
|
|
416
|
-
display(
|
436
|
+
display(
|
437
|
+
{MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})},
|
438
|
+
raw=True,
|
439
|
+
)
|
417
440
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
418
441
|
|
419
442
|
|
@@ -592,6 +615,15 @@ def use_florence2_fine_tuning(
|
|
592
615
|
|
593
616
|
diff = get_diff_with_prompts(name, code, new_code)
|
594
617
|
print(diff)
|
618
|
+
|
619
|
+
display(
|
620
|
+
{
|
621
|
+
MimeType.APPLICATION_ARTIFACT: json.dumps(
|
622
|
+
{"name": name, "content": new_code}
|
623
|
+
)
|
624
|
+
},
|
625
|
+
raw=True,
|
626
|
+
)
|
595
627
|
return diff
|
596
628
|
|
597
629
|
|
vision_agent/utils/execute.py
CHANGED
@@ -56,6 +56,7 @@ class MimeType(str, Enum):
|
|
56
56
|
TEXT_LATEX = "text/latex"
|
57
57
|
APPLICATION_JSON = "application/json"
|
58
58
|
APPLICATION_JAVASCRIPT = "application/javascript"
|
59
|
+
APPLICATION_ARTIFACT = "application/artifact"
|
59
60
|
|
60
61
|
|
61
62
|
class FileSerializer:
|
@@ -103,6 +104,7 @@ class Result:
|
|
103
104
|
latex: Optional[str] = None
|
104
105
|
json: Optional[Dict[str, Any]] = None
|
105
106
|
javascript: Optional[str] = None
|
107
|
+
artifact_name: Optional[str] = None
|
106
108
|
extra: Optional[Dict[str, Any]] = None
|
107
109
|
"Extra data that can be included. Not part of the standard types."
|
108
110
|
|
@@ -127,6 +129,7 @@ class Result:
|
|
127
129
|
self.latex = data.pop(MimeType.TEXT_LATEX, None)
|
128
130
|
self.json = data.pop(MimeType.APPLICATION_JSON, None)
|
129
131
|
self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
|
132
|
+
self.artifact = data.pop(MimeType.APPLICATION_ARTIFACT, None)
|
130
133
|
self.extra = data
|
131
134
|
# Only keeping the PNG representation if both PNG and JPEG are present
|
132
135
|
if self.png and self.jpeg:
|
@@ -204,6 +207,8 @@ class Result:
|
|
204
207
|
formats.append("javascript")
|
205
208
|
if self.mp4:
|
206
209
|
formats.append("mp4")
|
210
|
+
if self.artifact:
|
211
|
+
formats.append("artifact")
|
207
212
|
if self.extra:
|
208
213
|
formats.extend(iter(self.extra))
|
209
214
|
return formats
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=TddDT4e3JVc68Dt0zSk0B4OBORx_R2WhAGK71uqEe2w,204
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=qOYQn-wJsa4j4YjFOBQ41xyklCg8Y94CIIGw9ZXmgIU,2053
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=fXip-WkQtRyWLBAH3SWlGsJr-zJRlN_e9C-5QDuNElk,12716
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=OI95goKTqVaEEPYwkn6bVsHsHZeifoBC8rjG9nD0Znc,36909
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=a7P19QscKNiaweke0zHPCfi5GQImpG-ZGKv_kXz0seg,13452
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
|
@@ -15,19 +15,19 @@ vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,
|
|
15
15
|
vision_agent/lmm/lmm.py,sha256=soWmEjtleQUSH2G3tYZWxOmteIqkgMVcmuZfx4mxszU,16838
|
16
16
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
17
|
vision_agent/tools/__init__.py,sha256=nufZNzbcLTuXwxFmvZNj99qE8EO2qtEPT8wFsuI9vyE,2397
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=eoraWbf_ivZrh8Rxiz3i5vNOaeUDR849UPXx7TnubzA,21979
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=ZYqzcw_e937reoNr7gJgyKjQ7Gudxz1ttfIyo7F65w8,7758
|
21
21
|
vision_agent/tools/tools.py,sha256=WKeB99ED0o_ISS_vZc-ch_1Dc8_Fl2fhnGlfVNwNouc,70024
|
22
22
|
vision_agent/tools/tools_types.py,sha256=rLpCUODPY0yI65SLOTJOxfHFfqWM3WjOq-AYX25Chjk,2356
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
-
vision_agent/utils/execute.py,sha256=
|
25
|
+
vision_agent/utils/execute.py,sha256=rusXshSZKQlN6Bav9rB8BtAWiYDT4M1REhvYIUPi9TE,27973
|
26
26
|
vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7GJrg,10746
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=hOjfEOZNcddYdoa0CoviXA4Vo9kwURKuojIJgLLJdp0,4745
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.136.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.136.dist-info/METADATA,sha256=4BUmLW-JtgdCv7TT0fTDSe0vEzPwwLBHJb-I8RRorCc,12252
|
32
|
+
vision_agent-0.2.136.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.136.dist-info/RECORD,,
|
File without changes
|
File without changes
|