vision-agent 0.2.135__py3-none-any.whl → 0.2.136__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +2 -21
- vision_agent/tools/meta_tools.py +35 -8
- vision_agent/utils/execute.py +4 -4
- {vision_agent-0.2.135.dist-info → vision_agent-0.2.136.dist-info}/METADATA +1 -1
- {vision_agent-0.2.135.dist-info → vision_agent-0.2.136.dist-info}/RECORD +7 -7
- {vision_agent-0.2.135.dist-info → vision_agent-0.2.136.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.135.dist-info → vision_agent-0.2.136.dist-info}/WHEEL +0 -0
@@ -13,9 +13,8 @@ from vision_agent.agent.vision_agent_prompts import (
|
|
13
13
|
VA_CODE,
|
14
14
|
)
|
15
15
|
from vision_agent.lmm import LMM, Message, OpenAILMM
|
16
|
-
from vision_agent.tools import META_TOOL_DOCSTRING
|
16
|
+
from vision_agent.tools import META_TOOL_DOCSTRING
|
17
17
|
from vision_agent.tools.meta_tools import Artifacts, use_extra_vision_agent_args
|
18
|
-
from vision_agent.tools.tools import extract_frames, save_video
|
19
18
|
from vision_agent.utils import CodeInterpreterFactory
|
20
19
|
from vision_agent.utils.execute import CodeInterpreter, Execution
|
21
20
|
|
@@ -223,25 +222,7 @@ class VisionAgent(Agent):
|
|
223
222
|
for chat_i in int_chat:
|
224
223
|
if "media" in chat_i:
|
225
224
|
for media in chat_i["media"]:
|
226
|
-
|
227
|
-
# TODO: Ideally we should not call VA.tools here, we should come to revisit how to better support remote image later
|
228
|
-
file_path = str(
|
229
|
-
Path(self.local_artifacts_path).parent
|
230
|
-
/ Path(media).name
|
231
|
-
)
|
232
|
-
if file_path.lower().endswith(
|
233
|
-
".mp4"
|
234
|
-
) or file_path.lower().endswith(".mov"):
|
235
|
-
video_frames = extract_frames(media)
|
236
|
-
save_video(
|
237
|
-
[frame for frame, _ in video_frames], file_path
|
238
|
-
)
|
239
|
-
else:
|
240
|
-
ndarray = load_image(media)
|
241
|
-
save_image(ndarray, file_path)
|
242
|
-
media = file_path
|
243
|
-
else:
|
244
|
-
media = cast(str, media)
|
225
|
+
media = cast(str, media)
|
245
226
|
artifacts.artifacts[Path(media).name] = open(media, "rb").read()
|
246
227
|
|
247
228
|
media_remote_path = (
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
import difflib
|
2
|
+
import json
|
2
3
|
import os
|
3
4
|
import pickle as pkl
|
4
5
|
import re
|
@@ -70,8 +71,8 @@ def redisplay_results(execution: Execution) -> None:
|
|
70
71
|
display({MimeType.TEXT_LATEX: result.latex}, raw=True)
|
71
72
|
if result.json is not None:
|
72
73
|
display({MimeType.APPLICATION_JSON: result.json}, raw=True)
|
73
|
-
if result.
|
74
|
-
display({MimeType.
|
74
|
+
if result.artifact is not None:
|
75
|
+
display({MimeType.APPLICATION_ARTIFACT: result.artifact}, raw=True)
|
75
76
|
if result.extra is not None:
|
76
77
|
display(result.extra, raw=True)
|
77
78
|
|
@@ -210,7 +211,14 @@ def create_code_artifact(artifacts: Artifacts, name: str) -> str:
|
|
210
211
|
return_str = f"[Artifact {name} created]"
|
211
212
|
print(return_str)
|
212
213
|
|
213
|
-
display(
|
214
|
+
display(
|
215
|
+
{
|
216
|
+
MimeType.APPLICATION_ARTIFACT: json.dumps(
|
217
|
+
{"name": name, "content": artifacts[name]}
|
218
|
+
)
|
219
|
+
},
|
220
|
+
raw=True,
|
221
|
+
)
|
214
222
|
return return_str
|
215
223
|
|
216
224
|
|
@@ -294,7 +302,14 @@ def edit_code_artifact(
|
|
294
302
|
|
295
303
|
artifacts[name] = "".join(edited_lines)
|
296
304
|
|
297
|
-
display(
|
305
|
+
display(
|
306
|
+
{
|
307
|
+
MimeType.APPLICATION_ARTIFACT: json.dumps(
|
308
|
+
{"name": name, "content": artifacts[name]}
|
309
|
+
)
|
310
|
+
},
|
311
|
+
raw=True,
|
312
|
+
)
|
298
313
|
return open_code_artifact(artifacts, name, cur_line)
|
299
314
|
|
300
315
|
|
@@ -350,7 +365,10 @@ def generate_vision_code(
|
|
350
365
|
code_lines = code.splitlines(keepends=True)
|
351
366
|
total_lines = len(code_lines)
|
352
367
|
|
353
|
-
display(
|
368
|
+
display(
|
369
|
+
{MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})},
|
370
|
+
raw=True,
|
371
|
+
)
|
354
372
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
355
373
|
|
356
374
|
|
@@ -415,7 +433,10 @@ def edit_vision_code(
|
|
415
433
|
code_lines = code.splitlines(keepends=True)
|
416
434
|
total_lines = len(code_lines)
|
417
435
|
|
418
|
-
display(
|
436
|
+
display(
|
437
|
+
{MimeType.APPLICATION_ARTIFACT: json.dumps({"name": name, "content": code})},
|
438
|
+
raw=True,
|
439
|
+
)
|
419
440
|
return view_lines(code_lines, 0, total_lines, name, total_lines)
|
420
441
|
|
421
442
|
|
@@ -429,7 +450,6 @@ def write_media_artifact(artifacts: Artifacts, local_path: str) -> str:
|
|
429
450
|
with open(local_path, "rb") as f:
|
430
451
|
media = f.read()
|
431
452
|
artifacts[Path(local_path).name] = media
|
432
|
-
display({MimeType.TEXT_ARTIFACT_NAME: Path(local_path).name}, raw=True)
|
433
453
|
return f"[Media {Path(local_path).name} saved]"
|
434
454
|
|
435
455
|
|
@@ -596,7 +616,14 @@ def use_florence2_fine_tuning(
|
|
596
616
|
diff = get_diff_with_prompts(name, code, new_code)
|
597
617
|
print(diff)
|
598
618
|
|
599
|
-
display(
|
619
|
+
display(
|
620
|
+
{
|
621
|
+
MimeType.APPLICATION_ARTIFACT: json.dumps(
|
622
|
+
{"name": name, "content": new_code}
|
623
|
+
)
|
624
|
+
},
|
625
|
+
raw=True,
|
626
|
+
)
|
600
627
|
return diff
|
601
628
|
|
602
629
|
|
vision_agent/utils/execute.py
CHANGED
@@ -56,7 +56,7 @@ class MimeType(str, Enum):
|
|
56
56
|
TEXT_LATEX = "text/latex"
|
57
57
|
APPLICATION_JSON = "application/json"
|
58
58
|
APPLICATION_JAVASCRIPT = "application/javascript"
|
59
|
-
|
59
|
+
APPLICATION_ARTIFACT = "application/artifact"
|
60
60
|
|
61
61
|
|
62
62
|
class FileSerializer:
|
@@ -129,7 +129,7 @@ class Result:
|
|
129
129
|
self.latex = data.pop(MimeType.TEXT_LATEX, None)
|
130
130
|
self.json = data.pop(MimeType.APPLICATION_JSON, None)
|
131
131
|
self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
|
132
|
-
self.
|
132
|
+
self.artifact = data.pop(MimeType.APPLICATION_ARTIFACT, None)
|
133
133
|
self.extra = data
|
134
134
|
# Only keeping the PNG representation if both PNG and JPEG are present
|
135
135
|
if self.png and self.jpeg:
|
@@ -207,8 +207,8 @@ class Result:
|
|
207
207
|
formats.append("javascript")
|
208
208
|
if self.mp4:
|
209
209
|
formats.append("mp4")
|
210
|
-
if self.
|
211
|
-
formats.append("
|
210
|
+
if self.artifact:
|
211
|
+
formats.append("artifact")
|
212
212
|
if self.extra:
|
213
213
|
formats.extend(iter(self.extra))
|
214
214
|
return formats
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=TddDT4e3JVc68Dt0zSk0B4OBORx_R2WhAGK71uqEe2w,204
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=qOYQn-wJsa4j4YjFOBQ41xyklCg8Y94CIIGw9ZXmgIU,2053
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=fXip-WkQtRyWLBAH3SWlGsJr-zJRlN_e9C-5QDuNElk,12716
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=OI95goKTqVaEEPYwkn6bVsHsHZeifoBC8rjG9nD0Znc,36909
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=a7P19QscKNiaweke0zHPCfi5GQImpG-ZGKv_kXz0seg,13452
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=-fXiIIb48duXVljWYcJ0Y4ZzfNnRFi3C5cKdF4SdDo8,10075
|
@@ -15,19 +15,19 @@ vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,
|
|
15
15
|
vision_agent/lmm/lmm.py,sha256=soWmEjtleQUSH2G3tYZWxOmteIqkgMVcmuZfx4mxszU,16838
|
16
16
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
17
|
vision_agent/tools/__init__.py,sha256=nufZNzbcLTuXwxFmvZNj99qE8EO2qtEPT8wFsuI9vyE,2397
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=eoraWbf_ivZrh8Rxiz3i5vNOaeUDR849UPXx7TnubzA,21979
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=ZYqzcw_e937reoNr7gJgyKjQ7Gudxz1ttfIyo7F65w8,7758
|
21
21
|
vision_agent/tools/tools.py,sha256=WKeB99ED0o_ISS_vZc-ch_1Dc8_Fl2fhnGlfVNwNouc,70024
|
22
22
|
vision_agent/tools/tools_types.py,sha256=rLpCUODPY0yI65SLOTJOxfHFfqWM3WjOq-AYX25Chjk,2356
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
25
|
-
vision_agent/utils/execute.py,sha256=
|
25
|
+
vision_agent/utils/execute.py,sha256=rusXshSZKQlN6Bav9rB8BtAWiYDT4M1REhvYIUPi9TE,27973
|
26
26
|
vision_agent/utils/image_utils.py,sha256=zTTOJFOieMzwIquTFnW7T6ssx9o6XfoZ0Unqyk7GJrg,10746
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=hOjfEOZNcddYdoa0CoviXA4Vo9kwURKuojIJgLLJdp0,4745
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.136.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.136.dist-info/METADATA,sha256=4BUmLW-JtgdCv7TT0fTDSe0vEzPwwLBHJb-I8RRorCc,12252
|
32
|
+
vision_agent-0.2.136.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.136.dist-info/RECORD,,
|
File without changes
|
File without changes
|