vision-agent 0.2.48__py3-none-any.whl → 0.2.50__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +64 -46
- vision_agent/tools/tools.py +8 -9
- {vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/METADATA +1 -1
- {vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/RECORD +6 -6
- {vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.48.dist-info → vision_agent-0.2.50.dist-info}/WHEEL +0 -0
@@ -36,11 +36,25 @@ logging.basicConfig(stream=sys.stdout)
|
|
36
36
|
_LOGGER = logging.getLogger(__name__)
|
37
37
|
_MAX_TABULATE_COL_WIDTH = 80
|
38
38
|
_CONSOLE = Console()
|
39
|
-
|
40
|
-
|
39
|
+
|
40
|
+
|
41
|
+
class DefaultImports:
|
42
|
+
"""Container for default imports used in the code execution."""
|
43
|
+
|
44
|
+
common_imports = [
|
41
45
|
"from typing import *",
|
42
46
|
]
|
43
|
-
|
47
|
+
|
48
|
+
@staticmethod
|
49
|
+
def to_code_string() -> str:
|
50
|
+
return "\n".join(DefaultImports.common_imports + T.__new_tools__)
|
51
|
+
|
52
|
+
@staticmethod
|
53
|
+
def prepend_imports(code: str) -> str:
|
54
|
+
"""Run this method to prepend the default imports to the code.
|
55
|
+
NOTE: be sure to run this method after the custom tools have been registered.
|
56
|
+
"""
|
57
|
+
return DefaultImports.to_code_string() + "\n\n" + code
|
44
58
|
|
45
59
|
|
46
60
|
def get_diff(before: str, after: str) -> str:
|
@@ -202,18 +216,20 @@ def write_and_test_code(
|
|
202
216
|
"type": "code",
|
203
217
|
"status": "running",
|
204
218
|
"payload": {
|
205
|
-
"code": code,
|
219
|
+
"code": DefaultImports.prepend_imports(code),
|
206
220
|
"test": test,
|
207
221
|
},
|
208
222
|
}
|
209
223
|
)
|
210
|
-
result = code_interpreter.exec_isolation(
|
224
|
+
result = code_interpreter.exec_isolation(
|
225
|
+
f"{DefaultImports.to_code_string()}\n{code}\n{test}"
|
226
|
+
)
|
211
227
|
log_progress(
|
212
228
|
{
|
213
229
|
"type": "code",
|
214
230
|
"status": "completed" if result.success else "failed",
|
215
231
|
"payload": {
|
216
|
-
"code": code,
|
232
|
+
"code": DefaultImports.prepend_imports(code),
|
217
233
|
"test": test,
|
218
234
|
"result": result.to_json(),
|
219
235
|
},
|
@@ -264,19 +280,21 @@ def write_and_test_code(
|
|
264
280
|
"type": "code",
|
265
281
|
"status": "running",
|
266
282
|
"payload": {
|
267
|
-
"code": code,
|
283
|
+
"code": DefaultImports.prepend_imports(code),
|
268
284
|
"test": test,
|
269
285
|
},
|
270
286
|
}
|
271
287
|
)
|
272
288
|
|
273
|
-
result = code_interpreter.exec_isolation(
|
289
|
+
result = code_interpreter.exec_isolation(
|
290
|
+
f"{DefaultImports.to_code_string()}\n{code}\n{test}"
|
291
|
+
)
|
274
292
|
log_progress(
|
275
293
|
{
|
276
294
|
"type": "code",
|
277
295
|
"status": "completed" if result.success else "failed",
|
278
296
|
"payload": {
|
279
|
-
"code": code,
|
297
|
+
"code": DefaultImports.prepend_imports(code),
|
280
298
|
"test": test,
|
281
299
|
"result": result.to_json(),
|
282
300
|
},
|
@@ -307,7 +325,14 @@ def write_and_test_code(
|
|
307
325
|
def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
|
308
326
|
_CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
|
309
327
|
_CONSOLE.print("=" * 30 + " Code " + "=" * 30)
|
310
|
-
_CONSOLE.print(
|
328
|
+
_CONSOLE.print(
|
329
|
+
Syntax(
|
330
|
+
DefaultImports.prepend_imports(code),
|
331
|
+
"python",
|
332
|
+
theme="gruvbox-dark",
|
333
|
+
line_numbers=True,
|
334
|
+
)
|
335
|
+
)
|
311
336
|
if test:
|
312
337
|
_CONSOLE.print("=" * 30 + " Test " + "=" * 30)
|
313
338
|
_CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
|
@@ -464,10 +489,6 @@ class VisionAgent(Agent):
|
|
464
489
|
if chat_i["role"] == "user":
|
465
490
|
chat_i["content"] += f" Image name {media}"
|
466
491
|
|
467
|
-
# re-grab custom tools
|
468
|
-
global _DEFAULT_IMPORT
|
469
|
-
_DEFAULT_IMPORT = "\n".join(T.__new_tools__)
|
470
|
-
|
471
492
|
code = ""
|
472
493
|
test = ""
|
473
494
|
working_memory: List[Dict[str, str]] = []
|
@@ -531,38 +552,35 @@ class VisionAgent(Agent):
|
|
531
552
|
working_memory.extend(results["working_memory"]) # type: ignore
|
532
553
|
plan.append({"code": code, "test": test, "plan": plan_i})
|
533
554
|
|
534
|
-
if self_reflection:
|
535
|
-
self.log_progress(
|
536
|
-
{
|
537
|
-
"type": "self_reflection",
|
538
|
-
"status": "started",
|
539
|
-
}
|
540
|
-
)
|
541
|
-
reflection = reflect(
|
542
|
-
chat,
|
543
|
-
FULL_TASK.format(
|
544
|
-
user_request=chat[0]["content"], subtasks=plan_i_str
|
545
|
-
),
|
546
|
-
code,
|
547
|
-
self.planner,
|
548
|
-
)
|
549
|
-
if self.verbosity > 0:
|
550
|
-
_LOGGER.info(f"Reflection: {reflection}")
|
551
|
-
feedback = cast(str, reflection["feedback"])
|
552
|
-
success = cast(bool, reflection["success"])
|
553
|
-
self.log_progress(
|
554
|
-
{
|
555
|
-
"type": "self_reflection",
|
556
|
-
"status": "completed" if success else "failed",
|
557
|
-
"payload": reflection,
|
558
|
-
}
|
559
|
-
)
|
560
|
-
working_memory.append(
|
561
|
-
{"code": f"{code}\n{test}", "feedback": feedback}
|
562
|
-
)
|
563
|
-
else:
|
555
|
+
if not self_reflection:
|
564
556
|
break
|
565
557
|
|
558
|
+
self.log_progress(
|
559
|
+
{
|
560
|
+
"type": "self_reflection",
|
561
|
+
"status": "started",
|
562
|
+
}
|
563
|
+
)
|
564
|
+
reflection = reflect(
|
565
|
+
chat,
|
566
|
+
FULL_TASK.format(
|
567
|
+
user_request=chat[0]["content"], subtasks=plan_i_str
|
568
|
+
),
|
569
|
+
code,
|
570
|
+
self.planner,
|
571
|
+
)
|
572
|
+
if self.verbosity > 0:
|
573
|
+
_LOGGER.info(f"Reflection: {reflection}")
|
574
|
+
feedback = cast(str, reflection["feedback"])
|
575
|
+
success = cast(bool, reflection["success"])
|
576
|
+
self.log_progress(
|
577
|
+
{
|
578
|
+
"type": "self_reflection",
|
579
|
+
"status": "completed" if success else "failed",
|
580
|
+
"payload": reflection,
|
581
|
+
}
|
582
|
+
)
|
583
|
+
working_memory.append({"code": f"{code}\n{test}", "feedback": feedback})
|
566
584
|
retries += 1
|
567
585
|
|
568
586
|
execution_result = cast(Execution, results["test_result"])
|
@@ -571,7 +589,7 @@ class VisionAgent(Agent):
|
|
571
589
|
"type": "final_code",
|
572
590
|
"status": "completed" if success else "failed",
|
573
591
|
"payload": {
|
574
|
-
"code": code,
|
592
|
+
"code": DefaultImports.prepend_imports(code),
|
575
593
|
"test": test,
|
576
594
|
"result": execution_result.to_json(),
|
577
595
|
},
|
@@ -586,7 +604,7 @@ class VisionAgent(Agent):
|
|
586
604
|
play_video(res.mp4)
|
587
605
|
|
588
606
|
return {
|
589
|
-
"code": code,
|
607
|
+
"code": DefaultImports.prepend_imports(code),
|
590
608
|
"test": test,
|
591
609
|
"test_result": execution_result,
|
592
610
|
"plan": plan,
|
vision_agent/tools/tools.py
CHANGED
@@ -187,7 +187,7 @@ def extract_frames(
|
|
187
187
|
|
188
188
|
Returns:
|
189
189
|
List[Tuple[np.ndarray, float]]: A list of tuples containing the extracted frame
|
190
|
-
and the timestamp in seconds.
|
190
|
+
as a numpy array and the timestamp in seconds.
|
191
191
|
|
192
192
|
Example
|
193
193
|
-------
|
@@ -515,7 +515,7 @@ def save_json(data: Any, file_path: str) -> None:
|
|
515
515
|
|
516
516
|
|
517
517
|
def load_image(image_path: str) -> np.ndarray:
|
518
|
-
"""'load_image' is a utility function that loads an image from the given path.
|
518
|
+
"""'load_image' is a utility function that loads an image from the given file path string.
|
519
519
|
|
520
520
|
Parameters:
|
521
521
|
image_path (str): The path to the image.
|
@@ -527,7 +527,9 @@ def load_image(image_path: str) -> np.ndarray:
|
|
527
527
|
-------
|
528
528
|
>>> load_image("path/to/image.jpg")
|
529
529
|
"""
|
530
|
-
|
530
|
+
# NOTE: sometimes the generated code pass in a NumPy array
|
531
|
+
if isinstance(image_path, np.ndarray):
|
532
|
+
return image_path
|
531
533
|
image = Image.open(image_path).convert("RGB")
|
532
534
|
return np.array(image)
|
533
535
|
|
@@ -647,12 +649,9 @@ def overlay_bounding_boxes(
|
|
647
649
|
box = elt["bbox"]
|
648
650
|
scores = elt["score"]
|
649
651
|
|
650
|
-
box
|
651
|
-
|
652
|
-
|
653
|
-
int(box[2] * width),
|
654
|
-
int(box[3] * height),
|
655
|
-
]
|
652
|
+
# denormalize the box if it is normalized
|
653
|
+
box = denormalize_bbox(box, (height, width))
|
654
|
+
|
656
655
|
draw.rectangle(box, outline=color[label], width=4)
|
657
656
|
text = f"{label}: {scores:.2f}"
|
658
657
|
text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
|
@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
|
|
11
11
|
vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
|
12
12
|
vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
|
13
13
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
14
|
-
vision_agent/agent/vision_agent.py,sha256=
|
14
|
+
vision_agent/agent/vision_agent.py,sha256=0EqpLyyzpRGmT7fhS2XvLeUlktgCXTE5k1KGMQ8z3_s,20963
|
15
15
|
vision_agent/agent/vision_agent_prompts.py,sha256=hgnTlaYp2HMBHLi3e4faPb-DI5jQL9jfhKq9jyEUEgY,8370
|
16
16
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
17
17
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_P
|
|
23
23
|
vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
|
24
24
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
25
25
|
vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
|
26
|
-
vision_agent/tools/tools.py,sha256=
|
26
|
+
vision_agent/tools/tools.py,sha256=Sc6tAYbH03TbrPKAT8XIj1YZIwhd9j2k4ia8iKHhxzM,26743
|
27
27
|
vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
|
28
28
|
vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
|
29
29
|
vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
|
30
30
|
vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
|
31
31
|
vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
|
32
32
|
vision_agent/utils/video.py,sha256=BJ9fomy2giAl038JThQP1WQZ-u4J4J_nsZB7QEWvlcQ,8767
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
36
|
-
vision_agent-0.2.
|
33
|
+
vision_agent-0.2.50.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
34
|
+
vision_agent-0.2.50.dist-info/METADATA,sha256=nLyeSFYnn4Bv_RyKzrP5iqnCRRkwCZT_d3euN1zgBOA,6817
|
35
|
+
vision_agent-0.2.50.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
36
|
+
vision_agent-0.2.50.dist-info/RECORD,,
|
File without changes
|
File without changes
|