vision-agent 0.2.46__py3-none-any.whl → 0.2.48__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -36,7 +36,11 @@ logging.basicConfig(stream=sys.stdout)
36
36
  _LOGGER = logging.getLogger(__name__)
37
37
  _MAX_TABULATE_COL_WIDTH = 80
38
38
  _CONSOLE = Console()
39
- _DEFAULT_IMPORT = "\n".join(T.__new_tools__)
39
+ _DEFAULT_IMPORT = "\n".join(T.__new_tools__) + "\n".join(
40
+ [
41
+ "from typing import *",
42
+ ]
43
+ )
40
44
 
41
45
 
42
46
  def get_diff(before: str, after: str) -> str:
@@ -22,7 +22,7 @@ from .tools import (
22
22
  overlay_segmentation_masks,
23
23
  save_image,
24
24
  save_json,
25
- save_video_to_result,
25
+ save_video,
26
26
  visual_prompt_counting,
27
27
  zero_shot_counting,
28
28
  )
@@ -5,12 +5,13 @@ import logging
5
5
  import tempfile
6
6
  from importlib import resources
7
7
  from pathlib import Path
8
- from typing import Any, Callable, Dict, List, Tuple, Union, cast
8
+ from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
9
9
 
10
10
  import cv2
11
11
  import numpy as np
12
12
  import pandas as pd
13
13
  import requests
14
+ from moviepy.editor import ImageSequenceClip
14
15
  from PIL import Image, ImageDraw, ImageFont
15
16
 
16
17
  from vision_agent.tools.tool_utils import _send_inference_request
@@ -545,24 +546,49 @@ def save_image(image: np.ndarray) -> str:
545
546
  >>> save_image(image)
546
547
  "/tmp/tmpabc123.png"
547
548
  """
549
+ from IPython.display import display
548
550
 
551
+ pil_image = Image.fromarray(image.astype(np.uint8))
552
+ display(pil_image)
549
553
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
550
- pil_image = Image.fromarray(image.astype(np.uint8))
551
554
  pil_image.save(f, "PNG")
552
555
  return f.name
553
556
 
554
557
 
555
- def save_video_to_result(video_uri: str) -> None:
556
- """'save_video_to_result' a utility function that saves a video into the result of the code execution (as an intermediate output).
557
- This function is required to run if user wants to visualize the video generated by the code.
558
+ def save_video(
559
+ frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 4
560
+ ) -> str:
561
+ """'save_video' is a utility function that saves a list of frames as a mp4 video file on disk.
558
562
 
559
563
  Parameters:
560
- video_uri (str): The URI to the video file. Currently only local file paths are supported.
564
+ frames (list[np.ndarray]): A list of frames to save.
565
+ output_video_path (str): The path to save the video file. If not provided, a temporary file will be created.
566
+ fps (float): The number of frames composes a second in the video.
567
+
568
+ Returns:
569
+ str: The path to the saved video file.
561
570
 
562
571
  Example
563
572
  -------
564
- >>> save_video_to_result("path/to/video.mp4")
573
+ >>> save_video(frames)
574
+ "/tmp/tmpvideo123.mp4"
565
575
  """
576
+ if fps <= 0:
577
+ _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
578
+ fps = 4
579
+ with ImageSequenceClip(frames, fps=fps) as video:
580
+ if output_video_path:
581
+ f = open(output_video_path, "wb")
582
+ else:
583
+ f = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False) # type: ignore
584
+ video.write_videofile(f.name, codec="libx264")
585
+ f.close()
586
+ _save_video_to_result(f.name)
587
+ return f.name
588
+
589
+
590
+ def _save_video_to_result(video_uri: str) -> None:
591
+ """Saves a video into the result of the code execution (as an intermediate output)."""
566
592
  from IPython.display import display
567
593
 
568
594
  serializer = FileSerializer(video_uri)
@@ -595,8 +621,6 @@ def overlay_bounding_boxes(
595
621
  image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
596
622
  )
597
623
  """
598
- from IPython.display import display
599
-
600
624
  pil_image = Image.fromarray(image.astype(np.uint8))
601
625
 
602
626
  if len(set([box["label"] for box in bboxes])) > len(COLORS):
@@ -634,9 +658,6 @@ def overlay_bounding_boxes(
634
658
  text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
635
659
  draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
636
660
  draw.text((box[0], box[1]), text, fill="black", font=font)
637
-
638
- pil_image = pil_image.convert("RGB")
639
- display(pil_image)
640
661
  return np.array(pil_image)
641
662
 
642
663
 
@@ -668,8 +689,6 @@ def overlay_segmentation_masks(
668
689
  }],
669
690
  )
670
691
  """
671
- from IPython.display import display
672
-
673
692
  pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGBA")
674
693
 
675
694
  if len(set([mask["label"] for mask in masks])) > len(COLORS):
@@ -690,9 +709,6 @@ def overlay_segmentation_masks(
690
709
  np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
691
710
  mask_img = Image.fromarray(np_mask.astype(np.uint8))
692
711
  pil_image = Image.alpha_composite(pil_image, mask_img)
693
-
694
- pil_image = pil_image.convert("RGB")
695
- display(pil_image)
696
712
  return np.array(pil_image)
697
713
 
698
714
 
@@ -723,8 +739,6 @@ def overlay_heat_map(
723
739
  },
724
740
  )
725
741
  """
726
- from IPython.display import display
727
-
728
742
  pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
729
743
 
730
744
  if "heat_map" not in heat_map or len(heat_map["heat_map"]) == 0:
@@ -740,10 +754,7 @@ def overlay_heat_map(
740
754
  combined = Image.alpha_composite(
741
755
  pil_image.convert("RGBA"), overlay.resize(pil_image.size)
742
756
  )
743
-
744
- pil_image = combined.convert("RGB")
745
- display(pil_image)
746
- return np.array(pil_image)
757
+ return np.array(combined)
747
758
 
748
759
 
749
760
  def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
@@ -805,7 +816,7 @@ TOOLS = [
805
816
  save_json,
806
817
  load_image,
807
818
  save_image,
808
- save_video_to_result,
819
+ save_video,
809
820
  overlay_bounding_boxes,
810
821
  overlay_segmentation_masks,
811
822
  overlay_heat_map,
@@ -818,7 +829,7 @@ UTILITIES_DOCSTRING = get_tool_documentation(
818
829
  save_json,
819
830
  load_image,
820
831
  save_image,
821
- save_video_to_result,
832
+ save_video,
822
833
  overlay_bounding_boxes,
823
834
  overlay_segmentation_masks,
824
835
  overlay_heat_map,
@@ -401,6 +401,8 @@ class CodeInterpreter(abc.ABC):
401
401
 
402
402
 
403
403
  class E2BCodeInterpreter(CodeInterpreter):
404
+ KEEP_ALIVE_SEC: int = 300
405
+
404
406
  def __init__(self, *args: Any, **kwargs: Any) -> None:
405
407
  super().__init__(*args, **kwargs)
406
408
  assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
@@ -426,7 +428,13 @@ print(f"Vision Agent version: {va_version}")"""
426
428
  def restart_kernel(self) -> None:
427
429
  self.interpreter.notebook.restart_kernel()
428
430
 
431
+ @tenacity.retry(
432
+ wait=tenacity.wait_exponential_jitter(),
433
+ stop=tenacity.stop_after_attempt(2),
434
+ retry=tenacity.retry_if_exception_type(TimeoutError),
435
+ )
429
436
  def exec_cell(self, code: str) -> Execution:
437
+ self.interpreter.keep_alive(E2BCodeInterpreter.KEEP_ALIVE_SEC)
430
438
  execution = self.interpreter.notebook.exec_cell(code, timeout=self.timeout)
431
439
  return Execution.from_e2b_execution(execution)
432
440
 
@@ -31,7 +31,6 @@ def play_video(video_base64: str) -> None:
31
31
  # Display the first frame and wait for any key press to start the video
32
32
  ret, frame = cap.read()
33
33
  if ret:
34
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
35
34
  cv2.imshow("Video Player", frame)
36
35
  _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
37
36
  cv2.waitKey(0) # Wait for any key press
@@ -40,7 +39,6 @@ def play_video(video_base64: str) -> None:
40
39
  ret, frame = cap.read()
41
40
  if not ret:
42
41
  break
43
- frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
44
42
  cv2.imshow("Video Player", frame)
45
43
  # Press 'q' to exit the video
46
44
  if cv2.waitKey(200) & 0xFF == ord("q"):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.46
3
+ Version: 0.2.48
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
11
11
  vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
12
  vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
13
13
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
14
- vision_agent/agent/vision_agent.py,sha256=S0VJWsdr0NIYjikXvPrEX-njGMqOIA53r4Q4NYY0Lpo,20365
14
+ vision_agent/agent/vision_agent.py,sha256=X_LF2wRXVYAr8xMuJs3Omi8n06uVgLNgtF25sidKtfM,20424
15
15
  vision_agent/agent/vision_agent_prompts.py,sha256=hgnTlaYp2HMBHLi3e4faPb-DI5jQL9jfhKq9jyEUEgY,8370
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
@@ -19,18 +19,18 @@ vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,
19
19
  vision_agent/llm/llm.py,sha256=UZ73GqQHE-NKOJWsrOTWfmdHYsbCBkJ5rZ7dhcSCHHw,5951
20
20
  vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
21
21
  vision_agent/lmm/lmm.py,sha256=NwcZYLTzi95LSMAk0sTtw7G_zBLa9lU-DHM5GUUCiK4,10622
22
- vision_agent/tools/__init__.py,sha256=K_7knxmyTIcSEGL8c9wF8RpVh3GrMYfybFaq-2SUM1w,1538
22
+ vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_Pc,1528
23
23
  vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
24
24
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
25
25
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
26
- vision_agent/tools/tools.py,sha256=SrNrIjyUKoTE3mCqGcy6nC-MeEzJ8uJCumlSkTvvPpg,26085
26
+ vision_agent/tools/tools.py,sha256=Vpn2SxtjEcnztovat6qMiH52gFsDHo3ikEPrAT4e5yc,26639
27
27
  vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
28
- vision_agent/utils/execute.py,sha256=GlpUGe3pg5KdSvRHLFfVcn9ptXBIp-QRoHT3Wa6aIMs,20318
28
+ vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
- vision_agent/utils/video.py,sha256=_u3UrEpcJzbclKyJYxF7SiDQGhE2gUc598diYYiEv34,8885
33
- vision_agent-0.2.46.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.46.dist-info/METADATA,sha256=FOlKABAkLUX8oqtjeE2q9EO6j8yeoiwyw3lWUpIe0ow,6817
35
- vision_agent-0.2.46.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.46.dist-info/RECORD,,
32
+ vision_agent/utils/video.py,sha256=BJ9fomy2giAl038JThQP1WQZ-u4J4J_nsZB7QEWvlcQ,8767
33
+ vision_agent-0.2.48.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.48.dist-info/METADATA,sha256=sJSWNAHN2-JMNb5hi4iA-HTzKNskLioIse9sdrMDuy4,6817
35
+ vision_agent-0.2.48.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.48.dist-info/RECORD,,