PyPI - vision-agent - Versions diffs - 0.2.47__py3-none-any.whl → 0.2.49__py3-none-any.whl - Mend

vision-agent 0.2.47py3-none-any.whl → 0.2.49py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -36,7 +36,11 @@ logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _MAX_TABULATE_COL_WIDTH = 80
 _CONSOLE = Console()
-_DEFAULT_IMPORT = "\n".join(T.__new_tools__)
+_DEFAULT_IMPORT = "\n".join(T.__new_tools__) + "\n".join(
+    [
+        "from typing import *",
+    ]
+)
 def get_diff(before: str, after: str) -> str:

vision_agent/tools/__init__.py CHANGED Viewed

@@ -22,7 +22,7 @@ from .tools import (
     overlay_segmentation_masks,
     save_image,
     save_json,
-    save_video_to_result,
+    save_video,
     visual_prompt_counting,
     zero_shot_counting,
 )

vision_agent/tools/tools.py CHANGED Viewed

@@ -5,12 +5,13 @@ import logging
 import tempfile
 from importlib import resources
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Tuple, Union, cast
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
 import cv2
 import numpy as np
 import pandas as pd
 import requests
+from moviepy.editor import ImageSequenceClip
 from PIL import Image, ImageDraw, ImageFont
 from vision_agent.tools.tool_utils import _send_inference_request
@@ -545,24 +546,49 @@ def save_image(image: np.ndarray) -> str:
     >>> save_image(image)
     "/tmp/tmpabc123.png"
     """
+    from IPython.display import display
+    pil_image = Image.fromarray(image.astype(np.uint8))
+    display(pil_image)
     with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as f:
-        pil_image = Image.fromarray(image.astype(np.uint8))
         pil_image.save(f, "PNG")
     return f.name
-def save_video_to_result(video_uri: str) -> None:
-    """'save_video_to_result' a utility function that saves a video into the result of the code execution (as an intermediate output).
-    This function is required to run if user wants to visualize the video generated by the code.
+def save_video(
+    frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 4
+) -> str:
+    """'save_video' is a utility function that saves a list of frames as a mp4 video file on disk.
     Parameters:
-        video_uri (str): The URI to the video file. Currently only local file paths are supported.
+        frames (list[np.ndarray]): A list of frames to save.
+        output_video_path (str): The path to save the video file. If not provided, a temporary file will be created.
+        fps (float): The number of frames composes a second in the video.
+    Returns:
+        str: The path to the saved video file.
     Example
     -------
-    >>> save_video_to_result("path/to/video.mp4")
+    >>> save_video(frames)
+    "/tmp/tmpvideo123.mp4"
     """
+    if fps <= 0:
+        _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
+        fps = 4
+    with ImageSequenceClip(frames, fps=fps) as video:
+        if output_video_path:
+            f = open(output_video_path, "wb")
+        else:
+            f = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)  # type: ignore
+        video.write_videofile(f.name, codec="libx264")
+        f.close()
+        _save_video_to_result(f.name)
+        return f.name
+def _save_video_to_result(video_uri: str) -> None:
+    """Saves a video into the result of the code execution (as an intermediate output)."""
     from IPython.display import display
     serializer = FileSerializer(video_uri)
@@ -595,8 +621,6 @@ def overlay_bounding_boxes(
         image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
     )
     """
-    from IPython.display import display
     pil_image = Image.fromarray(image.astype(np.uint8))
     if len(set([box["label"] for box in bboxes])) > len(COLORS):
@@ -623,20 +647,14 @@ def overlay_bounding_boxes(
         box = elt["bbox"]
         scores = elt["score"]
-        box = [
-            int(box[0] * width),
-            int(box[1] * height),
-            int(box[2] * width),
-            int(box[3] * height),
-        ]
+        # denormalize the box if it is normalized
+        box = denormalize_bbox(box, (height, width))
         draw.rectangle(box, outline=color[label], width=4)
         text = f"{label}: {scores:.2f}"
         text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
         draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
         draw.text((box[0], box[1]), text, fill="black", font=font)
-    pil_image = pil_image.convert("RGB")
-    display(pil_image)
     return np.array(pil_image)
@@ -668,8 +686,6 @@ def overlay_segmentation_masks(
         }],
     )
     """
-    from IPython.display import display
     pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGBA")
     if len(set([mask["label"] for mask in masks])) > len(COLORS):
@@ -690,9 +706,6 @@ def overlay_segmentation_masks(
         np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
         mask_img = Image.fromarray(np_mask.astype(np.uint8))
         pil_image = Image.alpha_composite(pil_image, mask_img)
-    pil_image = pil_image.convert("RGB")
-    display(pil_image)
     return np.array(pil_image)
@@ -723,8 +736,6 @@ def overlay_heat_map(
         },
     )
     """
-    from IPython.display import display
     pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
     if "heat_map" not in heat_map or len(heat_map["heat_map"]) == 0:
@@ -740,10 +751,7 @@ def overlay_heat_map(
     combined = Image.alpha_composite(
         pil_image.convert("RGBA"), overlay.resize(pil_image.size)
     )
-    pil_image = combined.convert("RGB")
-    display(pil_image)
-    return np.array(pil_image)
+    return np.array(combined)
 def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
@@ -805,7 +813,7 @@ TOOLS = [
     save_json,
     load_image,
     save_image,
-    save_video_to_result,
+    save_video,
     overlay_bounding_boxes,
     overlay_segmentation_masks,
     overlay_heat_map,
@@ -818,7 +826,7 @@ UTILITIES_DOCSTRING = get_tool_documentation(
         save_json,
         load_image,
         save_image,
-        save_video_to_result,
+        save_video,
         overlay_bounding_boxes,
         overlay_segmentation_masks,
         overlay_heat_map,

vision_agent/utils/execute.py CHANGED Viewed

@@ -401,6 +401,8 @@ class CodeInterpreter(abc.ABC):
 class E2BCodeInterpreter(CodeInterpreter):
+    KEEP_ALIVE_SEC: int = 300
     def __init__(self, *args: Any, **kwargs: Any) -> None:
         super().__init__(*args, **kwargs)
         assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
@@ -432,6 +434,7 @@ print(f"Vision Agent version: {va_version}")"""
         retry=tenacity.retry_if_exception_type(TimeoutError),
     )
     def exec_cell(self, code: str) -> Execution:
+        self.interpreter.keep_alive(E2BCodeInterpreter.KEEP_ALIVE_SEC)
         execution = self.interpreter.notebook.exec_cell(code, timeout=self.timeout)
         return Execution.from_e2b_execution(execution)

vision_agent/utils/video.py CHANGED Viewed

@@ -31,7 +31,6 @@ def play_video(video_base64: str) -> None:
         # Display the first frame and wait for any key press to start the video
         ret, frame = cap.read()
         if ret:
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             cv2.imshow("Video Player", frame)
             _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
             cv2.waitKey(0)  # Wait for any key press
@@ -40,7 +39,6 @@ def play_video(video_base64: str) -> None:
             ret, frame = cap.read()
             if not ret:
                 break
-            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
             cv2.imshow("Video Player", frame)
             # Press 'q' to exit the video
             if cv2.waitKey(200) & 0xFF == ord("q"):

{vision_agent-0.2.47.dist-info → vision_agent-0.2.49.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.47
+Version: 0.2.49
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.47.dist-info → vision_agent-0.2.49.dist-info}/RECORD RENAMED Viewed

@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
 vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
 vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
 vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
-vision_agent/agent/vision_agent.py,sha256=S0VJWsdr0NIYjikXvPrEX-njGMqOIA53r4Q4NYY0Lpo,20365
+vision_agent/agent/vision_agent.py,sha256=X_LF2wRXVYAr8xMuJs3Omi8n06uVgLNgtF25sidKtfM,20424
 vision_agent/agent/vision_agent_prompts.py,sha256=hgnTlaYp2HMBHLi3e4faPb-DI5jQL9jfhKq9jyEUEgY,8370
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
@@ -19,18 +19,18 @@ vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,
 vision_agent/llm/llm.py,sha256=UZ73GqQHE-NKOJWsrOTWfmdHYsbCBkJ5rZ7dhcSCHHw,5951
 vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
 vision_agent/lmm/lmm.py,sha256=NwcZYLTzi95LSMAk0sTtw7G_zBLa9lU-DHM5GUUCiK4,10622
-vision_agent/tools/__init__.py,sha256=K_7knxmyTIcSEGL8c9wF8RpVh3GrMYfybFaq-2SUM1w,1538
+vision_agent/tools/__init__.py,sha256=Sng6dChynJJCYWjraXXM0tep_VPdnYl3L9vb0HMy_Pc,1528
 vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
-vision_agent/tools/tools.py,sha256=SrNrIjyUKoTE3mCqGcy6nC-MeEzJ8uJCumlSkTvvPpg,26085
+vision_agent/tools/tools.py,sha256=IuTxw-08UodemQAmiIQWdwpqg_Cjf-opGuqtYHv8nuk,26583
 vision_agent/utils/__init__.py,sha256=Ce4yPhoWanRsnTy3X7YzZNBYYRJsrJeT7N59WUf8GZM,209
-vision_agent/utils/execute.py,sha256=6sil3ktl6t8R8dV_RhYfb_s-z5m0c1_xtHCFzofIyqI,20501
+vision_agent/utils/execute.py,sha256=GqoAodxtwTPBr1nujPTsWiZO2rBGvWVXTe8lgxY4d_g,20603
 vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
 vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
 vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
-vision_agent/utils/video.py,sha256=_u3UrEpcJzbclKyJYxF7SiDQGhE2gUc598diYYiEv34,8885
-vision_agent-0.2.47.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.47.dist-info/METADATA,sha256=WuIuMBkKPAExXks1PVwavSOQhXXtHennV9WsvJ1sans,6817
-vision_agent-0.2.47.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.47.dist-info/RECORD,,
+vision_agent/utils/video.py,sha256=BJ9fomy2giAl038JThQP1WQZ-u4J4J_nsZB7QEWvlcQ,8767
+vision_agent-0.2.49.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.49.dist-info/METADATA,sha256=J8uaMXfLvURGCOujviCSb0aaCYOWQnAphcZHjD1bjWw,6817
+vision_agent-0.2.49.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.49.dist-info/RECORD,,

{vision_agent-0.2.47.dist-info → vision_agent-0.2.49.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.47.dist-info → vision_agent-0.2.49.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.47__py3-none-any.whl → 0.2.49__py3-none-any.whl

vision-agent 0.2.47py3-none-any.whl → 0.2.49py3-none-any.whl