PyPI - vision-agent - Versions diffs - 0.2.95__py3-none-any.whl → 0.2.97__py3-none-any.whl - Mend

vision-agent 0.2.95py3-none-any.whl → 0.2.97py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

vision_agent/tools/tools.py CHANGED Viewed

@@ -9,6 +9,7 @@ from typing import Any, Dict, List, Optional, Tuple, Union, cast
 import cv2
 import numpy as np
 import requests
+from moviepy.editor import ImageSequenceClip
 from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
 from pytube import YouTube  # type: ignore
@@ -106,6 +107,7 @@ def grounding_dino(
             "visual_grounding" if model_size == "large" else "visual_grounding_tiny"
         ),
         "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
+        "function_name": "grounding_dino",
     }
     data: Dict[str, Any] = send_inference_request(request_data, "tools")
     return_data = []
@@ -161,6 +163,7 @@ def owl_v2(
         "image": image_b64,
         "tool": "open_vocab_detection",
         "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
+        "function_name": "owl_v2",
     }
     data: Dict[str, Any] = send_inference_request(request_data, "tools")
     return_data = []
@@ -225,6 +228,7 @@ def grounding_sam(
         "image": image_b64,
         "tool": "visual_grounding_segment",
         "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
+        "function_name": "grounding_sam",
     }
     data: Dict[str, Any] = send_inference_request(request_data, "tools")
     return_data = []
@@ -364,6 +368,7 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
     data = {
         "image": image_b64,
         "tool": "zero_shot_counting",
+        "function_name": "loca_zero_shot_counting",
     }
     resp_data = send_inference_request(data, "tools")
     resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
@@ -399,6 +404,7 @@ def loca_visual_prompt_counting(
         "image": image_b64,
         "prompt": bbox_str,
         "tool": "few_shot_counting",
+        "function_name": "loca_visual_prompt_counting",
     }
     resp_data = send_inference_request(data, "tools")
     resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
@@ -428,6 +434,7 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
         "image": image_b64,
         "prompt": prompt,
         "tool": "image_question_answering_with_context",
+        "function_name": "florencev2_roberta_vqa",
     }
     answer = send_inference_request(data, "tools")
@@ -457,6 +464,7 @@ def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
         "image": image_b64,
         "prompt": prompt,
         "tool": "image_question_answering",
+        "function_name": "git_vqa_v2",
     }
     answer = send_inference_request(data, "tools")
@@ -487,6 +495,7 @@ def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
         "prompt": ",".join(classes),
         "image": image_b64,
         "tool": "closed_set_image_classification",
+        "function_name": "clip",
     }
     resp_data = send_inference_request(data, "tools")
     resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
@@ -514,6 +523,7 @@ def vit_image_classification(image: np.ndarray) -> Dict[str, Any]:
     data = {
         "image": image_b64,
         "tool": "image_classification",
+        "function_name": "vit_image_classification",
     }
     resp_data = send_inference_request(data, "tools")
     resp_data["scores"] = [round(prob, 4) for prob in resp_data["scores"]]
@@ -541,6 +551,7 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
     data = {
         "image": image_b64,
         "tool": "nsfw_image_classification",
+        "function_name": "vit_nsfw_classification",
     }
     resp_data = send_inference_request(data, "tools")
     resp_data["scores"] = round(resp_data["scores"], 4)
@@ -567,6 +578,7 @@ def blip_image_caption(image: np.ndarray) -> str:
     data = {
         "image": image_b64,
         "tool": "image_captioning",
+        "function_name": "blip_image_caption",
     }
     answer = send_inference_request(data, "tools")
@@ -595,6 +607,7 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
         "image": image_b64,
         "tool": "florence2_image_captioning",
         "detail_caption": detail_caption,
+        "function_name": "florencev2_image_caption",
     }
     answer = send_inference_request(data, "tools")
@@ -630,6 +643,7 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
     data = {
         "image": image_b64,
         "tool": "object_detection",
+        "function_name": "florencev2_object_detection",
     }
     answer = send_inference_request(data, "tools")
@@ -686,6 +700,7 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
     data = {
         "image": image_b64,
         "tool": "panoptic_segmentation",
+        "function_name": "detr_segmentation",
     }
     answer = send_inference_request(data, "tools")
@@ -728,6 +743,7 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
     data = {
         "image": image_b64,
         "tool": "generate_depth",
+        "function_name": "depth_anything_v2",
     }
     answer = send_inference_request(data, "tools")
@@ -759,6 +775,7 @@ def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
     data = {
         "image": image_b64,
         "tool": "generate_hed",
+        "function_name": "generate_soft_edge_image",
     }
     answer = send_inference_request(data, "tools")
@@ -791,6 +808,7 @@ def dpt_hybrid_midas(image: np.ndarray) -> np.ndarray:
     data = {
         "image": image_b64,
         "tool": "generate_normal",
+        "function_name": "dpt_hybrid_midas",
     }
     answer = send_inference_request(data, "tools")
@@ -822,6 +840,7 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
     data = {
         "image": image_b64,
         "tool": "generate_pose",
+        "function_name": "generate_pose_image",
     }
     answer = send_inference_request(data, "tools")
@@ -862,6 +881,7 @@ def template_match(
         "image": image_b64,
         "template": template_image_b64,
         "tool": "template_match",
+        "function_name": "template_match",
     }
     answer = send_inference_request(data, "tools")
@@ -1044,20 +1064,15 @@ def save_video(
         _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
         fps = 4
-    if not output_video_path:
-        output_video_path = tempfile.NamedTemporaryFile(
-            suffix=".mp4", delete=False
-        ).name
-    height, width, layers = frames[0].shape if frames else (0, 0, 0)
-    fourcc = cv2.VideoWriter_fourcc(*"mp4v")  # type: ignore
-    video = cv2.VideoWriter(output_video_path, fourcc, fps, (width, height))
-    for frame in frames:
-        video.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))
-    video.release()
-    _save_video_to_result(output_video_path)
-    return output_video_path
+    with ImageSequenceClip(frames, fps=fps) as video:
+        if output_video_path:
+            f = open(output_video_path, "wb")
+        else:
+            f = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)  # type: ignore
+        video.write_videofile(f.name, codec="libx264")
+        f.close()
+        _save_video_to_result(f.name)
+        return f.name
 def _save_video_to_result(video_uri: str) -> None:

{vision_agent-0.2.95.dist-info → vision_agent-0.2.97.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.95
+Version: 0.2.97
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -11,7 +11,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Requires-Dist: anthropic (>=0.31.0,<0.32.0)
 Requires-Dist: e2b (>=0.17.1,<0.18.0)
-Requires-Dist: e2b-code-interpreter (==0.0.11a17)
+Requires-Dist: e2b-code-interpreter (==0.0.11a27)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
 Requires-Dist: langsmith (>=0.1.58,<0.2.0)
 Requires-Dist: moviepy (>=1.0.0,<2.0.0)

{vision_agent-0.2.95.dist-info → vision_agent-0.2.97.dist-info}/RECORD RENAMED Viewed

@@ -15,7 +15,7 @@ vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyP
 vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
-vision_agent/tools/tools.py,sha256=CWQY1sD-xtWchPrg_AJNAGH-k7UxrKIkiog8r0sx1Do,42446
+vision_agent/tools/tools.py,sha256=fHD4qhn7cGG1O77J_BHfaRfW6LMQuj1OIu9xqYu6AG8,43220
 vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
 vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
 vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
@@ -23,7 +23,7 @@ vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn
 vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
 vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
 vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
-vision_agent-0.2.95.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.95.dist-info/METADATA,sha256=-OCOFe_UAKyI5sjDr6nYklJq5jwKZbLjwFkFMO-wrV8,10728
-vision_agent-0.2.95.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.95.dist-info/RECORD,,
+vision_agent-0.2.97.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.97.dist-info/METADATA,sha256=00md0PT29fBJuyXl2LeWcrC3l5T6FXn85YE6Kmat60Q,10728
+vision_agent-0.2.97.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.97.dist-info/RECORD,,

{vision_agent-0.2.95.dist-info → vision_agent-0.2.97.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.95.dist-info → vision_agent-0.2.97.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.95__py3-none-any.whl → 0.2.97__py3-none-any.whl

vision-agent 0.2.95py3-none-any.whl → 0.2.97py3-none-any.whl