PyPI - vision-agent - Versions diffs - 0.2.185__tar.gz → 0.2.186__tar.gz - Mend

vision-agent 0.2.185tar.gz → 0.2.186tar.gz

Files changed (35) hide show

{vision_agent-0.2.185 → vision_agent-0.2.186}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.185
+Version: 0.2.186
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -63,10 +63,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps!
 ## Table of Contents
 - [🚀Quick Start](#quick-start)
 - [📚Documentation](#documentation)
-- [🔍🤖VisionAgent](#vision-agent-basic-usage)
+- [🔍🤖VisionAgent](#visionagent-basic-usage)
 - [🛠️Tools](#tools)
 - [🤖LMMs](#lmms)
-- [💻🤖VisionAgent Coder](#vision-agent-coder)
+- [💻🤖VisionAgent Coder](#visionagent-coder)
 - [🏗️Additional Backends](#additional-backends)
 ## Quick Start

{vision_agent-0.2.185 → vision_agent-0.2.186}/README.md RENAMED Viewed

@@ -21,10 +21,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps!
 ## Table of Contents
 - [🚀Quick Start](#quick-start)
 - [📚Documentation](#documentation)
-- [🔍🤖VisionAgent](#vision-agent-basic-usage)
+- [🔍🤖VisionAgent](#visionagent-basic-usage)
 - [🛠️Tools](#tools)
 - [🤖LMMs](#lmms)
-- [💻🤖VisionAgent Coder](#vision-agent-coder)
+- [💻🤖VisionAgent Coder](#visionagent-coder)
 - [🏗️Additional Backends](#additional-backends)
 ## Quick Start

{vision_agent-0.2.185 → vision_agent-0.2.186}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.185"
+version = "0.2.186"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.185 → vision_agent-0.2.186}/vision_agent/tools/tools.py RENAMED Viewed

@@ -1057,23 +1057,25 @@ def video_temporal_localization(
     prompt: str,
     frames: List[np.ndarray],
     model: str = "qwen2vl",
-    chunk_length: Optional[float] = None,
-    chunk_length_seconds: Optional[float] = None,
     chunk_length_frames: Optional[int] = 2,
 ) -> List[float]:
-    """'video_temporal_localization' is a tool that can find objects in a video given a question about it.
-    It returns a list of floats with a value of 1.0 if the object to be found is present in the chunk of video being analyzed.
+    """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
+    value selected for the video. It can detect multiple objects independently per
+    chunk_length_frames given a text prompt such as a referring expression
+    but does not track objects across frames.
+    It returns a list of floats with a value of 1.0 if the objects are found in a given
+    chunk_length_frames of the video.
     Parameters:
         prompt (str): The question about the video
         frames (List[np.ndarray]): The reference frames used for the question
-        model (str): The model to use for the inference. Valid values are 'qwen2vl', 'gpt4o', 'internlm-xcomposer'
-        chunk_length (Optional[float]): length of each chunk in seconds
-        chunk_length_seconds (Optional[float]): alternative length for chunk in seconds
+        model (str): The model to use for the inference. Valid values are
+            'qwen2vl', 'gpt4o', 'internlm-xcomposer'
         chunk_length_frames (Optional[int]): length of each chunk in frames
     Returns:
-        List[float]: A list of floats with a value of 1.0 if the object to be found is present in the chunk of video
+        List[float]: A list of floats with a value of 1.0 if the objects to be found
+            are present in the chunk_length_frames of the video.
     Example
     -------
@@ -1088,10 +1090,6 @@ def video_temporal_localization(
         "model": model,
         "function_name": "video_temporal_localization",
     }
-    if chunk_length is not None:
-        payload["chunk_length"] = chunk_length
-    if chunk_length_seconds is not None:
-        payload["chunk_length_seconds"] = chunk_length_seconds
     if chunk_length_frames is not None:
         payload["chunk_length_frames"] = chunk_length_frames
@@ -1790,9 +1788,8 @@ def flux_image_inpainting(
             where 1 indicates areas to be inpainted and 0 indicates areas to be preserved.
     Returns:
-        np.ndarray:
-            The generated image(s) as a numpy array in RGB format
-            with values ranging from 0 to 255.
+        np.ndarray: The generated image(s) as a numpy array in RGB format with values
+            ranging from 0 to 255.
     -------
     Example:
@@ -2351,6 +2348,7 @@ FUNCTION_TOOLS = [
     closest_box_distance,
     qwen2_vl_images_vqa,
     qwen2_vl_video_vqa,
+    video_temporal_localization,
 ]
 UTIL_TOOLS = [