vision-agent 0.2.185__py3-none-any.whl → 0.2.186__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1057,23 +1057,25 @@ def video_temporal_localization(
1057
1057
  prompt: str,
1058
1058
  frames: List[np.ndarray],
1059
1059
  model: str = "qwen2vl",
1060
- chunk_length: Optional[float] = None,
1061
- chunk_length_seconds: Optional[float] = None,
1062
1060
  chunk_length_frames: Optional[int] = 2,
1063
1061
  ) -> List[float]:
1064
- """'video_temporal_localization' is a tool that can find objects in a video given a question about it.
1065
- It returns a list of floats with a value of 1.0 if the object to be found is present in the chunk of video being analyzed.
1062
+ """'video_temporal_localization' will run qwen2vl on each chunk_length_frames
1063
+ value selected for the video. It can detect multiple objects independently per
1064
+ chunk_length_frames given a text prompt such as a referring expression
1065
+ but does not track objects across frames.
1066
+ It returns a list of floats with a value of 1.0 if the objects are found in a given
1067
+ chunk_length_frames of the video.
1066
1068
 
1067
1069
  Parameters:
1068
1070
  prompt (str): The question about the video
1069
1071
  frames (List[np.ndarray]): The reference frames used for the question
1070
- model (str): The model to use for the inference. Valid values are 'qwen2vl', 'gpt4o', 'internlm-xcomposer'
1071
- chunk_length (Optional[float]): length of each chunk in seconds
1072
- chunk_length_seconds (Optional[float]): alternative length for chunk in seconds
1072
+ model (str): The model to use for the inference. Valid values are
1073
+ 'qwen2vl', 'gpt4o', 'internlm-xcomposer'
1073
1074
  chunk_length_frames (Optional[int]): length of each chunk in frames
1074
1075
 
1075
1076
  Returns:
1076
- List[float]: A list of floats with a value of 1.0 if the object to be found is present in the chunk of video
1077
+ List[float]: A list of floats with a value of 1.0 if the objects to be found
1078
+ are present in the chunk_length_frames of the video.
1077
1079
 
1078
1080
  Example
1079
1081
  -------
@@ -1088,10 +1090,6 @@ def video_temporal_localization(
1088
1090
  "model": model,
1089
1091
  "function_name": "video_temporal_localization",
1090
1092
  }
1091
- if chunk_length is not None:
1092
- payload["chunk_length"] = chunk_length
1093
- if chunk_length_seconds is not None:
1094
- payload["chunk_length_seconds"] = chunk_length_seconds
1095
1093
  if chunk_length_frames is not None:
1096
1094
  payload["chunk_length_frames"] = chunk_length_frames
1097
1095
 
@@ -1790,9 +1788,8 @@ def flux_image_inpainting(
1790
1788
  where 1 indicates areas to be inpainted and 0 indicates areas to be preserved.
1791
1789
 
1792
1790
  Returns:
1793
- np.ndarray:
1794
- The generated image(s) as a numpy array in RGB format
1795
- with values ranging from 0 to 255.
1791
+ np.ndarray: The generated image(s) as a numpy array in RGB format with values
1792
+ ranging from 0 to 255.
1796
1793
 
1797
1794
  -------
1798
1795
  Example:
@@ -2351,6 +2348,7 @@ FUNCTION_TOOLS = [
2351
2348
  closest_box_distance,
2352
2349
  qwen2_vl_images_vqa,
2353
2350
  qwen2_vl_video_vqa,
2351
+ video_temporal_localization,
2354
2352
  ]
2355
2353
 
2356
2354
  UTIL_TOOLS = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.185
3
+ Version: 0.2.186
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -63,10 +63,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps!
63
63
  ## Table of Contents
64
64
  - [🚀Quick Start](#quick-start)
65
65
  - [📚Documentation](#documentation)
66
- - [🔍🤖VisionAgent](#vision-agent-basic-usage)
66
+ - [🔍🤖VisionAgent](#visionagent-basic-usage)
67
67
  - [🛠️Tools](#tools)
68
68
  - [🤖LMMs](#lmms)
69
- - [💻🤖VisionAgent Coder](#vision-agent-coder)
69
+ - [💻🤖VisionAgent Coder](#visionagent-coder)
70
70
  - [🏗️Additional Backends](#additional-backends)
71
71
 
72
72
  ## Quick Start
@@ -20,7 +20,7 @@ vision_agent/tools/__init__.py,sha256=KVP4_6qxOb2lpFdQgQtyDfdkMLL1O6wVZNK19MXp-x
20
20
  vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
21
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
22
22
  vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
23
- vision_agent/tools/tools.py,sha256=us3fOV3JIqFB9WidEX6NT65HwJbIxhh59RRvUcMIshI,83251
23
+ vision_agent/tools/tools.py,sha256=-oq8jzITi-yVYJ3ut5MuGJ65jd3ESRtHfw4SCAruMps,83059
24
24
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
25
25
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
26
26
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
29
29
  vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
30
30
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
31
31
  vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
32
- vision_agent-0.2.185.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
- vision_agent-0.2.185.dist-info/METADATA,sha256=Wgo1bRpQ3MgqxIDpBiN0Tj0YAUBwRtYCQ7DmhJwgKpY,18330
34
- vision_agent-0.2.185.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
- vision_agent-0.2.185.dist-info/RECORD,,
32
+ vision_agent-0.2.186.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vision_agent-0.2.186.dist-info/METADATA,sha256=NQfESIRsq9-QWyPzNkyv6dSuRS6TGe5D2tZH4iJpeBU,18328
34
+ vision_agent-0.2.186.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
+ vision_agent-0.2.186.dist-info/RECORD,,