vision-agent 0.2.185__py3-none-any.whl → 0.2.186__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +13 -15
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/METADATA +3 -3
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/RECORD +5 -5
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.185.dist-info → vision_agent-0.2.186.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -1057,23 +1057,25 @@ def video_temporal_localization(
|
|
1057
1057
|
prompt: str,
|
1058
1058
|
frames: List[np.ndarray],
|
1059
1059
|
model: str = "qwen2vl",
|
1060
|
-
chunk_length: Optional[float] = None,
|
1061
|
-
chunk_length_seconds: Optional[float] = None,
|
1062
1060
|
chunk_length_frames: Optional[int] = 2,
|
1063
1061
|
) -> List[float]:
|
1064
|
-
"""'video_temporal_localization'
|
1065
|
-
|
1062
|
+
"""'video_temporal_localization' will run qwen2vl on each chunk_length_frames
|
1063
|
+
value selected for the video. It can detect multiple objects independently per
|
1064
|
+
chunk_length_frames given a text prompt such as a referring expression
|
1065
|
+
but does not track objects across frames.
|
1066
|
+
It returns a list of floats with a value of 1.0 if the objects are found in a given
|
1067
|
+
chunk_length_frames of the video.
|
1066
1068
|
|
1067
1069
|
Parameters:
|
1068
1070
|
prompt (str): The question about the video
|
1069
1071
|
frames (List[np.ndarray]): The reference frames used for the question
|
1070
|
-
model (str): The model to use for the inference. Valid values are
|
1071
|
-
|
1072
|
-
chunk_length_seconds (Optional[float]): alternative length for chunk in seconds
|
1072
|
+
model (str): The model to use for the inference. Valid values are
|
1073
|
+
'qwen2vl', 'gpt4o', 'internlm-xcomposer'
|
1073
1074
|
chunk_length_frames (Optional[int]): length of each chunk in frames
|
1074
1075
|
|
1075
1076
|
Returns:
|
1076
|
-
List[float]: A list of floats with a value of 1.0 if the
|
1077
|
+
List[float]: A list of floats with a value of 1.0 if the objects to be found
|
1078
|
+
are present in the chunk_length_frames of the video.
|
1077
1079
|
|
1078
1080
|
Example
|
1079
1081
|
-------
|
@@ -1088,10 +1090,6 @@ def video_temporal_localization(
|
|
1088
1090
|
"model": model,
|
1089
1091
|
"function_name": "video_temporal_localization",
|
1090
1092
|
}
|
1091
|
-
if chunk_length is not None:
|
1092
|
-
payload["chunk_length"] = chunk_length
|
1093
|
-
if chunk_length_seconds is not None:
|
1094
|
-
payload["chunk_length_seconds"] = chunk_length_seconds
|
1095
1093
|
if chunk_length_frames is not None:
|
1096
1094
|
payload["chunk_length_frames"] = chunk_length_frames
|
1097
1095
|
|
@@ -1790,9 +1788,8 @@ def flux_image_inpainting(
|
|
1790
1788
|
where 1 indicates areas to be inpainted and 0 indicates areas to be preserved.
|
1791
1789
|
|
1792
1790
|
Returns:
|
1793
|
-
np.ndarray:
|
1794
|
-
|
1795
|
-
with values ranging from 0 to 255.
|
1791
|
+
np.ndarray: The generated image(s) as a numpy array in RGB format with values
|
1792
|
+
ranging from 0 to 255.
|
1796
1793
|
|
1797
1794
|
-------
|
1798
1795
|
Example:
|
@@ -2351,6 +2348,7 @@ FUNCTION_TOOLS = [
|
|
2351
2348
|
closest_box_distance,
|
2352
2349
|
qwen2_vl_images_vqa,
|
2353
2350
|
qwen2_vl_video_vqa,
|
2351
|
+
video_temporal_localization,
|
2354
2352
|
]
|
2355
2353
|
|
2356
2354
|
UTIL_TOOLS = [
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.186
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -63,10 +63,10 @@ code to solve the task for them. Check out our discord for updates and roadmaps!
|
|
63
63
|
## Table of Contents
|
64
64
|
- [🚀Quick Start](#quick-start)
|
65
65
|
- [📚Documentation](#documentation)
|
66
|
-
- [🔍🤖VisionAgent](#
|
66
|
+
- [🔍🤖VisionAgent](#visionagent-basic-usage)
|
67
67
|
- [🛠️Tools](#tools)
|
68
68
|
- [🤖LMMs](#lmms)
|
69
|
-
- [💻🤖VisionAgent Coder](#
|
69
|
+
- [💻🤖VisionAgent Coder](#visionagent-coder)
|
70
70
|
- [🏗️Additional Backends](#additional-backends)
|
71
71
|
|
72
72
|
## Quick Start
|
@@ -20,7 +20,7 @@ vision_agent/tools/__init__.py,sha256=KVP4_6qxOb2lpFdQgQtyDfdkMLL1O6wVZNK19MXp-x
|
|
20
20
|
vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
|
-
vision_agent/tools/tools.py,sha256
|
23
|
+
vision_agent/tools/tools.py,sha256=-oq8jzITi-yVYJ3ut5MuGJ65jd3ESRtHfw4SCAruMps,83059
|
24
24
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
25
25
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
26
26
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.186.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.186.dist-info/METADATA,sha256=NQfESIRsq9-QWyPzNkyv6dSuRS6TGe5D2tZH4iJpeBU,18328
|
34
|
+
vision_agent-0.2.186.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.186.dist-info/RECORD,,
|
File without changes
|
File without changes
|