vision-agent 0.2.197__py3-none-any.whl → 0.2.199__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/.sim_tools/df.csv +18 -18
- vision_agent/.sim_tools/embs.npy +0 -0
- vision_agent/tools/tools.py +1 -1
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/METADATA +1 -1
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/RECORD +7 -7
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/WHEEL +0 -0
vision_agent/.sim_tools/df.csv
CHANGED
@@ -80,24 +80,6 @@ desc,doc,name
|
|
80
80
|
{'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
|
81
81
|
]
|
82
82
|
",ocr
|
83
|
-
'clip' is a tool that can classify an image or a cropped detection given a list of input classes or tags. It returns the same list of the input classes along with their probability scores based on image content.,"clip(image: numpy.ndarray, classes: List[str]) -> Dict[str, Any]:
|
84
|
-
'clip' is a tool that can classify an image or a cropped detection given a list
|
85
|
-
of input classes or tags. It returns the same list of the input classes along with
|
86
|
-
their probability scores based on image content.
|
87
|
-
|
88
|
-
Parameters:
|
89
|
-
image (np.ndarray): The image to classify or tag
|
90
|
-
classes (List[str]): The list of classes or tags that is associated with the image
|
91
|
-
|
92
|
-
Returns:
|
93
|
-
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
94
|
-
contains a list of given labels and other a list of scores.
|
95
|
-
|
96
|
-
Example
|
97
|
-
-------
|
98
|
-
>>> clip(image, ['dog', 'cat', 'bird'])
|
99
|
-
{""labels"": [""dog"", ""cat"", ""bird""], ""scores"": [0.68, 0.30, 0.02]},
|
100
|
-
",clip
|
101
83
|
'vit_image_classification' is a tool that can classify an image. It returns a list of classes and their probability scores based on image content.,"vit_image_classification(image: numpy.ndarray) -> Dict[str, Any]:
|
102
84
|
'vit_image_classification' is a tool that can classify an image. It returns a
|
103
85
|
list of classes and their probability scores based on image content.
|
@@ -488,6 +470,24 @@ desc,doc,name
|
|
488
470
|
... )
|
489
471
|
>>> save_image(result, ""inpainted_room.png"")
|
490
472
|
",flux_image_inpainting
|
473
|
+
'siglip_classification' is a tool that can classify an image or a cropped detection given a list of input labels or tags. It returns the same list of the input labels along with their probability scores based on image content.,"siglip_classification(image: numpy.ndarray, labels: List[str]) -> Dict[str, Any]:
|
474
|
+
'siglip_classification' is a tool that can classify an image or a cropped detection given a list
|
475
|
+
of input labels or tags. It returns the same list of the input labels along with
|
476
|
+
their probability scores based on image content.
|
477
|
+
|
478
|
+
Parameters:
|
479
|
+
image (np.ndarray): The image to classify or tag
|
480
|
+
labels (List[str]): The list of labels or tags that is associated with the image
|
481
|
+
|
482
|
+
Returns:
|
483
|
+
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
484
|
+
contains a list of given labels and other a list of scores.
|
485
|
+
|
486
|
+
Example
|
487
|
+
-------
|
488
|
+
>>> siglip_classification(image, ['dog', 'cat', 'bird'])
|
489
|
+
{""labels"": [""dog"", ""cat"", ""bird""], ""scores"": [0.68, 0.30, 0.02]},
|
490
|
+
",siglip_classification
|
491
491
|
"'extract_frames_and_timestamps' extracts frames and timestamps from a video which can be a file path, url or youtube link, returns a list of dictionaries with keys ""frame"" and ""timestamp"" where ""frame"" is a numpy array and ""timestamp"" is the relative time in seconds where the frame was captured. The frame is a numpy array.","extract_frames_and_timestamps(video_uri: Union[str, pathlib.Path], fps: float = 1) -> List[Dict[str, Union[numpy.ndarray, float]]]:
|
492
492
|
'extract_frames_and_timestamps' extracts frames and timestamps from a video
|
493
493
|
which can be a file path, url or youtube link, returns a list of dictionaries
|
vision_agent/.sim_tools/embs.npy
CHANGED
Binary file
|
vision_agent/tools/tools.py
CHANGED
@@ -2453,7 +2453,6 @@ FUNCTION_TOOLS = [
|
|
2453
2453
|
owl_v2_image,
|
2454
2454
|
owl_v2_video,
|
2455
2455
|
ocr,
|
2456
|
-
clip,
|
2457
2456
|
vit_image_classification,
|
2458
2457
|
vit_nsfw_classification,
|
2459
2458
|
countgd_counting,
|
@@ -2471,6 +2470,7 @@ FUNCTION_TOOLS = [
|
|
2471
2470
|
qwen2_vl_video_vqa,
|
2472
2471
|
video_temporal_localization,
|
2473
2472
|
flux_image_inpainting,
|
2473
|
+
siglip_classification,
|
2474
2474
|
]
|
2475
2475
|
|
2476
2476
|
UTIL_TOOLS = [
|
@@ -1,5 +1,5 @@
|
|
1
|
-
vision_agent/.sim_tools/df.csv,sha256=
|
2
|
-
vision_agent/.sim_tools/embs.npy,sha256=
|
1
|
+
vision_agent/.sim_tools/df.csv,sha256=0fmLwTDjnRTiqYwamTOdCPjruE6wZz0AVrONIPTHxZY,34086
|
2
|
+
vision_agent/.sim_tools/embs.npy,sha256=xF8Cg7Xd09QCTySj831aL1O2_0kRNaaH8XRJIRjgWzQ,356480
|
3
3
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
4
4
|
vision_agent/agent/__init__.py,sha256=j4W3zHXKE96o93ZziY62ZBWgicLYEink1rIU3gPsfwM,548
|
5
5
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
@@ -27,7 +27,7 @@ vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB
|
|
27
27
|
vision_agent/tools/planner_tools.py,sha256=FROahw_6Taqvytv6pOjCHUEypOfjsi_f8Vo1c5vz6Mw,8823
|
28
28
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
29
29
|
vision_agent/tools/tool_utils.py,sha256=GDGOmBCo4UfYz-DJ-olREJHPsqs5mzHu0YXiAnpNE8E,10179
|
30
|
-
vision_agent/tools/tools.py,sha256=
|
30
|
+
vision_agent/tools/tools.py,sha256=wXDs0m_Yb601FQVp5fPYYVtt4lHUeMnuqIbfDZhsE4Q,87852
|
31
31
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
32
32
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
33
33
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -36,7 +36,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
|
|
36
36
|
vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
|
37
37
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
38
38
|
vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
|
39
|
-
vision_agent-0.2.
|
40
|
-
vision_agent-0.2.
|
41
|
-
vision_agent-0.2.
|
42
|
-
vision_agent-0.2.
|
39
|
+
vision_agent-0.2.199.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
40
|
+
vision_agent-0.2.199.dist-info/METADATA,sha256=NbaPI49uOha3uZXbfOokpji32pilLujBz7DcmhaXW1M,19026
|
41
|
+
vision_agent-0.2.199.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
42
|
+
vision_agent-0.2.199.dist-info/RECORD,,
|
File without changes
|
File without changes
|