vision-agent 0.2.197__py3-none-any.whl → 0.2.199__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/.sim_tools/df.csv +18 -18
- vision_agent/.sim_tools/embs.npy +0 -0
- vision_agent/tools/tools.py +1 -1
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/METADATA +1 -1
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/RECORD +7 -7
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.197.dist-info → vision_agent-0.2.199.dist-info}/WHEEL +0 -0
vision_agent/.sim_tools/df.csv
CHANGED
@@ -80,24 +80,6 @@ desc,doc,name
|
|
80
80
|
{'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
|
81
81
|
]
|
82
82
|
",ocr
|
83
|
-
'clip' is a tool that can classify an image or a cropped detection given a list of input classes or tags. It returns the same list of the input classes along with their probability scores based on image content.,"clip(image: numpy.ndarray, classes: List[str]) -> Dict[str, Any]:
|
84
|
-
'clip' is a tool that can classify an image or a cropped detection given a list
|
85
|
-
of input classes or tags. It returns the same list of the input classes along with
|
86
|
-
their probability scores based on image content.
|
87
|
-
|
88
|
-
Parameters:
|
89
|
-
image (np.ndarray): The image to classify or tag
|
90
|
-
classes (List[str]): The list of classes or tags that is associated with the image
|
91
|
-
|
92
|
-
Returns:
|
93
|
-
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
94
|
-
contains a list of given labels and other a list of scores.
|
95
|
-
|
96
|
-
Example
|
97
|
-
-------
|
98
|
-
>>> clip(image, ['dog', 'cat', 'bird'])
|
99
|
-
{""labels"": [""dog"", ""cat"", ""bird""], ""scores"": [0.68, 0.30, 0.02]},
|
100
|
-
",clip
|
101
83
|
'vit_image_classification' is a tool that can classify an image. It returns a list of classes and their probability scores based on image content.,"vit_image_classification(image: numpy.ndarray) -> Dict[str, Any]:
|
102
84
|
'vit_image_classification' is a tool that can classify an image. It returns a
|
103
85
|
list of classes and their probability scores based on image content.
|
@@ -488,6 +470,24 @@ desc,doc,name
|
|
488
470
|
... )
|
489
471
|
>>> save_image(result, ""inpainted_room.png"")
|
490
472
|
",flux_image_inpainting
|
473
|
+
'siglip_classification' is a tool that can classify an image or a cropped detection given a list of input labels or tags. It returns the same list of the input labels along with their probability scores based on image content.,"siglip_classification(image: numpy.ndarray, labels: List[str]) -> Dict[str, Any]:
|
474
|
+
'siglip_classification' is a tool that can classify an image or a cropped detection given a list
|
475
|
+
of input labels or tags. It returns the same list of the input labels along with
|
476
|
+
their probability scores based on image content.
|
477
|
+
|
478
|
+
Parameters:
|
479
|
+
image (np.ndarray): The image to classify or tag
|
480
|
+
labels (List[str]): The list of labels or tags that is associated with the image
|
481
|
+
|
482
|
+
Returns:
|
483
|
+
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
484
|
+
contains a list of given labels and other a list of scores.
|
485
|
+
|
486
|
+
Example
|
487
|
+
-------
|
488
|
+
>>> siglip_classification(image, ['dog', 'cat', 'bird'])
|
489
|
+
{""labels"": [""dog"", ""cat"", ""bird""], ""scores"": [0.68, 0.30, 0.02]},
|
490
|
+
",siglip_classification
|
491
491
|
"'extract_frames_and_timestamps' extracts frames and timestamps from a video which can be a file path, url or youtube link, returns a list of dictionaries with keys ""frame"" and ""timestamp"" where ""frame"" is a numpy array and ""timestamp"" is the relative time in seconds where the frame was captured. The frame is a numpy array.","extract_frames_and_timestamps(video_uri: Union[str, pathlib.Path], fps: float = 1) -> List[Dict[str, Union[numpy.ndarray, float]]]:
|
492
492
|
'extract_frames_and_timestamps' extracts frames and timestamps from a video
|
493
493
|
which can be a file path, url or youtube link, returns a list of dictionaries
|
vision_agent/.sim_tools/embs.npy
CHANGED
Binary file
|
vision_agent/tools/tools.py
CHANGED
@@ -2453,7 +2453,6 @@ FUNCTION_TOOLS = [
|
|
2453
2453
|
owl_v2_image,
|
2454
2454
|
owl_v2_video,
|
2455
2455
|
ocr,
|
2456
|
-
clip,
|
2457
2456
|
vit_image_classification,
|
2458
2457
|
vit_nsfw_classification,
|
2459
2458
|
countgd_counting,
|
@@ -2471,6 +2470,7 @@ FUNCTION_TOOLS = [
|
|
2471
2470
|
qwen2_vl_video_vqa,
|
2472
2471
|
video_temporal_localization,
|
2473
2472
|
flux_image_inpainting,
|
2473
|
+
siglip_classification,
|
2474
2474
|
]
|
2475
2475
|
|
2476
2476
|
UTIL_TOOLS = [
|
@@ -1,5 +1,5 @@
|
|
1
|
-
vision_agent/.sim_tools/df.csv,sha256=
|
2
|
-
vision_agent/.sim_tools/embs.npy,sha256=
|
1
|
+
vision_agent/.sim_tools/df.csv,sha256=0fmLwTDjnRTiqYwamTOdCPjruE6wZz0AVrONIPTHxZY,34086
|
2
|
+
vision_agent/.sim_tools/embs.npy,sha256=xF8Cg7Xd09QCTySj831aL1O2_0kRNaaH8XRJIRjgWzQ,356480
|
3
3
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
4
4
|
vision_agent/agent/__init__.py,sha256=j4W3zHXKE96o93ZziY62ZBWgicLYEink1rIU3gPsfwM,548
|
5
5
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
@@ -27,7 +27,7 @@ vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB
|
|
27
27
|
vision_agent/tools/planner_tools.py,sha256=FROahw_6Taqvytv6pOjCHUEypOfjsi_f8Vo1c5vz6Mw,8823
|
28
28
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
29
29
|
vision_agent/tools/tool_utils.py,sha256=GDGOmBCo4UfYz-DJ-olREJHPsqs5mzHu0YXiAnpNE8E,10179
|
30
|
-
vision_agent/tools/tools.py,sha256=
|
30
|
+
vision_agent/tools/tools.py,sha256=wXDs0m_Yb601FQVp5fPYYVtt4lHUeMnuqIbfDZhsE4Q,87852
|
31
31
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
32
32
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
33
33
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -36,7 +36,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
|
|
36
36
|
vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
|
37
37
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
38
38
|
vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
|
39
|
-
vision_agent-0.2.
|
40
|
-
vision_agent-0.2.
|
41
|
-
vision_agent-0.2.
|
42
|
-
vision_agent-0.2.
|
39
|
+
vision_agent-0.2.199.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
40
|
+
vision_agent-0.2.199.dist-info/METADATA,sha256=NbaPI49uOha3uZXbfOokpji32pilLujBz7DcmhaXW1M,19026
|
41
|
+
vision_agent-0.2.199.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
42
|
+
vision_agent-0.2.199.dist-info/RECORD,,
|
File without changes
|
File without changes
|