vision-agent 0.2.188__py3-none-any.whl → 0.2.190__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/tools.py +43 -0
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/METADATA +1 -1
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/RECORD +6 -6
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/WHEEL +0 -0
vision_agent/tools/__init__.py
CHANGED
vision_agent/tools/tools.py
CHANGED
@@ -1845,6 +1845,48 @@ def flux_image_inpainting(
|
|
1845
1845
|
return output_image
|
1846
1846
|
|
1847
1847
|
|
1848
|
+
def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any]:
|
1849
|
+
"""'siglip_classification' is a tool that can classify an image or a cropped detection given a list
|
1850
|
+
of input labels or tags. It returns the same list of the input labels along with
|
1851
|
+
their probability scores based on image content.
|
1852
|
+
|
1853
|
+
Parameters:
|
1854
|
+
image (np.ndarray): The image to classify or tag
|
1855
|
+
labels (List[str]): The list of labels or tags that is associated with the image
|
1856
|
+
|
1857
|
+
Returns:
|
1858
|
+
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
1859
|
+
contains a list of given labels and other a list of scores.
|
1860
|
+
|
1861
|
+
Example
|
1862
|
+
-------
|
1863
|
+
>>> siglip_classification(image, ['dog', 'cat', 'bird'])
|
1864
|
+
{"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
|
1865
|
+
"""
|
1866
|
+
|
1867
|
+
if image.shape[0] < 1 or image.shape[1] < 1:
|
1868
|
+
return {"labels": [], "scores": []}
|
1869
|
+
|
1870
|
+
image_file = numpy_to_bytes(image)
|
1871
|
+
|
1872
|
+
files = [("image", image_file)]
|
1873
|
+
|
1874
|
+
payload = {
|
1875
|
+
"model": "siglip",
|
1876
|
+
"labels": labels,
|
1877
|
+
}
|
1878
|
+
|
1879
|
+
response: dict[str, Any] = send_inference_request(
|
1880
|
+
payload=payload,
|
1881
|
+
endpoint_name="classification",
|
1882
|
+
files=files,
|
1883
|
+
v2=True,
|
1884
|
+
metadata_payload={"function_name": "siglip_classification"},
|
1885
|
+
)
|
1886
|
+
|
1887
|
+
return response
|
1888
|
+
|
1889
|
+
|
1848
1890
|
# Utility and visualization functions
|
1849
1891
|
|
1850
1892
|
|
@@ -2362,6 +2404,7 @@ FUNCTION_TOOLS = [
|
|
2362
2404
|
qwen2_vl_images_vqa,
|
2363
2405
|
qwen2_vl_video_vqa,
|
2364
2406
|
video_temporal_localization,
|
2407
|
+
flux_image_inpainting,
|
2365
2408
|
]
|
2366
2409
|
|
2367
2410
|
UTIL_TOOLS = [
|
@@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
16
16
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
|
-
vision_agent/tools/__init__.py,sha256=
|
19
|
+
vision_agent/tools/__init__.py,sha256=uhqE2bo9q1F7andb2zEzp-OYVmYOElPdapIGK9bQ63U,2825
|
20
20
|
vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
|
-
vision_agent/tools/tools.py,sha256=
|
23
|
+
vision_agent/tools/tools.py,sha256=nTeWePteTltKEvDcr2t3nSDtdUL0xdXEIjeolvXNxGg,84945
|
24
24
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
25
25
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
26
26
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.190.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.190.dist-info/METADATA,sha256=AvlPUByWfCJlMgl2ZvWDfnp9ila0lsCv6x3tZ41DY8k,18328
|
34
|
+
vision_agent-0.2.190.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.190.dist-info/RECORD,,
|
File without changes
|
File without changes
|