vision-agent 0.2.188__py3-none-any.whl → 0.2.190__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/__init__.py +1 -0
- vision_agent/tools/tools.py +43 -0
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/METADATA +1 -1
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/RECORD +6 -6
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.188.dist-info → vision_agent-0.2.190.dist-info}/WHEEL +0 -0
vision_agent/tools/__init__.py
CHANGED
vision_agent/tools/tools.py
CHANGED
@@ -1845,6 +1845,48 @@ def flux_image_inpainting(
|
|
1845
1845
|
return output_image
|
1846
1846
|
|
1847
1847
|
|
1848
|
+
def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any]:
|
1849
|
+
"""'siglip_classification' is a tool that can classify an image or a cropped detection given a list
|
1850
|
+
of input labels or tags. It returns the same list of the input labels along with
|
1851
|
+
their probability scores based on image content.
|
1852
|
+
|
1853
|
+
Parameters:
|
1854
|
+
image (np.ndarray): The image to classify or tag
|
1855
|
+
labels (List[str]): The list of labels or tags that is associated with the image
|
1856
|
+
|
1857
|
+
Returns:
|
1858
|
+
Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
|
1859
|
+
contains a list of given labels and other a list of scores.
|
1860
|
+
|
1861
|
+
Example
|
1862
|
+
-------
|
1863
|
+
>>> siglip_classification(image, ['dog', 'cat', 'bird'])
|
1864
|
+
{"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
|
1865
|
+
"""
|
1866
|
+
|
1867
|
+
if image.shape[0] < 1 or image.shape[1] < 1:
|
1868
|
+
return {"labels": [], "scores": []}
|
1869
|
+
|
1870
|
+
image_file = numpy_to_bytes(image)
|
1871
|
+
|
1872
|
+
files = [("image", image_file)]
|
1873
|
+
|
1874
|
+
payload = {
|
1875
|
+
"model": "siglip",
|
1876
|
+
"labels": labels,
|
1877
|
+
}
|
1878
|
+
|
1879
|
+
response: dict[str, Any] = send_inference_request(
|
1880
|
+
payload=payload,
|
1881
|
+
endpoint_name="classification",
|
1882
|
+
files=files,
|
1883
|
+
v2=True,
|
1884
|
+
metadata_payload={"function_name": "siglip_classification"},
|
1885
|
+
)
|
1886
|
+
|
1887
|
+
return response
|
1888
|
+
|
1889
|
+
|
1848
1890
|
# Utility and visualization functions
|
1849
1891
|
|
1850
1892
|
|
@@ -2362,6 +2404,7 @@ FUNCTION_TOOLS = [
|
|
2362
2404
|
qwen2_vl_images_vqa,
|
2363
2405
|
qwen2_vl_video_vqa,
|
2364
2406
|
video_temporal_localization,
|
2407
|
+
flux_image_inpainting,
|
2365
2408
|
]
|
2366
2409
|
|
2367
2410
|
UTIL_TOOLS = [
|
@@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
|
|
16
16
|
vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
|
-
vision_agent/tools/__init__.py,sha256=
|
19
|
+
vision_agent/tools/__init__.py,sha256=uhqE2bo9q1F7andb2zEzp-OYVmYOElPdapIGK9bQ63U,2825
|
20
20
|
vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
|
-
vision_agent/tools/tools.py,sha256=
|
23
|
+
vision_agent/tools/tools.py,sha256=nTeWePteTltKEvDcr2t3nSDtdUL0xdXEIjeolvXNxGg,84945
|
24
24
|
vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
25
25
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
26
26
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
31
|
vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
32
|
+
vision_agent-0.2.190.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.190.dist-info/METADATA,sha256=AvlPUByWfCJlMgl2ZvWDfnp9ila0lsCv6x3tZ41DY8k,18328
|
34
|
+
vision_agent-0.2.190.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.190.dist-info/RECORD,,
|
File without changes
|
File without changes
|