vision-agent 0.2.188__py3-none-any.whl → 0.2.190__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -69,6 +69,7 @@ from .tools import (
69
69
  qwen2_vl_video_vqa,
70
70
  video_temporal_localization,
71
71
  flux_image_inpainting,
72
+ siglip_classification,
72
73
  )
73
74
 
74
75
  __new_tools__ = [
@@ -1845,6 +1845,48 @@ def flux_image_inpainting(
1845
1845
  return output_image
1846
1846
 
1847
1847
 
1848
+ def siglip_classification(image: np.ndarray, labels: List[str]) -> Dict[str, Any]:
1849
+ """'siglip_classification' is a tool that can classify an image or a cropped detection given a list
1850
+ of input labels or tags. It returns the same list of the input labels along with
1851
+ their probability scores based on image content.
1852
+
1853
+ Parameters:
1854
+ image (np.ndarray): The image to classify or tag
1855
+ labels (List[str]): The list of labels or tags that is associated with the image
1856
+
1857
+ Returns:
1858
+ Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
1859
+ contains a list of given labels and other a list of scores.
1860
+
1861
+ Example
1862
+ -------
1863
+ >>> siglip_classification(image, ['dog', 'cat', 'bird'])
1864
+ {"labels": ["dog", "cat", "bird"], "scores": [0.68, 0.30, 0.02]},
1865
+ """
1866
+
1867
+ if image.shape[0] < 1 or image.shape[1] < 1:
1868
+ return {"labels": [], "scores": []}
1869
+
1870
+ image_file = numpy_to_bytes(image)
1871
+
1872
+ files = [("image", image_file)]
1873
+
1874
+ payload = {
1875
+ "model": "siglip",
1876
+ "labels": labels,
1877
+ }
1878
+
1879
+ response: dict[str, Any] = send_inference_request(
1880
+ payload=payload,
1881
+ endpoint_name="classification",
1882
+ files=files,
1883
+ v2=True,
1884
+ metadata_payload={"function_name": "siglip_classification"},
1885
+ )
1886
+
1887
+ return response
1888
+
1889
+
1848
1890
  # Utility and visualization functions
1849
1891
 
1850
1892
 
@@ -2362,6 +2404,7 @@ FUNCTION_TOOLS = [
2362
2404
  qwen2_vl_images_vqa,
2363
2405
  qwen2_vl_video_vqa,
2364
2406
  video_temporal_localization,
2407
+ flux_image_inpainting,
2365
2408
  ]
2366
2409
 
2367
2410
  UTIL_TOOLS = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.188
3
+ Version: 0.2.190
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -16,11 +16,11 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
16
16
  vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
17
17
  vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
18
18
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
19
- vision_agent/tools/__init__.py,sha256=KVP4_6qxOb2lpFdQgQtyDfdkMLL1O6wVZNK19MXp-xo,2798
19
+ vision_agent/tools/__init__.py,sha256=uhqE2bo9q1F7andb2zEzp-OYVmYOElPdapIGK9bQ63U,2825
20
20
  vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB9Kg,32074
21
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
22
22
  vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
23
- vision_agent/tools/tools.py,sha256=lIRQals2WLkV01pXDwFwkZdMmEa2xf7Jnv8g3UNrdOQ,83582
23
+ vision_agent/tools/tools.py,sha256=nTeWePteTltKEvDcr2t3nSDtdUL0xdXEIjeolvXNxGg,84945
24
24
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
25
25
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
26
26
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -29,7 +29,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
29
29
  vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
30
30
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
31
31
  vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
32
- vision_agent-0.2.188.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
- vision_agent-0.2.188.dist-info/METADATA,sha256=CoqVIiF9B_0k4j9NQtyR8RLQ6cRPByvP0CWfmALRz4A,18328
34
- vision_agent-0.2.188.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
- vision_agent-0.2.188.dist-info/RECORD,,
32
+ vision_agent-0.2.190.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vision_agent-0.2.190.dist-info/METADATA,sha256=AvlPUByWfCJlMgl2ZvWDfnp9ila0lsCv6x3tZ41DY8k,18328
34
+ vision_agent-0.2.190.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
+ vision_agent-0.2.190.dist-info/RECORD,,