vision-agent 0.2.197__py3-none-any.whl → 0.2.199__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -80,24 +80,6 @@ desc,doc,name
80
80
  {'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
81
81
  ]
82
82
  ",ocr
83
- 'clip' is a tool that can classify an image or a cropped detection given a list of input classes or tags. It returns the same list of the input classes along with their probability scores based on image content.,"clip(image: numpy.ndarray, classes: List[str]) -> Dict[str, Any]:
84
- 'clip' is a tool that can classify an image or a cropped detection given a list
85
- of input classes or tags. It returns the same list of the input classes along with
86
- their probability scores based on image content.
87
-
88
- Parameters:
89
- image (np.ndarray): The image to classify or tag
90
- classes (List[str]): The list of classes or tags that is associated with the image
91
-
92
- Returns:
93
- Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
94
- contains a list of given labels and other a list of scores.
95
-
96
- Example
97
- -------
98
- >>> clip(image, ['dog', 'cat', 'bird'])
99
- {""labels"": [""dog"", ""cat"", ""bird""], ""scores"": [0.68, 0.30, 0.02]},
100
- ",clip
101
83
  'vit_image_classification' is a tool that can classify an image. It returns a list of classes and their probability scores based on image content.,"vit_image_classification(image: numpy.ndarray) -> Dict[str, Any]:
102
84
  'vit_image_classification' is a tool that can classify an image. It returns a
103
85
  list of classes and their probability scores based on image content.
@@ -488,6 +470,24 @@ desc,doc,name
488
470
  ... )
489
471
  >>> save_image(result, ""inpainted_room.png"")
490
472
  ",flux_image_inpainting
473
+ 'siglip_classification' is a tool that can classify an image or a cropped detection given a list of input labels or tags. It returns the same list of the input labels along with their probability scores based on image content.,"siglip_classification(image: numpy.ndarray, labels: List[str]) -> Dict[str, Any]:
474
+ 'siglip_classification' is a tool that can classify an image or a cropped detection given a list
475
+ of input labels or tags. It returns the same list of the input labels along with
476
+ their probability scores based on image content.
477
+
478
+ Parameters:
479
+ image (np.ndarray): The image to classify or tag
480
+ labels (List[str]): The list of labels or tags that is associated with the image
481
+
482
+ Returns:
483
+ Dict[str, Any]: A dictionary containing the labels and scores. One dictionary
484
+ contains a list of given labels and other a list of scores.
485
+
486
+ Example
487
+ -------
488
+ >>> siglip_classification(image, ['dog', 'cat', 'bird'])
489
+ {""labels"": [""dog"", ""cat"", ""bird""], ""scores"": [0.68, 0.30, 0.02]},
490
+ ",siglip_classification
491
491
  "'extract_frames_and_timestamps' extracts frames and timestamps from a video which can be a file path, url or youtube link, returns a list of dictionaries with keys ""frame"" and ""timestamp"" where ""frame"" is a numpy array and ""timestamp"" is the relative time in seconds where the frame was captured. The frame is a numpy array.","extract_frames_and_timestamps(video_uri: Union[str, pathlib.Path], fps: float = 1) -> List[Dict[str, Union[numpy.ndarray, float]]]:
492
492
  'extract_frames_and_timestamps' extracts frames and timestamps from a video
493
493
  which can be a file path, url or youtube link, returns a list of dictionaries
Binary file
@@ -2453,7 +2453,6 @@ FUNCTION_TOOLS = [
2453
2453
  owl_v2_image,
2454
2454
  owl_v2_video,
2455
2455
  ocr,
2456
- clip,
2457
2456
  vit_image_classification,
2458
2457
  vit_nsfw_classification,
2459
2458
  countgd_counting,
@@ -2471,6 +2470,7 @@ FUNCTION_TOOLS = [
2471
2470
  qwen2_vl_video_vqa,
2472
2471
  video_temporal_localization,
2473
2472
  flux_image_inpainting,
2473
+ siglip_classification,
2474
2474
  ]
2475
2475
 
2476
2476
  UTIL_TOOLS = [
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.197
3
+ Version: 0.2.199
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -1,5 +1,5 @@
1
- vision_agent/.sim_tools/df.csv,sha256=IPYWrI8W8r7zs2-QRjGsAzlg1O3wqngGRSyz9F-BOpo,34008
2
- vision_agent/.sim_tools/embs.npy,sha256=ItJgcBpT0--0HeZjUV30INzFXNQh-12HoUVevNY38dc,356480
1
+ vision_agent/.sim_tools/df.csv,sha256=0fmLwTDjnRTiqYwamTOdCPjruE6wZz0AVrONIPTHxZY,34086
2
+ vision_agent/.sim_tools/embs.npy,sha256=xF8Cg7Xd09QCTySj831aL1O2_0kRNaaH8XRJIRjgWzQ,356480
3
3
  vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
4
4
  vision_agent/agent/__init__.py,sha256=j4W3zHXKE96o93ZziY62ZBWgicLYEink1rIU3gPsfwM,548
5
5
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
@@ -27,7 +27,7 @@ vision_agent/tools/meta_tools.py,sha256=by7TIbH7lsLIayX_Pe2mS1iw8aeLn2T8yqAo8SkB
27
27
  vision_agent/tools/planner_tools.py,sha256=FROahw_6Taqvytv6pOjCHUEypOfjsi_f8Vo1c5vz6Mw,8823
28
28
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
29
29
  vision_agent/tools/tool_utils.py,sha256=GDGOmBCo4UfYz-DJ-olREJHPsqs5mzHu0YXiAnpNE8E,10179
30
- vision_agent/tools/tools.py,sha256=Q8QSuOUk0df_XueU856vi21GOolp2TB3_f0WvMayjIA,87835
30
+ vision_agent/tools/tools.py,sha256=wXDs0m_Yb601FQVp5fPYYVtt4lHUeMnuqIbfDZhsE4Q,87852
31
31
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
32
32
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
33
33
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -36,7 +36,7 @@ vision_agent/utils/image_utils.py,sha256=rRWcxKggPXIRXIY_XT9rZt30ECDRq8zq7FDeXRD
36
36
  vision_agent/utils/sim.py,sha256=NZc9QGD6BTY5O29NVbHH7oxDePL_QMnylT1lYcDUn1Y,7437
37
37
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
38
38
  vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
39
- vision_agent-0.2.197.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
- vision_agent-0.2.197.dist-info/METADATA,sha256=b_qATdG-j1o9ksVxISHe_-NcUdd0HMgZAVF9r6Y10ek,19026
41
- vision_agent-0.2.197.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
42
- vision_agent-0.2.197.dist-info/RECORD,,
39
+ vision_agent-0.2.199.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
40
+ vision_agent-0.2.199.dist-info/METADATA,sha256=NbaPI49uOha3uZXbfOokpji32pilLujBz7DcmhaXW1M,19026
41
+ vision_agent-0.2.199.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
42
+ vision_agent-0.2.199.dist-info/RECORD,,