vision-agent 0.2.34__py3-none-any.whl → 0.2.36__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -264,15 +264,19 @@ def retrieve_tools(
264
264
  )
265
265
  tool_info = []
266
266
  tool_desc = []
267
+ tool_list: List[Dict[str, str]] = []
267
268
  for task in plan:
268
269
  tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
269
270
  tool_info.extend([e["doc"] for e in tools])
270
271
  tool_desc.extend([e["desc"] for e in tools])
272
+ tool_list.extend(
273
+ {"description": e["desc"], "documentation": e["doc"]} for e in tools
274
+ )
271
275
  log_progress(
272
276
  {
273
277
  "type": "tools",
274
278
  "status": "completed",
275
- "payload": tools,
279
+ "payload": tool_list,
276
280
  }
277
281
  )
278
282
  if verbosity == 2:
@@ -58,9 +58,10 @@ def grounding_dino(
58
58
  box_threshold: float = 0.20,
59
59
  iou_threshold: float = 0.20,
60
60
  ) -> List[Dict[str, Any]]:
61
- """'grounding_dino' is a tool that can detect and count objects given a text prompt
62
- such as category names or referring expressions. It returns a list and count of
63
- bounding boxes, label names and associated probability scores.
61
+ """'grounding_dino' is a tool that can detect and count multiple objects given a text
62
+ prompt such as category names or referring expressions. The categories in text prompt
63
+ are separated by commas or periods. It returns a list and count of bounding boxes,
64
+ label names and associated probability scores.
64
65
 
65
66
  Parameters:
66
67
  prompt (str): The prompt to ground to the image.
@@ -111,9 +112,10 @@ def grounding_sam(
111
112
  box_threshold: float = 0.20,
112
113
  iou_threshold: float = 0.20,
113
114
  ) -> List[Dict[str, Any]]:
114
- """'grounding_sam' is a tool that can detect and segment objects given a text
115
- prompt such as category names or referring expressions. It returns a list of
116
- bounding boxes, label names and masks file names and associated probability scores.
115
+ """'grounding_sam' is a tool that can detect and segment multiple objects given a
116
+ text prompt such as category names or referring expressions. The categories in text
117
+ prompt are separated by commas or periods. It returns a list of bounding boxes,
118
+ label names, mask file names and associated probability scores.
117
119
 
118
120
  Parameters:
119
121
  prompt (str): The prompt to ground to the image.
@@ -343,9 +345,9 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
343
345
 
344
346
 
345
347
  def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
346
- """'clip' is a tool that can classify an image given a list of input classes or tags.
347
- It returns the same list of the input classes along with their probability scores
348
- based on image content.
348
+ """'clip' is a tool that can classify an image or a cropped detection given a list
349
+ of input classes or tags. It returns the same list of the input classes along with
350
+ their probability scores based on image content.
349
351
 
350
352
  Parameters:
351
353
  image (np.ndarray): The image to classify or tag
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.34
3
+ Version: 0.2.36
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -11,7 +11,7 @@ vision_agent/agent/easytool_v2.py,sha256=CjY-sSj3abxnSq3ZHZMt-7YvRWDXEZsC6RN8FFI
11
11
  vision_agent/agent/easytool_v2_prompts.py,sha256=MZSIwovYgB-f-kdJ6btaNDVXptJn47bfOL3-Zn6NiC0,8573
12
12
  vision_agent/agent/reflexion.py,sha256=AlM5AvBJvCslXlYQdZiadq4oVHsNBm3IF_03DglTxRo,10506
13
13
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
14
- vision_agent/agent/vision_agent.py,sha256=SAk1-UWVxdpjMbcUsx2afbgQO8VjbwfKUKdM_MUs8Ck,16640
14
+ vision_agent/agent/vision_agent.py,sha256=WMClrV5qhPyqnQscNgek2vVes6-A1jNwaHH0vzgy6Zk,16802
15
15
  vision_agent/agent/vision_agent_prompts.py,sha256=0YbiS59IEWbiE43gCvOqfWrpudIAhTn8FHzXW0Y-Gaw,8201
16
16
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
@@ -23,14 +23,14 @@ vision_agent/tools/__init__.py,sha256=oZa_sslb1UqEgpdWROChDcz5JHdB475ejJX78FMLYv
23
23
  vision_agent/tools/easytool_tools.py,sha256=pZc5dQlYINlV4nYbbzsDi3-wauA-fCeD2iGmJUMoUfE,47373
24
24
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
25
25
  vision_agent/tools/tool_utils.py,sha256=wzRacbUpqk9hhfX_Y08rL8qP0XCN2w-8IZoYLi3Upn4,869
26
- vision_agent/tools/tools.py,sha256=nXjefpW9L-Xuos73ObDqpmJfOyUAJVrzoiHsxEE7O10,23346
26
+ vision_agent/tools/tools.py,sha256=RVBuaP8KZrBVppEPaqP0Tey3Am6O5zoNIaZpBSW125c,23523
27
27
  vision_agent/utils/__init__.py,sha256=xsHFyJSDbLdonB9Dh74cwZnVTiT__2OQF3Brd3Nmglc,116
28
28
  vision_agent/utils/execute.py,sha256=8_SfK-IkHH4lXF0JVyV7sDFszZn9HKsh1bFITKGCJ1g,3881
29
29
  vision_agent/utils/image_utils.py,sha256=_cdiS5YrLzqkq_ZgFUO897m5M4_SCIThwUy4lOklfB8,7700
30
30
  vision_agent/utils/sim.py,sha256=oUZ-6eu8Io-UNt9GXJ0XRKtP-Wc0sPWVzYGVpB2yDFk,3001
31
31
  vision_agent/utils/type_defs.py,sha256=BlI8ywWHAplC7kYWLvt4AOdnKpEW3qWEFm-GEOSkrFQ,1792
32
32
  vision_agent/utils/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
33
- vision_agent-0.2.34.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
- vision_agent-0.2.34.dist-info/METADATA,sha256=G7TLFwGHMZmxNOCXouYlajbIwhIE4YTbyRCOOeBVpPY,6698
35
- vision_agent-0.2.34.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
- vision_agent-0.2.34.dist-info/RECORD,,
33
+ vision_agent-0.2.36.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
34
+ vision_agent-0.2.36.dist-info/METADATA,sha256=aKsyRoGvUDMRtezD5RCoJP3aTn6cky5DFO5t2vVaHx4,6698
35
+ vision_agent-0.2.36.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
36
+ vision_agent-0.2.36.dist-info/RECORD,,