vision-agent 0.2.34__tar.gz → 0.2.36__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.34 → vision_agent-0.2.36}/PKG-INFO +1 -1
- {vision_agent-0.2.34 → vision_agent-0.2.36}/pyproject.toml +1 -1
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/vision_agent.py +5 -1
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/tools/tools.py +11 -9
- {vision_agent-0.2.34 → vision_agent-0.2.36}/LICENSE +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/README.md +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/agent_coder.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/agent_coder_prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/data_interpreter.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/data_interpreter_prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/easytool.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/easytool_prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/easytool_v2.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/easytool_v2_prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/reflexion.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/reflexion_prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/llm/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/llm/llm.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/tools/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/tools/easytool_tools.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.34 → vision_agent-0.2.36}/vision_agent/utils/video.py +0 -0
@@ -264,15 +264,19 @@ def retrieve_tools(
|
|
264
264
|
)
|
265
265
|
tool_info = []
|
266
266
|
tool_desc = []
|
267
|
+
tool_list: List[Dict[str, str]] = []
|
267
268
|
for task in plan:
|
268
269
|
tools = tool_recommender.top_k(task["instructions"], k=2, thresh=0.3)
|
269
270
|
tool_info.extend([e["doc"] for e in tools])
|
270
271
|
tool_desc.extend([e["desc"] for e in tools])
|
272
|
+
tool_list.extend(
|
273
|
+
{"description": e["desc"], "documentation": e["doc"]} for e in tools
|
274
|
+
)
|
271
275
|
log_progress(
|
272
276
|
{
|
273
277
|
"type": "tools",
|
274
278
|
"status": "completed",
|
275
|
-
"payload":
|
279
|
+
"payload": tool_list,
|
276
280
|
}
|
277
281
|
)
|
278
282
|
if verbosity == 2:
|
@@ -58,9 +58,10 @@ def grounding_dino(
|
|
58
58
|
box_threshold: float = 0.20,
|
59
59
|
iou_threshold: float = 0.20,
|
60
60
|
) -> List[Dict[str, Any]]:
|
61
|
-
"""'grounding_dino' is a tool that can detect and count objects given a text
|
62
|
-
such as category names or referring expressions.
|
63
|
-
|
61
|
+
"""'grounding_dino' is a tool that can detect and count multiple objects given a text
|
62
|
+
prompt such as category names or referring expressions. The categories in text prompt
|
63
|
+
are separated by commas or periods. It returns a list and count of bounding boxes,
|
64
|
+
label names and associated probability scores.
|
64
65
|
|
65
66
|
Parameters:
|
66
67
|
prompt (str): The prompt to ground to the image.
|
@@ -111,9 +112,10 @@ def grounding_sam(
|
|
111
112
|
box_threshold: float = 0.20,
|
112
113
|
iou_threshold: float = 0.20,
|
113
114
|
) -> List[Dict[str, Any]]:
|
114
|
-
"""'grounding_sam' is a tool that can detect and segment objects given a
|
115
|
-
prompt such as category names or referring expressions.
|
116
|
-
|
115
|
+
"""'grounding_sam' is a tool that can detect and segment multiple objects given a
|
116
|
+
text prompt such as category names or referring expressions. The categories in text
|
117
|
+
prompt are separated by commas or periods. It returns a list of bounding boxes,
|
118
|
+
label names, mask file names and associated probability scores.
|
117
119
|
|
118
120
|
Parameters:
|
119
121
|
prompt (str): The prompt to ground to the image.
|
@@ -343,9 +345,9 @@ def image_question_answering(image: np.ndarray, prompt: str) -> str:
|
|
343
345
|
|
344
346
|
|
345
347
|
def clip(image: np.ndarray, classes: List[str]) -> Dict[str, Any]:
|
346
|
-
"""'clip' is a tool that can classify an image
|
347
|
-
It returns the same list of the input classes along with
|
348
|
-
based on image content.
|
348
|
+
"""'clip' is a tool that can classify an image or a cropped detection given a list
|
349
|
+
of input classes or tags. It returns the same list of the input classes along with
|
350
|
+
their probability scores based on image content.
|
349
351
|
|
350
352
|
Parameters:
|
351
353
|
image (np.ndarray): The image to classify or tag
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|