vision-agent 0.2.22__tar.gz → 0.2.23__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.22 → vision_agent-0.2.23}/PKG-INFO +1 -1
- {vision_agent-0.2.22 → vision_agent-0.2.23}/pyproject.toml +1 -1
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/vision_agent_v2.py +5 -3
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/tools/tools_v2.py +52 -1
- {vision_agent-0.2.22 → vision_agent-0.2.23}/LICENSE +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/README.md +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/agent_coder.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/agent_coder_prompts.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/easytool.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/easytool_prompts.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/reflexion.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/reflexion_prompts.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/agent/vision_agent_v2_prompt.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/llm/__init__.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/llm/llm.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/tools/__init__.py +1 -1
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/tools/tools.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.22 → vision_agent-0.2.23}/vision_agent/utils/video.py +0 -0
@@ -235,9 +235,11 @@ def run_plan(
|
|
235
235
|
f"""
|
236
236
|
{tabulate(tabular_data=[task], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
237
237
|
)
|
238
|
-
|
239
|
-
|
240
|
-
|
238
|
+
tools = tool_recommender.top_k(task["instruction"])
|
239
|
+
tool_info = "\n".join([e["doc"] for e in tools])
|
240
|
+
|
241
|
+
if verbosity == 2:
|
242
|
+
_LOGGER.info(f"Tools retrieved: {[e['desc'] for e in tools]}")
|
241
243
|
|
242
244
|
if long_term_memory is not None:
|
243
245
|
retrieved_ltm = "\n".join(
|
@@ -4,12 +4,13 @@ import logging
|
|
4
4
|
import tempfile
|
5
5
|
from importlib import resources
|
6
6
|
from pathlib import Path
|
7
|
-
from typing import Any, Callable, Dict, List, Tuple, Union
|
7
|
+
from typing import Any, Callable, Dict, List, Tuple, Union, cast
|
8
8
|
|
9
9
|
import numpy as np
|
10
10
|
import pandas as pd
|
11
11
|
import requests
|
12
12
|
from PIL import Image, ImageDraw, ImageFont
|
13
|
+
from scipy.spatial import distance # type: ignore
|
13
14
|
|
14
15
|
from vision_agent.tools.tool_utils import _send_inference_request
|
15
16
|
from vision_agent.utils import extract_frames_from_video
|
@@ -233,6 +234,54 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
233
234
|
return output
|
234
235
|
|
235
236
|
|
237
|
+
def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
|
238
|
+
"""'closest_mask_distance' calculates the closest distance between two masks.
|
239
|
+
|
240
|
+
Parameters:
|
241
|
+
mask1 (np.ndarray): The first mask.
|
242
|
+
mask2 (np.ndarray): The second mask.
|
243
|
+
|
244
|
+
Returns:
|
245
|
+
float: The closest distance between the two masks.
|
246
|
+
|
247
|
+
Example
|
248
|
+
-------
|
249
|
+
>>> closest_mask_distance(mask1, mask2)
|
250
|
+
0.5
|
251
|
+
"""
|
252
|
+
|
253
|
+
mask1 = np.clip(mask1, 0, 1)
|
254
|
+
mask2 = np.clip(mask2, 0, 1)
|
255
|
+
mask1_points = np.transpose(np.nonzero(mask1))
|
256
|
+
mask2_points = np.transpose(np.nonzero(mask2))
|
257
|
+
dist_matrix = distance.cdist(mask1_points, mask2_points, "euclidean")
|
258
|
+
return cast(float, np.min(dist_matrix))
|
259
|
+
|
260
|
+
|
261
|
+
def closest_box_distance(box1: List[float], box2: List[float]) -> float:
|
262
|
+
"""'closest_box_distance' calculates the closest distance between two bounding boxes.
|
263
|
+
|
264
|
+
Parameters:
|
265
|
+
box1 (List[float]): The first bounding box.
|
266
|
+
box2 (List[float]): The second bounding box.
|
267
|
+
|
268
|
+
Returns:
|
269
|
+
float: The closest distance between the two bounding boxes.
|
270
|
+
|
271
|
+
Example
|
272
|
+
-------
|
273
|
+
>>> closest_box_distance([100, 100, 200, 200], [300, 300, 400, 400])
|
274
|
+
141.42
|
275
|
+
"""
|
276
|
+
|
277
|
+
x11, y11, x12, y12 = box1
|
278
|
+
x21, y21, x22, y22 = box2
|
279
|
+
|
280
|
+
horizontal_distance = np.max([0, x21 - x12, x11 - x22])
|
281
|
+
vertical_distance = np.max([0, y21 - y12, y11 - y22])
|
282
|
+
return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
|
283
|
+
|
284
|
+
|
236
285
|
# Utility and visualization functions
|
237
286
|
|
238
287
|
|
@@ -429,6 +478,8 @@ TOOLS = [
|
|
429
478
|
grounding_sam,
|
430
479
|
extract_frames,
|
431
480
|
ocr,
|
481
|
+
closest_mask_distance,
|
482
|
+
closest_box_distance,
|
432
483
|
load_image,
|
433
484
|
save_image,
|
434
485
|
overlay_bounding_boxes,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|