vision-agent 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/.sim_tools/df.csv +12 -12
- vision_agent/.sim_tools/embs.npy +0 -0
- vision_agent/agent/__init__.py +1 -0
- vision_agent/agent/vision_agent_prompts_v3.py +372 -0
- vision_agent/agent/vision_agent_v3.py +278 -0
- vision_agent/lmm/lmm.py +219 -57
- vision_agent/tools/__init__.py +3 -3
- vision_agent/tools/planner_v3_tools.py +206 -0
- vision_agent/tools/tools.py +55 -64
- vision_agent/utils/agent.py +24 -8
- vision_agent/utils/tools.py +1 -1
- {vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/METADATA +4 -4
- {vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/RECORD +15 -12
- {vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/WHEEL +0 -0
- {vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,206 @@
|
|
1
|
+
import base64
|
2
|
+
import copy
|
3
|
+
import io
|
4
|
+
from typing import Dict, List, Optional, Tuple, Union, cast
|
5
|
+
|
6
|
+
import cv2
|
7
|
+
import matplotlib.figure
|
8
|
+
import matplotlib.pyplot as plt
|
9
|
+
import numpy as np
|
10
|
+
from PIL import Image
|
11
|
+
from PIL.Image import Image as PILImageType
|
12
|
+
|
13
|
+
from vision_agent.utils.image_utils import (
|
14
|
+
denormalize_bbox,
|
15
|
+
normalize_bbox,
|
16
|
+
numpy_to_bytes,
|
17
|
+
rle_decode_array,
|
18
|
+
)
|
19
|
+
from vision_agent.utils.tools import send_inference_request
|
20
|
+
|
21
|
+
|
22
|
+
def maybe_denormalize_bbox(
|
23
|
+
bbox: List[Union[int, float]], image_size: Tuple[int, ...]
|
24
|
+
) -> List[float]:
|
25
|
+
if all([0 <= c <= 1 for c in bbox]):
|
26
|
+
return denormalize_bbox(bbox, image_size)
|
27
|
+
return bbox
|
28
|
+
|
29
|
+
|
30
|
+
def maybe_normalize_bbox(
|
31
|
+
bbox: List[Union[int, float]], image_size: Tuple[int, ...]
|
32
|
+
) -> List[float]:
|
33
|
+
if any([1 <= c for c in bbox]):
|
34
|
+
return normalize_bbox(bbox, image_size)
|
35
|
+
return bbox
|
36
|
+
|
37
|
+
|
38
|
+
def instance_segmentation(
|
39
|
+
prompt: str, image: np.ndarray, threshold: float = 0.23, nms_threshold: float = 0.5
|
40
|
+
) -> List[Dict[str, Union[str, float, List[float], np.ndarray]]]:
|
41
|
+
image_bytes = numpy_to_bytes(image)
|
42
|
+
files = [("image", image_bytes)]
|
43
|
+
data = {"prompts": [prompt], "threshold": threshold, "nms_threshold": nms_threshold}
|
44
|
+
results = send_inference_request(
|
45
|
+
data,
|
46
|
+
"glee",
|
47
|
+
files=files,
|
48
|
+
v2=True,
|
49
|
+
)
|
50
|
+
results = results[0]
|
51
|
+
results_formatted = [
|
52
|
+
{
|
53
|
+
"label": elt["label"],
|
54
|
+
"score": elt["score"],
|
55
|
+
"bbox": normalize_bbox(elt["bounding_box"], image.shape[:2]),
|
56
|
+
"mask": np.array(rle_decode_array(elt["mask"])),
|
57
|
+
}
|
58
|
+
for elt in results
|
59
|
+
]
|
60
|
+
return results_formatted
|
61
|
+
|
62
|
+
|
63
|
+
def ocr(image: np.ndarray) -> List[Dict[str, Union[str, float, List[float]]]]:
|
64
|
+
image_bytes = numpy_to_bytes(image)
|
65
|
+
files = [("image", image_bytes)]
|
66
|
+
results = send_inference_request(
|
67
|
+
{},
|
68
|
+
"paddle-ocr",
|
69
|
+
files=files,
|
70
|
+
v2=True,
|
71
|
+
)
|
72
|
+
results_formatted = [
|
73
|
+
{
|
74
|
+
"label": elt["label"],
|
75
|
+
"score": elt["score"],
|
76
|
+
"bbox": normalize_bbox(elt["bbox"], image.shape[:2]),
|
77
|
+
}
|
78
|
+
for elt in results
|
79
|
+
]
|
80
|
+
return results_formatted
|
81
|
+
|
82
|
+
|
83
|
+
def depth_estimation(image: np.ndarray) -> np.ndarray:
|
84
|
+
shape = image.shape[:2]
|
85
|
+
image_bytes = numpy_to_bytes(image)
|
86
|
+
files = [("image", image_bytes)]
|
87
|
+
results = send_inference_request(
|
88
|
+
{},
|
89
|
+
"depth-pro",
|
90
|
+
files=files,
|
91
|
+
v2=True,
|
92
|
+
)
|
93
|
+
depth = np.frombuffer(base64.b64decode(results["depth"]), dtype=np.float32).reshape(
|
94
|
+
shape
|
95
|
+
)
|
96
|
+
return depth
|
97
|
+
|
98
|
+
|
99
|
+
def visualize_bounding_boxes(
|
100
|
+
image: np.ndarray, bounding_boxes: List[Dict[str, Union[str, float, List[float]]]]
|
101
|
+
) -> np.ndarray:
|
102
|
+
image = image.copy()
|
103
|
+
image_size = image.shape[:2]
|
104
|
+
bounding_boxes = copy.deepcopy(bounding_boxes)
|
105
|
+
|
106
|
+
for bbox in bounding_boxes:
|
107
|
+
bbox["bbox"] = maybe_denormalize_bbox(
|
108
|
+
cast(List[float], bbox["bbox"]), image_size
|
109
|
+
)
|
110
|
+
for bbox in bounding_boxes:
|
111
|
+
x1, y1, x2, y2 = bbox["bbox"] # type: ignore
|
112
|
+
cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
|
113
|
+
return image
|
114
|
+
|
115
|
+
|
116
|
+
def visualize_segmentation_masks(
|
117
|
+
image: np.ndarray,
|
118
|
+
segmentation_masks: List[Dict[str, Union[str, float, np.ndarray]]],
|
119
|
+
) -> np.ndarray:
|
120
|
+
alpha = 0.5
|
121
|
+
overlay = image.copy()
|
122
|
+
color_mask = np.zeros_like(image)
|
123
|
+
color_mask[:, :] = (0, 100, 255)
|
124
|
+
for elt in segmentation_masks:
|
125
|
+
mask = cast(np.ndarray, elt["mask"])
|
126
|
+
overlay[mask == 1] = (1 - alpha) * overlay[mask == 1] + alpha * color_mask[
|
127
|
+
mask == 1
|
128
|
+
]
|
129
|
+
|
130
|
+
# draw outline on the mask so it doesn't just think the color of the object changed
|
131
|
+
mask_uint8 = (mask * 255).astype(np.uint8)
|
132
|
+
contours, _ = cv2.findContours(
|
133
|
+
mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
|
134
|
+
)
|
135
|
+
cv2.drawContours(overlay, contours, -1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
|
136
|
+
overlay = np.clip(overlay, 0, 255).astype(np.uint8)
|
137
|
+
return overlay
|
138
|
+
|
139
|
+
|
140
|
+
def get_crops(
|
141
|
+
image: np.ndarray, bounding_boxes: List[Dict[str, Union[str, float, List[float]]]]
|
142
|
+
) -> List[np.ndarray]:
|
143
|
+
image = image.copy()
|
144
|
+
bounding_boxes = copy.deepcopy(bounding_boxes)
|
145
|
+
|
146
|
+
for bbox in bounding_boxes:
|
147
|
+
bbox["bbox"] = maybe_denormalize_bbox(
|
148
|
+
cast(List[float], bbox["bbox"]), image.shape[:2]
|
149
|
+
)
|
150
|
+
crops = []
|
151
|
+
for bbox in bounding_boxes:
|
152
|
+
x1, y1, x2, y2 = bbox["bbox"] # type: ignore
|
153
|
+
crops.append(image[int(y1) : int(y2), int(x1) : int(x2)])
|
154
|
+
return crops
|
155
|
+
|
156
|
+
|
157
|
+
def rotate_90(image: np.ndarray, k: int = 1) -> np.ndarray:
|
158
|
+
return np.rot90(image, k=k, axes=(0, 1))
|
159
|
+
|
160
|
+
|
161
|
+
def iou(
|
162
|
+
pred1: Union[List[float], np.ndarray], pred2: Union[List[float], np.ndarray]
|
163
|
+
) -> float:
|
164
|
+
if isinstance(pred1, list) and isinstance(pred2, list):
|
165
|
+
x1, y1, x2, y2 = pred1
|
166
|
+
x1_, y1_, x2_, y2_ = pred2
|
167
|
+
intersection = max(0, min(x2, x2_) - max(x1, x1_)) * max(
|
168
|
+
0, min(y2, y2_) - max(y1, y1_)
|
169
|
+
)
|
170
|
+
union = (x2 - x1) * (y2 - y1) + (x2_ - x1_) * (y2_ - y1_) - intersection
|
171
|
+
return intersection / union
|
172
|
+
elif isinstance(pred1, np.ndarray) and isinstance(pred2, np.ndarray):
|
173
|
+
pred1 = np.clip(pred1, 0, 1)
|
174
|
+
pred2 = np.clip(pred2, 0, 1)
|
175
|
+
intersection = np.sum(pred1 * pred2)
|
176
|
+
union = np.sum(pred1) + np.sum(pred2) - intersection
|
177
|
+
return intersection / union
|
178
|
+
raise ValueError("Unsupported input types for IoU calculation.")
|
179
|
+
|
180
|
+
|
181
|
+
def display_image(
|
182
|
+
image: Union[np.ndarray, PILImageType, matplotlib.figure.Figure, str],
|
183
|
+
) -> None:
|
184
|
+
display_img: Optional[PILImageType] = None
|
185
|
+
if isinstance(image, np.ndarray):
|
186
|
+
display_img = Image.fromarray(image)
|
187
|
+
elif isinstance(image, matplotlib.figure.Figure):
|
188
|
+
# Render the figure to a BytesIO buffer
|
189
|
+
buf = io.BytesIO()
|
190
|
+
image.savefig(buf, format="png")
|
191
|
+
buf.seek(0)
|
192
|
+
# Load the buffer as a PIL Image
|
193
|
+
display_img = Image.open(buf)
|
194
|
+
plt.close(image) # type: ignore
|
195
|
+
elif isinstance(image, PILImageType):
|
196
|
+
display_img = image # Already a PIL Image
|
197
|
+
elif isinstance(image, str):
|
198
|
+
display_img = Image.open(image)
|
199
|
+
|
200
|
+
if display_img is not None:
|
201
|
+
plt.imshow(display_img) # type: ignore
|
202
|
+
plt.axis("off") # type: ignore
|
203
|
+
plt.show()
|
204
|
+
else:
|
205
|
+
# Handle cases where image type is not supported or conversion failed
|
206
|
+
print("Unsupported image type or conversion failed.")
|
vision_agent/tools/tools.py
CHANGED
@@ -4,7 +4,7 @@ import logging
|
|
4
4
|
import os
|
5
5
|
import tempfile
|
6
6
|
import urllib.request
|
7
|
-
from base64 import b64encode
|
7
|
+
from base64 import b64encode, b64decode
|
8
8
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9
9
|
from importlib import resources
|
10
10
|
from pathlib import Path
|
@@ -15,7 +15,6 @@ import time
|
|
15
15
|
import cv2
|
16
16
|
import numpy as np
|
17
17
|
import pandas as pd
|
18
|
-
import requests
|
19
18
|
from IPython.display import display
|
20
19
|
from PIL import Image, ImageDraw, ImageFont
|
21
20
|
from pillow_heif import register_heif_opener # type: ignore
|
@@ -2034,8 +2033,8 @@ def qwen2_vl_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
|
|
2034
2033
|
return cast(str, data)
|
2035
2034
|
|
2036
2035
|
|
2037
|
-
def
|
2038
|
-
"""'
|
2036
|
+
def paddle_ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
2037
|
+
"""'paddle_ocr' extracts text from an image. It returns a list of detected text, bounding
|
2039
2038
|
boxes with normalized coordinates, and confidence scores. The results are sorted
|
2040
2039
|
from top-left to bottom right.
|
2041
2040
|
|
@@ -2048,51 +2047,33 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
2048
2047
|
|
2049
2048
|
Example
|
2050
2049
|
-------
|
2051
|
-
>>>
|
2050
|
+
>>> paddle_ocr(image)
|
2052
2051
|
[
|
2053
2052
|
{'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
|
2054
2053
|
]
|
2055
2054
|
"""
|
2056
2055
|
|
2057
|
-
|
2058
|
-
image_size = pil_image.size[::-1]
|
2056
|
+
image_size = image.shape[:2]
|
2059
2057
|
if image_size[0] < 1 or image_size[1] < 1:
|
2060
2058
|
return []
|
2061
|
-
|
2062
|
-
|
2063
|
-
|
2064
|
-
|
2065
|
-
|
2066
|
-
|
2067
|
-
|
2068
|
-
|
2069
|
-
|
2070
|
-
headers={"contentType": "multipart/form-data", "apikey": _API_KEY},
|
2071
|
-
)
|
2072
|
-
|
2073
|
-
if res.status_code != 200:
|
2074
|
-
raise ValueError(f"OCR request failed with status code {res.status_code}")
|
2075
|
-
|
2076
|
-
data = res.json()
|
2077
|
-
output = []
|
2078
|
-
for det in data[0]:
|
2079
|
-
label = det["text"]
|
2080
|
-
box = [
|
2081
|
-
det["location"][0]["x"],
|
2082
|
-
det["location"][0]["y"],
|
2083
|
-
det["location"][2]["x"],
|
2084
|
-
det["location"][2]["y"],
|
2085
|
-
]
|
2086
|
-
box = normalize_bbox(box, image_size)
|
2087
|
-
output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
|
2059
|
+
buffer_bytes = numpy_to_bytes(image)
|
2060
|
+
files = [("image", buffer_bytes)]
|
2061
|
+
|
2062
|
+
res = send_inference_request(
|
2063
|
+
payload={"function_name": "paddle-ocr"},
|
2064
|
+
endpoint_name="paddle-ocr",
|
2065
|
+
files=files,
|
2066
|
+
v2=True,
|
2067
|
+
)
|
2088
2068
|
|
2089
2069
|
_display_tool_trace(
|
2090
|
-
|
2070
|
+
paddle_ocr.__name__,
|
2091
2071
|
{},
|
2092
|
-
|
2093
|
-
|
2072
|
+
res,
|
2073
|
+
files,
|
2094
2074
|
)
|
2095
|
-
|
2075
|
+
|
2076
|
+
return sorted(res, key=lambda x: (x["bbox"][1], x["bbox"][0]))
|
2096
2077
|
|
2097
2078
|
|
2098
2079
|
def claude35_text_extraction(image: np.ndarray) -> str:
|
@@ -2370,7 +2351,12 @@ def agentic_activity_recognition(
|
|
2370
2351
|
buffer_bytes = frames_to_bytes(frames, fps=fps)
|
2371
2352
|
files = [("video", buffer_bytes)]
|
2372
2353
|
|
2373
|
-
payload = {
|
2354
|
+
payload = {
|
2355
|
+
"prompt": prompt,
|
2356
|
+
"specificity": specificity,
|
2357
|
+
"with_audio": with_audio,
|
2358
|
+
"function_name": "agentic_activity_recognition",
|
2359
|
+
}
|
2374
2360
|
|
2375
2361
|
response = send_inference_request(
|
2376
2362
|
payload=payload, endpoint_name="activity-recognition", files=files, v2=True
|
@@ -2529,48 +2515,53 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
|
|
2529
2515
|
return return_data
|
2530
2516
|
|
2531
2517
|
|
2532
|
-
def
|
2533
|
-
|
2534
|
-
|
2535
|
-
|
2518
|
+
def depth_pro(
|
2519
|
+
image: np.ndarray,
|
2520
|
+
) -> np.ndarray:
|
2521
|
+
"""'depth_pro' is a tool that runs the Apple DepthPro model to generate a
|
2522
|
+
depth map from a given RGB image. The returned depth map has the same dimensions
|
2523
|
+
as the input image, with each pixel indicating the distance from the camera in meters.
|
2536
2524
|
|
2537
2525
|
Parameters:
|
2538
2526
|
image (np.ndarray): The image to used to generate depth image
|
2539
2527
|
|
2540
2528
|
Returns:
|
2541
|
-
np.ndarray: A
|
2542
|
-
|
2529
|
+
np.ndarray: A depth map with float32 pixel values that represent
|
2530
|
+
the distance from the camera in meters.
|
2543
2531
|
|
2544
2532
|
Example
|
2545
2533
|
-------
|
2546
|
-
>>>
|
2534
|
+
>>> depth_pro(image)
|
2547
2535
|
array([[0, 0, 0, ..., 0, 0, 0],
|
2548
2536
|
[0, 20, 24, ..., 0, 100, 103],
|
2549
2537
|
...,
|
2550
2538
|
[10, 11, 15, ..., 202, 202, 205],
|
2551
|
-
[10, 10, 10, ..., 200, 200, 200]], dtype=
|
2539
|
+
[10, 10, 10, ..., 200, 200, 200]], dtype=np.float32),
|
2552
2540
|
"""
|
2553
|
-
if image.shape[0] < 1 or image.shape[1] < 1:
|
2554
|
-
raise ValueError(f"Image is empty, image shape: {image.shape}")
|
2555
2541
|
|
2556
|
-
|
2557
|
-
|
2558
|
-
|
2559
|
-
|
2560
|
-
|
2542
|
+
image_size = image.shape[:2]
|
2543
|
+
if image_size[0] < 1 or image_size[1] < 1:
|
2544
|
+
return np.empty(0)
|
2545
|
+
buffer_bytes = numpy_to_bytes(image)
|
2546
|
+
files = [("image", buffer_bytes)]
|
2561
2547
|
|
2562
|
-
|
2563
|
-
|
2564
|
-
|
2565
|
-
|
2548
|
+
detections = send_inference_request(
|
2549
|
+
payload={"function_name": "depth-pro"},
|
2550
|
+
endpoint_name="depth-pro",
|
2551
|
+
files=files,
|
2552
|
+
v2=True,
|
2566
2553
|
)
|
2567
|
-
|
2554
|
+
|
2555
|
+
depth_bytes = b64decode(detections["depth"])
|
2556
|
+
depth_map_np = np.frombuffer(depth_bytes, dtype=np.float32).reshape(image_size)
|
2557
|
+
|
2568
2558
|
_display_tool_trace(
|
2569
|
-
|
2559
|
+
depth_pro.__name__,
|
2570
2560
|
{},
|
2571
|
-
|
2572
|
-
|
2561
|
+
response=detections,
|
2562
|
+
files=files,
|
2573
2563
|
)
|
2564
|
+
|
2574
2565
|
return depth_map_np
|
2575
2566
|
|
2576
2567
|
|
@@ -3564,12 +3555,12 @@ FUNCTION_TOOLS = [
|
|
3564
3555
|
claude35_text_extraction,
|
3565
3556
|
agentic_document_extraction,
|
3566
3557
|
document_qa,
|
3567
|
-
|
3558
|
+
paddle_ocr,
|
3568
3559
|
gemini_image_generation,
|
3569
3560
|
qwen25_vl_images_vqa,
|
3570
3561
|
qwen25_vl_video_vqa,
|
3571
3562
|
agentic_activity_recognition,
|
3572
|
-
|
3563
|
+
depth_pro,
|
3573
3564
|
generate_pose_image,
|
3574
3565
|
vit_nsfw_classification,
|
3575
3566
|
siglip_classification,
|
vision_agent/utils/agent.py
CHANGED
@@ -247,7 +247,9 @@ def print_table(title: str, columns: List[str], rows: List[List[str]]) -> None:
|
|
247
247
|
|
248
248
|
|
249
249
|
def add_media_to_chat(
|
250
|
-
chat: List[AgentMessage],
|
250
|
+
chat: List[AgentMessage],
|
251
|
+
code_interpreter: Optional[CodeInterpreter] = None,
|
252
|
+
append_to_prompt: bool = True,
|
251
253
|
) -> Tuple[List[AgentMessage], List[AgentMessage], List[Union[str, Path]]]:
|
252
254
|
orig_chat = copy.deepcopy(chat)
|
253
255
|
int_chat = copy.deepcopy(chat)
|
@@ -278,6 +280,7 @@ def add_media_to_chat(
|
|
278
280
|
if (
|
279
281
|
not str(chat_i.content).endswith(f" Media name {media}")
|
280
282
|
and chat_i.role == "user"
|
283
|
+
and append_to_prompt
|
281
284
|
):
|
282
285
|
chat_i.content += f" Media name {media}"
|
283
286
|
chat_i.media = media_list_i if len(media_list_i) > 0 else None
|
@@ -304,13 +307,26 @@ def add_media_to_chat(
|
|
304
307
|
def capture_media_from_exec(execution: Execution) -> List[str]:
|
305
308
|
images = []
|
306
309
|
for result in execution.results:
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
310
|
+
if hasattr(result, "formats"):
|
311
|
+
for format in result.formats():
|
312
|
+
if format in ["png", "jpeg"]:
|
313
|
+
# converts the image to png and then to base64
|
314
|
+
images.append(
|
315
|
+
"data:image/png;base64,"
|
316
|
+
+ convert_to_b64(b64_to_pil(result[format]))
|
317
|
+
)
|
318
|
+
elif hasattr(result, "savefig"):
|
319
|
+
pass
|
320
|
+
elif hasattr(result, "_repr_png_") and result._repr_png_():
|
321
|
+
images.append(
|
322
|
+
"data:image/png;base64,"
|
323
|
+
+ convert_to_b64(b64_to_pil(result._repr_png_())) # type: ignore
|
324
|
+
)
|
325
|
+
elif hasattr(result, "_repr_jpeg_") and result._repr_jpeg_():
|
326
|
+
images.append(
|
327
|
+
"data:image/jpeg;base64,"
|
328
|
+
+ convert_to_b64(b64_to_pil(result._repr_jpeg_())) # type: ignore
|
329
|
+
)
|
314
330
|
return images
|
315
331
|
|
316
332
|
|
vision_agent/utils/tools.py
CHANGED
@@ -106,7 +106,7 @@ def send_task_inference_request(
|
|
106
106
|
if metadata is not None and "function_name" in metadata:
|
107
107
|
function_name = metadata["function_name"]
|
108
108
|
response = _call_post(url, payload, session, files, function_name, is_form)
|
109
|
-
return response["data"]
|
109
|
+
return response["data"] if "data" in response else response
|
110
110
|
|
111
111
|
|
112
112
|
def _create_requests_session(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.18
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Project-URL: Homepage, https://landing.ai
|
6
6
|
Project-URL: repository, https://github.com/landing-ai/vision-agent
|
@@ -8,7 +8,7 @@ Project-URL: documentation, https://github.com/landing-ai/vision-agent
|
|
8
8
|
Author-email: Landing AI <dev@landing.ai>
|
9
9
|
License-File: LICENSE
|
10
10
|
Requires-Python: <4.0,>=3.9
|
11
|
-
Requires-Dist: anthropic
|
11
|
+
Requires-Dist: anthropic>=0.54.0
|
12
12
|
Requires-Dist: av<12,>=11.0.0
|
13
13
|
Requires-Dist: dotenv<0.10,>=0.9.9
|
14
14
|
Requires-Dist: flake8<8,>=7.0.0
|
@@ -20,7 +20,7 @@ Requires-Dist: matplotlib<4,>=3.9.2
|
|
20
20
|
Requires-Dist: nbclient<0.11,>=0.10.0
|
21
21
|
Requires-Dist: nbformat<6,>=5.10.4
|
22
22
|
Requires-Dist: numpy<2.0.0,>=1.21.0
|
23
|
-
Requires-Dist: openai
|
23
|
+
Requires-Dist: openai>=1.86.0
|
24
24
|
Requires-Dist: opencv-python==4.*
|
25
25
|
Requires-Dist: opentelemetry-api<2,>=1.29.0
|
26
26
|
Requires-Dist: pandas==2.*
|
@@ -36,7 +36,7 @@ Requires-Dist: tabulate<0.10,>=0.9.0
|
|
36
36
|
Requires-Dist: tenacity<9,>=8.3.0
|
37
37
|
Requires-Dist: tqdm<5.0.0,>=4.64.0
|
38
38
|
Requires-Dist: typing-extensions==4.*
|
39
|
-
Requires-Dist: yt-dlp>=2025.
|
39
|
+
Requires-Dist: yt-dlp>=2025.6.9
|
40
40
|
Description-Content-Type: text/markdown
|
41
41
|
|
42
42
|
<div align="center">
|
@@ -1,15 +1,17 @@
|
|
1
1
|
vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
2
|
-
vision_agent/.sim_tools/df.csv,sha256=
|
3
|
-
vision_agent/.sim_tools/embs.npy,sha256=
|
2
|
+
vision_agent/.sim_tools/df.csv,sha256=Hus29ljPZV15EmAd1qFTStPuVDi8JDS0ekUcyjCTJ9U,41187
|
3
|
+
vision_agent/.sim_tools/embs.npy,sha256=OLj2rt4aBFze2HIf9bQ3yn0-_3RVPecrHWxm2CWvgn0,245888
|
4
4
|
vision_agent/agent/README.md,sha256=3XSPG_VO7-6y6P8COvcgSSonWj5uvfgvfmOkBpfKK8Q,5527
|
5
|
-
vision_agent/agent/__init__.py,sha256=
|
5
|
+
vision_agent/agent/__init__.py,sha256=lhPV1JUJ_Ckp_NHpq9VcwqaBd0wh4-GtyT79aFOWvI0,249
|
6
6
|
vision_agent/agent/agent.py,sha256=o1Zuhl6h2R7uVwvUur0Aj38kak8U08plfeFWPst_ErM,1576
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=53b_DhQtffX5wxLuCbNQ83AJhB0P_3wEnuKr-v5bx-o,4866
|
8
8
|
vision_agent/agent/vision_agent_coder_v2.py,sha256=ELc_J8Q4NKPs7YETu3a9O0Vk1zN3k6QfHBgu0M0IWGk,17450
|
9
9
|
vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=O24BpRhMRZx7D_WdaRv-a2K6fLpin0o7oWxlvL70WpM,35944
|
10
10
|
vision_agent/agent/vision_agent_planner_v2.py,sha256=Aww_BJhTFKZ5XjYe8FW57z2Gwp2se0vg1t1DKLGRAyQ,22050
|
11
11
|
vision_agent/agent/vision_agent_prompts_v2.py,sha256=NG1xnZvZGi4DcqdfqZCkPkS7oka3gr6h42ekUKUKcqY,4231
|
12
|
+
vision_agent/agent/vision_agent_prompts_v3.py,sha256=ABFdTe1TMnFBy_VH_AYDSE0IHFiPX0KOB-nNRfLurxM,16548
|
12
13
|
vision_agent/agent/vision_agent_v2.py,sha256=iPW6DowH7wCFIA5vb1SdSLfZFWbn_oSC7Xa8uO8KIJI,11675
|
14
|
+
vision_agent/agent/vision_agent_v3.py,sha256=tFr9VYSG65R0PRypiNzoW6NzKV1yuBPXIzmE4HO-p0A,10228
|
13
15
|
vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
14
16
|
vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
|
15
17
|
vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
|
@@ -19,28 +21,29 @@ vision_agent/configs/openai_config.py,sha256=Bw7ElBYmBcaZttyRBoNpcy3uTkqg5qADk8L
|
|
19
21
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
20
22
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
21
23
|
vision_agent/lmm/__init__.py,sha256=4qX2lmGnKWHeKftXueEi9xj_ieK2nQh_ipHf72nKGFk,84
|
22
|
-
vision_agent/lmm/lmm.py,sha256=
|
24
|
+
vision_agent/lmm/lmm.py,sha256=gGUf621irXgQ18W497bMa1vQzbgUsZQsRwLHFNpBSJA,29982
|
23
25
|
vision_agent/models/__init__.py,sha256=eIP0pD5dYog8zUA7uuTmUxCF6SIutbLRLRE0cmuCJgQ,326
|
24
26
|
vision_agent/models/agent_types.py,sha256=vBZ9-ns5lHDdFMO7ulCGGeZ6OwRo3gK4O3vN0814IWc,3064
|
25
27
|
vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
|
26
28
|
vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
27
29
|
vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
|
28
30
|
vision_agent/sim/sim.py,sha256=WQY_x9A4VT647qGDBScJ3R8_Iv0aoYLHTgwcQSCXwv4,10059
|
29
|
-
vision_agent/tools/__init__.py,sha256=
|
31
|
+
vision_agent/tools/__init__.py,sha256=USlLNSJ1YZ3UQBAHYu6MXx8Scf639sfL10im1NUuI4k,2490
|
30
32
|
vision_agent/tools/meta_tools.py,sha256=9iJilpGYEiXW0nYPTYAWHa7l23wGN8IM5KbE7mWDOT0,6798
|
31
33
|
vision_agent/tools/planner_tools.py,sha256=iQWtTgXdomn0IWrbmvXXM-y8Q_RSEOxyP04HIRLrgWI,19576
|
34
|
+
vision_agent/tools/planner_v3_tools.py,sha256=9uLKDtdWdpiRm_lVgc2DdeLEo2D4cw2demFTUQ401Zo,6525
|
32
35
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
33
|
-
vision_agent/tools/tools.py,sha256=
|
36
|
+
vision_agent/tools/tools.py,sha256=lndSG8xrIWcs6Rpe1-Jq44niUDXQnWlYfGP2B1YjpI0,124216
|
34
37
|
vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
|
35
|
-
vision_agent/utils/agent.py,sha256=
|
38
|
+
vision_agent/utils/agent.py,sha256=88axZswX7DibAkckc0mDJWLr0SoVPyam4mqO4zsLRNQ,15827
|
36
39
|
vision_agent/utils/exceptions.py,sha256=zis8smCbdEylBVZBTVfEUfAh7Rb7cWV3MSPambu6FsQ,1837
|
37
40
|
vision_agent/utils/execute.py,sha256=QAql6KC2uEhX1o_44mMA77lCmMUs0itaaGMFSfJBki8,21520
|
38
41
|
vision_agent/utils/image_utils.py,sha256=bJM2mEvB6E__M9pxi74yQYzAiZ7mu3KE2ptyVrp5vzQ,12533
|
39
|
-
vision_agent/utils/tools.py,sha256=
|
42
|
+
vision_agent/utils/tools.py,sha256=gF5h1QuBCJaC2u_FRxPR32eYPRa78R_DPcmOiPcnb3A,8147
|
40
43
|
vision_agent/utils/tools_doc.py,sha256=PKcXXbJktiuPi9q6Q1zXzFx24Dh229SNgWBDtZ2fQSQ,2730
|
41
44
|
vision_agent/utils/video.py,sha256=rjsQ1sKKisaQ6AVjJz0zd_G4g-ovRweS_rs4JEhenoI,5340
|
42
45
|
vision_agent/utils/video_tracking.py,sha256=DZLFpNCuzuPJQzbQoVNcp-m4dKxgiKdCNM5QTh_zURE,12245
|
43
|
-
vision_agent-1.1.
|
44
|
-
vision_agent-1.1.
|
45
|
-
vision_agent-1.1.
|
46
|
-
vision_agent-1.1.
|
46
|
+
vision_agent-1.1.18.dist-info/METADATA,sha256=S7WnsgYo0nBT-O4Ca6-rYLG3tjQ9np5Tk1Fv1Z-_0pU,12071
|
47
|
+
vision_agent-1.1.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
48
|
+
vision_agent-1.1.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
49
|
+
vision_agent-1.1.18.dist-info/RECORD,,
|
File without changes
|
File without changes
|