vision-agent 0.2.237__py3-none-any.whl → 0.2.239__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/configs/anthropic_openai_config.py +17 -3
- vision_agent/sim/__init__.py +1 -0
- vision_agent/tools/planner_tools.py +21 -0
- vision_agent/tools/tools.py +1 -1
- vision_agent/utils/video.py +13 -38
- {vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/METADATA +1 -2
- {vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/RECORD +9 -9
- {vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/WHEEL +0 -0
@@ -96,13 +96,24 @@ class Config(BaseModel):
|
|
96
96
|
}
|
97
97
|
)
|
98
98
|
|
99
|
+
# for get_tool_for_task
|
100
|
+
od_judge: Type[LMM] = Field(default=AnthropicLMM)
|
101
|
+
od_judge_kwargs: dict = Field(
|
102
|
+
default_factory=lambda: {
|
103
|
+
"model_name": "claude-3-5-sonnet-20241022",
|
104
|
+
"temperature": 0.0,
|
105
|
+
"image_size": 512,
|
106
|
+
}
|
107
|
+
)
|
108
|
+
|
99
109
|
# for suggestions module
|
100
|
-
suggester: Type[LMM] = Field(default=
|
110
|
+
suggester: Type[LMM] = Field(default=OpenAILMM)
|
101
111
|
suggester_kwargs: dict = Field(
|
102
112
|
default_factory=lambda: {
|
103
|
-
"model_name": "
|
113
|
+
"model_name": "o1",
|
104
114
|
"temperature": 1.0,
|
105
|
-
"
|
115
|
+
"image_detail": "high",
|
116
|
+
"image_size": 1024,
|
106
117
|
}
|
107
118
|
)
|
108
119
|
|
@@ -143,6 +154,9 @@ class Config(BaseModel):
|
|
143
154
|
def create_tool_chooser(self) -> LMM:
|
144
155
|
return self.tool_chooser(**self.tool_chooser_kwargs)
|
145
156
|
|
157
|
+
def create_od_judge(self) -> LMM:
|
158
|
+
return self.od_judge(**self.od_judge_kwargs)
|
159
|
+
|
146
160
|
def create_suggester(self) -> LMM:
|
147
161
|
return self.suggester(**self.suggester_kwargs)
|
148
162
|
|
vision_agent/sim/__init__.py
CHANGED
@@ -368,6 +368,15 @@ def get_tool_for_task(
|
|
368
368
|
tool_tester = CONFIG.create_tool_tester()
|
369
369
|
tool_chooser = CONFIG.create_tool_chooser()
|
370
370
|
|
371
|
+
if isinstance(images, list):
|
372
|
+
if len(images) > 0 and isinstance(images[0], dict):
|
373
|
+
if all(["frame" in image for image in images]):
|
374
|
+
images = [image["frame"] for image in images]
|
375
|
+
else:
|
376
|
+
raise ValueError(
|
377
|
+
f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
|
378
|
+
)
|
379
|
+
|
371
380
|
if isinstance(images, list):
|
372
381
|
images = {"image": images}
|
373
382
|
|
@@ -410,6 +419,15 @@ def get_tool_for_task_human_reviewer(
|
|
410
419
|
# NOTE: this will have the same documentation as get_tool_for_task
|
411
420
|
tool_tester = CONFIG.create_tool_tester()
|
412
421
|
|
422
|
+
if isinstance(images, list):
|
423
|
+
if len(images) > 0 and isinstance(images[0], dict):
|
424
|
+
if all(["frame" in image for image in images]):
|
425
|
+
images = [image["frame"] for image in images]
|
426
|
+
else:
|
427
|
+
raise ValueError(
|
428
|
+
f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
|
429
|
+
)
|
430
|
+
|
413
431
|
if isinstance(images, list):
|
414
432
|
images = {"image": images}
|
415
433
|
|
@@ -424,6 +442,9 @@ def get_tool_for_task_human_reviewer(
|
|
424
442
|
Image.fromarray(image).save(image_path)
|
425
443
|
image_paths.append(image_path)
|
426
444
|
|
445
|
+
# run no more than 3 images or else it overloads the LLM
|
446
|
+
image_paths = image_paths[:3]
|
447
|
+
|
427
448
|
tools = [
|
428
449
|
t.__name__
|
429
450
|
for t in get_tools()
|
vision_agent/tools/tools.py
CHANGED
@@ -2804,7 +2804,7 @@ def save_video(
|
|
2804
2804
|
else:
|
2805
2805
|
Path(output_video_path).parent.mkdir(parents=True, exist_ok=True)
|
2806
2806
|
|
2807
|
-
output_video_path = video_writer(frames, fps, output_video_path)
|
2807
|
+
output_video_path = video_writer(frames, fps, filename=output_video_path)
|
2808
2808
|
_save_video_to_result(output_video_path)
|
2809
2809
|
return output_video_path
|
2810
2810
|
|
vision_agent/utils/video.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
import base64
|
2
1
|
import logging
|
2
|
+
import os
|
3
3
|
import tempfile
|
4
4
|
from functools import lru_cache
|
5
5
|
from typing import List, Optional, Tuple
|
@@ -15,37 +15,6 @@ _DEFAULT_VIDEO_FPS = 24
|
|
15
15
|
_DEFAULT_INPUT_FPS = 1.0
|
16
16
|
|
17
17
|
|
18
|
-
def play_video(video_base64: str) -> None:
|
19
|
-
"""Play a video file"""
|
20
|
-
video_data = base64.b64decode(video_base64)
|
21
|
-
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
|
22
|
-
temp_video.write(video_data)
|
23
|
-
temp_video_path = temp_video.name
|
24
|
-
|
25
|
-
cap = cv2.VideoCapture(temp_video_path)
|
26
|
-
if not cap.isOpened():
|
27
|
-
_LOGGER.error("Error: Could not open video.")
|
28
|
-
return
|
29
|
-
|
30
|
-
# Display the first frame and wait for any key press to start the video
|
31
|
-
ret, frame = cap.read()
|
32
|
-
if ret:
|
33
|
-
cv2.imshow("Video Player", frame)
|
34
|
-
_LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
|
35
|
-
cv2.waitKey(0) # Wait for any key press
|
36
|
-
|
37
|
-
while cap.isOpened():
|
38
|
-
ret, frame = cap.read()
|
39
|
-
if not ret:
|
40
|
-
break
|
41
|
-
cv2.imshow("Video Player", frame)
|
42
|
-
# Press 'q' to exit the video
|
43
|
-
if cv2.waitKey(200) & 0xFF == ord("q"):
|
44
|
-
break
|
45
|
-
cap.release()
|
46
|
-
cv2.destroyAllWindows()
|
47
|
-
|
48
|
-
|
49
18
|
def _resize_frame(frame: np.ndarray) -> np.ndarray:
|
50
19
|
height, width = frame.shape[:2]
|
51
20
|
new_width = width - (width % 2)
|
@@ -57,12 +26,15 @@ def video_writer(
|
|
57
26
|
frames: List[np.ndarray],
|
58
27
|
fps: float = _DEFAULT_INPUT_FPS,
|
59
28
|
filename: Optional[str] = None,
|
29
|
+
file_ext: str = ".mp4",
|
60
30
|
) -> str:
|
31
|
+
tempf = None
|
61
32
|
if isinstance(fps, str):
|
62
33
|
# fps could be a string when it's passed in from a web endpoint deployment
|
63
34
|
fps = float(fps)
|
64
35
|
if filename is None:
|
65
|
-
|
36
|
+
tempf = tempfile.NamedTemporaryFile(delete=False, suffix=file_ext)
|
37
|
+
filename = tempf.name
|
66
38
|
container = av.open(filename, mode="w")
|
67
39
|
stream = container.add_stream("h264", rate=fps)
|
68
40
|
height, width = frames[0].shape[:2]
|
@@ -82,6 +54,9 @@ def video_writer(
|
|
82
54
|
for packet in stream.encode():
|
83
55
|
container.mux(packet)
|
84
56
|
container.close()
|
57
|
+
# for windows nee to manually close tempfile, cannot use with NamedTemporaryFile(delete=True)
|
58
|
+
if tempf is not None:
|
59
|
+
tempf.close()
|
85
60
|
return filename
|
86
61
|
|
87
62
|
|
@@ -98,11 +73,11 @@ def frames_to_bytes(
|
|
98
73
|
if isinstance(fps, str):
|
99
74
|
# fps could be a string when it's passed in from a web endpoint deployment
|
100
75
|
fps = float(fps)
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
76
|
+
filename = video_writer(frames, fps, file_ext=file_ext)
|
77
|
+
# TODO: look into memory-mapped files to avoid reading the entire file into memory
|
78
|
+
with open(filename, "rb") as f:
|
79
|
+
buffer_bytes = f.read()
|
80
|
+
os.unlink(filename)
|
106
81
|
return buffer_bytes
|
107
82
|
|
108
83
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.239
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -15,7 +15,6 @@ Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
|
|
15
15
|
Requires-Dist: e2b-code-interpreter (==0.0.11a37)
|
16
16
|
Requires-Dist: flake8 (>=7.0.0,<8.0.0)
|
17
17
|
Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
|
18
|
-
Requires-Dist: langsmith (>=0.1.58,<0.2.0)
|
19
18
|
Requires-Dist: libcst (>=1.5.0,<2.0.0)
|
20
19
|
Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
|
21
20
|
Requires-Dist: nbclient (>=0.10.0,<0.11.0)
|
@@ -21,7 +21,7 @@ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,
|
|
21
21
|
vision_agent/clients/landing_public_api.py,sha256=Vz9lldtNbaJRWzT7T8-uQrC-dMnt47LIsDrxHgoVdEw,1492
|
22
22
|
vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
|
23
23
|
vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
|
24
|
-
vision_agent/configs/anthropic_openai_config.py,sha256=
|
24
|
+
vision_agent/configs/anthropic_openai_config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
|
25
25
|
vision_agent/configs/config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
|
26
26
|
vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
|
27
27
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -32,13 +32,13 @@ vision_agent/models/__init__.py,sha256=qAdygB-0EsmxMHNzYTPNM6tAF8Fym95gm9bsHJafd
|
|
32
32
|
vision_agent/models/agent_types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
|
33
33
|
vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
|
34
34
|
vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
|
35
|
-
vision_agent/sim/__init__.py,sha256=
|
35
|
+
vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
|
36
36
|
vision_agent/sim/sim.py,sha256=VSU_1rYd4ifvF45xKWBEYugxdeeEQVpj0QL6rjx49i4,9801
|
37
37
|
vision_agent/tools/__init__.py,sha256=T-MPNBVbvWtfo71hobaZsdYzQ52oyymolk_OAb2Pq_g,2463
|
38
38
|
vision_agent/tools/meta_tools.py,sha256=-heMwGkx0hX_9zUp1dgBqsJpVnl6Y6tErMsjFy0dwLM,28652
|
39
|
-
vision_agent/tools/planner_tools.py,sha256=
|
39
|
+
vision_agent/tools/planner_tools.py,sha256=orBTdJQz2NKoLuX9WE6XixaYuG305xz0UBYvZOiuquQ,19474
|
40
40
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
41
|
-
vision_agent/tools/tools.py,sha256
|
41
|
+
vision_agent/tools/tools.py,sha256=hhQYqypvBDfcel1p4bfZHZfOZom3plnxGPHwo2T52Ls,111466
|
42
42
|
vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
|
43
43
|
vision_agent/utils/agent.py,sha256=QGKcbzpAjcVj0958bXYLv07-d2i1GU7-bXVG7bTGRMA,14619
|
44
44
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -47,9 +47,9 @@ vision_agent/utils/image_utils.py,sha256=bJM2mEvB6E__M9pxi74yQYzAiZ7mu3KE2ptyVrp
|
|
47
47
|
vision_agent/utils/tools.py,sha256=USZL0MKsiJgqA8RFiYRTcj_Kn2FVYKLHK4wIk0gP1Ow,7694
|
48
48
|
vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8Wis,2523
|
49
49
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
50
|
-
vision_agent/utils/video.py,sha256=
|
50
|
+
vision_agent/utils/video.py,sha256=0LsmH0sDaBWhvtV15CCJgqKxWzwDDos7Sv2wOd7wyzQ,5610
|
51
51
|
vision_agent/utils/video_tracking.py,sha256=GM9qfeawqhmZVWoKrzw5-NETd4gEo7ImMfWtBnhC3bw,12086
|
52
|
-
vision_agent-0.2.
|
53
|
-
vision_agent-0.2.
|
54
|
-
vision_agent-0.2.
|
55
|
-
vision_agent-0.2.
|
52
|
+
vision_agent-0.2.239.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
53
|
+
vision_agent-0.2.239.dist-info/METADATA,sha256=yC90fdYSDqbLrHHIU6OTm96QhNJ-39buRPoVgIxnDzM,5712
|
54
|
+
vision_agent-0.2.239.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
55
|
+
vision_agent-0.2.239.dist-info/RECORD,,
|
File without changes
|
File without changes
|