vision-agent 0.2.165__py3-none-any.whl → 0.2.167__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/agent/vision_agent.py +14 -1
- vision_agent/tools/meta_tools.py +3 -2
- vision_agent/utils/video.py +23 -3
- {vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/METADATA +1 -1
- {vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/RECORD +7 -7
- {vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/WHEEL +0 -0
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
|
|
85
85
|
return output
|
86
86
|
|
87
87
|
|
88
|
+
def _clean_response(response: str) -> str:
|
89
|
+
# Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
|
90
|
+
# had already executed the code. This function removes the hallucinated response.
|
91
|
+
if "<execute_python>" in response:
|
92
|
+
end_execute_python = response.find("</execute_python>")
|
93
|
+
response = response[: end_execute_python + len("</execute_python>")]
|
94
|
+
return response
|
95
|
+
|
96
|
+
|
88
97
|
def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
89
98
|
chat = copy.deepcopy(chat)
|
90
99
|
|
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
|
|
114
123
|
message["media"] = chat[-1]["media"]
|
115
124
|
conv_resp = cast(str, orch([message], stream=False))
|
116
125
|
|
126
|
+
# clean the response first, if we are executing code, do not resond or end
|
127
|
+
# conversation before the code has been executed.
|
128
|
+
conv_resp = _clean_response(conv_resp)
|
129
|
+
|
117
130
|
let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
|
118
131
|
let_user_respond = (
|
119
132
|
"true" in let_user_respond_str.lower() if let_user_respond_str else False
|
@@ -458,7 +471,7 @@ class VisionAgent(Agent):
|
|
458
471
|
self.streaming_message(
|
459
472
|
{
|
460
473
|
"role": "assistant",
|
461
|
-
"content": json.dumps(response),
|
474
|
+
"content": json.dumps(add_step_descriptions(response)),
|
462
475
|
"finished": finished and code_action is None,
|
463
476
|
}
|
464
477
|
)
|
vision_agent/tools/meta_tools.py
CHANGED
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
|
|
676
676
|
for node in red:
|
677
677
|
# seems to always be atomtrailers not call type
|
678
678
|
if node.type == "atomtrailers":
|
679
|
+
if node.name.value == "generate_vision_code":
|
680
|
+
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
681
|
+
|
679
682
|
if (
|
680
683
|
node.name.value == "generate_vision_code"
|
681
684
|
or node.name.value == "edit_vision_code"
|
682
685
|
):
|
683
|
-
node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
|
684
|
-
|
685
686
|
if custom_tool_names is not None:
|
686
687
|
node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
|
687
688
|
cleaned_code = red.dumps().strip()
|
vision_agent/utils/video.py
CHANGED
@@ -11,6 +11,9 @@ import numpy as np
|
|
11
11
|
_LOGGER = logging.getLogger(__name__)
|
12
12
|
# The maximum length of the clip to extract frames from, in seconds
|
13
13
|
|
14
|
+
_DEFAULT_VIDEO_FPS = 24
|
15
|
+
_DEFAULT_INPUT_FPS = 1.0
|
16
|
+
|
14
17
|
|
15
18
|
def play_video(video_base64: str) -> None:
|
16
19
|
"""Play a video file"""
|
@@ -51,7 +54,9 @@ def _resize_frame(frame: np.ndarray) -> np.ndarray:
|
|
51
54
|
|
52
55
|
|
53
56
|
def video_writer(
|
54
|
-
frames: List[np.ndarray],
|
57
|
+
frames: List[np.ndarray],
|
58
|
+
fps: float = _DEFAULT_INPUT_FPS,
|
59
|
+
filename: Optional[str] = None,
|
55
60
|
) -> str:
|
56
61
|
if filename is None:
|
57
62
|
filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
|
@@ -78,7 +83,7 @@ def video_writer(
|
|
78
83
|
|
79
84
|
|
80
85
|
def frames_to_bytes(
|
81
|
-
frames: List[np.ndarray], fps: float =
|
86
|
+
frames: List[np.ndarray], fps: float = _DEFAULT_INPUT_FPS, file_ext: str = ".mp4"
|
82
87
|
) -> bytes:
|
83
88
|
r"""Convert a list of frames to a video file encoded into a byte string.
|
84
89
|
|
@@ -101,7 +106,7 @@ def frames_to_bytes(
|
|
101
106
|
# same file name and the time savings are very large.
|
102
107
|
@lru_cache(maxsize=8)
|
103
108
|
def extract_frames_from_video(
|
104
|
-
video_uri: str, fps: float =
|
109
|
+
video_uri: str, fps: float = _DEFAULT_INPUT_FPS
|
105
110
|
) -> List[Tuple[np.ndarray, float]]:
|
106
111
|
"""Extract frames from a video along with the timestamp in seconds.
|
107
112
|
|
@@ -118,6 +123,16 @@ def extract_frames_from_video(
|
|
118
123
|
|
119
124
|
cap = cv2.VideoCapture(video_uri)
|
120
125
|
orig_fps = cap.get(cv2.CAP_PROP_FPS)
|
126
|
+
if not orig_fps or orig_fps <= 0:
|
127
|
+
_LOGGER.warning(
|
128
|
+
f"Input video, {video_uri}, has no fps, using the default value {_DEFAULT_VIDEO_FPS}"
|
129
|
+
)
|
130
|
+
orig_fps = _DEFAULT_VIDEO_FPS
|
131
|
+
if not fps or fps <= 0:
|
132
|
+
_LOGGER.warning(
|
133
|
+
f"Input fps, {fps}, is illegal, using the default value: {_DEFAULT_INPUT_FPS}"
|
134
|
+
)
|
135
|
+
fps = _DEFAULT_INPUT_FPS
|
121
136
|
orig_frame_time = 1 / orig_fps
|
122
137
|
targ_frame_time = 1 / fps
|
123
138
|
frames: List[Tuple[np.ndarray, float]] = []
|
@@ -129,10 +144,15 @@ def extract_frames_from_video(
|
|
129
144
|
break
|
130
145
|
|
131
146
|
elapsed_time += orig_frame_time
|
147
|
+
# This is to prevent float point precision loss issue, which can cause
|
148
|
+
# the elapsed time to be slightly less than the target frame time, which
|
149
|
+
# causes the last frame to be skipped
|
150
|
+
elapsed_time = round(elapsed_time, 8)
|
132
151
|
if elapsed_time >= targ_frame_time:
|
133
152
|
frames.append((cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), i / orig_fps))
|
134
153
|
elapsed_time -= targ_frame_time
|
135
154
|
|
136
155
|
i += 1
|
137
156
|
cap.release()
|
157
|
+
_LOGGER.info(f"Extracted {len(frames)} frames from {video_uri}")
|
138
158
|
return frames
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=GIobCJaojOMxdMFtigklvt7RgHk49KAh7zSZoQ7HKXw,24294
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
|
8
8
|
vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
|
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
|
|
17
17
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
18
18
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
19
19
|
vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
|
20
|
-
vision_agent/tools/meta_tools.py,sha256=
|
20
|
+
vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
|
21
21
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
22
22
|
vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
|
23
23
|
vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
|
@@ -28,8 +28,8 @@ vision_agent/utils/execute.py,sha256=FqSOr5gtBeKB1g2hbV6-bhox6qItDQNn2o9efq1w6f4
|
|
28
28
|
vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwdn6sk,11303
|
29
29
|
vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
|
30
30
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
31
|
-
vision_agent/utils/video.py,sha256=
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
34
|
-
vision_agent-0.2.
|
35
|
-
vision_agent-0.2.
|
31
|
+
vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
|
32
|
+
vision_agent-0.2.167.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
33
|
+
vision_agent-0.2.167.dist-info/METADATA,sha256=e80T_Sh_9yt4SDeTGlq9fD4RqF1iY-LL6IHgarXwLc8,18034
|
34
|
+
vision_agent-0.2.167.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
35
|
+
vision_agent-0.2.167.dist-info/RECORD,,
|
File without changes
|
File without changes
|