vision-agent 0.2.165__py3-none-any.whl → 0.2.167__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
85
85
  return output
86
86
 
87
87
 
88
+ def _clean_response(response: str) -> str:
89
+ # Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
90
+ # had already executed the code. This function removes the hallucinated response.
91
+ if "<execute_python>" in response:
92
+ end_execute_python = response.find("</execute_python>")
93
+ response = response[: end_execute_python + len("</execute_python>")]
94
+ return response
95
+
96
+
88
97
  def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
89
98
  chat = copy.deepcopy(chat)
90
99
 
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
114
123
  message["media"] = chat[-1]["media"]
115
124
  conv_resp = cast(str, orch([message], stream=False))
116
125
 
126
+ # clean the response first, if we are executing code, do not resond or end
127
+ # conversation before the code has been executed.
128
+ conv_resp = _clean_response(conv_resp)
129
+
117
130
  let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
118
131
  let_user_respond = (
119
132
  "true" in let_user_respond_str.lower() if let_user_respond_str else False
@@ -458,7 +471,7 @@ class VisionAgent(Agent):
458
471
  self.streaming_message(
459
472
  {
460
473
  "role": "assistant",
461
- "content": json.dumps(response),
474
+ "content": json.dumps(add_step_descriptions(response)),
462
475
  "finished": finished and code_action is None,
463
476
  }
464
477
  )
@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
676
676
  for node in red:
677
677
  # seems to always be atomtrailers not call type
678
678
  if node.type == "atomtrailers":
679
+ if node.name.value == "generate_vision_code":
680
+ node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
681
+
679
682
  if (
680
683
  node.name.value == "generate_vision_code"
681
684
  or node.name.value == "edit_vision_code"
682
685
  ):
683
- node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
684
-
685
686
  if custom_tool_names is not None:
686
687
  node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
687
688
  cleaned_code = red.dumps().strip()
@@ -11,6 +11,9 @@ import numpy as np
11
11
  _LOGGER = logging.getLogger(__name__)
12
12
  # The maximum length of the clip to extract frames from, in seconds
13
13
 
14
+ _DEFAULT_VIDEO_FPS = 24
15
+ _DEFAULT_INPUT_FPS = 1.0
16
+
14
17
 
15
18
  def play_video(video_base64: str) -> None:
16
19
  """Play a video file"""
@@ -51,7 +54,9 @@ def _resize_frame(frame: np.ndarray) -> np.ndarray:
51
54
 
52
55
 
53
56
  def video_writer(
54
- frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
57
+ frames: List[np.ndarray],
58
+ fps: float = _DEFAULT_INPUT_FPS,
59
+ filename: Optional[str] = None,
55
60
  ) -> str:
56
61
  if filename is None:
57
62
  filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
@@ -78,7 +83,7 @@ def video_writer(
78
83
 
79
84
 
80
85
  def frames_to_bytes(
81
- frames: List[np.ndarray], fps: float = 1.0, file_ext: str = ".mp4"
86
+ frames: List[np.ndarray], fps: float = _DEFAULT_INPUT_FPS, file_ext: str = ".mp4"
82
87
  ) -> bytes:
83
88
  r"""Convert a list of frames to a video file encoded into a byte string.
84
89
 
@@ -101,7 +106,7 @@ def frames_to_bytes(
101
106
  # same file name and the time savings are very large.
102
107
  @lru_cache(maxsize=8)
103
108
  def extract_frames_from_video(
104
- video_uri: str, fps: float = 1.0
109
+ video_uri: str, fps: float = _DEFAULT_INPUT_FPS
105
110
  ) -> List[Tuple[np.ndarray, float]]:
106
111
  """Extract frames from a video along with the timestamp in seconds.
107
112
 
@@ -118,6 +123,16 @@ def extract_frames_from_video(
118
123
 
119
124
  cap = cv2.VideoCapture(video_uri)
120
125
  orig_fps = cap.get(cv2.CAP_PROP_FPS)
126
+ if not orig_fps or orig_fps <= 0:
127
+ _LOGGER.warning(
128
+ f"Input video, {video_uri}, has no fps, using the default value {_DEFAULT_VIDEO_FPS}"
129
+ )
130
+ orig_fps = _DEFAULT_VIDEO_FPS
131
+ if not fps or fps <= 0:
132
+ _LOGGER.warning(
133
+ f"Input fps, {fps}, is illegal, using the default value: {_DEFAULT_INPUT_FPS}"
134
+ )
135
+ fps = _DEFAULT_INPUT_FPS
121
136
  orig_frame_time = 1 / orig_fps
122
137
  targ_frame_time = 1 / fps
123
138
  frames: List[Tuple[np.ndarray, float]] = []
@@ -129,10 +144,15 @@ def extract_frames_from_video(
129
144
  break
130
145
 
131
146
  elapsed_time += orig_frame_time
147
+ # This is to prevent float point precision loss issue, which can cause
148
+ # the elapsed time to be slightly less than the target frame time, which
149
+ # causes the last frame to be skipped
150
+ elapsed_time = round(elapsed_time, 8)
132
151
  if elapsed_time >= targ_frame_time:
133
152
  frames.append((cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), i / orig_fps))
134
153
  elapsed_time -= targ_frame_time
135
154
 
136
155
  i += 1
137
156
  cap.release()
157
+ _LOGGER.info(f"Extracted {len(frames)} frames from {video_uri}")
138
158
  return frames
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.165
3
+ Version: 0.2.167
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
2
2
  vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
3
3
  vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
4
4
  vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
5
- vision_agent/agent/vision_agent.py,sha256=cbY_V3f85_g8JmASa3m2LBX4G6xgsOKX1n7YtCf-C98,23676
5
+ vision_agent/agent/vision_agent.py,sha256=GIobCJaojOMxdMFtigklvt7RgHk49KAh7zSZoQ7HKXw,24294
6
6
  vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
7
7
  vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
8
8
  vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
17
17
  vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
18
18
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
19
19
  vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
20
- vision_agent/tools/meta_tools.py,sha256=ZF-7z3KT-Su08MvF5OhSm3Taqeu1Ek-EZjFhpN5w1uU,28257
20
+ vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
21
21
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
22
22
  vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
23
23
  vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
@@ -28,8 +28,8 @@ vision_agent/utils/execute.py,sha256=FqSOr5gtBeKB1g2hbV6-bhox6qItDQNn2o9efq1w6f4
28
28
  vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwdn6sk,11303
29
29
  vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
30
30
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
31
- vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
32
- vision_agent-0.2.165.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
- vision_agent-0.2.165.dist-info/METADATA,sha256=jvrYb4IyKp79Sqrhyul6pu0EtEZRewumAZCVR6qWZWg,18034
34
- vision_agent-0.2.165.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
- vision_agent-0.2.165.dist-info/RECORD,,
31
+ vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
32
+ vision_agent-0.2.167.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
33
+ vision_agent-0.2.167.dist-info/METADATA,sha256=e80T_Sh_9yt4SDeTGlq9fD4RqF1iY-LL6IHgarXwLc8,18034
34
+ vision_agent-0.2.167.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
+ vision_agent-0.2.167.dist-info/RECORD,,