PyPI - vision-agent - Versions diffs - 0.2.165__py3-none-any.whl → 0.2.167__py3-none-any.whl - Mend

vision-agent 0.2.165py3-none-any.whl → 0.2.167py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -85,6 +85,15 @@ def format_agent_message(agent_message: str) -> str:
     return output
+def _clean_response(response: str) -> str:
+    # Sometimes the LLM will hallucinate responses to an <execute_python> tag as if it
+    # had already executed the code. This function removes the hallucinated response.
+    if "<execute_python>" in response:
+        end_execute_python = response.find("</execute_python>")
+        response = response[: end_execute_python + len("</execute_python>")]
+    return response
 def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
     chat = copy.deepcopy(chat)
@@ -114,6 +123,10 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
         message["media"] = chat[-1]["media"]
     conv_resp = cast(str, orch([message], stream=False))
+    # clean the response first, if we are executing code, do not resond or end
+    # conversation before the code has been executed.
+    conv_resp = _clean_response(conv_resp)
     let_user_respond_str = extract_tag(conv_resp, "let_user_respond")
     let_user_respond = (
         "true" in let_user_respond_str.lower() if let_user_respond_str else False
@@ -458,7 +471,7 @@ class VisionAgent(Agent):
                     self.streaming_message(
                         {
                             "role": "assistant",
-                            "content": json.dumps(response),
+                            "content": json.dumps(add_step_descriptions(response)),
                             "finished": finished and code_action is None,
                         }
                     )

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -676,12 +676,13 @@ def use_extra_vision_agent_args(
     for node in red:
         # seems to always be atomtrailers not call type
         if node.type == "atomtrailers":
+            if node.name.value == "generate_vision_code":
+                node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
             if (
                 node.name.value == "generate_vision_code"
                 or node.name.value == "edit_vision_code"
             ):
-                node.value[1].value.append(f"test_multi_plan={test_multi_plan}")
                 if custom_tool_names is not None:
                     node.value[1].value.append(f"custom_tool_names={custom_tool_names}")
     cleaned_code = red.dumps().strip()

vision_agent/utils/video.py CHANGED Viewed

@@ -11,6 +11,9 @@ import numpy as np
 _LOGGER = logging.getLogger(__name__)
 # The maximum length of the clip to extract frames from, in seconds
+_DEFAULT_VIDEO_FPS = 24
+_DEFAULT_INPUT_FPS = 1.0
 def play_video(video_base64: str) -> None:
     """Play a video file"""
@@ -51,7 +54,9 @@ def _resize_frame(frame: np.ndarray) -> np.ndarray:
 def video_writer(
-    frames: List[np.ndarray], fps: float = 1.0, filename: Optional[str] = None
+    frames: List[np.ndarray],
+    fps: float = _DEFAULT_INPUT_FPS,
+    filename: Optional[str] = None,
 ) -> str:
     if filename is None:
         filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
@@ -78,7 +83,7 @@ def video_writer(
 def frames_to_bytes(
-    frames: List[np.ndarray], fps: float = 1.0, file_ext: str = ".mp4"
+    frames: List[np.ndarray], fps: float = _DEFAULT_INPUT_FPS, file_ext: str = ".mp4"
 ) -> bytes:
     r"""Convert a list of frames to a video file encoded into a byte string.
@@ -101,7 +106,7 @@ def frames_to_bytes(
 # same file name and the time savings are very large.
 @lru_cache(maxsize=8)
 def extract_frames_from_video(
-    video_uri: str, fps: float = 1.0
+    video_uri: str, fps: float = _DEFAULT_INPUT_FPS
 ) -> List[Tuple[np.ndarray, float]]:
     """Extract frames from a video along with the timestamp in seconds.
@@ -118,6 +123,16 @@ def extract_frames_from_video(
     cap = cv2.VideoCapture(video_uri)
     orig_fps = cap.get(cv2.CAP_PROP_FPS)
+    if not orig_fps or orig_fps <= 0:
+        _LOGGER.warning(
+            f"Input video, {video_uri}, has no fps, using the default value {_DEFAULT_VIDEO_FPS}"
+        )
+        orig_fps = _DEFAULT_VIDEO_FPS
+    if not fps or fps <= 0:
+        _LOGGER.warning(
+            f"Input fps, {fps}, is illegal, using the default value: {_DEFAULT_INPUT_FPS}"
+        )
+        fps = _DEFAULT_INPUT_FPS
     orig_frame_time = 1 / orig_fps
     targ_frame_time = 1 / fps
     frames: List[Tuple[np.ndarray, float]] = []
@@ -129,10 +144,15 @@ def extract_frames_from_video(
             break
         elapsed_time += orig_frame_time
+        # This is to prevent float point precision loss issue, which can cause
+        # the elapsed time to be slightly less than the target frame time, which
+        # causes the last frame to be skipped
+        elapsed_time = round(elapsed_time, 8)
         if elapsed_time >= targ_frame_time:
             frames.append((cv2.cvtColor(frame, cv2.COLOR_BGR2RGB), i / orig_fps))
             elapsed_time -= targ_frame_time
         i += 1
     cap.release()
+    _LOGGER.info(f"Extracted {len(frames)} frames from {video_uri}")
     return frames

{vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.165
+Version: 0.2.167
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/RECORD RENAMED Viewed

@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=RRMPhH8mgm_pCtEKiVFSjJyDi4lCr4F7k05AhK01xlM,436
 vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
 vision_agent/agent/agent_utils.py,sha256=eSgg8CwWylX_erLTqTg2pVhEEgVkMLRrQfYRyJzI3so,5443
-vision_agent/agent/vision_agent.py,sha256=cbY_V3f85_g8JmASa3m2LBX4G6xgsOKX1n7YtCf-C98,23676
+vision_agent/agent/vision_agent.py,sha256=GIobCJaojOMxdMFtigklvt7RgHk49KAh7zSZoQ7HKXw,24294
 vision_agent/agent/vision_agent_coder.py,sha256=aVkl0b9LKvy-auuHGYSag-ixYnue0iRQqD1PYLPBR-s,29312
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=gPLVXQMNSzYnQYpNm0wlH_5FPkOTaFDV24bqzK3jQ40,12221
 vision_agent/agent/vision_agent_planner.py,sha256=mjmnXG9CvYf_ZA7ZJ3ri4H-2U_Km55gF1sZYRSOlxpY,19027
@@ -17,7 +17,7 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
 vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
 vision_agent/tools/__init__.py,sha256=u-vS5iORB4ccvxoAjbtpvhTALDhXGilcATIq1_eZhKo,2332
-vision_agent/tools/meta_tools.py,sha256=ZF-7z3KT-Su08MvF5OhSm3Taqeu1Ek-EZjFhpN5w1uU,28257
+vision_agent/tools/meta_tools.py,sha256=7XM3VP4EW4Dtg_Hvoov_laOAEaZLdSGOeA-iPb7CimU,28315
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=VPGqGJ2ZYEJA6AW7K9X7hQv6vRlMtAQcybE4izdToCw,8196
 vision_agent/tools/tools.py,sha256=iKsBZxJ5--xWK-mqgZ1jbX_bfGS5HmAp-VRZ69m9yPg,77921
@@ -28,8 +28,8 @@ vision_agent/utils/execute.py,sha256=FqSOr5gtBeKB1g2hbV6-bhox6qItDQNn2o9efq1w6f4
 vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwdn6sk,11303
 vision_agent/utils/sim.py,sha256=ZuSS07TUXFGjipmiQoY8TKRmSes7XXCdtU9PI8PC1sw,5609
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
-vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
-vision_agent-0.2.165.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.165.dist-info/METADATA,sha256=jvrYb4IyKp79Sqrhyul6pu0EtEZRewumAZCVR6qWZWg,18034
-vision_agent-0.2.165.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.165.dist-info/RECORD,,
+vision_agent/utils/video.py,sha256=fOPR48-SuwMbE5eB5rc2F7lVo6k1mVHn26eEJ0QCslc,5602
+vision_agent-0.2.167.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.167.dist-info/METADATA,sha256=e80T_Sh_9yt4SDeTGlq9fD4RqF1iY-LL6IHgarXwLc8,18034
+vision_agent-0.2.167.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.167.dist-info/RECORD,,

{vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.165.dist-info → vision_agent-0.2.167.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.165__py3-none-any.whl → 0.2.167__py3-none-any.whl

vision-agent 0.2.165py3-none-any.whl → 0.2.167py3-none-any.whl