PyPI - vision-agent - Versions diffs - 0.2.237__py3-none-any.whl → 0.2.239__py3-none-any.whl - Mend

vision-agent 0.2.237py3-none-any.whl → 0.2.239py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

vision_agent/configs/anthropic_openai_config.py CHANGED Viewed

@@ -96,13 +96,24 @@ class Config(BaseModel):
         }
     )
+    # for get_tool_for_task
+    od_judge: Type[LMM] = Field(default=AnthropicLMM)
+    od_judge_kwargs: dict = Field(
+        default_factory=lambda: {
+            "model_name": "claude-3-5-sonnet-20241022",
+            "temperature": 0.0,
+            "image_size": 512,
+        }
+    )
     # for suggestions module
-    suggester: Type[LMM] = Field(default=AnthropicLMM)
+    suggester: Type[LMM] = Field(default=OpenAILMM)
     suggester_kwargs: dict = Field(
         default_factory=lambda: {
-            "model_name": "claude-3-5-sonnet-20241022",
+            "model_name": "o1",
             "temperature": 1.0,
-            "image_size": 768,
+            "image_detail": "high",
+            "image_size": 1024,
         }
     )
@@ -143,6 +154,9 @@ class Config(BaseModel):
     def create_tool_chooser(self) -> LMM:
         return self.tool_chooser(**self.tool_chooser_kwargs)
+    def create_od_judge(self) -> LMM:
+        return self.od_judge(**self.od_judge_kwargs)
     def create_suggester(self) -> LMM:
         return self.suggester(**self.suggester_kwargs)

vision_agent/sim/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@ from .sim import (
     AzureSim,
     OllamaSim,
     Sim,
+    StellaSim,
     get_tool_recommender,
     load_cached_sim,
     load_sim,

vision_agent/tools/planner_tools.py CHANGED Viewed

@@ -368,6 +368,15 @@ def get_tool_for_task(
     tool_tester = CONFIG.create_tool_tester()
     tool_chooser = CONFIG.create_tool_chooser()
+    if isinstance(images, list):
+        if len(images) > 0 and isinstance(images[0], dict):
+            if all(["frame" in image for image in images]):
+                images = [image["frame"] for image in images]
+            else:
+                raise ValueError(
+                    f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
+                )
     if isinstance(images, list):
         images = {"image": images}
@@ -410,6 +419,15 @@ def get_tool_for_task_human_reviewer(
     # NOTE: this will have the same documentation as get_tool_for_task
     tool_tester = CONFIG.create_tool_tester()
+    if isinstance(images, list):
+        if len(images) > 0 and isinstance(images[0], dict):
+            if all(["frame" in image for image in images]):
+                images = [image["frame"] for image in images]
+            else:
+                raise ValueError(
+                    f"Expected a list of numpy arrays or a dictionary of strings to lists of numpy arrays, got a list of dictionaries instead: {images}"
+                )
     if isinstance(images, list):
         images = {"image": images}
@@ -424,6 +442,9 @@ def get_tool_for_task_human_reviewer(
                 Image.fromarray(image).save(image_path)
                 image_paths.append(image_path)
+        # run no more than 3 images or else it overloads the LLM
+        image_paths = image_paths[:3]
         tools = [
             t.__name__
             for t in get_tools()

vision_agent/tools/tools.py CHANGED Viewed

@@ -2804,7 +2804,7 @@ def save_video(
     else:
         Path(output_video_path).parent.mkdir(parents=True, exist_ok=True)
-    output_video_path = video_writer(frames, fps, output_video_path)
+    output_video_path = video_writer(frames, fps, filename=output_video_path)
     _save_video_to_result(output_video_path)
     return output_video_path

vision_agent/utils/video.py CHANGED Viewed

@@ -1,5 +1,5 @@
-import base64
 import logging
+import os
 import tempfile
 from functools import lru_cache
 from typing import List, Optional, Tuple
@@ -15,37 +15,6 @@ _DEFAULT_VIDEO_FPS = 24
 _DEFAULT_INPUT_FPS = 1.0
-def play_video(video_base64: str) -> None:
-    """Play a video file"""
-    video_data = base64.b64decode(video_base64)
-    with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
-        temp_video.write(video_data)
-        temp_video_path = temp_video.name
-        cap = cv2.VideoCapture(temp_video_path)
-        if not cap.isOpened():
-            _LOGGER.error("Error: Could not open video.")
-            return
-        # Display the first frame and wait for any key press to start the video
-        ret, frame = cap.read()
-        if ret:
-            cv2.imshow("Video Player", frame)
-            _LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
-            cv2.waitKey(0)  # Wait for any key press
-        while cap.isOpened():
-            ret, frame = cap.read()
-            if not ret:
-                break
-            cv2.imshow("Video Player", frame)
-            # Press 'q' to exit the video
-            if cv2.waitKey(200) & 0xFF == ord("q"):
-                break
-        cap.release()
-        cv2.destroyAllWindows()
 def _resize_frame(frame: np.ndarray) -> np.ndarray:
     height, width = frame.shape[:2]
     new_width = width - (width % 2)
@@ -57,12 +26,15 @@ def video_writer(
     frames: List[np.ndarray],
     fps: float = _DEFAULT_INPUT_FPS,
     filename: Optional[str] = None,
+    file_ext: str = ".mp4",
 ) -> str:
+    tempf = None
     if isinstance(fps, str):
         # fps could be a string when it's passed in from a web endpoint deployment
         fps = float(fps)
     if filename is None:
-        filename = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4").name
+        tempf = tempfile.NamedTemporaryFile(delete=False, suffix=file_ext)
+        filename = tempf.name
     container = av.open(filename, mode="w")
     stream = container.add_stream("h264", rate=fps)
     height, width = frames[0].shape[:2]
@@ -82,6 +54,9 @@ def video_writer(
     for packet in stream.encode():
         container.mux(packet)
     container.close()
+    # for windows nee to manually close tempfile, cannot use with NamedTemporaryFile(delete=True)
+    if tempf is not None:
+        tempf.close()
     return filename
@@ -98,11 +73,11 @@ def frames_to_bytes(
     if isinstance(fps, str):
         # fps could be a string when it's passed in from a web endpoint deployment
         fps = float(fps)
-    with tempfile.NamedTemporaryFile(delete=True, suffix=file_ext) as temp_file:
-        video_writer(frames, fps, temp_file.name)
-        with open(temp_file.name, "rb") as f:
-            buffer_bytes = f.read()
+    filename = video_writer(frames, fps, file_ext=file_ext)
+    # TODO: look into memory-mapped files to avoid reading the entire file into memory
+    with open(filename, "rb") as f:
+        buffer_bytes = f.read()
+    os.unlink(filename)
     return buffer_bytes

{vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.237
+Version: 0.2.239
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -15,7 +15,6 @@ Requires-Dist: e2b (>=0.17.2a50,<0.18.0)
 Requires-Dist: e2b-code-interpreter (==0.0.11a37)
 Requires-Dist: flake8 (>=7.0.0,<8.0.0)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
-Requires-Dist: langsmith (>=0.1.58,<0.2.0)
 Requires-Dist: libcst (>=1.5.0,<2.0.0)
 Requires-Dist: matplotlib (>=3.9.2,<4.0.0)
 Requires-Dist: nbclient (>=0.10.0,<0.11.0)

{vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/RECORD RENAMED Viewed

@@ -21,7 +21,7 @@ vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,
 vision_agent/clients/landing_public_api.py,sha256=Vz9lldtNbaJRWzT7T8-uQrC-dMnt47LIsDrxHgoVdEw,1492
 vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
 vision_agent/configs/anthropic_config.py,sha256=T1UuESgiY8913A6wA42P7-cg8FTk9-LkJpyywo7OnIQ,4298
-vision_agent/configs/anthropic_openai_config.py,sha256=YQjFxmlxppn5L55dJjK_v1myBJQ_V5J4q25pmUtwTOU,4310
+vision_agent/configs/anthropic_openai_config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
 vision_agent/configs/config.py,sha256=rUz5zca4Pn5dTUwJXiJzRDYua5PWizApCKI3y0zOvhc,4699
 vision_agent/configs/openai_config.py,sha256=v2_AIY89d7LKWn4uqA2G047U2IdmnqZrGH2Iww9gRIw,4498
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -32,13 +32,13 @@ vision_agent/models/__init__.py,sha256=qAdygB-0EsmxMHNzYTPNM6tAF8Fym95gm9bsHJafd
 vision_agent/models/agent_types.py,sha256=dIdxATH_PP76pD5Wfo0oofWt6iPQh0vpf48QbEQSzhs,2472
 vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
 vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
-vision_agent/sim/__init__.py,sha256=XYL4BKCB-pamJzCR1y2d5lC8FL64WGu0oEzWzLfguAQ,120
+vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
 vision_agent/sim/sim.py,sha256=VSU_1rYd4ifvF45xKWBEYugxdeeEQVpj0QL6rjx49i4,9801
 vision_agent/tools/__init__.py,sha256=T-MPNBVbvWtfo71hobaZsdYzQ52oyymolk_OAb2Pq_g,2463
 vision_agent/tools/meta_tools.py,sha256=-heMwGkx0hX_9zUp1dgBqsJpVnl6Y6tErMsjFy0dwLM,28652
-vision_agent/tools/planner_tools.py,sha256=iXyHjTBIWeQOCfcdQNufoQXfipHu_H38DIoK375FdnA,18492
+vision_agent/tools/planner_tools.py,sha256=orBTdJQz2NKoLuX9WE6XixaYuG305xz0UBYvZOiuquQ,19474
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tools.py,sha256=-xg5Msq5ZtHgaISpHnbq5rJ5MIERwfH6wPHg6KpaYjg,111457
+vision_agent/tools/tools.py,sha256=hhQYqypvBDfcel1p4bfZHZfOZom3plnxGPHwo2T52Ls,111466
 vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
 vision_agent/utils/agent.py,sha256=QGKcbzpAjcVj0958bXYLv07-d2i1GU7-bXVG7bTGRMA,14619
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -47,9 +47,9 @@ vision_agent/utils/image_utils.py,sha256=bJM2mEvB6E__M9pxi74yQYzAiZ7mu3KE2ptyVrp
 vision_agent/utils/tools.py,sha256=USZL0MKsiJgqA8RFiYRTcj_Kn2FVYKLHK4wIk0gP1Ow,7694
 vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8Wis,2523
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
-vision_agent/utils/video.py,sha256=Dt9_pqGgr63gmpurzisnpF6d9tr65-zxS1CccXdVuxk,6458
+vision_agent/utils/video.py,sha256=0LsmH0sDaBWhvtV15CCJgqKxWzwDDos7Sv2wOd7wyzQ,5610
 vision_agent/utils/video_tracking.py,sha256=GM9qfeawqhmZVWoKrzw5-NETd4gEo7ImMfWtBnhC3bw,12086
-vision_agent-0.2.237.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.237.dist-info/METADATA,sha256=MkwC7kWf1f5E1ArMWdjNx_GGNgFwfWQtHbfyDzHN8EM,5755
-vision_agent-0.2.237.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.237.dist-info/RECORD,,
+vision_agent-0.2.239.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.239.dist-info/METADATA,sha256=yC90fdYSDqbLrHHIU6OTm96QhNJ-39buRPoVgIxnDzM,5712
+vision_agent-0.2.239.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.239.dist-info/RECORD,,

{vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.237.dist-info → vision_agent-0.2.239.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.237__py3-none-any.whl → 0.2.239__py3-none-any.whl

vision-agent 0.2.237py3-none-any.whl → 0.2.239py3-none-any.whl