PyPI - vision-agent - Versions diffs - 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl - Mend

vision-agent 1.1.16py3-none-any.whl → 1.1.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

vision_agent/.sim_tools/df.csv +12 -12
vision_agent/.sim_tools/embs.npy +0 -0
vision_agent/agent/__init__.py +1 -0
vision_agent/agent/vision_agent_prompts_v3.py +372 -0
vision_agent/agent/vision_agent_v3.py +278 -0
vision_agent/lmm/lmm.py +219 -57
vision_agent/tools/__init__.py +3 -3
vision_agent/tools/planner_v3_tools.py +206 -0
vision_agent/tools/tools.py +55 -64
vision_agent/utils/agent.py +24 -8
vision_agent/utils/tools.py +1 -1
{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/METADATA +4 -4
{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/RECORD +15 -12
{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/WHEEL +0 -0
{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/licenses/LICENSE +0 -0

vision_agent/tools/planner_v3_tools.py ADDED Viewed

@@ -0,0 +1,206 @@
+import base64
+import copy
+import io
+from typing import Dict, List, Optional, Tuple, Union, cast
+import cv2
+import matplotlib.figure
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+from PIL.Image import Image as PILImageType
+from vision_agent.utils.image_utils import (
+    denormalize_bbox,
+    normalize_bbox,
+    numpy_to_bytes,
+    rle_decode_array,
+)
+from vision_agent.utils.tools import send_inference_request
+def maybe_denormalize_bbox(
+    bbox: List[Union[int, float]], image_size: Tuple[int, ...]
+) -> List[float]:
+    if all([0 <= c <= 1 for c in bbox]):
+        return denormalize_bbox(bbox, image_size)
+    return bbox
+def maybe_normalize_bbox(
+    bbox: List[Union[int, float]], image_size: Tuple[int, ...]
+) -> List[float]:
+    if any([1 <= c for c in bbox]):
+        return normalize_bbox(bbox, image_size)
+    return bbox
+def instance_segmentation(
+    prompt: str, image: np.ndarray, threshold: float = 0.23, nms_threshold: float = 0.5
+) -> List[Dict[str, Union[str, float, List[float], np.ndarray]]]:
+    image_bytes = numpy_to_bytes(image)
+    files = [("image", image_bytes)]
+    data = {"prompts": [prompt], "threshold": threshold, "nms_threshold": nms_threshold}
+    results = send_inference_request(
+        data,
+        "glee",
+        files=files,
+        v2=True,
+    )
+    results = results[0]
+    results_formatted = [
+        {
+            "label": elt["label"],
+            "score": elt["score"],
+            "bbox": normalize_bbox(elt["bounding_box"], image.shape[:2]),
+            "mask": np.array(rle_decode_array(elt["mask"])),
+        }
+        for elt in results
+    ]
+    return results_formatted
+def ocr(image: np.ndarray) -> List[Dict[str, Union[str, float, List[float]]]]:
+    image_bytes = numpy_to_bytes(image)
+    files = [("image", image_bytes)]
+    results = send_inference_request(
+        {},
+        "paddle-ocr",
+        files=files,
+        v2=True,
+    )
+    results_formatted = [
+        {
+            "label": elt["label"],
+            "score": elt["score"],
+            "bbox": normalize_bbox(elt["bbox"], image.shape[:2]),
+        }
+        for elt in results
+    ]
+    return results_formatted
+def depth_estimation(image: np.ndarray) -> np.ndarray:
+    shape = image.shape[:2]
+    image_bytes = numpy_to_bytes(image)
+    files = [("image", image_bytes)]
+    results = send_inference_request(
+        {},
+        "depth-pro",
+        files=files,
+        v2=True,
+    )
+    depth = np.frombuffer(base64.b64decode(results["depth"]), dtype=np.float32).reshape(
+        shape
+    )
+    return depth
+def visualize_bounding_boxes(
+    image: np.ndarray, bounding_boxes: List[Dict[str, Union[str, float, List[float]]]]
+) -> np.ndarray:
+    image = image.copy()
+    image_size = image.shape[:2]
+    bounding_boxes = copy.deepcopy(bounding_boxes)
+    for bbox in bounding_boxes:
+        bbox["bbox"] = maybe_denormalize_bbox(
+            cast(List[float], bbox["bbox"]), image_size
+        )
+    for bbox in bounding_boxes:
+        x1, y1, x2, y2 = bbox["bbox"]  # type: ignore
+        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
+    return image
+def visualize_segmentation_masks(
+    image: np.ndarray,
+    segmentation_masks: List[Dict[str, Union[str, float, np.ndarray]]],
+) -> np.ndarray:
+    alpha = 0.5
+    overlay = image.copy()
+    color_mask = np.zeros_like(image)
+    color_mask[:, :] = (0, 100, 255)
+    for elt in segmentation_masks:
+        mask = cast(np.ndarray, elt["mask"])
+        overlay[mask == 1] = (1 - alpha) * overlay[mask == 1] + alpha * color_mask[
+            mask == 1
+        ]
+        # draw outline on the mask so it doesn't just think the color of the object changed
+        mask_uint8 = (mask * 255).astype(np.uint8)
+        contours, _ = cv2.findContours(
+            mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+        cv2.drawContours(overlay, contours, -1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
+    overlay = np.clip(overlay, 0, 255).astype(np.uint8)
+    return overlay
+def get_crops(
+    image: np.ndarray, bounding_boxes: List[Dict[str, Union[str, float, List[float]]]]
+) -> List[np.ndarray]:
+    image = image.copy()
+    bounding_boxes = copy.deepcopy(bounding_boxes)
+    for bbox in bounding_boxes:
+        bbox["bbox"] = maybe_denormalize_bbox(
+            cast(List[float], bbox["bbox"]), image.shape[:2]
+        )
+    crops = []
+    for bbox in bounding_boxes:
+        x1, y1, x2, y2 = bbox["bbox"]  # type: ignore
+        crops.append(image[int(y1) : int(y2), int(x1) : int(x2)])
+    return crops
+def rotate_90(image: np.ndarray, k: int = 1) -> np.ndarray:
+    return np.rot90(image, k=k, axes=(0, 1))
+def iou(
+    pred1: Union[List[float], np.ndarray], pred2: Union[List[float], np.ndarray]
+) -> float:
+    if isinstance(pred1, list) and isinstance(pred2, list):
+        x1, y1, x2, y2 = pred1
+        x1_, y1_, x2_, y2_ = pred2
+        intersection = max(0, min(x2, x2_) - max(x1, x1_)) * max(
+            0, min(y2, y2_) - max(y1, y1_)
+        )
+        union = (x2 - x1) * (y2 - y1) + (x2_ - x1_) * (y2_ - y1_) - intersection
+        return intersection / union
+    elif isinstance(pred1, np.ndarray) and isinstance(pred2, np.ndarray):
+        pred1 = np.clip(pred1, 0, 1)
+        pred2 = np.clip(pred2, 0, 1)
+        intersection = np.sum(pred1 * pred2)
+        union = np.sum(pred1) + np.sum(pred2) - intersection
+        return intersection / union
+    raise ValueError("Unsupported input types for IoU calculation.")
+def display_image(
+    image: Union[np.ndarray, PILImageType, matplotlib.figure.Figure, str],
+) -> None:
+    display_img: Optional[PILImageType] = None
+    if isinstance(image, np.ndarray):
+        display_img = Image.fromarray(image)
+    elif isinstance(image, matplotlib.figure.Figure):
+        # Render the figure to a BytesIO buffer
+        buf = io.BytesIO()
+        image.savefig(buf, format="png")
+        buf.seek(0)
+        # Load the buffer as a PIL Image
+        display_img = Image.open(buf)
+        plt.close(image)  # type: ignore
+    elif isinstance(image, PILImageType):
+        display_img = image  # Already a PIL Image
+    elif isinstance(image, str):
+        display_img = Image.open(image)
+    if display_img is not None:
+        plt.imshow(display_img)  # type: ignore
+        plt.axis("off")  # type: ignore
+        plt.show()
+    else:
+        # Handle cases where image type is not supported or conversion failed
+        print("Unsupported image type or conversion failed.")

vision_agent/tools/tools.py CHANGED Viewed

@@ -4,7 +4,7 @@ import logging
 import os
 import tempfile
 import urllib.request
-from base64 import b64encode
+from base64 import b64encode, b64decode
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from importlib import resources
 from pathlib import Path
@@ -15,7 +15,6 @@ import time
 import cv2
 import numpy as np
 import pandas as pd
-import requests
 from IPython.display import display
 from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
@@ -2034,8 +2033,8 @@ def qwen2_vl_video_vqa(prompt: str, frames: List[np.ndarray]) -> str:
     return cast(str, data)
-def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
-    """'ocr' extracts text from an image. It returns a list of detected text, bounding
+def paddle_ocr(image: np.ndarray) -> List[Dict[str, Any]]:
+    """'paddle_ocr' extracts text from an image. It returns a list of detected text, bounding
     boxes with normalized coordinates, and confidence scores. The results are sorted
     from top-left to bottom right.
@@ -2048,51 +2047,33 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
     Example
     -------
-        >>> ocr(image)
+        >>> paddle_ocr(image)
         [
             {'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
         ]
     """
-    pil_image = Image.fromarray(image).convert("RGB")
-    image_size = pil_image.size[::-1]
+    image_size = image.shape[:2]
     if image_size[0] < 1 or image_size[1] < 1:
         return []
-    image_buffer = io.BytesIO()
-    pil_image.save(image_buffer, format="PNG")
-    buffer_bytes = image_buffer.getvalue()
-    image_buffer.close()
-    res = requests.post(
-        _OCR_URL,
-        files={"images": buffer_bytes},
-        data={"language": "en"},
-        headers={"contentType": "multipart/form-data", "apikey": _API_KEY},
-    )
-    if res.status_code != 200:
-        raise ValueError(f"OCR request failed with status code {res.status_code}")
-    data = res.json()
-    output = []
-    for det in data[0]:
-        label = det["text"]
-        box = [
-            det["location"][0]["x"],
-            det["location"][0]["y"],
-            det["location"][2]["x"],
-            det["location"][2]["y"],
-        ]
-        box = normalize_bbox(box, image_size)
-        output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
+    buffer_bytes = numpy_to_bytes(image)
+    files = [("image", buffer_bytes)]
+    res = send_inference_request(
+        payload={"function_name": "paddle-ocr"},
+        endpoint_name="paddle-ocr",
+        files=files,
+        v2=True,
+    )
     _display_tool_trace(
-        ocr.__name__,
+        paddle_ocr.__name__,
         {},
-        data,
-        cast(List[Tuple[str, bytes]], [("image", buffer_bytes)]),
+        res,
+        files,
     )
-    return sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
+    return sorted(res, key=lambda x: (x["bbox"][1], x["bbox"][0]))
 def claude35_text_extraction(image: np.ndarray) -> str:
@@ -2370,7 +2351,12 @@ def agentic_activity_recognition(
     buffer_bytes = frames_to_bytes(frames, fps=fps)
     files = [("video", buffer_bytes)]
-    payload = {"prompt": prompt, "specificity": specificity, "with_audio": with_audio}
+    payload = {
+        "prompt": prompt,
+        "specificity": specificity,
+        "with_audio": with_audio,
+        "function_name": "agentic_activity_recognition",
+    }
     response = send_inference_request(
         payload=payload, endpoint_name="activity-recognition", files=files, v2=True
@@ -2529,48 +2515,53 @@ def detr_segmentation(image: np.ndarray) -> List[Dict[str, Any]]:
     return return_data
-def depth_anything_v2(image: np.ndarray) -> np.ndarray:
-    """'depth_anything_v2' is a tool that runs depth anything v2 model to generate a
-    depth image from a given RGB image. The returned depth image is monochrome and
-    represents depth values as pixel intensities with pixel values ranging from 0 to 255.
+def depth_pro(
+    image: np.ndarray,
+) -> np.ndarray:
+    """'depth_pro' is a tool that runs the Apple DepthPro model to generate a
+    depth map from a given RGB image. The returned depth map has the same dimensions
+    as the input image, with each pixel indicating the distance from the camera in meters.
     Parameters:
         image (np.ndarray): The image to used to generate depth image
     Returns:
-        np.ndarray: A grayscale depth image with pixel values ranging from 0 to 255
-            where high values represent closer objects and low values further.
+        np.ndarray: A depth map with float32 pixel values that represent
+            the distance from the camera in meters.
     Example
     -------
-        >>> depth_anything_v2(image)
+        >>> depth_pro(image)
         array([[0, 0, 0, ..., 0, 0, 0],
                 [0, 20, 24, ..., 0, 100, 103],
                 ...,
                 [10, 11, 15, ..., 202, 202, 205],
-                [10, 10, 10, ..., 200, 200, 200]], dtype=uint8),
+                [10, 10, 10, ..., 200, 200, 200]], dtype=np.float32),
     """
-    if image.shape[0] < 1 or image.shape[1] < 1:
-        raise ValueError(f"Image is empty, image shape: {image.shape}")
-    image_b64 = convert_to_b64(image)
-    data = {
-        "image": image_b64,
-        "function_name": "depth_anything_v2",
-    }
+    image_size = image.shape[:2]
+    if image_size[0] < 1 or image_size[1] < 1:
+        return np.empty(0)
+    buffer_bytes = numpy_to_bytes(image)
+    files = [("image", buffer_bytes)]
-    depth_map = send_inference_request(data, "depth-anything-v2", v2=True)
-    depth_map_np = np.array(depth_map["map"])
-    depth_map_np = (depth_map_np - depth_map_np.min()) / (
-        depth_map_np.max() - depth_map_np.min()
+    detections = send_inference_request(
+        payload={"function_name": "depth-pro"},
+        endpoint_name="depth-pro",
+        files=files,
+        v2=True,
     )
-    depth_map_np = (255 * depth_map_np).astype(np.uint8)
+    depth_bytes = b64decode(detections["depth"])
+    depth_map_np = np.frombuffer(depth_bytes, dtype=np.float32).reshape(image_size)
     _display_tool_trace(
-        depth_anything_v2.__name__,
+        depth_pro.__name__,
         {},
-        depth_map,
-        image_b64,
+        response=detections,
+        files=files,
     )
     return depth_map_np
@@ -3564,12 +3555,12 @@ FUNCTION_TOOLS = [
     claude35_text_extraction,
     agentic_document_extraction,
     document_qa,
-    ocr,
+    paddle_ocr,
     gemini_image_generation,
     qwen25_vl_images_vqa,
     qwen25_vl_video_vqa,
     agentic_activity_recognition,
-    depth_anything_v2,
+    depth_pro,
     generate_pose_image,
     vit_nsfw_classification,
     siglip_classification,

vision_agent/utils/agent.py CHANGED Viewed

@@ -247,7 +247,9 @@ def print_table(title: str, columns: List[str], rows: List[List[str]]) -> None:
 def add_media_to_chat(
-    chat: List[AgentMessage], code_interpreter: Optional[CodeInterpreter] = None
+    chat: List[AgentMessage],
+    code_interpreter: Optional[CodeInterpreter] = None,
+    append_to_prompt: bool = True,
 ) -> Tuple[List[AgentMessage], List[AgentMessage], List[Union[str, Path]]]:
     orig_chat = copy.deepcopy(chat)
     int_chat = copy.deepcopy(chat)
@@ -278,6 +280,7 @@ def add_media_to_chat(
                 if (
                     not str(chat_i.content).endswith(f" Media name {media}")
                     and chat_i.role == "user"
+                    and append_to_prompt
                 ):
                     chat_i.content += f" Media name {media}"
             chat_i.media = media_list_i if len(media_list_i) > 0 else None
@@ -304,13 +307,26 @@ def add_media_to_chat(
 def capture_media_from_exec(execution: Execution) -> List[str]:
     images = []
     for result in execution.results:
-        for format in result.formats():
-            if format in ["png", "jpeg"]:
-                # converts the image to png and then to base64
-                images.append(
-                    "data:image/png;base64,"
-                    + convert_to_b64(b64_to_pil(result[format]))
-                )
+        if hasattr(result, "formats"):
+            for format in result.formats():
+                if format in ["png", "jpeg"]:
+                    # converts the image to png and then to base64
+                    images.append(
+                        "data:image/png;base64,"
+                        + convert_to_b64(b64_to_pil(result[format]))
+                    )
+        elif hasattr(result, "savefig"):
+            pass
+        elif hasattr(result, "_repr_png_") and result._repr_png_():
+            images.append(
+                "data:image/png;base64,"
+                + convert_to_b64(b64_to_pil(result._repr_png_()))  # type: ignore
+            )
+        elif hasattr(result, "_repr_jpeg_") and result._repr_jpeg_():
+            images.append(
+                "data:image/jpeg;base64,"
+                + convert_to_b64(b64_to_pil(result._repr_jpeg_()))  # type: ignore
+            )
     return images

vision_agent/utils/tools.py CHANGED Viewed

@@ -106,7 +106,7 @@ def send_task_inference_request(
     if metadata is not None and "function_name" in metadata:
         function_name = metadata["function_name"]
     response = _call_post(url, payload, session, files, function_name, is_form)
-    return response["data"]
+    return response["data"] if "data" in response else response
 def _create_requests_session(

{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vision-agent
-Version: 1.1.16
+Version: 1.1.18
 Summary: Toolset for Vision Agent
 Project-URL: Homepage, https://landing.ai
 Project-URL: repository, https://github.com/landing-ai/vision-agent
@@ -8,7 +8,7 @@ Project-URL: documentation, https://github.com/landing-ai/vision-agent
 Author-email: Landing AI <dev@landing.ai>
 License-File: LICENSE
 Requires-Python: <4.0,>=3.9
-Requires-Dist: anthropic<0.32,>=0.31.0
+Requires-Dist: anthropic>=0.54.0
 Requires-Dist: av<12,>=11.0.0
 Requires-Dist: dotenv<0.10,>=0.9.9
 Requires-Dist: flake8<8,>=7.0.0
@@ -20,7 +20,7 @@ Requires-Dist: matplotlib<4,>=3.9.2
 Requires-Dist: nbclient<0.11,>=0.10.0
 Requires-Dist: nbformat<6,>=5.10.4
 Requires-Dist: numpy<2.0.0,>=1.21.0
-Requires-Dist: openai==1.55.3
+Requires-Dist: openai>=1.86.0
 Requires-Dist: opencv-python==4.*
 Requires-Dist: opentelemetry-api<2,>=1.29.0
 Requires-Dist: pandas==2.*
@@ -36,7 +36,7 @@ Requires-Dist: tabulate<0.10,>=0.9.0
 Requires-Dist: tenacity<9,>=8.3.0
 Requires-Dist: tqdm<5.0.0,>=4.64.0
 Requires-Dist: typing-extensions==4.*
-Requires-Dist: yt-dlp>=2025.3.31
+Requires-Dist: yt-dlp>=2025.6.9
 Description-Content-Type: text/markdown
 <div align="center">

{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/RECORD RENAMED Viewed

@@ -1,15 +1,17 @@
 vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
-vision_agent/.sim_tools/df.csv,sha256=i732_U1KQf55UNhT-9srtZXF91XvDnfWBDdc8EqDmpw,41215
-vision_agent/.sim_tools/embs.npy,sha256=XCu3LnLS10IS3npfPMqX2VHIbDPq9iY_NPDBwq5AEj0,245888
+vision_agent/.sim_tools/df.csv,sha256=Hus29ljPZV15EmAd1qFTStPuVDi8JDS0ekUcyjCTJ9U,41187
+vision_agent/.sim_tools/embs.npy,sha256=OLj2rt4aBFze2HIf9bQ3yn0-_3RVPecrHWxm2CWvgn0,245888
 vision_agent/agent/README.md,sha256=3XSPG_VO7-6y6P8COvcgSSonWj5uvfgvfmOkBpfKK8Q,5527
-vision_agent/agent/__init__.py,sha256=_-nGLHhRTLViXxBSb9D4OwLTqk9HXKPEkTBkvK8c7OU,206
+vision_agent/agent/__init__.py,sha256=lhPV1JUJ_Ckp_NHpq9VcwqaBd0wh4-GtyT79aFOWvI0,249
 vision_agent/agent/agent.py,sha256=o1Zuhl6h2R7uVwvUur0Aj38kak8U08plfeFWPst_ErM,1576
 vision_agent/agent/vision_agent_coder_prompts_v2.py,sha256=53b_DhQtffX5wxLuCbNQ83AJhB0P_3wEnuKr-v5bx-o,4866
 vision_agent/agent/vision_agent_coder_v2.py,sha256=ELc_J8Q4NKPs7YETu3a9O0Vk1zN3k6QfHBgu0M0IWGk,17450
 vision_agent/agent/vision_agent_planner_prompts_v2.py,sha256=O24BpRhMRZx7D_WdaRv-a2K6fLpin0o7oWxlvL70WpM,35944
 vision_agent/agent/vision_agent_planner_v2.py,sha256=Aww_BJhTFKZ5XjYe8FW57z2Gwp2se0vg1t1DKLGRAyQ,22050
 vision_agent/agent/vision_agent_prompts_v2.py,sha256=NG1xnZvZGi4DcqdfqZCkPkS7oka3gr6h42ekUKUKcqY,4231
+vision_agent/agent/vision_agent_prompts_v3.py,sha256=ABFdTe1TMnFBy_VH_AYDSE0IHFiPX0KOB-nNRfLurxM,16548
 vision_agent/agent/vision_agent_v2.py,sha256=iPW6DowH7wCFIA5vb1SdSLfZFWbn_oSC7Xa8uO8KIJI,11675
+vision_agent/agent/vision_agent_v3.py,sha256=tFr9VYSG65R0PRypiNzoW6NzKV1yuBPXIzmE4HO-p0A,10228
 vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
 vision_agent/configs/__init__.py,sha256=Iu75-w9_nlPmnB_qKA7nYaaaHf7xtTrDmK8N4v2WV34,27
@@ -19,28 +21,29 @@ vision_agent/configs/openai_config.py,sha256=Bw7ElBYmBcaZttyRBoNpcy3uTkqg5qADk8L
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/lmm/__init__.py,sha256=4qX2lmGnKWHeKftXueEi9xj_ieK2nQh_ipHf72nKGFk,84
-vision_agent/lmm/lmm.py,sha256=w23nWSmUiW1rxfRC-Td44-UR3-8k0ey80-0SVZraeqA,23681
+vision_agent/lmm/lmm.py,sha256=gGUf621irXgQ18W497bMa1vQzbgUsZQsRwLHFNpBSJA,29982
 vision_agent/models/__init__.py,sha256=eIP0pD5dYog8zUA7uuTmUxCF6SIutbLRLRE0cmuCJgQ,326
 vision_agent/models/agent_types.py,sha256=vBZ9-ns5lHDdFMO7ulCGGeZ6OwRo3gK4O3vN0814IWc,3064
 vision_agent/models/lmm_types.py,sha256=v04h-NjbczHOIN8UWa1vvO5-1BDuZ4JQhD2mge1cXmw,305
 vision_agent/models/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/sim/__init__.py,sha256=Aouz6HEPPTYcLxR5_0fTYCL1OvPKAH1RMWAF90QXAlA,135
 vision_agent/sim/sim.py,sha256=WQY_x9A4VT647qGDBScJ3R8_Iv0aoYLHTgwcQSCXwv4,10059
-vision_agent/tools/__init__.py,sha256=zf8HzjcMSgxKhtrxbqYe9hmvsfuweeDMrOc8eVA8Ya8,2477
+vision_agent/tools/__init__.py,sha256=USlLNSJ1YZ3UQBAHYu6MXx8Scf639sfL10im1NUuI4k,2490
 vision_agent/tools/meta_tools.py,sha256=9iJilpGYEiXW0nYPTYAWHa7l23wGN8IM5KbE7mWDOT0,6798
 vision_agent/tools/planner_tools.py,sha256=iQWtTgXdomn0IWrbmvXXM-y8Q_RSEOxyP04HIRLrgWI,19576
+vision_agent/tools/planner_v3_tools.py,sha256=9uLKDtdWdpiRm_lVgc2DdeLEo2D4cw2demFTUQ401Zo,6525
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tools.py,sha256=i9GGGu8tvo2M6O5fF4UUBTpn_Ul2KEN9mG3ZlJ95qao,124929
+vision_agent/tools/tools.py,sha256=lndSG8xrIWcs6Rpe1-Jq44niUDXQnWlYfGP2B1YjpI0,124216
 vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
-vision_agent/utils/agent.py,sha256=2ifTP5QElItnr4YHOJR6L5P1PUzV0GhChTTqVxuVyQg,15153
+vision_agent/utils/agent.py,sha256=88axZswX7DibAkckc0mDJWLr0SoVPyam4mqO4zsLRNQ,15827
 vision_agent/utils/exceptions.py,sha256=zis8smCbdEylBVZBTVfEUfAh7Rb7cWV3MSPambu6FsQ,1837
 vision_agent/utils/execute.py,sha256=QAql6KC2uEhX1o_44mMA77lCmMUs0itaaGMFSfJBki8,21520
 vision_agent/utils/image_utils.py,sha256=bJM2mEvB6E__M9pxi74yQYzAiZ7mu3KE2ptyVrp5vzQ,12533
-vision_agent/utils/tools.py,sha256=Days0dETPRQLSDamMKPnXFsc5g5IKX9QJcPPNmSHNdM,8111
+vision_agent/utils/tools.py,sha256=gF5h1QuBCJaC2u_FRxPR32eYPRa78R_DPcmOiPcnb3A,8147
 vision_agent/utils/tools_doc.py,sha256=PKcXXbJktiuPi9q6Q1zXzFx24Dh229SNgWBDtZ2fQSQ,2730
 vision_agent/utils/video.py,sha256=rjsQ1sKKisaQ6AVjJz0zd_G4g-ovRweS_rs4JEhenoI,5340
 vision_agent/utils/video_tracking.py,sha256=DZLFpNCuzuPJQzbQoVNcp-m4dKxgiKdCNM5QTh_zURE,12245
-vision_agent-1.1.16.dist-info/METADATA,sha256=JMmL6rIdT1-WO6XTrjNHucAp4S_UlkjDW1dxznQJ994,12078
-vision_agent-1.1.16.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-vision_agent-1.1.16.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-1.1.16.dist-info/RECORD,,
+vision_agent-1.1.18.dist-info/METADATA,sha256=S7WnsgYo0nBT-O4Ca6-rYLG3tjQ9np5Tk1Fv1Z-_0pU,12071
+vision_agent-1.1.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+vision_agent-1.1.18.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-1.1.18.dist-info/RECORD,,

{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/WHEEL RENAMED Viewed

File without changes

{vision_agent-1.1.16.dist-info → vision_agent-1.1.18.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

vision-agent 1.1.16__py3-none-any.whl → 1.1.18__py3-none-any.whl

vision-agent 1.1.16py3-none-any.whl → 1.1.18py3-none-any.whl