PyPI - vision-agent - Versions diffs - 1.1.17__py3-none-any.whl → 1.1.18__py3-none-any.whl - Mend

vision-agent 1.1.17py3-none-any.whl → 1.1.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

vision_agent/.sim_tools/df.csv +1 -1
vision_agent/agent/__init__.py +1 -0
vision_agent/agent/vision_agent_prompts_v3.py +372 -0
vision_agent/agent/vision_agent_v3.py +278 -0
vision_agent/lmm/lmm.py +219 -57
vision_agent/tools/__init__.py +2 -2
vision_agent/tools/planner_v3_tools.py +206 -0
vision_agent/utils/agent.py +24 -8
vision_agent/utils/tools.py +1 -1
{vision_agent-1.1.17.dist-info → vision_agent-1.1.18.dist-info}/METADATA +4 -4
{vision_agent-1.1.17.dist-info → vision_agent-1.1.18.dist-info}/RECORD +13 -10
{vision_agent-1.1.17.dist-info → vision_agent-1.1.18.dist-info}/WHEEL +0 -0
{vision_agent-1.1.17.dist-info → vision_agent-1.1.18.dist-info}/licenses/LICENSE +0 -0

vision_agent/lmm/lmm.py CHANGED Viewed

@@ -1,19 +1,33 @@
+import base64
 import json
 import os
 from abc import ABC, abstractmethod
 from pathlib import Path
-from typing import Any, Dict, Iterator, List, Optional, Sequence, Union, cast
-import base64
+from typing import (
+    Any,
+    Dict,
+    Iterator,
+    List,
+    Optional,
+    Sequence,
+    Union,
+    cast,
+)
 import anthropic
 import requests
-from anthropic.types import ImageBlockParam, MessageParam, TextBlockParam
-from openai import AzureOpenAI, OpenAI
+from anthropic.types import (
+    ImageBlockParam,
+    MessageParam,
+    TextBlockParam,
+    ThinkingBlockParam,
+)
 from google import genai  # type: ignore
 from google.genai import types  # type: ignore
+from openai import AzureOpenAI, OpenAI
 from vision_agent.models import Message
+from vision_agent.utils.agent import extract_tag
 from vision_agent.utils.image_utils import encode_media
@@ -99,11 +113,15 @@ class OpenAILMM(LMM):
                 [{"role": "user", "content": "Hello!", "media": ["image1.jpg", ...]}, ...]
         """
         fixed_chat = []
-        for c in chat:
-            fixed_c = {"role": c["role"]}
-            fixed_c["content"] = [{"type": "text", "text": c["content"]}]  # type: ignore
-            if "media" in c and self.model_name != "o3-mini":
-                for media in c["media"]:
+        for msg in chat:
+            fixed_c = {"role": msg["role"]}
+            fixed_c["content"] = [{"type": "text", "text": msg["content"]}]  # type: ignore
+            if (
+                "media" in msg
+                and msg["media"] is not None
+                and self.model_name != "o3-mini"
+            ):
+                for media in msg["media"]:
                     resize = kwargs["resize"] if "resize" in kwargs else self.image_size
                     image_detail = (
                         kwargs["image_detail"]
@@ -297,14 +315,14 @@ class OllamaLMM(LMM):
                 [{"role": "user", "content": "Hello!", "media": ["image1.jpg", ...]}, ...]
         """
         fixed_chat = []
-        for message in chat:
-            if "media" in message:
+        for msg in chat:
+            if "media" in msg and msg["media"] is not None:
                 resize = kwargs["resize"] if "resize" in kwargs else self.image_size
-                message["images"] = [
-                    encode_media(cast(str, m), resize=resize) for m in message["media"]
+                msg["images"] = [
+                    encode_media(cast(str, m), resize=resize) for m in msg["media"]
                 ]
-                del message["media"]
-            fixed_chat.append(message)
+                del msg["media"]
+            fixed_chat.append(msg)
         url = f"{self.url}/chat"
         model = self.model_name
         messages = fixed_chat
@@ -410,63 +428,207 @@ class AnthropicLMM(LMM):
     def __call__(
         self,
-        input: Union[str, Sequence[Dict[str, Any]]],
+        input: Union[str, Sequence[Message]],
         **kwargs: Any,
     ) -> Union[str, Iterator[Optional[str]]]:
         if isinstance(input, str):
             return self.generate(input, **kwargs)
         return self.chat(input, **kwargs)
-    def chat(
+    def create_thinking_assistant_message(
         self,
-        chat: Sequence[Dict[str, Any]],
-        **kwargs: Any,
-    ) -> Union[str, Iterator[Optional[str]]]:
+        msg_content: str,
+    ) -> MessageParam:
+        content: List[Union[TextBlockParam, ThinkingBlockParam]] = []
+        thinking_content = extract_tag(msg_content, "thinking")
+        signature = extract_tag(msg_content, "signature")
+        if thinking_content:
+            content.append(
+                ThinkingBlockParam(
+                    type="thinking",
+                    thinking=thinking_content.strip(),
+                    signature=signature.strip() if signature else "",
+                )
+            )
+        signature_content = extract_tag(msg_content, "signature")
+        if signature_content:
+            text_content = msg_content.replace(
+                f"<thinking>{thinking_content}</thinking>", ""
+            ).replace(f"<signature>{signature_content}</signature>", "")
+        else:
+            text_content = msg_content.replace(
+                f"<thinking>{thinking_content}</thinking>", ""
+            )
+        if text_content.strip():
+            content.append(TextBlockParam(type="text", text=text_content.strip()))
+        return MessageParam(role="assistant", content=content)
+    def _setup_chat_kwargs(self, kwargs: Dict[str, Any]) -> tuple[Dict[str, Any], bool]:
+        """Set up kwargs and determine if thinking mode is enabled."""
+        tmp_kwargs = self.kwargs | kwargs
+        thinking_enabled = (
+            "thinking" in tmp_kwargs
+            and "type" in tmp_kwargs["thinking"]
+            and tmp_kwargs["thinking"]["type"] == "enabled"
+        )
+        if thinking_enabled:
+            tmp_kwargs["temperature"] = 1.0
+        return tmp_kwargs, thinking_enabled
+    def _convert_messages_to_anthropic_format(
+        self, chat: Sequence[Message], thinking_enabled: bool, **kwargs: Any
+    ) -> List[MessageParam]:
+        """Convert chat messages to Anthropic format."""
         messages: List[MessageParam] = []
         for msg in chat:
-            content: List[Union[TextBlockParam, ImageBlockParam]] = [
-                TextBlockParam(type="text", text=msg["content"])
-            ]
-            if "media" in msg:
-                for media_path in msg["media"]:
-                    resize = kwargs["resize"] if "resize" in kwargs else self.image_size
-                    encoded_media = encode_media(media_path, resize=resize)
-                    if encoded_media.startswith("data:image/png;base64,"):
-                        encoded_media = encoded_media[len("data:image/png;base64,") :]
-                    content.append(
-                        ImageBlockParam(
-                            type="image",
-                            source={
-                                "type": "base64",
-                                "media_type": "image/png",
-                                "data": encoded_media,
-                            },
+            if msg["role"] == "user":
+                content: List[Union[TextBlockParam, ImageBlockParam]] = [
+                    TextBlockParam(type="text", text=cast(str, msg["content"]))
+                ]
+                if "media" in msg and msg["media"] is not None:
+                    for media_path in msg["media"]:
+                        resize = (
+                            kwargs["resize"] if "resize" in kwargs else self.image_size
+                        )
+                        encoded_media = encode_media(
+                            cast(str, media_path), resize=resize
+                        )
+                        if encoded_media.startswith("data:image/png;base64,"):
+                            encoded_media = encoded_media[
+                                len("data:image/png;base64,") :
+                            ]
+                        content.append(
+                            ImageBlockParam(
+                                type="image",
+                                source={
+                                    "type": "base64",
+                                    "media_type": "image/png",
+                                    "data": encoded_media,
+                                },
+                            )
+                        )
+                messages.append({"role": "user", "content": content})
+            elif msg["role"] == "assistant":
+                if thinking_enabled:
+                    messages.append(
+                        self.create_thinking_assistant_message(
+                            cast(str, msg["content"]),
+                        )
+                    )
+                else:
+                    messages.append(
+                        MessageParam(
+                            role="assistant",
+                            content=[
+                                {"type": "text", "text": cast(str, msg["content"])}
+                            ],
                         )
                     )
-            messages.append({"role": msg["role"], "content": content})
+            else:
+                raise ValueError(
+                    f"Unsupported role {msg['role']}. Only 'user' and 'assistant' roles are supported."
+                )
-        # prefers kwargs from second dictionary over first
-        tmp_kwargs = self.kwargs | kwargs
-        response = self.client.messages.create(
-            model=self.model_name, messages=messages, **tmp_kwargs
+        return messages
+    def _handle_streaming_response(
+        self, stream_response: anthropic.Stream[anthropic.MessageStreamEvent]
+    ) -> Iterator[Optional[str]]:
+        """Handle streaming response from Anthropic API."""
+        def f() -> Iterator[Optional[str]]:
+            thinking_start = False
+            signature_start = False
+            for chunk in stream_response:
+                if chunk.type == "message_start" or chunk.type == "content_block_start":
+                    continue
+                elif chunk.type == "content_block_delta":
+                    if chunk.delta.type == "text_delta":
+                        if thinking_start:
+                            thinking_start = False
+                            yield f"</thinking>\n{chunk.delta.text}"
+                        elif signature_start:
+                            signature_start = False
+                            yield f"</signature>\n{chunk.delta.text}"
+                        else:
+                            yield chunk.delta.text
+                    elif chunk.delta.type == "thinking_delta":
+                        if not thinking_start:
+                            thinking_start = True
+                            yield f"<thinking>{chunk.delta.thinking}"
+                        else:
+                            yield chunk.delta.thinking
+                    elif chunk.delta.type == "signature_delta":
+                        if not signature_start:
+                            signature_start = True
+                            yield f"<signature>{chunk.delta.signature}"
+                        else:
+                            yield chunk.delta.signature
+                elif chunk.type == "message_stop":
+                    yield None
+        return f()
+    def _format_thinking_response(self, msg_response: anthropic.types.Message) -> str:
+        """Format thinking mode response with proper tags."""
+        thinking = ""
+        signature = ""
+        redacted_thinking = ""
+        text = ""
+        for block in msg_response.content:
+            if block.type == "thinking":
+                thinking += block.thinking
+                if block.signature:
+                    signature = block.signature
+            elif block.type == "text":
+                text += block.text
+            elif block.type == "redacted_thinking":
+                redacted_thinking += block.data
+        return (
+            f"<thinking>{thinking}</thinking>\n"
+            + (
+                f"<redacted_thinking>{redacted_thinking}</redacted_thinking>\n"
+                if redacted_thinking
+                else ""
+            )
+            + (f"<signature>{signature}</signature>\n" if signature else "")
+            + text
         )
-        if "stream" in tmp_kwargs and tmp_kwargs["stream"]:
-            def f() -> Iterator[Optional[str]]:
-                for chunk in response:
-                    if (
-                        chunk.type == "message_start"
-                        or chunk.type == "content_block_start"
-                    ):
-                        continue
-                    elif chunk.type == "content_block_delta":
-                        yield chunk.delta.text
-                    elif chunk.type == "message_stop":
-                        yield None
+    def _handle_non_streaming_response(
+        self, response_untyped: Any, thinking_enabled: bool
+    ) -> str:
+        """Handle non-streaming response from Anthropic API."""
+        msg_response = cast(anthropic.types.Message, response_untyped)
+        if thinking_enabled:
+            return self._format_thinking_response(msg_response)
+        return cast(anthropic.types.TextBlock, msg_response.content[0]).text
-            return f()
+    def chat(
+        self,
+        chat: Sequence[Message],
+        **kwargs: Any,
+    ) -> Union[str, Iterator[Optional[str]]]:
+        tmp_kwargs, thinking_enabled = self._setup_chat_kwargs(kwargs)
+        messages = self._convert_messages_to_anthropic_format(
+            chat, thinking_enabled, **kwargs
+        )
+        response_untyped = self.client.messages.create(
+            model=self.model_name, messages=messages, **tmp_kwargs
+        )
+        is_stream = bool(tmp_kwargs.get("stream", False))
+        if is_stream:
+            stream_response = cast(
+                anthropic.Stream[anthropic.MessageStreamEvent], response_untyped
+            )
+            return self._handle_streaming_response(stream_response)
         else:
-            return cast(str, response.content[0].text)
+            return self._handle_non_streaming_response(
+                response_untyped, thinking_enabled
+            )
     def generate(
         self,

vision_agent/tools/__init__.py CHANGED Viewed

@@ -42,7 +42,6 @@ from .tools import (
     glee_sam2_video_tracking,
     load_image,
     minimum_distance,
-    paddle_ocr,
     od_sam2_video_tracking,
     overlay_bounding_boxes,
     overlay_heat_map,
@@ -50,6 +49,7 @@ from .tools import (
     owlv2_object_detection,
     owlv2_sam2_instance_segmentation,
     owlv2_sam2_video_tracking,
+    paddle_ocr,
     qwen2_vl_images_vqa,
     qwen2_vl_video_vqa,
     qwen25_vl_images_vqa,
@@ -74,7 +74,7 @@ def register_tool(imports: Optional[List] = None) -> Callable:
     def decorator(tool: Callable) -> Callable:
         import inspect
-        global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
+        global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO  # noqa: F824
         from vision_agent.tools.tools import TOOLS
         if tool not in TOOLS:  # type: ignore

vision_agent/tools/planner_v3_tools.py ADDED Viewed

@@ -0,0 +1,206 @@
+import base64
+import copy
+import io
+from typing import Dict, List, Optional, Tuple, Union, cast
+import cv2
+import matplotlib.figure
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+from PIL.Image import Image as PILImageType
+from vision_agent.utils.image_utils import (
+    denormalize_bbox,
+    normalize_bbox,
+    numpy_to_bytes,
+    rle_decode_array,
+)
+from vision_agent.utils.tools import send_inference_request
+def maybe_denormalize_bbox(
+    bbox: List[Union[int, float]], image_size: Tuple[int, ...]
+) -> List[float]:
+    if all([0 <= c <= 1 for c in bbox]):
+        return denormalize_bbox(bbox, image_size)
+    return bbox
+def maybe_normalize_bbox(
+    bbox: List[Union[int, float]], image_size: Tuple[int, ...]
+) -> List[float]:
+    if any([1 <= c for c in bbox]):
+        return normalize_bbox(bbox, image_size)
+    return bbox
+def instance_segmentation(
+    prompt: str, image: np.ndarray, threshold: float = 0.23, nms_threshold: float = 0.5
+) -> List[Dict[str, Union[str, float, List[float], np.ndarray]]]:
+    image_bytes = numpy_to_bytes(image)
+    files = [("image", image_bytes)]
+    data = {"prompts": [prompt], "threshold": threshold, "nms_threshold": nms_threshold}
+    results = send_inference_request(
+        data,
+        "glee",
+        files=files,
+        v2=True,
+    )
+    results = results[0]
+    results_formatted = [
+        {
+            "label": elt["label"],
+            "score": elt["score"],
+            "bbox": normalize_bbox(elt["bounding_box"], image.shape[:2]),
+            "mask": np.array(rle_decode_array(elt["mask"])),
+        }
+        for elt in results
+    ]
+    return results_formatted
+def ocr(image: np.ndarray) -> List[Dict[str, Union[str, float, List[float]]]]:
+    image_bytes = numpy_to_bytes(image)
+    files = [("image", image_bytes)]
+    results = send_inference_request(
+        {},
+        "paddle-ocr",
+        files=files,
+        v2=True,
+    )
+    results_formatted = [
+        {
+            "label": elt["label"],
+            "score": elt["score"],
+            "bbox": normalize_bbox(elt["bbox"], image.shape[:2]),
+        }
+        for elt in results
+    ]
+    return results_formatted
+def depth_estimation(image: np.ndarray) -> np.ndarray:
+    shape = image.shape[:2]
+    image_bytes = numpy_to_bytes(image)
+    files = [("image", image_bytes)]
+    results = send_inference_request(
+        {},
+        "depth-pro",
+        files=files,
+        v2=True,
+    )
+    depth = np.frombuffer(base64.b64decode(results["depth"]), dtype=np.float32).reshape(
+        shape
+    )
+    return depth
+def visualize_bounding_boxes(
+    image: np.ndarray, bounding_boxes: List[Dict[str, Union[str, float, List[float]]]]
+) -> np.ndarray:
+    image = image.copy()
+    image_size = image.shape[:2]
+    bounding_boxes = copy.deepcopy(bounding_boxes)
+    for bbox in bounding_boxes:
+        bbox["bbox"] = maybe_denormalize_bbox(
+            cast(List[float], bbox["bbox"]), image_size
+        )
+    for bbox in bounding_boxes:
+        x1, y1, x2, y2 = bbox["bbox"]  # type: ignore
+        cv2.rectangle(image, (int(x1), int(y1)), (int(x2), int(y2)), (0, 0, 255), 2)
+    return image
+def visualize_segmentation_masks(
+    image: np.ndarray,
+    segmentation_masks: List[Dict[str, Union[str, float, np.ndarray]]],
+) -> np.ndarray:
+    alpha = 0.5
+    overlay = image.copy()
+    color_mask = np.zeros_like(image)
+    color_mask[:, :] = (0, 100, 255)
+    for elt in segmentation_masks:
+        mask = cast(np.ndarray, elt["mask"])
+        overlay[mask == 1] = (1 - alpha) * overlay[mask == 1] + alpha * color_mask[
+            mask == 1
+        ]
+        # draw outline on the mask so it doesn't just think the color of the object changed
+        mask_uint8 = (mask * 255).astype(np.uint8)
+        contours, _ = cv2.findContours(
+            mask_uint8, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE
+        )
+        cv2.drawContours(overlay, contours, -1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
+    overlay = np.clip(overlay, 0, 255).astype(np.uint8)
+    return overlay
+def get_crops(
+    image: np.ndarray, bounding_boxes: List[Dict[str, Union[str, float, List[float]]]]
+) -> List[np.ndarray]:
+    image = image.copy()
+    bounding_boxes = copy.deepcopy(bounding_boxes)
+    for bbox in bounding_boxes:
+        bbox["bbox"] = maybe_denormalize_bbox(
+            cast(List[float], bbox["bbox"]), image.shape[:2]
+        )
+    crops = []
+    for bbox in bounding_boxes:
+        x1, y1, x2, y2 = bbox["bbox"]  # type: ignore
+        crops.append(image[int(y1) : int(y2), int(x1) : int(x2)])
+    return crops
+def rotate_90(image: np.ndarray, k: int = 1) -> np.ndarray:
+    return np.rot90(image, k=k, axes=(0, 1))
+def iou(
+    pred1: Union[List[float], np.ndarray], pred2: Union[List[float], np.ndarray]
+) -> float:
+    if isinstance(pred1, list) and isinstance(pred2, list):
+        x1, y1, x2, y2 = pred1
+        x1_, y1_, x2_, y2_ = pred2
+        intersection = max(0, min(x2, x2_) - max(x1, x1_)) * max(
+            0, min(y2, y2_) - max(y1, y1_)
+        )
+        union = (x2 - x1) * (y2 - y1) + (x2_ - x1_) * (y2_ - y1_) - intersection
+        return intersection / union
+    elif isinstance(pred1, np.ndarray) and isinstance(pred2, np.ndarray):
+        pred1 = np.clip(pred1, 0, 1)
+        pred2 = np.clip(pred2, 0, 1)
+        intersection = np.sum(pred1 * pred2)
+        union = np.sum(pred1) + np.sum(pred2) - intersection
+        return intersection / union
+    raise ValueError("Unsupported input types for IoU calculation.")
+def display_image(
+    image: Union[np.ndarray, PILImageType, matplotlib.figure.Figure, str],
+) -> None:
+    display_img: Optional[PILImageType] = None
+    if isinstance(image, np.ndarray):
+        display_img = Image.fromarray(image)
+    elif isinstance(image, matplotlib.figure.Figure):
+        # Render the figure to a BytesIO buffer
+        buf = io.BytesIO()
+        image.savefig(buf, format="png")
+        buf.seek(0)
+        # Load the buffer as a PIL Image
+        display_img = Image.open(buf)
+        plt.close(image)  # type: ignore
+    elif isinstance(image, PILImageType):
+        display_img = image  # Already a PIL Image
+    elif isinstance(image, str):
+        display_img = Image.open(image)
+    if display_img is not None:
+        plt.imshow(display_img)  # type: ignore
+        plt.axis("off")  # type: ignore
+        plt.show()
+    else:
+        # Handle cases where image type is not supported or conversion failed
+        print("Unsupported image type or conversion failed.")

vision_agent/utils/agent.py CHANGED Viewed

@@ -247,7 +247,9 @@ def print_table(title: str, columns: List[str], rows: List[List[str]]) -> None:
 def add_media_to_chat(
-    chat: List[AgentMessage], code_interpreter: Optional[CodeInterpreter] = None
+    chat: List[AgentMessage],
+    code_interpreter: Optional[CodeInterpreter] = None,
+    append_to_prompt: bool = True,
 ) -> Tuple[List[AgentMessage], List[AgentMessage], List[Union[str, Path]]]:
     orig_chat = copy.deepcopy(chat)
     int_chat = copy.deepcopy(chat)
@@ -278,6 +280,7 @@ def add_media_to_chat(
                 if (
                     not str(chat_i.content).endswith(f" Media name {media}")
                     and chat_i.role == "user"
+                    and append_to_prompt
                 ):
                     chat_i.content += f" Media name {media}"
             chat_i.media = media_list_i if len(media_list_i) > 0 else None
@@ -304,13 +307,26 @@ def add_media_to_chat(
 def capture_media_from_exec(execution: Execution) -> List[str]:
     images = []
     for result in execution.results:
-        for format in result.formats():
-            if format in ["png", "jpeg"]:
-                # converts the image to png and then to base64
-                images.append(
-                    "data:image/png;base64,"
-                    + convert_to_b64(b64_to_pil(result[format]))
-                )
+        if hasattr(result, "formats"):
+            for format in result.formats():
+                if format in ["png", "jpeg"]:
+                    # converts the image to png and then to base64
+                    images.append(
+                        "data:image/png;base64,"
+                        + convert_to_b64(b64_to_pil(result[format]))
+                    )
+        elif hasattr(result, "savefig"):
+            pass
+        elif hasattr(result, "_repr_png_") and result._repr_png_():
+            images.append(
+                "data:image/png;base64,"
+                + convert_to_b64(b64_to_pil(result._repr_png_()))  # type: ignore
+            )
+        elif hasattr(result, "_repr_jpeg_") and result._repr_jpeg_():
+            images.append(
+                "data:image/jpeg;base64,"
+                + convert_to_b64(b64_to_pil(result._repr_jpeg_()))  # type: ignore
+            )
     return images

vision_agent/utils/tools.py CHANGED Viewed

@@ -106,7 +106,7 @@ def send_task_inference_request(
     if metadata is not None and "function_name" in metadata:
         function_name = metadata["function_name"]
     response = _call_post(url, payload, session, files, function_name, is_form)
-    return response["data"]
+    return response["data"] if "data" in response else response
 def _create_requests_session(

{vision_agent-1.1.17.dist-info → vision_agent-1.1.18.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: vision-agent
-Version: 1.1.17
+Version: 1.1.18
 Summary: Toolset for Vision Agent
 Project-URL: Homepage, https://landing.ai
 Project-URL: repository, https://github.com/landing-ai/vision-agent
@@ -8,7 +8,7 @@ Project-URL: documentation, https://github.com/landing-ai/vision-agent
 Author-email: Landing AI <dev@landing.ai>
 License-File: LICENSE
 Requires-Python: <4.0,>=3.9
-Requires-Dist: anthropic<0.32,>=0.31.0
+Requires-Dist: anthropic>=0.54.0
 Requires-Dist: av<12,>=11.0.0
 Requires-Dist: dotenv<0.10,>=0.9.9
 Requires-Dist: flake8<8,>=7.0.0
@@ -20,7 +20,7 @@ Requires-Dist: matplotlib<4,>=3.9.2
 Requires-Dist: nbclient<0.11,>=0.10.0
 Requires-Dist: nbformat<6,>=5.10.4
 Requires-Dist: numpy<2.0.0,>=1.21.0
-Requires-Dist: openai==1.55.3
+Requires-Dist: openai>=1.86.0
 Requires-Dist: opencv-python==4.*
 Requires-Dist: opentelemetry-api<2,>=1.29.0
 Requires-Dist: pandas==2.*
@@ -36,7 +36,7 @@ Requires-Dist: tabulate<0.10,>=0.9.0
 Requires-Dist: tenacity<9,>=8.3.0
 Requires-Dist: tqdm<5.0.0,>=4.64.0
 Requires-Dist: typing-extensions==4.*
-Requires-Dist: yt-dlp>=2025.3.31
+Requires-Dist: yt-dlp>=2025.6.9
 Description-Content-Type: text/markdown
 <div align="center">

vision-agent 1.1.17__py3-none-any.whl → 1.1.18__py3-none-any.whl

vision-agent 1.1.17py3-none-any.whl → 1.1.18py3-none-any.whl