PyPI - vision-agent - Versions diffs - 0.2.98__py3-none-any.whl → 0.2.100__py3-none-any.whl - Mend

vision-agent 0.2.98py3-none-any.whl → 0.2.100py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

vision_agent/agent/vision_agent.py +1 -1
vision_agent/clients/__init__.py +0 -0
vision_agent/clients/http.py +46 -0
vision_agent/clients/landing_public_api.py +26 -0
vision_agent/tools/__init__.py +1 -1
vision_agent/tools/meta_tools.py +45 -0
vision_agent/tools/meta_tools_types.py +30 -0
vision_agent/tools/tool_utils.py +4 -3
vision_agent/tools/tools.py +55 -62
vision_agent/utils/execute.py +2 -2
vision_agent/utils/type_defs.py +1 -1
{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/METADATA +1 -1
{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/RECORD +15 -11
{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/LICENSE +0 -0
{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/WHEEL +0 -0

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
     ]
     @staticmethod

vision_agent/clients/__init__.py ADDED Viewed

File without changes

vision_agent/clients/http.py ADDED Viewed

@@ -0,0 +1,46 @@
+import json
+import logging
+from typing import Any, Dict, Optional
+from requests import Session
+from requests.adapters import HTTPAdapter
+from requests.exceptions import ConnectionError, RequestException, Timeout
+_LOGGER = logging.getLogger(__name__)
+class BaseHTTP:
+    _TIMEOUT = 30  # seconds
+    _MAX_RETRIES = 3
+    def __init__(
+        self, base_endpoint: str, *, headers: Optional[Dict[str, Any]] = None
+    ) -> None:
+        self._headers = headers
+        if headers is None:
+            self._headers = {
+                "Content-Type": "application/json",
+            }
+        self._base_endpoint = base_endpoint
+        self._session = Session()
+        self._session.headers.update(self._headers)  # type: ignore
+        self._session.mount(
+            self._base_endpoint, HTTPAdapter(max_retries=self._MAX_RETRIES)
+        )
+    def post(self, url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+        formatted_url = f"{self._base_endpoint}/{url}"
+        _LOGGER.info(f"Sending data to {formatted_url}")
+        try:
+            response = self._session.post(
+                url=formatted_url, json=payload, timeout=self._TIMEOUT
+            )
+            response.raise_for_status()
+            result: Dict[str, Any] = response.json()
+            _LOGGER.info(json.dumps(result))
+        except (ConnectionError, Timeout, RequestException) as err:
+            _LOGGER.warning(f"Error: {err}.")
+        except json.JSONDecodeError:
+            resp_text = response.text
+            _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+        return result

vision_agent/clients/landing_public_api.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+from uuid import UUID
+from typing import List
+from vision_agent.clients.http import BaseHTTP
+from vision_agent.utils.type_defs import LandingaiAPIKey
+from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
+class LandingPublicAPI(BaseHTTP):
+    def __init__(self) -> None:
+        landing_url = os.environ.get("LANDINGAI_URL", "https://api.dev.landing.ai")
+        landing_api_key = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
+        headers = {"Content-Type": "application/json", "apikey": landing_api_key}
+        super().__init__(base_endpoint=landing_url, headers=headers)
+    def launch_fine_tuning_job(
+        self, model_name: str, task: PromptTask, bboxes: List[BboxInputBase64]
+    ) -> UUID:
+        url = "v1/agent/jobs/fine-tuning"
+        data = {
+            "model": {"name": model_name, "task": task.value},
+            "bboxes": [bbox.model_dump(by_alias=True) for bbox in bboxes],
+        }
+        response = self.post(url, payload=data)
+        return UUID(response["jobId"])

vision_agent/tools/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Callable, List, Optional
-from .meta_tools import META_TOOL_DOCSTRING
+from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
     TOOL_DESCRIPTIONS,

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import subprocess
+from uuid import UUID
 from pathlib import Path
 from typing import Any, Dict, List, Union
@@ -7,6 +8,9 @@ import vision_agent as va
 from vision_agent.lmm.types import Message
 from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
+from vision_agent.utils.image_utils import convert_to_b64
+from vision_agent.clients.landing_public_api import LandingPublicAPI
+from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
@@ -385,6 +389,46 @@ def get_tool_descriptions() -> str:
     return TOOL_DESCRIPTIONS
+def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
+    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
+    to detect objects in an image based on a given dataset. It returns the fine
+    tuning job id.
+    Parameters:
+        bboxes (List[BboxInput]): A list of BboxInput containing the
+            image path, labels and bounding boxes.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+    Returns:
+        UUID: The fine tuning job id, this id will used to retrieve the fine
+            tuned model.
+    Example
+    -------
+        >>> fine_tuning_job_id = florencev2_fine_tuning(
+            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
+             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
+             "OBJECT_DETECTION"
+        )
+    """
+    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
+    task_input = PromptTask[task]
+    fine_tuning_request = [
+        BboxInputBase64(
+            image=convert_to_b64(bbox_input.image_path),
+            filename=bbox_input.image_path.split("/")[-1],
+            labels=bbox_input.labels,
+            bboxes=bbox_input.bboxes,
+        )
+        for bbox_input in bboxes_input
+    ]
+    landing_api = LandingPublicAPI()
+    return landing_api.launch_fine_tuning_job(
+        "florencev2", task_input, fine_tuning_request
+    )
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -398,5 +442,6 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         search_dir,
         search_file,
         find_file,
+        florencev2_fine_tuning,
     ]
 )

vision_agent/tools/meta_tools_types.py ADDED Viewed

@@ -0,0 +1,30 @@
+from enum import Enum
+from typing import List, Tuple
+from pydantic import BaseModel
+class BboxInput(BaseModel):
+    image_path: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class BboxInputBase64(BaseModel):
+    image: str
+    filename: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class PromptTask(str, Enum):
+    """
+    Valid task prompts options for the Florencev2 model.
+    """
+    CAPTION = "<CAPTION>"
+    """"""
+    CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
+    """"""
+    OBJECT_DETECTION = "<OD>"
+    """"""

vision_agent/tools/tool_utils.py CHANGED Viewed

@@ -16,7 +16,8 @@ from vision_agent.utils.type_defs import LandingaiAPIKey
 _LOGGER = logging.getLogger(__name__)
 _LND_API_KEY = LandingaiAPIKey().api_key
-_LND_API_URL = "https://api.landing.ai/v1/agent"
+_LND_API_URL = "https://api.landing.ai/v1/agent/model"
+_LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
 class ToolCallTrace(BaseModel):
@@ -27,13 +28,13 @@ class ToolCallTrace(BaseModel):
 def send_inference_request(
-    payload: Dict[str, Any], endpoint_name: str
+    payload: Dict[str, Any], endpoint_name: str, v2: bool = False
 ) -> Dict[str, Any]:
     try:
         if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
             payload["runtime_tag"] = runtime_tag
-        url = f"{_LND_API_URL}/model/{endpoint_name}"
+        url = f"{_LND_API_URL_v2 if v2 else _LND_API_URL}/{endpoint_name}"
         if "TOOL_ENDPOINT_URL" in os.environ:
             url = os.environ["TOOL_ENDPOINT_URL"]

vision_agent/tools/tools.py CHANGED Viewed

@@ -2,23 +2,23 @@ import io
 import json
 import logging
 import tempfile
-from importlib import resources
 from pathlib import Path
+from importlib import resources
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 import cv2
-import numpy as np
 import requests
+import numpy as np
+from pytube import YouTube  # type: ignore
 from moviepy.editor import ImageSequenceClip
 from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
-from pytube import YouTube  # type: ignore
 from vision_agent.tools.tool_utils import (
+    send_inference_request,
     get_tool_descriptions,
     get_tool_documentation,
     get_tools_df,
-    send_inference_request,
 )
 from vision_agent.utils import extract_frames_from_video
 from vision_agent.utils.execute import FileSerializer, MimeType
@@ -126,7 +126,6 @@ def owl_v2(
     prompt: str,
     image: np.ndarray,
     box_threshold: float = 0.10,
-    iou_threshold: float = 0.10,
 ) -> List[Dict[str, Any]]:
     """'owl_v2' is a tool that can detect and count multiple objects given a text
     prompt such as category names or referring expressions. The categories in text prompt
@@ -138,8 +137,6 @@ def owl_v2(
         image (np.ndarray): The image to ground the prompt to.
         box_threshold (float, optional): The threshold for the box detection. Defaults
             to 0.10.
-        iou_threshold (float, optional): The threshold for the Intersection over Union
-            (IoU). Defaults to 0.10.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -159,22 +156,22 @@ def owl_v2(
     image_size = image.shape[:2]
     image_b64 = convert_to_b64(image)
     request_data = {
-        "prompt": prompt,
+        "prompts": prompt.split("."),
         "image": image_b64,
-        "tool": "open_vocab_detection",
-        "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
+        "confidence": box_threshold,
         "function_name": "owl_v2",
     }
-    data: Dict[str, Any] = send_inference_request(request_data, "tools")
+    data: Dict[str, Any] = send_inference_request(request_data, "owlv2", v2=True)
     return_data = []
-    for i in range(len(data["bboxes"])):
-        return_data.append(
-            {
-                "score": round(data["scores"][i], 2),
-                "label": data["labels"][i].strip(),
-                "bbox": normalize_bbox(data["bboxes"][i], image_size),
-            }
-        )
+    if data is not None:
+        for elt in data:
+            return_data.append(
+                {
+                    "bbox": normalize_bbox(elt["bbox"], image_size),  # type: ignore
+                    "label": elt["label"],  # type: ignore
+                    "score": round(elt["score"], 2),  # type: ignore
+                }
+            )
     return return_data
@@ -367,11 +364,10 @@ def loca_zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "tool": "zero_shot_counting",
         "function_name": "loca_zero_shot_counting",
     }
-    resp_data = send_inference_request(data, "tools")
-    resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
+    resp_data = send_inference_request(data, "loca", v2=True)
+    resp_data["heat_map"] = np.array(resp_data["heat_map"][0]).astype(np.uint8)
     return resp_data
@@ -397,17 +393,15 @@ def loca_visual_prompt_counting(
     image_size = get_image_size(image)
     bbox = visual_prompt["bbox"]
-    bbox_str = ", ".join(map(str, denormalize_bbox(bbox, image_size)))
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "prompt": bbox_str,
-        "tool": "few_shot_counting",
+        "bbox": list(map(int, denormalize_bbox(bbox, image_size))),
         "function_name": "loca_visual_prompt_counting",
     }
-    resp_data = send_inference_request(data, "tools")
-    resp_data["heat_map"] = np.array(b64_to_pil(resp_data["heat_map"][0]))
+    resp_data = send_inference_request(data, "loca", v2=True)
+    resp_data["heat_map"] = np.array(resp_data["heat_map"][0]).astype(np.uint8)
     return resp_data
@@ -432,13 +426,12 @@ def florencev2_roberta_vqa(prompt: str, image: np.ndarray) -> str:
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "prompt": prompt,
-        "tool": "image_question_answering_with_context",
+        "question": prompt,
         "function_name": "florencev2_roberta_vqa",
     }
-    answer = send_inference_request(data, "tools")
-    return answer["text"][0]  # type: ignore
+    answer = send_inference_request(data, "florence2-qa", v2=True)
+    return answer  # type: ignore
 def git_vqa_v2(prompt: str, image: np.ndarray) -> str:
@@ -544,17 +537,16 @@ def vit_nsfw_classification(image: np.ndarray) -> Dict[str, Any]:
     Example
     -------
         >>> vit_nsfw_classification(image)
-        {"labels": "normal", "scores": 0.68},
+        {"label": "normal", "scores": 0.68},
     """
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "tool": "nsfw_image_classification",
         "function_name": "vit_nsfw_classification",
     }
-    resp_data = send_inference_request(data, "tools")
-    resp_data["scores"] = round(resp_data["scores"], 4)
+    resp_data = send_inference_request(data, "nsfw-classification", v2=True)
+    resp_data["score"] = round(resp_data["score"], 4)
     return resp_data
@@ -603,21 +595,21 @@ def florencev2_image_caption(image: np.ndarray, detail_caption: bool = True) ->
         'This image contains a cat sitting on a table with a bowl of milk.'
     """
     image_b64 = convert_to_b64(image)
+    task = "<MORE_DETAILED_CAPTION>" if detail_caption else "<DETAILED_CAPTION>"
     data = {
         "image": image_b64,
-        "tool": "florence2_image_captioning",
-        "detail_caption": detail_caption,
+        "task": task,
         "function_name": "florencev2_image_caption",
     }
-    answer = send_inference_request(data, "tools")
-    return answer["text"][0]  # type: ignore
+    answer = send_inference_request(data, "florence2", v2=True)
+    return answer[task]  # type: ignore
-def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
-    """'florencev2_object_detection' is a tool that can detect common objects in an
-    image without any text prompt or thresholding. It returns a list of detected objects
-    as labels and their location as bounding boxes.
+def florencev2_object_detection(image: np.ndarray, prompt: str) -> List[Dict[str, Any]]:
+    """'florencev2_object_detection' is a tool that can detect objects given a text
+    prompt such as a phrase or class names separated by commas. It returns a list of
+    detected objects as labels and their location as bounding boxes with score of 1.0.
     Parameters:
         image (np.ndarray): The image to used to detect objects
@@ -631,29 +623,30 @@ def florencev2_object_detection(image: np.ndarray) -> List[Dict[str, Any]]:
     Example
     -------
-        >>> florencev2_object_detection(image)
+        >>> florencev2_object_detection(image, 'person looking at a coyote')
         [
-            {'score': 1.0, 'label': 'window', 'bbox': [0.1, 0.11, 0.35, 0.4]},
-            {'score': 1.0, 'label': 'car', 'bbox': [0.2, 0.21, 0.45, 0.5},
-            {'score': 1.0, 'label': 'person', 'bbox': [0.34, 0.21, 0.85, 0.5},
+            {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
+            {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
         ]
     """
     image_size = image.shape[:2]
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "tool": "object_detection",
+        "task": "<CAPTION_TO_PHRASE_GROUNDING>",
+        "prompt": prompt,
         "function_name": "florencev2_object_detection",
     }
-    answer = send_inference_request(data, "tools")
+    detections = send_inference_request(data, "florence2", v2=True)
+    detections = detections["<CAPTION_TO_PHRASE_GROUNDING>"]
     return_data = []
-    for i in range(len(answer["bboxes"])):
+    for i in range(len(detections["bboxes"])):
         return_data.append(
             {
-                "score": round(answer["scores"][i], 2),
-                "label": answer["labels"][i],
-                "bbox": normalize_bbox(answer["bboxes"][i], image_size),
+                "score": 1.0,
+                "label": detections["labels"][i],
+                "bbox": normalize_bbox(detections["bboxes"][i], image_size),
             }
         )
     return return_data
@@ -742,13 +735,16 @@ def depth_anything_v2(image: np.ndarray) -> np.ndarray:
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "tool": "generate_depth",
         "function_name": "depth_anything_v2",
     }
-    answer = send_inference_request(data, "tools")
-    return_data = np.array(b64_to_pil(answer["masks"][0]).convert("L"))
-    return return_data
+    depth_map = send_inference_request(data, "depth-anything-v2", v2=True)
+    depth_map_np = np.array(depth_map["map"])
+    depth_map_np = (depth_map_np - depth_map_np.min()) / (
+        depth_map_np.max() - depth_map_np.min()
+    )
+    depth_map_np = (255 * depth_map_np).astype(np.uint8)
+    return depth_map_np
 def generate_soft_edge_image(image: np.ndarray) -> np.ndarray:
@@ -839,12 +835,11 @@ def generate_pose_image(image: np.ndarray) -> np.ndarray:
     image_b64 = convert_to_b64(image)
     data = {
         "image": image_b64,
-        "tool": "generate_pose",
         "function_name": "generate_pose_image",
     }
-    answer = send_inference_request(data, "tools")
-    return_data = np.array(b64_to_pil(answer["masks"][0]).convert("RGB"))
+    pos_img = send_inference_request(data, "pose-detector", v2=True)
+    return_data = np.array(b64_to_pil(pos_img["data"]).convert("RGB"))
     return return_data
@@ -1063,7 +1058,6 @@ def save_video(
     if fps <= 0:
         _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
         fps = 4
     with ImageSequenceClip(frames, fps=fps) as video:
         if output_video_path:
             f = open(output_video_path, "wb")
@@ -1254,7 +1248,6 @@ TOOLS = [
     loca_visual_prompt_counting,
     florencev2_roberta_vqa,
     florencev2_image_caption,
-    florencev2_object_detection,
     detr_segmentation,
     depth_anything_v2,
     generate_soft_edge_image,

vision_agent/utils/execute.py CHANGED Viewed

@@ -209,7 +209,7 @@ class Result:
         return formats
     @staticmethod
-    def from_e2b_result(result: E2BResult) -> "Result":  # type: ignore
+    def from_e2b_result(result: E2BResult) -> "Result":
         """
         Creates a Result object from an E2BResult object.
         """
@@ -361,7 +361,7 @@ class Execution(BaseModel):
         )
     @staticmethod
-    def from_e2b_execution(exec: E2BExecution) -> "Execution":  # type: ignore
+    def from_e2b_execution(exec: E2BExecution) -> "Execution":
         """Creates an Execution object from an E2BResult object."""
         return Execution(
             results=[Result.from_e2b_result(res) for res in exec.results],

vision_agent/utils/type_defs.py CHANGED Viewed

@@ -14,7 +14,7 @@ class LandingaiAPIKey(BaseSettings):
     """
     api_key: str = Field(
-        default="land_sk_fnmSzD0ksknSfvhyD8UGu9R4ss3bKfLL1Im5gb6tDQTy2z1Oy5",
+        default="land_sk_zKvyPcPV2bVoq7q87KwduoerAxuQpx33DnqP8M1BliOCiZOSoI",
         alias="LANDINGAI_API_KEY",
         description="The API key of LandingAI.",
     )

{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.98
+Version: 0.2.100
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/RECORD RENAMED Viewed

@@ -2,28 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
 vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
 vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
-vision_agent/agent/vision_agent.py,sha256=i_rNpc7faqHTifp2c9sQE4Js3qYUKuJeiqauTp90OlE,8417
+vision_agent/agent/vision_agent.py,sha256=U7VqUR-Io0xkGHpcF03Kq87Y0YQIdZQGqxuXdwjQzgk,8441
 vision_agent/agent/vision_agent_coder.py,sha256=N8oVwfxrz6emHlucJC5hGQvkA9cQWW2sMLFtshwLdI8,30309
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
 vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
+vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
+vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
 vision_agent/lmm/lmm.py,sha256=KcS6h-8whGFmwt7t4LNlj0hZ4U-rBojYBLKLmrMsF48,15075
 vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
-vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyPQ,1882
-vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
+vision_agent/tools/__init__.py,sha256=e8q4lYD3acyX1ikMKLz4nlaAR_WZpBAIyq2CGYOYnvM,1906
+vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
+vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
-vision_agent/tools/tools.py,sha256=fHD4qhn7cGG1O77J_BHfaRfW6LMQuj1OIu9xqYu6AG8,43220
+vision_agent/tools/tool_utils.py,sha256=Zg2aP58UqVRUlEtekWwSwGK5Z5c0eyNrKOvAfEyY4Ik,4694
+vision_agent/tools/tools.py,sha256=jWWioqBNsoNaGa8WKVldKBk_y9ZD1shO52kSE-26MFc,43111
 vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
 vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
-vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
+vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
 vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn6EM0,7703
 vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
-vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
+vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
-vision_agent-0.2.98.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.98.dist-info/METADATA,sha256=ANK0JJR0vAu0Tq9W07O6UM4XvUTnoKVqrqwm9gK-DuU,10728
-vision_agent-0.2.98.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.98.dist-info/RECORD,,
+vision_agent-0.2.100.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.100.dist-info/METADATA,sha256=aoWhGb6-cKJpae77m_JsrUP7ljLz1LHVnmYLHSA7-U0,10729
+vision_agent-0.2.100.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.100.dist-info/RECORD,,

{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.98.dist-info → vision_agent-0.2.100.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.98__py3-none-any.whl → 0.2.100__py3-none-any.whl

vision-agent 0.2.98py3-none-any.whl → 0.2.100py3-none-any.whl