PyPI - vision-agent - Versions diffs - 0.2.109__py3-none-any.whl → 0.2.111__py3-none-any.whl - Mend

vision-agent 0.2.109py3-none-any.whl → 0.2.111py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

vision_agent/agent/vision_agent.py +1 -1
vision_agent/clients/http.py +15 -3
vision_agent/clients/landing_public_api.py +13 -1
vision_agent/tools/__init__.py +7 -3
vision_agent/tools/meta_tools.py +2 -46
vision_agent/tools/tool_utils.py +30 -7
vision_agent/tools/tools.py +126 -0
vision_agent/tools/tools_types.py +84 -0
vision_agent/utils/exceptions.py +13 -0
{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/METADATA +1 -1
{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/RECORD +13 -13
vision_agent/tools/meta_tools_types.py +0 -30
{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/LICENSE +0 -0
{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/WHEEL +0 -0

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
     ]
     @staticmethod

vision_agent/clients/http.py CHANGED Viewed

@@ -4,7 +4,6 @@ from typing import Any, Dict, Optional
 from requests import Session
 from requests.adapters import HTTPAdapter
-from requests.exceptions import ConnectionError, RequestException, Timeout
 _LOGGER = logging.getLogger(__name__)
@@ -38,9 +37,22 @@ class BaseHTTP:
             response.raise_for_status()
             result: Dict[str, Any] = response.json()
             _LOGGER.info(json.dumps(result))
-        except (ConnectionError, Timeout, RequestException) as err:
-            _LOGGER.warning(f"Error: {err}.")
         except json.JSONDecodeError:
             resp_text = response.text
             _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+            raise
+        return result
+    def get(self, url: str) -> Dict[str, Any]:
+        formatted_url = f"{self._base_endpoint}/{url}"
+        _LOGGER.info(f"Sending data to {formatted_url}")
+        try:
+            response = self._session.get(url=formatted_url, timeout=self._TIMEOUT)
+            response.raise_for_status()
+            result: Dict[str, Any] = response.json()
+            _LOGGER.info(json.dumps(result))
+        except json.JSONDecodeError:
+            resp_text = response.text
+            _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+            raise
         return result

vision_agent/clients/landing_public_api.py CHANGED Viewed

@@ -2,9 +2,12 @@ import os
 from uuid import UUID
 from typing import List
+from requests.exceptions import HTTPError
 from vision_agent.clients.http import BaseHTTP
 from vision_agent.utils.type_defs import LandingaiAPIKey
-from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
+from vision_agent.utils.exceptions import FineTuneModelNotFound
+from vision_agent.tools.tools_types import BboxInputBase64, PromptTask, JobStatus
 class LandingPublicAPI(BaseHTTP):
@@ -24,3 +27,12 @@ class LandingPublicAPI(BaseHTTP):
         }
         response = self.post(url, payload=data)
         return UUID(response["jobId"])
+    def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
+        url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
+        try:
+            get_job = self.get(url)
+        except HTTPError as err:
+            if err.response.status_code == 404:
+                raise FineTuneModelNotFound()
+        return JobStatus(get_job["status"])

vision_agent/tools/__init__.py CHANGED Viewed

@@ -1,12 +1,15 @@
 from typing import Callable, List, Optional
-from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
+from .meta_tools import (
+    META_TOOL_DOCSTRING,
+)
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
     TOOL_DESCRIPTIONS,
     TOOL_DOCSTRING,
     TOOLS,
     TOOLS_DF,
+    TOOLS_INFO,
     UTILITIES_DOCSTRING,
     blip_image_caption,
     clip,
@@ -52,15 +55,16 @@ def register_tool(imports: Optional[List] = None) -> Callable:
     def decorator(tool: Callable) -> Callable:
         import inspect
-        from .tools import get_tool_descriptions, get_tools_df
+        from .tools import get_tool_descriptions, get_tools_df, get_tools_info
-        global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING
+        global TOOLS, TOOLS_DF, TOOL_DESCRIPTIONS, TOOL_DOCSTRING, TOOLS_INFO
         if tool not in TOOLS:
             TOOLS.append(tool)
             TOOLS_DF = get_tools_df(TOOLS)  # type: ignore
             TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS)  # type: ignore
             TOOL_DOCSTRING = get_tool_documentation(TOOLS)  # type: ignore
+            TOOLS_INFO = get_tools_info(TOOLS)  # type: ignore
             globals()[tool.__name__] = tool
             if imports is not None:

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import subprocess
-from uuid import UUID
 from pathlib import Path
 from typing import Any, Dict, List, Union
@@ -8,9 +7,7 @@ import vision_agent as va
 from vision_agent.lmm.types import Message
 from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
-from vision_agent.utils.image_utils import convert_to_b64
-from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
@@ -384,51 +381,11 @@ def edit_file(file_path: str, start: int, end: int, content: str) -> str:
 def get_tool_descriptions() -> str:
     """Returns a description of all the tools that `generate_vision_code` has access to.
-    Helpful for answerings questions about what types of vision tasks you can do with
+    Helpful for answering questions about what types of vision tasks you can do with
     `generate_vision_code`."""
     return TOOL_DESCRIPTIONS
-def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
-    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
-    to detect objects in an image based on a given dataset. It returns the fine
-    tuning job id.
-    Parameters:
-        bboxes (List[BboxInput]): A list of BboxInput containing the
-            image path, labels and bounding boxes.
-        task (PromptTask): The florencev2 fine-tuning task. The options are
-            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
-    Returns:
-        UUID: The fine tuning job id, this id will used to retrieve the fine
-            tuned model.
-    Example
-    -------
-        >>> fine_tuning_job_id = florencev2_fine_tuning(
-            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
-             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
-             "OBJECT_DETECTION"
-        )
-    """
-    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
-    task_input = PromptTask[task]
-    fine_tuning_request = [
-        BboxInputBase64(
-            image=convert_to_b64(bbox_input.image_path),
-            filename=bbox_input.image_path.split("/")[-1],
-            labels=bbox_input.labels,
-            bboxes=bbox_input.bboxes,
-        )
-        for bbox_input in bboxes_input
-    ]
-    landing_api = LandingPublicAPI()
-    return landing_api.launch_fine_tuning_job(
-        "florencev2", task_input, fine_tuning_request
-    )
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -442,6 +399,5 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         search_dir,
         search_file,
         find_file,
-        florencev2_fine_tuning,
     ]
 )

vision_agent/tools/tool_utils.py CHANGED Viewed

@@ -15,9 +15,10 @@ from vision_agent.utils.execute import Error, MimeType
 from vision_agent.utils.type_defs import LandingaiAPIKey
 _LOGGER = logging.getLogger(__name__)
-_LND_API_KEY = LandingaiAPIKey().api_key
-_LND_API_URL = "https://api.landing.ai/v1/agent/model"
-_LND_API_URL_v2 = "https://api.landing.ai/v1/tools"
+_LND_API_KEY = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
+_LND_BASE_URL = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
+_LND_API_URL = f"{_LND_BASE_URL}/v1/agent/model"
+_LND_API_URL_v2 = f"{_LND_BASE_URL}/v1/tools"
 class ToolCallTrace(BaseModel):
@@ -28,8 +29,13 @@ class ToolCallTrace(BaseModel):
 def send_inference_request(
-    payload: Dict[str, Any], endpoint_name: str, v2: bool = False
+    payload: Dict[str, Any],
+    endpoint_name: str,
+    v2: bool = False,
+    metadata_payload: Optional[Dict[str, Any]] = None,
 ) -> Dict[str, Any]:
+    # TODO: runtime_tag and function_name should be metadata_payload and now included
+    # in the service payload
     try:
         if runtime_tag := os.environ.get("RUNTIME_TAG", ""):
             payload["runtime_tag"] = runtime_tag
@@ -62,9 +68,13 @@ def send_inference_request(
                 traceback_raw=[],
             )
             _LOGGER.error(f"Request failed: {res.status_code} {res.text}")
-            raise RemoteToolCallFailed(
-                payload["function_name"], res.status_code, res.text
-            )
+            # TODO: function_name should be in metadata_payload
+            function_name = "unknown"
+            if "function_name" in payload:
+                function_name = payload["function_name"]
+            elif metadata_payload is not None and "function_name" in metadata_payload:
+                function_name = metadata_payload["function_name"]
+            raise RemoteToolCallFailed(function_name, res.status_code, res.text)
         resp = res.json()
         tool_call_trace.response = resp
@@ -142,3 +152,16 @@ def get_tools_df(funcs: List[Callable[..., Any]]) -> pd.DataFrame:
         data["doc"].append(doc)
     return pd.DataFrame(data)  # type: ignore
+def get_tools_info(funcs: List[Callable[..., Any]]) -> Dict[str, str]:
+    data: Dict[str, str] = {}
+    for func in funcs:
+        desc = func.__doc__
+        if desc is None:
+            desc = ""
+        data[func.__name__] = f"{func.__name__}{inspect.signature(func)}:\n{desc}"
+    return data

vision_agent/tools/tools.py CHANGED Viewed

@@ -2,6 +2,7 @@ import io
 import json
 import logging
 import tempfile
+from uuid import UUID
 from pathlib import Path
 from importlib import resources
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
@@ -19,7 +20,9 @@ from vision_agent.tools.tool_utils import (
     get_tool_descriptions,
     get_tool_documentation,
     get_tools_df,
+    get_tools_info,
 )
+from vision_agent.utils.exceptions import FineTuneModelIsNotReady
 from vision_agent.utils import extract_frames_from_video
 from vision_agent.utils.execute import FileSerializer, MimeType
 from vision_agent.utils.image_utils import (
@@ -31,6 +34,15 @@ from vision_agent.utils.image_utils import (
     convert_quad_box_to_bbox,
     rle_decode,
 )
+from vision_agent.tools.tools_types import (
+    BboxInput,
+    BboxInputBase64,
+    PromptTask,
+    Florencev2FtRequest,
+    FineTuning,
+    JobStatus,
+)
+from vision_agent.clients.landing_public_api import LandingPublicAPI
 register_heif_opener()
@@ -1285,6 +1297,119 @@ def overlay_heat_map(
     return np.array(combined)
+# TODO: add this function to the imports so that is picked in the agent
+def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
+    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
+    to detect objects in an image based on a given dataset. It returns the fine
+    tuning job id.
+    Parameters:
+        bboxes (List[BboxInput]): A list of BboxInput containing the
+            image path, labels and bounding boxes.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+    Returns:
+        UUID: The fine tuning job id, this id will used to retrieve the fine
+            tuned model.
+    Example
+    -------
+        >>> fine_tuning_job_id = florencev2_fine_tuning(
+            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
+             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
+             "OBJECT_DETECTION"
+        )
+    """
+    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
+    task_input = PromptTask[task]
+    fine_tuning_request = [
+        BboxInputBase64(
+            image=convert_to_b64(bbox_input.image_path),
+            filename=bbox_input.image_path.split("/")[-1],
+            labels=bbox_input.labels,
+            bboxes=bbox_input.bboxes,
+        )
+        for bbox_input in bboxes_input
+    ]
+    landing_api = LandingPublicAPI()
+    return landing_api.launch_fine_tuning_job(
+        "florencev2", task_input, fine_tuning_request
+    )
+# TODO: add this function to the imports so that is picked in the agent
+def florencev2_fine_tuned_object_detection(
+    image: np.ndarray, prompt: str, model_id: UUID, task: str
+) -> List[Dict[str, Any]]:
+    """'florencev2_fine_tuned_object_detection' is a tool that uses a fine tuned model
+    to detect objects given a text prompt such as a phrase or class names separated by
+    commas. It returns a list of detected objects as labels and their location as
+    bounding boxes with score of 1.0.
+    Parameters:
+        image (np.ndarray): The image to used to detect objects.
+        prompt (str): The prompt to help find objects in the image.
+        model_id (UUID): The fine-tuned model id.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
+            bounding box of the detected objects with normalized coordinates between 0
+            and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
+            top-left and xmax and ymax are the coordinates of the bottom-right of the
+            bounding box. The scores are always 1.0 and cannot be thresholded
+    Example
+    -------
+        >>> florencev2_fine_tuned_object_detection(
+            image,
+            'person looking at a coyote',
+            UUID("381cd5f9-5dc4-472d-9260-f3bb89d31f83")
+        )
+        [
+            {'score': 1.0, 'label': 'person', 'bbox': [0.1, 0.11, 0.35, 0.4]},
+            {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
+        ]
+    """
+    # check if job succeeded first
+    landing_api = LandingPublicAPI()
+    status = landing_api.check_fine_tuning_job(model_id)
+    if status is not JobStatus.SUCCEEDED:
+        raise FineTuneModelIsNotReady()
+    task = PromptTask[task]
+    if task is PromptTask.OBJECT_DETECTION:
+        prompt = ""
+    data_obj = Florencev2FtRequest(
+        image=convert_to_b64(image),
+        task=task,
+        tool="florencev2_fine_tuning",
+        prompt=prompt,
+        fine_tuning=FineTuning(job_id=model_id),
+    )
+    data = data_obj.model_dump(by_alias=True)
+    metadata_payload = {"function_name": "florencev2_fine_tuned_object_detection"}
+    detections = send_inference_request(
+        data, "tools", v2=False, metadata_payload=metadata_payload
+    )
+    detections = detections[task.value]
+    return_data = []
+    image_size = image.shape[:2]
+    for i in range(len(detections["bboxes"])):
+        return_data.append(
+            {
+                "score": 1.0,
+                "label": detections["labels"][i],
+                "bbox": normalize_bbox(detections["bboxes"][i], image_size),
+            }
+        )
+    return return_data
 TOOLS = [
     owl_v2,
     grounding_sam,
@@ -1317,6 +1442,7 @@ TOOLS = [
 TOOLS_DF = get_tools_df(TOOLS)  # type: ignore
 TOOL_DESCRIPTIONS = get_tool_descriptions(TOOLS)  # type: ignore
 TOOL_DOCSTRING = get_tool_documentation(TOOLS)  # type: ignore
+TOOLS_INFO = get_tools_info(TOOLS)  # type: ignore
 UTILITIES_DOCSTRING = get_tool_documentation(
     [
         save_json,

vision_agent/tools/tools_types.py ADDED Viewed

@@ -0,0 +1,84 @@
+from uuid import UUID
+from enum import Enum
+from typing import List, Tuple, Optional
+from pydantic import BaseModel, ConfigDict, Field, field_serializer, SerializationInfo
+class BboxInput(BaseModel):
+    image_path: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class BboxInputBase64(BaseModel):
+    image: str
+    filename: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class PromptTask(str, Enum):
+    """
+    Valid task prompts options for the Florencev2 model.
+    """
+    CAPTION = "<CAPTION>"
+    """"""
+    CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
+    """"""
+    OBJECT_DETECTION = "<OD>"
+    """"""
+class FineTuning(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+    job_id: UUID = Field(alias="jobId")
+    @field_serializer("job_id")
+    def serialize_job_id(self, job_id: UUID, _info: SerializationInfo) -> str:
+        return str(job_id)
+class Florencev2FtRequest(BaseModel):
+    model_config = ConfigDict(populate_by_name=True)
+    image: str
+    task: PromptTask
+    tool: str
+    prompt: Optional[str] = ""
+    fine_tuning: Optional[FineTuning] = Field(None, alias="fineTuning")
+class JobStatus(str, Enum):
+    """The status of a fine-tuning job.
+    CREATED:
+        The job has been created and is waiting to be scheduled to run.
+    STARTING:
+        The job has started running, but not entering the training phase.
+    TRAINING:
+        The job is training a model.
+    EVALUATING:
+        The job is evaluating the model and computing metrics.
+    PUBLISHING:
+        The job is exporting the artifact(s) to an external directory (s3 or local).
+    SUCCEEDED:
+        The job has finished, including training, evaluation and publishing the
+        artifact(s).
+    FAILED:
+        The job has failed for some reason internally, it can be due to resources
+        issues or the code itself.
+    STOPPED:
+        The job has been stopped by the use locally or in the cloud.
+    """
+    CREATED = "CREATED"
+    STARTING = "STARTING"
+    TRAINING = "TRAINING"
+    EVALUATING = "EVALUATING"
+    PUBLISHING = "PUBLISHING"
+    SUCCEEDED = "SUCCEEDED"
+    FAILED = "FAILED"
+    STOPPED = "STOPPED"

vision_agent/utils/exceptions.py CHANGED Viewed

@@ -49,3 +49,16 @@ class RemoteSandboxClosedError(RemoteSandboxError):
     """
     is_retryable = True
+class FineTuneModelIsNotReady(Exception):
+    """Exception raised when the fine-tune model is not ready.
+    If this is raised, it's recommended to wait 5 seconds before trying to use
+    the model again.
+    """
+class FineTuneModelNotFound(Exception):
+    """Exception raised when the fine-tune model is not found.
+    If this is raised, it's recommended to try another model id.
+    """

{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.109
+Version: 0.2.111
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/RECORD RENAMED Viewed

@@ -2,32 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
 vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
 vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
-vision_agent/agent/vision_agent.py,sha256=4vzKYNoScv_sOZiqefo46iKJNZOtqSFvSJif0zZIdLI,8471
+vision_agent/agent/vision_agent.py,sha256=5rgO-pScVOS3t4sWnLBnGYYkGftGgF4U0FpZzFVrDAY,8447
 vision_agent/agent/vision_agent_coder.py,sha256=oo3IoRrc-cVdjKq_YsjzkBZNTBtiCTIctGfeC5C7MXM,30926
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
 vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
 vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
-vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
+vision_agent/clients/http.py,sha256=k883i6M_4nl7zwwHSI-yP5sAgQZIDPM1nrKD6YFJ3Xs,2009
+vision_agent/clients/landing_public_api.py,sha256=ImMzR6qVvkwgiMMmQRGl91E4xktKjoctun0hWn9PxfE,1507
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
 vision_agent/lmm/lmm.py,sha256=TgEwrtQqpnWlBYEvsSU6DbkY3Y7MM8wRb4lMQgSiM0k,19435
 vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
-vision_agent/tools/__init__.py,sha256=MK0D8NtIChwGHwqsTz3LeV5BGuQecNVrNzUsyaEwuGA,1926
-vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
-vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
+vision_agent/tools/__init__.py,sha256=ibjymNE7QqtZLgAm3oytYDANNhGLovQsjFqVZZCQWEU,2018
+vision_agent/tools/meta_tools.py,sha256=Bm_sIeorVRW_aWA-htA0G-BbbN4yZm91Tam90s90cnA,13366
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tool_utils.py,sha256=ZhZ9oEcOvRSuWPy-gV0rx3pvaaXzBW-ZC3YQanXrq1g,4733
-vision_agent/tools/tools.py,sha256=6sheMaBfuJUxDboN1GA0L4bTeuKoljrGlGNdwXn_dq8,44805
+vision_agent/tools/tool_utils.py,sha256=FU6DCMB3hk9e8p4nAkAv7mHQDIhH8fssyxAYE1bmGK4,5628
+vision_agent/tools/tools.py,sha256=RSKzMcEUNQwcKnQLSH4Go284QgBl3pXqIqmCGMY4shY,49616
+vision_agent/tools/tools_types.py,sha256=z6_XtUhWgh201yM7Z0CYtiLBEGdHPc_QUydMDHZ84EA,2216
 vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
-vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
+vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
 vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
 vision_agent/utils/image_utils.py,sha256=c_g5i_cFC0C-Yw9gU_NaVgQdmBlyumw3bLIDtCU42xo,8200
 vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
-vision_agent-0.2.109.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.109.dist-info/METADATA,sha256=ILl3GV0gpeSUgDP5QlGQP4r12nsbyxovYCxn3EtPkx4,10732
-vision_agent-0.2.109.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.109.dist-info/RECORD,,
+vision_agent-0.2.111.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.111.dist-info/METADATA,sha256=Rqo5Hv-b8GnmZloGTELvU5lzbEZAY6cz96KUGKM7WR8,10732
+vision_agent-0.2.111.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.111.dist-info/RECORD,,

vision_agent/tools/meta_tools_types.py DELETED Viewed

@@ -1,30 +0,0 @@
-from enum import Enum
-from typing import List, Tuple
-from pydantic import BaseModel
-class BboxInput(BaseModel):
-    image_path: str
-    labels: List[str]
-    bboxes: List[Tuple[int, int, int, int]]
-class BboxInputBase64(BaseModel):
-    image: str
-    filename: str
-    labels: List[str]
-    bboxes: List[Tuple[int, int, int, int]]
-class PromptTask(str, Enum):
-    """
-    Valid task prompts options for the Florencev2 model.
-    """
-    CAPTION = "<CAPTION>"
-    """"""
-    CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
-    """"""
-    OBJECT_DETECTION = "<OD>"
-    """"""

{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.109.dist-info → vision_agent-0.2.111.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.109__py3-none-any.whl → 0.2.111__py3-none-any.whl

vision-agent 0.2.109py3-none-any.whl → 0.2.111py3-none-any.whl