PyPI - vision-agent - Versions diffs - 0.2.98__py3-none-any.whl → 0.2.99__py3-none-any.whl - Mend

vision-agent 0.2.98py3-none-any.whl → 0.2.99py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

vision_agent/agent/vision_agent.py +1 -1
vision_agent/clients/__init__.py +0 -0
vision_agent/clients/http.py +46 -0
vision_agent/clients/landing_public_api.py +26 -0
vision_agent/tools/__init__.py +1 -1
vision_agent/tools/meta_tools.py +45 -0
vision_agent/tools/meta_tools_types.py +30 -0
vision_agent/tools/tools.py +4 -5
vision_agent/utils/execute.py +2 -2
{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/METADATA +1 -1
{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/RECORD +13 -9
{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/LICENSE +0 -0
{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/WHEEL +0 -0

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
     ]
     @staticmethod

vision_agent/clients/__init__.py ADDED Viewed

File without changes

vision_agent/clients/http.py ADDED Viewed

@@ -0,0 +1,46 @@
+import json
+import logging
+from typing import Any, Dict, Optional
+from requests import Session
+from requests.adapters import HTTPAdapter
+from requests.exceptions import ConnectionError, RequestException, Timeout
+_LOGGER = logging.getLogger(__name__)
+class BaseHTTP:
+    _TIMEOUT = 30  # seconds
+    _MAX_RETRIES = 3
+    def __init__(
+        self, base_endpoint: str, *, headers: Optional[Dict[str, Any]] = None
+    ) -> None:
+        self._headers = headers
+        if headers is None:
+            self._headers = {
+                "Content-Type": "application/json",
+            }
+        self._base_endpoint = base_endpoint
+        self._session = Session()
+        self._session.headers.update(self._headers)  # type: ignore
+        self._session.mount(
+            self._base_endpoint, HTTPAdapter(max_retries=self._MAX_RETRIES)
+        )
+    def post(self, url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+        formatted_url = f"{self._base_endpoint}/{url}"
+        _LOGGER.info(f"Sending data to {formatted_url}")
+        try:
+            response = self._session.post(
+                url=formatted_url, json=payload, timeout=self._TIMEOUT
+            )
+            response.raise_for_status()
+            result: Dict[str, Any] = response.json()
+            _LOGGER.info(json.dumps(result))
+        except (ConnectionError, Timeout, RequestException) as err:
+            _LOGGER.warning(f"Error: {err}.")
+        except json.JSONDecodeError:
+            resp_text = response.text
+            _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+        return result

vision_agent/clients/landing_public_api.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+from uuid import UUID
+from typing import List
+from vision_agent.clients.http import BaseHTTP
+from vision_agent.utils.type_defs import LandingaiAPIKey
+from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
+class LandingPublicAPI(BaseHTTP):
+    def __init__(self) -> None:
+        landing_url = os.environ.get("LANDINGAI_URL", "https://api.dev.landing.ai")
+        landing_api_key = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
+        headers = {"Content-Type": "application/json", "apikey": landing_api_key}
+        super().__init__(base_endpoint=landing_url, headers=headers)
+    def launch_fine_tuning_job(
+        self, model_name: str, task: PromptTask, bboxes: List[BboxInputBase64]
+    ) -> UUID:
+        url = "v1/agent/jobs/fine-tuning"
+        data = {
+            "model": {"name": model_name, "task": task.value},
+            "bboxes": [bbox.model_dump(by_alias=True) for bbox in bboxes],
+        }
+        response = self.post(url, payload=data)
+        return UUID(response["jobId"])

vision_agent/tools/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Callable, List, Optional
-from .meta_tools import META_TOOL_DOCSTRING
+from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
     TOOL_DESCRIPTIONS,

vision_agent/tools/meta_tools.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import os
 import subprocess
+from uuid import UUID
 from pathlib import Path
 from typing import Any, Dict, List, Union
@@ -7,6 +8,9 @@ import vision_agent as va
 from vision_agent.lmm.types import Message
 from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
+from vision_agent.utils.image_utils import convert_to_b64
+from vision_agent.clients.landing_public_api import LandingPublicAPI
+from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
@@ -385,6 +389,46 @@ def get_tool_descriptions() -> str:
     return TOOL_DESCRIPTIONS
+def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
+    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
+    to detect objects in an image based on a given dataset. It returns the fine
+    tuning job id.
+    Parameters:
+        bboxes (List[BboxInput]): A list of BboxInput containing the
+            image path, labels and bounding boxes.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+    Returns:
+        UUID: The fine tuning job id, this id will used to retrieve the fine
+            tuned model.
+    Example
+    -------
+        >>> fine_tuning_job_id = florencev2_fine_tuning(
+            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
+             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
+             "OBJECT_DETECTION"
+        )
+    """
+    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
+    task_input = PromptTask[task]
+    fine_tuning_request = [
+        BboxInputBase64(
+            image=convert_to_b64(bbox_input.image_path),
+            filename=bbox_input.image_path.split("/")[-1],
+            labels=bbox_input.labels,
+            bboxes=bbox_input.bboxes,
+        )
+        for bbox_input in bboxes_input
+    ]
+    landing_api = LandingPublicAPI()
+    return landing_api.launch_fine_tuning_job(
+        "florencev2", task_input, fine_tuning_request
+    )
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -398,5 +442,6 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         search_dir,
         search_file,
         find_file,
+        florencev2_fine_tuning,
     ]
 )

vision_agent/tools/meta_tools_types.py ADDED Viewed

@@ -0,0 +1,30 @@
+from enum import Enum
+from typing import List, Tuple
+from pydantic import BaseModel
+class BboxInput(BaseModel):
+    image_path: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class BboxInputBase64(BaseModel):
+    image: str
+    filename: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class PromptTask(str, Enum):
+    """
+    Valid task prompts options for the Florencev2 model.
+    """
+    CAPTION = "<CAPTION>"
+    """"""
+    CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
+    """"""
+    OBJECT_DETECTION = "<OD>"
+    """"""

vision_agent/tools/tools.py CHANGED Viewed

@@ -2,23 +2,23 @@ import io
 import json
 import logging
 import tempfile
-from importlib import resources
 from pathlib import Path
+from importlib import resources
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 import cv2
-import numpy as np
 import requests
+import numpy as np
+from pytube import YouTube  # type: ignore
 from moviepy.editor import ImageSequenceClip
 from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
-from pytube import YouTube  # type: ignore
 from vision_agent.tools.tool_utils import (
+    send_inference_request,
     get_tool_descriptions,
     get_tool_documentation,
     get_tools_df,
-    send_inference_request,
 )
 from vision_agent.utils import extract_frames_from_video
 from vision_agent.utils.execute import FileSerializer, MimeType
@@ -1063,7 +1063,6 @@ def save_video(
     if fps <= 0:
         _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
         fps = 4
     with ImageSequenceClip(frames, fps=fps) as video:
         if output_video_path:
             f = open(output_video_path, "wb")

vision_agent/utils/execute.py CHANGED Viewed

@@ -209,7 +209,7 @@ class Result:
         return formats
     @staticmethod
-    def from_e2b_result(result: E2BResult) -> "Result":  # type: ignore
+    def from_e2b_result(result: E2BResult) -> "Result":
         """
         Creates a Result object from an E2BResult object.
         """
@@ -361,7 +361,7 @@ class Execution(BaseModel):
         )
     @staticmethod
-    def from_e2b_execution(exec: E2BExecution) -> "Execution":  # type: ignore
+    def from_e2b_execution(exec: E2BExecution) -> "Execution":
         """Creates an Execution object from an E2BResult object."""
         return Execution(
             results=[Result.from_e2b_result(res) for res in exec.results],

{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.98
+Version: 0.2.99
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/RECORD RENAMED Viewed

@@ -2,28 +2,32 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
 vision_agent/agent/__init__.py,sha256=qpduQ9YufJQfMmG6jwKC2xmlbtR2qK8_1eQC1sGA9Ks,135
 vision_agent/agent/agent.py,sha256=Bt8yhjCFXuRdZaHxKEesG40V09nWRt45sZluri1R3AA,575
 vision_agent/agent/agent_utils.py,sha256=JXdl2xz14LKQAmScY-MIW23AD2WBFCsnI0JS6dAyj3Q,1412
-vision_agent/agent/vision_agent.py,sha256=i_rNpc7faqHTifp2c9sQE4Js3qYUKuJeiqauTp90OlE,8417
+vision_agent/agent/vision_agent.py,sha256=U7VqUR-Io0xkGHpcF03Kq87Y0YQIdZQGqxuXdwjQzgk,8441
 vision_agent/agent/vision_agent_coder.py,sha256=N8oVwfxrz6emHlucJC5hGQvkA9cQWW2sMLFtshwLdI8,30309
 vision_agent/agent/vision_agent_coder_prompts.py,sha256=a3R_vHlT2FW3-DSn4OWgzF9zEAx-uKM4ZaTi9Kn-K54,11116
 vision_agent/agent/vision_agent_prompts.py,sha256=hjs-m4ZHR7HE1HtOeX_1rOvTQA2FMEAqEkaBbGPBYDo,6072
+vision_agent/clients/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+vision_agent/clients/http.py,sha256=1WMt29F12YFfPH03AttKxnUNXx5sNOD9ZuH4etbB054,1598
+vision_agent/clients/landing_public_api.py,sha256=Tjl8uBZWc3dvrCOKg-PCYjw3RC3X5Y6B50kaKn_QzL0,1050
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
 vision_agent/lmm/__init__.py,sha256=YuUZRsMHdn8cMOv6iBU8yUqlIOLrbZQqZl9KPnofsHQ,103
 vision_agent/lmm/lmm.py,sha256=KcS6h-8whGFmwt7t4LNlj0hZ4U-rBojYBLKLmrMsF48,15075
 vision_agent/lmm/types.py,sha256=8TSRoTbXyCKVJiH-wHXI2OiGOMSkYv1vLGYeAXtNpOQ,153
-vision_agent/tools/__init__.py,sha256=UNiaJAOt1C709gaJ-a9h9BzKnY5JmoEUpgKftsOnyPQ,1882
-vision_agent/tools/meta_tools.py,sha256=rmxgVzj-vJKeewHbue3qHru4sYsFLxlSZV-YH-eyH5w,13366
+vision_agent/tools/__init__.py,sha256=e8q4lYD3acyX1ikMKLz4nlaAR_WZpBAIyq2CGYOYnvM,1906
+vision_agent/tools/meta_tools.py,sha256=v2FrLl0YwM7JwsVRfgfnryd9qorbPRiObestexbnNBs,15170
+vision_agent/tools/meta_tools_types.py,sha256=aU4knXEhm0AnDYW958T6Q6qPwN4yq8pQzQOxqFaOjzg,596
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=XoB-iae8hHrBQgJd3fV6-UjZAkClysobUaOM17IcHuE,4597
-vision_agent/tools/tools.py,sha256=fHD4qhn7cGG1O77J_BHfaRfW6LMQuj1OIu9xqYu6AG8,43220
+vision_agent/tools/tools.py,sha256=aYo0xSbdr-Q4gq_dKxa8yLyczmXoKv_vYYrZ7dM38bw,43219
 vision_agent/utils/__init__.py,sha256=CW84HnhqI6XQVuxf2KifkLnSuO7EOhmuL09-gAymAak,219
 vision_agent/utils/exceptions.py,sha256=isVH-SVL4vHj3q5kK4z7cy5_aOapAqHXWkpibfSNbUs,1659
-vision_agent/utils/execute.py,sha256=s43aUtuq7ZNjil2mxrddiz8EvvqlJwttkYlIiZouXqM,25125
+vision_agent/utils/execute.py,sha256=ZRxztUfZwvMvPnFbKx5W_LZzTuKl8Zf5dP3Y8P2-3nk,25093
 vision_agent/utils/image_utils.py,sha256=y69wtNla0xHZ1h1x0-vv7nOyKUq69jtjSJBiDCn6EM0,7703
 vision_agent/utils/sim.py,sha256=7JvtWGN0Ik5ife3qQYWs7Fm3T8AnAXGFd5HnvDC15mQ,4433
 vision_agent/utils/type_defs.py,sha256=oVFJcicB-s_09lqvn61u0A5ncZsTqZArZledXWbrrg0,1384
 vision_agent/utils/video.py,sha256=rNmU9KEIkZB5-EztZNlUiKYN0mm_55A_2VGUM0QpqLA,8779
-vision_agent-0.2.98.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.98.dist-info/METADATA,sha256=ANK0JJR0vAu0Tq9W07O6UM4XvUTnoKVqrqwm9gK-DuU,10728
-vision_agent-0.2.98.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.98.dist-info/RECORD,,
+vision_agent-0.2.99.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.99.dist-info/METADATA,sha256=QDiN7-jSVTpGtrwJLhvSUM1A7aj1baWhZ9eFf1GVn2E,10728
+vision_agent-0.2.99.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.99.dist-info/RECORD,,

{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.98.dist-info → vision_agent-0.2.99.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.98__py3-none-any.whl → 0.2.99__py3-none-any.whl

vision-agent 0.2.98py3-none-any.whl → 0.2.99py3-none-any.whl