PyPI - vision-agent - Versions diffs - 0.2.97__tar.gz → 0.2.99__tar.gz - Mend

vision-agent 0.2.97tar.gz → 0.2.99tar.gz

Files changed (33) hide show

{vision_agent-0.2.97 → vision_agent-0.2.99}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.97
+Version: 0.2.99
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.97 → vision_agent-0.2.99}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.97"
+version = "0.2.99"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -17,6 +17,7 @@ packages = [{include = "vision_agent"}]
 [tool.poetry.dependencies]  # main dependency group
 python = ">=3.9,<4.0"
 numpy = ">=1.21.0,<2.0.0"
 pillow = "10.*"
 requests = "2.*"
@@ -60,6 +61,7 @@ mkdocstrings = {extras = ["python"], version = "^0.23.0"}
 mkdocs-material = "^9.4.2"
 types-tabulate = "^0.9.0.20240106"
 scikit-image = "<0.23.1"
+pre-commit = "^3.8.0"
 [tool.pytest.ini_options]
 log_cli = true
@@ -90,7 +92,6 @@ warn_unused_configs = true
 warn_unused_ignores = true
 warn_return_any = true
 show_error_codes = true
-disallow_any_unimported = true
 [[tool.mypy.overrides]]
 ignore_missing_imports = true
@@ -101,5 +102,5 @@ module = [
     "sentence_transformers.*",
     "moviepy.*",
     "e2b_code_interpreter.*",
-    "e2b.*",
+    "e2b.*"
 ]

{vision_agent-0.2.97 → vision_agent-0.2.99}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -28,7 +28,7 @@ class DefaultImports:
     code = [
         "from typing import *",
         "from vision_agent.utils.execute import CodeInterpreter",
-        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions",
+        "from vision_agent.tools.meta_tools import generate_vision_code, edit_vision_code, open_file, create_file, scroll_up, scroll_down, edit_file, get_tool_descriptions, florencev2_fine_tuning",
     ]
     @staticmethod

{vision_agent-0.2.97 → vision_agent-0.2.99}/vision_agent/agent/vision_agent_coder.py RENAMED Viewed

@@ -93,7 +93,7 @@ def format_plans(plans: Dict[str, Any]) -> str:
 def extract_image(
-    media: Optional[Sequence[Union[str, Path]]]
+    media: Optional[Sequence[Union[str, Path]]],
 ) -> Optional[Sequence[Union[str, Path]]]:
     if media is None:
         return None
@@ -186,7 +186,8 @@ def pick_plan(
                 if tool_output.success
                 else "Code execution failed"
             ),
-            "payload": tool_output.to_json(),
+            "code": DefaultImports.prepend_imports(code),
+            # "payload": tool_output.to_json(),
             "status": "completed" if tool_output.success else "failed",
         }
     )
@@ -211,6 +212,9 @@ def pick_plan(
             }
         )
         code = extract_code(model(prompt))
+        tool_output = code_interpreter.exec_isolation(
+            DefaultImports.prepend_imports(code)
+        )
         log_progress(
             {
                 "type": "log",
@@ -220,13 +224,10 @@ def pick_plan(
                     else "Code execution failed"
                 ),
                 "code": DefaultImports.prepend_imports(code),
-                "payload": tool_output.to_json(),
+                # "payload": tool_output.to_json(),
                 "status": "completed" if tool_output.success else "failed",
             }
         )
-        tool_output = code_interpreter.exec_isolation(
-            DefaultImports.prepend_imports(code)
-        )
         tool_output_str = ""
         if len(tool_output.logs.stdout) > 0:
             tool_output_str = tool_output.logs.stdout[0]

vision_agent-0.2.99/vision_agent/clients/http.py ADDED Viewed

@@ -0,0 +1,46 @@
+import json
+import logging
+from typing import Any, Dict, Optional
+from requests import Session
+from requests.adapters import HTTPAdapter
+from requests.exceptions import ConnectionError, RequestException, Timeout
+_LOGGER = logging.getLogger(__name__)
+class BaseHTTP:
+    _TIMEOUT = 30  # seconds
+    _MAX_RETRIES = 3
+    def __init__(
+        self, base_endpoint: str, *, headers: Optional[Dict[str, Any]] = None
+    ) -> None:
+        self._headers = headers
+        if headers is None:
+            self._headers = {
+                "Content-Type": "application/json",
+            }
+        self._base_endpoint = base_endpoint
+        self._session = Session()
+        self._session.headers.update(self._headers)  # type: ignore
+        self._session.mount(
+            self._base_endpoint, HTTPAdapter(max_retries=self._MAX_RETRIES)
+        )
+    def post(self, url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
+        formatted_url = f"{self._base_endpoint}/{url}"
+        _LOGGER.info(f"Sending data to {formatted_url}")
+        try:
+            response = self._session.post(
+                url=formatted_url, json=payload, timeout=self._TIMEOUT
+            )
+            response.raise_for_status()
+            result: Dict[str, Any] = response.json()
+            _LOGGER.info(json.dumps(result))
+        except (ConnectionError, Timeout, RequestException) as err:
+            _LOGGER.warning(f"Error: {err}.")
+        except json.JSONDecodeError:
+            resp_text = response.text
+            _LOGGER.warning(f"Response seems incorrect: '{resp_text}'.")
+        return result

vision_agent-0.2.99/vision_agent/clients/landing_public_api.py ADDED Viewed

@@ -0,0 +1,26 @@
+import os
+from uuid import UUID
+from typing import List
+from vision_agent.clients.http import BaseHTTP
+from vision_agent.utils.type_defs import LandingaiAPIKey
+from vision_agent.tools.meta_tools_types import BboxInputBase64, PromptTask
+class LandingPublicAPI(BaseHTTP):
+    def __init__(self) -> None:
+        landing_url = os.environ.get("LANDINGAI_URL", "https://api.dev.landing.ai")
+        landing_api_key = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
+        headers = {"Content-Type": "application/json", "apikey": landing_api_key}
+        super().__init__(base_endpoint=landing_url, headers=headers)
+    def launch_fine_tuning_job(
+        self, model_name: str, task: PromptTask, bboxes: List[BboxInputBase64]
+    ) -> UUID:
+        url = "v1/agent/jobs/fine-tuning"
+        data = {
+            "model": {"name": model_name, "task": task.value},
+            "bboxes": [bbox.model_dump(by_alias=True) for bbox in bboxes],
+        }
+        response = self.post(url, payload=data)
+        return UUID(response["jobId"])

vision_agent-0.2.99/vision_agent/fonts/__init__.py ADDED Viewed

File without changes

{vision_agent-0.2.97 → vision_agent-0.2.99}/vision_agent/tools/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from typing import Callable, List, Optional
-from .meta_tools import META_TOOL_DOCSTRING
+from .meta_tools import META_TOOL_DOCSTRING, florencev2_fine_tuning
 from .prompts import CHOOSE_PARAMS, SYSTEM_PROMPT
 from .tools import (
     TOOL_DESCRIPTIONS,

{vision_agent-0.2.97 → vision_agent-0.2.99}/vision_agent/tools/meta_tools.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import os
 import subprocess
+from uuid import UUID
 from pathlib import Path
 from typing import Any, Dict, List, Union
@@ -7,6 +8,9 @@ import vision_agent as va
 from vision_agent.lmm.types import Message
 from vision_agent.tools.tool_utils import get_tool_documentation
 from vision_agent.tools.tools import TOOL_DESCRIPTIONS
+from vision_agent.utils.image_utils import convert_to_b64
+from vision_agent.clients.landing_public_api import LandingPublicAPI
+from vision_agent.tools.meta_tools_types import BboxInput, BboxInputBase64, PromptTask
 # These tools are adapted from SWE-Agent https://github.com/princeton-nlp/SWE-agent
@@ -385,6 +389,46 @@ def get_tool_descriptions() -> str:
     return TOOL_DESCRIPTIONS
+def florencev2_fine_tuning(bboxes: List[Dict[str, Any]], task: str) -> UUID:
+    """'florencev2_fine_tuning' is a tool that fine-tune florencev2 to be able
+    to detect objects in an image based on a given dataset. It returns the fine
+    tuning job id.
+    Parameters:
+        bboxes (List[BboxInput]): A list of BboxInput containing the
+            image path, labels and bounding boxes.
+        task (PromptTask): The florencev2 fine-tuning task. The options are
+            CAPTION, CAPTION_TO_PHRASE_GROUNDING and OBJECT_DETECTION.
+    Returns:
+        UUID: The fine tuning job id, this id will used to retrieve the fine
+            tuned model.
+    Example
+    -------
+        >>> fine_tuning_job_id = florencev2_fine_tuning(
+            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
+             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
+             "OBJECT_DETECTION"
+        )
+    """
+    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
+    task_input = PromptTask[task]
+    fine_tuning_request = [
+        BboxInputBase64(
+            image=convert_to_b64(bbox_input.image_path),
+            filename=bbox_input.image_path.split("/")[-1],
+            labels=bbox_input.labels,
+            bboxes=bbox_input.bboxes,
+        )
+        for bbox_input in bboxes_input
+    ]
+    landing_api = LandingPublicAPI()
+    return landing_api.launch_fine_tuning_job(
+        "florencev2", task_input, fine_tuning_request
+    )
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -398,5 +442,6 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         search_dir,
         search_file,
         find_file,
+        florencev2_fine_tuning,
     ]
 )

vision_agent-0.2.99/vision_agent/tools/meta_tools_types.py ADDED Viewed

@@ -0,0 +1,30 @@
+from enum import Enum
+from typing import List, Tuple
+from pydantic import BaseModel
+class BboxInput(BaseModel):
+    image_path: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class BboxInputBase64(BaseModel):
+    image: str
+    filename: str
+    labels: List[str]
+    bboxes: List[Tuple[int, int, int, int]]
+class PromptTask(str, Enum):
+    """
+    Valid task prompts options for the Florencev2 model.
+    """
+    CAPTION = "<CAPTION>"
+    """"""
+    CAPTION_TO_PHRASE_GROUNDING = "<CAPTION_TO_PHRASE_GROUNDING>"
+    """"""
+    OBJECT_DETECTION = "<OD>"
+    """"""

{vision_agent-0.2.97 → vision_agent-0.2.99}/vision_agent/tools/tools.py RENAMED Viewed

@@ -2,23 +2,23 @@ import io
 import json
 import logging
 import tempfile
-from importlib import resources
 from pathlib import Path
+from importlib import resources
 from typing import Any, Dict, List, Optional, Tuple, Union, cast
 import cv2
-import numpy as np
 import requests
+import numpy as np
+from pytube import YouTube  # type: ignore
 from moviepy.editor import ImageSequenceClip
 from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
-from pytube import YouTube  # type: ignore
 from vision_agent.tools.tool_utils import (
+    send_inference_request,
     get_tool_descriptions,
     get_tool_documentation,
     get_tools_df,
-    send_inference_request,
 )
 from vision_agent.utils import extract_frames_from_video
 from vision_agent.utils.execute import FileSerializer, MimeType
@@ -1063,7 +1063,6 @@ def save_video(
     if fps <= 0:
         _LOGGER.warning(f"Invalid fps value: {fps}. Setting fps to 4 (default value).")
         fps = 4
     with ImageSequenceClip(frames, fps=fps) as video:
         if output_video_path:
             f = open(output_video_path, "wb")

{vision_agent-0.2.97 → vision_agent-0.2.99}/vision_agent/utils/execute.py RENAMED Viewed

@@ -209,7 +209,7 @@ class Result:
         return formats
     @staticmethod
-    def from_e2b_result(result: E2BResult) -> "Result":  # type: ignore
+    def from_e2b_result(result: E2BResult) -> "Result":
         """
         Creates a Result object from an E2BResult object.
         """
@@ -361,7 +361,7 @@ class Execution(BaseModel):
         )
     @staticmethod
-    def from_e2b_execution(exec: E2BExecution) -> "Execution":  # type: ignore
+    def from_e2b_execution(exec: E2BExecution) -> "Execution":
         """Creates an Execution object from an E2BResult object."""
         return Execution(
             results=[Result.from_e2b_result(res) for res in exec.results],