PyPI - vision-agent - Versions diffs - 0.2.239__tar.gz → 0.2.241__tar.gz - Mend

vision-agent 0.2.239tar.gz → 0.2.241tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (55) hide show

{vision_agent-0.2.239 → vision_agent-0.2.241}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.239
+Version: 0.2.241
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.239 → vision_agent-0.2.241}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.239"
+version = "0.2.241"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.239 → vision_agent-0.2.241}/vision_agent/tools/__init__.py RENAMED Viewed

@@ -7,9 +7,7 @@ from .meta_tools import (
     generate_vision_code,
     get_tool_descriptions,
     list_artifacts,
-    object_detection_fine_tuning,
     open_code_artifact,
-    use_object_detection_fine_tuning,
     view_media_artifact,
 )
 from .planner_tools import judge_od_results

{vision_agent-0.2.239 → vision_agent-0.2.241}/vision_agent/tools/meta_tools.py RENAMED Viewed

@@ -11,11 +11,9 @@ import libcst as cst
 from IPython.display import display
 import vision_agent as va
-from vision_agent.clients.landing_public_api import LandingPublicAPI
-from vision_agent.models import BboxInput, BboxInputBase64, Message, PromptTask
+from vision_agent.models import Message
 from vision_agent.tools.tools import get_tools_descriptions as _get_tool_descriptions
 from vision_agent.utils.execute import Execution, MimeType
-from vision_agent.utils.image_utils import convert_to_b64
 from vision_agent.utils.tools_doc import get_tool_documentation
 CURRENT_FILE = None
@@ -573,48 +571,6 @@ def get_tool_descriptions() -> str:
     return _get_tool_descriptions()
-def object_detection_fine_tuning(bboxes: List[Dict[str, Any]]) -> str:
-    """DO NOT use this function unless the user has supplied you with bboxes.
-    'object_detection_fine_tuning' is a tool that fine-tunes object detection models to
-    be able to detect objects in an image based on a given dataset. It returns the fine
-    tuning job id.
-    Parameters:
-        bboxes (List[BboxInput]): A list of BboxInput containing the image path, labels
-            and bounding boxes. The coordinates are unnormalized.
-    Returns:
-        str: The fine tuning job id, this id will used to retrieve the fine tuned
-            model.
-    Example
-    -------
-        >>> fine_tuning_job_id = object_detection_fine_tuning(
-            [{'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[370, 30, 560, 290]]},
-             {'image_path': 'filename.png', 'labels': ['screw'], 'bboxes': [[120, 0, 300, 170]]}],
-             "phrase_grounding"
-        )
-    """
-    task = "phrase_grounding"
-    bboxes_input = [BboxInput.model_validate(bbox) for bbox in bboxes]
-    task_type = PromptTask[task.upper()]
-    fine_tuning_request = [
-        BboxInputBase64(
-            image=convert_to_b64(bbox_input.image_path),
-            filename=Path(bbox_input.image_path).name,
-            labels=bbox_input.labels,
-            bboxes=bbox_input.bboxes,
-        )
-        for bbox_input in bboxes_input
-    ]
-    landing_api = LandingPublicAPI()
-    fine_tune_id = str(
-        landing_api.launch_fine_tuning_job("florencev2", task_type, fine_tuning_request)
-    )
-    print(f"[Fine tuning id: {fine_tune_id}]")
-    return fine_tune_id
 def get_diff(before: str, after: str) -> str:
     return "".join(
         difflib.unified_diff(
@@ -721,83 +677,6 @@ def use_extra_vision_agent_args(
     return modified_tree.code
-def use_object_detection_fine_tuning(
-    artifacts: Artifacts, name: str, fine_tune_id: str
-) -> str:
-    """Replaces calls to 'owl_v2_image', 'florence2_phrase_detection' and
-    'florence2_sam2_image' with the fine tuning id. This ensures that the code utilizes
-    the fined tuned florence2 model. Returns the diff between the original code and the
-    new code.
-    Parameters:
-        artifacts (Artifacts): The artifacts object to edit the code from.
-        name (str): The name of the artifact to edit.
-        fine_tune_id (str): The fine tuning job id.
-    Examples
-    --------
-        >>> diff = use_object_detection_fine_tuning(artifacts, "code.py", "23b3b022-5ebf-4798-9373-20ef36429abf")
-    """
-    if name not in artifacts:
-        output_str = f"[Artifact {name} does not exist]"
-        print(output_str)
-        return output_str
-    code = artifacts[name]
-    patterns_with_fine_tune_id = [
-        (
-            r'florence2_phrase_grounding\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'florence2_phrase_grounding("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-        (
-            r'florence2_phrase_grounding_video\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'florence2_phrase_grounding_video("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-        (
-            r'owl_v2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'owl_v2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-        (
-            r'florence2_sam2_image\(\s*["\']([^"\']+)["\']\s*,\s*([^,]+)(?:,\s*["\'][^"\']+["\'])?\s*\)',
-            lambda match: f'florence2_sam2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
-        ),
-    ]
-    new_code = code
-    for (
-        pattern_with_fine_tune_id,
-        replacer_with_fine_tune_id,
-    ) in patterns_with_fine_tune_id:
-        if re.search(pattern_with_fine_tune_id, new_code):
-            new_code = re.sub(
-                pattern_with_fine_tune_id, replacer_with_fine_tune_id, new_code
-            )
-    if new_code == code:
-        output_str = (
-            f"[No function calls to replace with fine tuning id in artifact {name}]"
-        )
-        print(output_str)
-        return output_str
-    artifacts[name] = new_code
-    diff = get_diff_with_prompts(name, code, new_code)
-    print(diff)
-    display(
-        {
-            MimeType.APPLICATION_ARTIFACT: json.dumps(
-                {"name": name, "content": new_code, "action": "edit"}
-            )
-        },
-        raw=True,
-    )
-    return diff
 META_TOOL_DOCSTRING = get_tool_documentation(
     [
         get_tool_descriptions,
@@ -807,8 +686,6 @@ META_TOOL_DOCSTRING = get_tool_documentation(
         generate_vision_code,
         edit_vision_code,
         view_media_artifact,
-        object_detection_fine_tuning,
-        use_object_detection_fine_tuning,
         list_artifacts,
     ]
 )

{vision_agent-0.2.239 → vision_agent-0.2.241}/vision_agent/tools/tools.py RENAMED Viewed

@@ -8,8 +8,7 @@ from base64 import b64encode
 from concurrent.futures import ThreadPoolExecutor, as_completed
 from importlib import resources
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple, Union, cast
-from uuid import UUID
+from typing import IO, Any, Callable, Dict, List, Optional, Tuple, Union, cast
 import cv2
 import numpy as np
@@ -20,10 +19,7 @@ from PIL import Image, ImageDraw, ImageFont
 from pillow_heif import register_heif_opener  # type: ignore
 from pytube import YouTube  # type: ignore
-from vision_agent.clients.landing_public_api import LandingPublicAPI
 from vision_agent.lmm.lmm import LMM, AnthropicLMM, OpenAILMM
-from vision_agent.models import JobStatus
-from vision_agent.utils.exceptions import FineTuneModelIsNotReady
 from vision_agent.utils.execute import FileSerializer, MimeType
 from vision_agent.utils.image_utils import (
     b64_to_pil,
@@ -239,7 +235,7 @@ def od_sam2_video_tracking(
     frames: List[np.ndarray],
     box_threshold: float = 0.30,
     chunk_length: Optional[int] = 50,
-    fine_tune_id: Optional[str] = None,
+    deployment_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     chunk_length = 50 if chunk_length is None else chunk_length
     segment_size = chunk_length
@@ -262,7 +258,7 @@ def od_sam2_video_tracking(
         prompt: str,
         segment_index: int,
         frame_number: int,
-        fine_tune_id: str,
+        deployment_id: str,
         segment_frames: list,
     ) -> tuple:
         """
@@ -273,7 +269,7 @@ def od_sam2_video_tracking(
             prompt: The prompt for the object detection model.
             segment_index: The index of the current segment.
             frame_number: The number of the current frame.
-            fine_tune_id: Optional fine-tune ID for the model.
+            deployment_id: Optional The Model deployment ID.
             segment_frames: List of frames for the current segment.
         Returns:
@@ -293,7 +289,6 @@ def od_sam2_video_tracking(
                 prompt=prompt,
                 image=segment_frames[frame_number],
                 box_threshold=box_threshold,
-                fine_tune_id=fine_tune_id,
             )
             function_name = "owlv2_object_detection"
@@ -301,7 +296,6 @@ def od_sam2_video_tracking(
             segment_results = florence2_object_detection(
                 prompt=prompt,
                 image=segment_frames[frame_number],
-                fine_tune_id=fine_tune_id,
             )
             function_name = "florence2_object_detection"
@@ -309,13 +303,12 @@ def od_sam2_video_tracking(
             segment_results = agentic_object_detection(
                 prompt=prompt,
                 image=segment_frames[frame_number],
-                fine_tune_id=fine_tune_id,
             )
             function_name = "agentic_object_detection"
         elif od_model == ODModels.CUSTOM:
             segment_results = custom_object_detection(
-                deployment_id=fine_tune_id,
+                deployment_id=deployment_id,
                 image=segment_frames[frame_number],
                 box_threshold=box_threshold,
             )
@@ -337,7 +330,7 @@ def od_sam2_video_tracking(
                 segment_frames=segment,
                 od_model=od_model,
                 prompt=prompt,
-                fine_tune_id=fine_tune_id,
+                deployment_id=deployment_id,
                 chunk_length=chunk_length,
                 image_size=image_size,
                 segment_index=segment_index,
@@ -376,7 +369,6 @@ def _owlv2_object_detection(
     box_threshold: float,
     image_size: Tuple[int, ...],
     image_bytes: Optional[bytes] = None,
-    fine_tune_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     if image_bytes is None:
         image_bytes = numpy_to_bytes(image)
@@ -389,21 +381,6 @@ def _owlv2_object_detection(
     }
     metadata = {"function_name": "owlv2_object_detection"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        # we can only execute fine-tuned models with florence2
-        payload = {
-            "prompts": payload["prompts"],
-            "jobId": fine_tune_id,
-            "model": "florence2",
-        }
     detections = send_task_inference_request(
         payload,
         "text-to-object-detection",
@@ -440,7 +417,6 @@ def owlv2_object_detection(
     prompt: str,
     image: np.ndarray,
     box_threshold: float = 0.10,
-    fine_tune_id: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """'owlv2_object_detection' is a tool that can detect and count multiple objects
     given a text prompt such as category names or referring expressions on images. The
@@ -452,8 +428,6 @@ def owlv2_object_detection(
         image (np.ndarray): The image to ground the prompt to.
         box_threshold (float, optional): The threshold for the box detection. Defaults
             to 0.10.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -475,9 +449,7 @@ def owlv2_object_detection(
     if image_size[0] < 1 or image_size[1] < 1:
         return []
-    ret = _owlv2_object_detection(
-        prompt, image, box_threshold, image_size, fine_tune_id=fine_tune_id
-    )
+    ret = _owlv2_object_detection(prompt, image, box_threshold, image_size)
     _display_tool_trace(
         owlv2_object_detection.__name__,
@@ -556,7 +528,6 @@ def owlv2_sam2_video_tracking(
     frames: List[np.ndarray],
     box_threshold: float = 0.10,
     chunk_length: Optional[int] = 25,
-    fine_tune_id: Optional[str] = None,
 ) -> List[List[Dict[str, Any]]]:
     """'owlv2_sam2_video_tracking' is a tool that can track and segment multiple
     objects in a video given a text prompt such as category names or referring
@@ -571,8 +542,6 @@ def owlv2_sam2_video_tracking(
             to 0.10.
         chunk_length (Optional[int]): The number of frames to re-run owlv2 to find
             new objects.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[List[Dict[str, Any]]]: A list of list of dictionaries containing the
@@ -609,7 +578,6 @@ def owlv2_sam2_video_tracking(
         frames=frames,
         box_threshold=box_threshold,
         chunk_length=chunk_length,
-        fine_tune_id=fine_tune_id,
     )
     _display_tool_trace(
         owlv2_sam2_video_tracking.__name__,
@@ -624,7 +592,8 @@ def owlv2_sam2_video_tracking(
 def florence2_object_detection(
-    prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
+    prompt: str,
+    image: np.ndarray,
 ) -> List[Dict[str, Any]]:
     """'florence2_object_detection' is a tool that can detect multiple objects given a
     text prompt which can be object names or caption. You can optionally separate the
@@ -635,8 +604,6 @@ def florence2_object_detection(
         prompt (str): The prompt to ground to the image. Use exclusive categories that
             do not overlap such as 'person, car' and NOT 'person, athlete'.
         image (np.ndarray): The image to used to detect objects
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -653,6 +620,7 @@ def florence2_object_detection(
             {'score': 1.0, 'label': 'coyote', 'bbox': [0.34, 0.21, 0.85, 0.5},
         ]
     """
     image_size = image.shape[:2]
     if image_size[0] < 1 or image_size[1] < 1:
         return []
@@ -665,16 +633,6 @@ def florence2_object_detection(
     }
     metadata = {"function_name": "florence2_object_detection"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        payload["jobId"] = fine_tune_id
     detections = send_task_inference_request(
         payload,
         "text-to-object-detection",
@@ -703,7 +661,8 @@ def florence2_object_detection(
 def florence2_sam2_instance_segmentation(
-    prompt: str, image: np.ndarray, fine_tune_id: Optional[str] = None
+    prompt: str,
+    image: np.ndarray,
 ) -> List[Dict[str, Any]]:
     """'florence2_sam2_instance_segmentation' is a tool that can segment multiple
     objects given a text prompt such as category names or referring expressions. The
@@ -715,8 +674,6 @@ def florence2_sam2_instance_segmentation(
         prompt (str): The prompt to ground to the image. Use exclusive categories that
             do not overlap such as 'person, car' and NOT 'person, athlete'.
         image (np.ndarray): The image to ground the prompt to.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label,
@@ -742,6 +699,7 @@ def florence2_sam2_instance_segmentation(
             },
         ]
     """
     if image.shape[0] < 1 or image.shape[1] < 1:
         return []
@@ -753,16 +711,6 @@ def florence2_sam2_instance_segmentation(
     }
     metadata = {"function_name": "florence2_sam2_instance_segmentation"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        payload["jobId"] = fine_tune_id
     detections = send_task_inference_request(
         payload,
         "text-to-instance-segmentation",
@@ -792,7 +740,6 @@ def florence2_sam2_video_tracking(
     prompt: str,
     frames: List[np.ndarray],
     chunk_length: Optional[int] = 25,
-    fine_tune_id: Optional[str] = None,
 ) -> List[List[Dict[str, Any]]]:
     """'florence2_sam2_video_tracking' is a tool that can track and segment multiple
     objects in a video given a text prompt such as category names or referring
@@ -806,8 +753,6 @@ def florence2_sam2_video_tracking(
         frames (List[np.ndarray]): The list of frames to ground the prompt to.
         chunk_length (Optional[int]): The number of frames to re-run florence2 to find
             new objects.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[List[Dict[str, Any]]]: A list of list of dictionaries containing the
@@ -837,6 +782,7 @@ def florence2_sam2_video_tracking(
             ...
         ]
     """
     if len(frames) == 0 or not isinstance(frames, List):
         raise ValueError("Must provide a list of numpy arrays for frames")
@@ -851,16 +797,6 @@ def florence2_sam2_video_tracking(
     if chunk_length is not None:
         payload["chunk_length_frames"] = chunk_length  # type: ignore
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        payload["jobId"] = fine_tune_id
     detections = send_task_inference_request(
         payload,
         "text-to-instance-segmentation",
@@ -1397,7 +1333,7 @@ def custom_od_sam2_video_tracking(
         prompt="",
         frames=frames,
         chunk_length=chunk_length,
-        fine_tune_id=deployment_id,
+        deployment_id=deployment_id,
     )
     _display_tool_trace(
         custom_od_sam2_video_tracking.__name__,
@@ -1416,7 +1352,6 @@ def _agentic_object_detection(
     image: np.ndarray,
     image_size: Tuple[int, ...],
     image_bytes: Optional[bytes] = None,
-    fine_tune_id: Optional[str] = None,
 ) -> Dict[str, Any]:
     if image_bytes is None:
         image_bytes = numpy_to_bytes(image)
@@ -1428,21 +1363,6 @@ def _agentic_object_detection(
     }
     metadata = {"function_name": "agentic_object_detection"}
-    if fine_tune_id is not None:
-        landing_api = LandingPublicAPI()
-        status = landing_api.check_fine_tuning_job(UUID(fine_tune_id))
-        if status is not JobStatus.SUCCEEDED:
-            raise FineTuneModelIsNotReady(
-                f"Fine-tuned model {fine_tune_id} is not ready yet"
-            )
-        # we can only execute fine-tuned models with florence2
-        payload = {
-            "prompts": payload["prompts"],
-            "jobId": fine_tune_id,
-            "model": "florence2",
-        }
     detections = send_task_inference_request(
         payload,
         "text-to-object-detection",
@@ -1478,7 +1398,6 @@ def _agentic_object_detection(
 def agentic_object_detection(
     prompt: str,
     image: np.ndarray,
-    fine_tune_id: Optional[str] = None,
 ) -> List[Dict[str, Any]]:
     """'agentic_object_detection' is a tool that can detect multiple objects given a
     text prompt such as object names or referring expressions on images. It's
@@ -1490,8 +1409,6 @@ def agentic_object_detection(
         prompt (str): The prompt to ground to the image, only supports a single prompt
             with no commas or periods.
         image (np.ndarray): The image to ground the prompt to.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
@@ -1513,9 +1430,7 @@ def agentic_object_detection(
     if image_size[0] < 1 or image_size[1] < 1:
         return []
-    ret = _agentic_object_detection(
-        prompt, image, image_size, fine_tune_id=fine_tune_id
-    )
+    ret = _agentic_object_detection(prompt, image, image_size)
     _display_tool_trace(
         agentic_object_detection.__name__,
@@ -1586,7 +1501,6 @@ def agentic_sam2_video_tracking(
     prompt: str,
     frames: List[np.ndarray],
     chunk_length: Optional[int] = 25,
-    fine_tune_id: Optional[str] = None,
 ) -> List[List[Dict[str, Any]]]:
     """'agentic_sam2_video_tracking' is a tool that can track and segment multiple
     objects in a video given a text prompt such as object names or referring
@@ -1601,8 +1515,6 @@ def agentic_sam2_video_tracking(
         frames (List[np.ndarray]): The list of frames to ground the prompt to.
         chunk_length (Optional[int]): The number of frames to re-run agentic object detection to
             to find new objects.
-        fine_tune_id (Optional[str]): If you have a fine-tuned model, you can pass the
-            fine-tuned model ID here to use it.
     Returns:
         List[List[Dict[str, Any]]]: A list of list of dictionaries containing the
@@ -1638,7 +1550,6 @@ def agentic_sam2_video_tracking(
         prompt=prompt,
         frames=frames,
         chunk_length=chunk_length,
-        fine_tune_id=fine_tune_id,
     )
     _display_tool_trace(
         agentic_sam2_video_tracking.__name__,
@@ -2797,16 +2708,17 @@ def save_video(
         ):
             raise ValueError("A frame is not a valid NumPy array with shape (H, W, C)")
+    output_file: IO[bytes]
     if output_video_path is None:
-        output_video_path = tempfile.NamedTemporaryFile(
-            delete=False, suffix=".mp4"
-        ).name
+        output_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
     else:
         Path(output_video_path).parent.mkdir(parents=True, exist_ok=True)
+        output_file = open(output_video_path, "wb")
-    output_video_path = video_writer(frames, fps, filename=output_video_path)
-    _save_video_to_result(output_video_path)
-    return output_video_path
+    with output_file as file:
+        video_writer(frames, fps, file=file)
+    _save_video_to_result(output_file.name)
+    return output_file.name
 def _save_video_to_result(video_uri: str) -> None:

{vision_agent-0.2.239 → vision_agent-0.2.241}/vision_agent/utils/exceptions.py RENAMED Viewed

@@ -51,13 +51,6 @@ class RemoteSandboxClosedError(RemoteSandboxError):
     is_retryable = True
-class FineTuneModelIsNotReady(Exception):
-    """Exception raised when the fine-tune model is not ready.
-    If this is raised, it's recommended to wait 5 seconds before trying to use
-    the model again.
-    """
 class FineTuneModelNotFound(Exception):
     """Exception raised when the fine-tune model is not found.
     If this is raised, it's recommended to try another model id.

{vision_agent-0.2.239 → vision_agent-0.2.241}/vision_agent/utils/video.py RENAMED Viewed

@@ -1,8 +1,7 @@
 import logging
-import os
 import tempfile
 from functools import lru_cache
-from typing import List, Optional, Tuple
+from typing import IO, List, Optional, Tuple
 import av  # type: ignore
 import cv2
@@ -25,39 +24,32 @@ def _resize_frame(frame: np.ndarray) -> np.ndarray:
 def video_writer(
     frames: List[np.ndarray],
     fps: float = _DEFAULT_INPUT_FPS,
-    filename: Optional[str] = None,
-    file_ext: str = ".mp4",
+    file: Optional[IO[bytes]] = None,
 ) -> str:
-    tempf = None
     if isinstance(fps, str):
         # fps could be a string when it's passed in from a web endpoint deployment
         fps = float(fps)
-    if filename is None:
-        tempf = tempfile.NamedTemporaryFile(delete=False, suffix=file_ext)
-        filename = tempf.name
-    container = av.open(filename, mode="w")
-    stream = container.add_stream("h264", rate=fps)
-    height, width = frames[0].shape[:2]
-    stream.height = height - (height % 2)
-    stream.width = width - (width % 2)
-    stream.pix_fmt = "yuv420p"
-    stream.options = {"crf": "10"}
-    for frame in frames:
-        # Remove the alpha channel (convert RGBA to RGB)
-        frame_rgb = frame[:, :, :3]
-        # Resize the frame to make dimensions divisible by 2
-        frame_rgb = _resize_frame(frame_rgb)
-        av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
-        for packet in stream.encode(av_frame):
+    if file is None:
+        file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
+    with av.open(file, "w") as container:
+        stream = container.add_stream("h264", rate=fps)
+        height, width = frames[0].shape[:2]
+        stream.height = height - (height % 2)
+        stream.width = width - (width % 2)
+        stream.pix_fmt = "yuv420p"
+        stream.options = {"crf": "10"}
+        for frame in frames:
+            # Remove the alpha channel (convert RGBA to RGB)
+            frame_rgb = frame[:, :, :3]
+            # Resize the frame to make dimensions divisible by 2
+            frame_rgb = _resize_frame(frame_rgb)
+            av_frame = av.VideoFrame.from_ndarray(frame_rgb, format="rgb24")
+            for packet in stream.encode(av_frame):
+                container.mux(packet)
+        for packet in stream.encode():
             container.mux(packet)
-    for packet in stream.encode():
-        container.mux(packet)
-    container.close()
-    # for windows nee to manually close tempfile, cannot use with NamedTemporaryFile(delete=True)
-    if tempf is not None:
-        tempf.close()
-    return filename
+    return file.name
 def frames_to_bytes(
@@ -73,11 +65,10 @@ def frames_to_bytes(
     if isinstance(fps, str):
         # fps could be a string when it's passed in from a web endpoint deployment
         fps = float(fps)
-    filename = video_writer(frames, fps, file_ext=file_ext)
-    # TODO: look into memory-mapped files to avoid reading the entire file into memory
-    with open(filename, "rb") as f:
+    with tempfile.NamedTemporaryFile(delete=True, suffix=file_ext) as f:
+        video_writer(frames, fps, f)
+        f.seek(0)
         buffer_bytes = f.read()
-    os.unlink(filename)
     return buffer_bytes

{vision_agent-0.2.239 → vision_agent-0.2.241}/vision_agent/utils/video_tracking.py RENAMED Viewed

@@ -54,7 +54,7 @@ def process_segment(
     segment_frames: List[np.ndarray],
     od_model: ODModels,
     prompt: str,
-    fine_tune_id: Optional[str],
+    deployment_id: Optional[str],
     chunk_length: Optional[int],
     image_size: Tuple[int, ...],
     segment_index: int,
@@ -67,7 +67,7 @@ def process_segment(
         segment_frames (List[np.ndarray]): Frames in the segment.
         od_model (ODModels): Object detection model to use.
         prompt (str): Prompt for the model.
-        fine_tune_id (Optional[str]): Fine-tune model ID.
+        deployment_id (Optional[str]): The model deployment ID.
         chunk_length (Optional[int]): Chunk length for processing.
         image_size (Tuple[int, int]): Size of the images.
         segment_index (int): Index of the segment.
@@ -90,7 +90,12 @@ def process_segment(
     for idx in range(0, len(segment_frames), step):
         frame_number = idx
         segment_results[idx], function_name = object_detection_tool(
-            od_model, prompt, segment_index, frame_number, fine_tune_id, segment_frames
+            deployment_id=deployment_id,
+            frame_number=frame_number,
+            od_model=od_model,
+            prompt=prompt,
+            segment_frames=segment_frames,
+            segment_index=segment_index,
         )
     transformed_detections = transform_detections(

vision_agent-0.2.239/vision_agent/clients/landing_public_api.py DELETED Viewed

@@ -1,38 +0,0 @@
-import os
-from typing import List
-from uuid import UUID
-from requests.exceptions import HTTPError
-from vision_agent.clients.http import BaseHTTP
-from vision_agent.models import BboxInputBase64, JobStatus, PromptTask
-from vision_agent.utils.exceptions import FineTuneModelNotFound
-from vision_agent.utils.type_defs import LandingaiAPIKey
-class LandingPublicAPI(BaseHTTP):
-    def __init__(self) -> None:
-        landing_url = os.environ.get("LANDINGAI_URL", "https://api.landing.ai")
-        landing_api_key = os.environ.get("LANDINGAI_API_KEY", LandingaiAPIKey().api_key)
-        headers = {"Content-Type": "application/json", "apikey": landing_api_key}
-        super().__init__(base_endpoint=landing_url, headers=headers)
-    def launch_fine_tuning_job(
-        self, model_name: str, task: PromptTask, bboxes: List[BboxInputBase64]
-    ) -> UUID:
-        url = "v1/agent/jobs/fine-tuning"
-        data = {
-            "model": {"name": model_name, "task": task.value},
-            "bboxes": [bbox.model_dump(by_alias=True) for bbox in bboxes],
-        }
-        response = self.post(url, payload=data)
-        return UUID(response["jobId"])
-    def check_fine_tuning_job(self, job_id: UUID) -> JobStatus:
-        url = f"v1/agent/jobs/fine-tuning/{job_id}/status"
-        try:
-            get_job = self.get(url)
-        except HTTPError as err:
-            if err.response.status_code == 404:
-                raise FineTuneModelNotFound()
-        return JobStatus(get_job["status"])