PyPI - vision-agent - Versions diffs - 0.2.226__tar.gz → 0.2.227__tar.gz - Mend

vision-agent 0.2.226tar.gz → 0.2.227tar.gz

Files changed (47) hide show

{vision_agent-0.2.226 → vision_agent-0.2.227}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.226
+Version: 0.2.227
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.226 → vision_agent-0.2.227}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.226"
+version = "0.2.227"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"

{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/__init__.py RENAMED Viewed

@@ -63,6 +63,7 @@ from .tools import (
     video_temporal_localization,
     vit_image_classification,
     vit_nsfw_classification,
+    custom_object_detection,
 )
 __new_tools__ = [

{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/tools/tools.py RENAMED Viewed

@@ -290,6 +290,13 @@ def od_sam2_video_tracking(
             )
             function_name = "florence2_object_detection"
+        elif od_model == ODModels.CUSTOM:
+            segment_results = custom_object_detection(
+                deployment_id=fine_tune_id,
+                image=segment_frames[frame_number],
+            )
+            function_name = "custom_object_detection"
         else:
             raise NotImplementedError(
                 f"Object detection model '{od_model}' is not implemented."
@@ -1217,6 +1224,139 @@ def countgd_visual_prompt_object_detection(
     return bboxes_formatted
+def custom_object_detection(
+    deployment_id: str,
+    image: np.ndarray,
+    box_threshold: float = 0.1,
+) -> List[Dict[str, Any]]:
+    """'custom_object_detection' is a tool that can detect instances of an
+    object given a deployment_id of a previously finetuned object detection model.
+    It is particularly useful when trying to detect objects that are not well detected by generalist models.
+    It returns a list of bounding boxes with normalized
+    coordinates, label names and associated confidence scores.
+    Parameters:
+        deployment_id (str): The id of the finetuned model.
+        image (np.ndarray): The image that contains instances of the object.
+        box_threshold (float, optional): The threshold for detection. Defaults
+            to 0.1.
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
+            bounding box of the detected objects with normalized coordinates between 0
+            and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
+            top-left and xmax and ymax are the coordinates of the bottom-right of the
+            bounding box.
+    Example
+    -------
+        >>> custom_object_detection("abcd1234-5678efg", image)
+        [
+            {'score': 0.49, 'label': 'flower', 'bbox': [0.1, 0.11, 0.35, 0.4]},
+            {'score': 0.68, 'label': 'flower', 'bbox': [0.2, 0.21, 0.45, 0.5]},
+            {'score': 0.78, 'label': 'flower', 'bbox': [0.3, 0.35, 0.48, 0.52]},
+            {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58]},
+        ]
+    """
+    image_size = image.shape[:2]
+    if image_size[0] < 1 or image_size[1] < 1:
+        return []
+    files = [("image", numpy_to_bytes(image))]
+    payload = {
+        "deployment_id": deployment_id,
+        "confidence": box_threshold,
+    }
+    detections: List[List[Dict[str, Any]]] = send_inference_request(
+        payload, "custom-object-detection", files=files, v2=True
+    )
+    bboxes = detections[0]
+    bboxes_formatted = [
+        {
+            "label": bbox["label"],
+            "bbox": normalize_bbox(bbox["bounding_box"], image_size),
+            "score": bbox["score"],
+        }
+        for bbox in bboxes
+    ]
+    display_data = [
+        {
+            "label": bbox["label"],
+            "bbox": bbox["bounding_box"],
+            "score": bbox["score"],
+        }
+        for bbox in bboxes
+    ]
+    _display_tool_trace(
+        custom_object_detection.__name__,
+        payload,
+        display_data,
+        files,
+    )
+    return bboxes_formatted
+def custom_od_sam2_video_tracking(
+    deployment_id: str,
+    frames: List[np.ndarray],
+    chunk_length: Optional[int] = 10,
+) -> List[List[Dict[str, Any]]]:
+    """'custom_od_sam2_video_tracking' is a tool that can segment multiple objects given a
+    custom model with predefined category names.
+    It returns a list of bounding boxes, label names,
+    mask file names and associated probability scores.
+    Parameters:
+        deployment_id (str): The id of the deployed custom model.
+        image (np.ndarray): The image to ground the prompt to.
+        chunk_length (Optional[int]): The number of frames to re-run florence2 to find
+            new objects.
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label,
+            bounding box, and mask of the detected objects with normalized coordinates
+            (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left
+            and xmax and ymax are the coordinates of the bottom-right of the bounding box.
+            The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
+            the background.
+    Example
+    -------
+        >>> custom_od_sam2_video_tracking("abcd1234-5678efg", frames)
+        [
+            [
+                {
+                    'label': '0: dinosaur',
+                    'bbox': [0.1, 0.11, 0.35, 0.4],
+                    'mask': array([[0, 0, 0, ..., 0, 0, 0],
+                        [0, 0, 0, ..., 0, 0, 0],
+                        ...,
+                        [0, 0, 0, ..., 0, 0, 0],
+                        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
+                },
+            ],
+            ...
+        ]
+    """
+    ret = od_sam2_video_tracking(
+        ODModels.CUSTOM,
+        prompt="",
+        frames=frames,
+        chunk_length=chunk_length,
+        fine_tune_id=deployment_id,
+    )
+    _display_tool_trace(
+        custom_od_sam2_video_tracking.__name__,
+        {},
+        ret["display_data"],
+        ret["files"],
+    )
+    return ret["return_data"]  # type: ignore
 def qwen2_vl_images_vqa(prompt: str, images: List[np.ndarray]) -> str:
     """'qwen2_vl_images_vqa' is a tool that can answer any questions about arbitrary
     images including regular images or images of documents or presentations. It can be

{vision_agent-0.2.226 → vision_agent-0.2.227}/vision_agent/utils/video_tracking.py RENAMED Viewed

@@ -17,6 +17,7 @@ class ODModels(str, Enum):
     COUNTGD = "countgd"
     FLORENCE2 = "florence2"
     OWLV2 = "owlv2"
+    CUSTOM = "custom"
 def split_frames_into_segments(