PyPI - vision-agent - Versions diffs - 0.2.226__py3-none-any.whl → 0.2.227__py3-none-any.whl - Mend

vision-agent 0.2.226py3-none-any.whl → 0.2.227py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agent/tools/__init__.py CHANGED Viewed

@@ -63,6 +63,7 @@ from .tools import (
     video_temporal_localization,
     vit_image_classification,
     vit_nsfw_classification,
+    custom_object_detection,
 )
 __new_tools__ = [

vision_agent/tools/tools.py CHANGED Viewed

@@ -290,6 +290,13 @@ def od_sam2_video_tracking(
             )
             function_name = "florence2_object_detection"
+        elif od_model == ODModels.CUSTOM:
+            segment_results = custom_object_detection(
+                deployment_id=fine_tune_id,
+                image=segment_frames[frame_number],
+            )
+            function_name = "custom_object_detection"
         else:
             raise NotImplementedError(
                 f"Object detection model '{od_model}' is not implemented."
@@ -1217,6 +1224,139 @@ def countgd_visual_prompt_object_detection(
     return bboxes_formatted
+def custom_object_detection(
+    deployment_id: str,
+    image: np.ndarray,
+    box_threshold: float = 0.1,
+) -> List[Dict[str, Any]]:
+    """'custom_object_detection' is a tool that can detect instances of an
+    object given a deployment_id of a previously finetuned object detection model.
+    It is particularly useful when trying to detect objects that are not well detected by generalist models.
+    It returns a list of bounding boxes with normalized
+    coordinates, label names and associated confidence scores.
+    Parameters:
+        deployment_id (str): The id of the finetuned model.
+        image (np.ndarray): The image that contains instances of the object.
+        box_threshold (float, optional): The threshold for detection. Defaults
+            to 0.1.
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label, and
+            bounding box of the detected objects with normalized coordinates between 0
+            and 1 (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the
+            top-left and xmax and ymax are the coordinates of the bottom-right of the
+            bounding box.
+    Example
+    -------
+        >>> custom_object_detection("abcd1234-5678efg", image)
+        [
+            {'score': 0.49, 'label': 'flower', 'bbox': [0.1, 0.11, 0.35, 0.4]},
+            {'score': 0.68, 'label': 'flower', 'bbox': [0.2, 0.21, 0.45, 0.5]},
+            {'score': 0.78, 'label': 'flower', 'bbox': [0.3, 0.35, 0.48, 0.52]},
+            {'score': 0.98, 'label': 'flower', 'bbox': [0.44, 0.24, 0.49, 0.58]},
+        ]
+    """
+    image_size = image.shape[:2]
+    if image_size[0] < 1 or image_size[1] < 1:
+        return []
+    files = [("image", numpy_to_bytes(image))]
+    payload = {
+        "deployment_id": deployment_id,
+        "confidence": box_threshold,
+    }
+    detections: List[List[Dict[str, Any]]] = send_inference_request(
+        payload, "custom-object-detection", files=files, v2=True
+    )
+    bboxes = detections[0]
+    bboxes_formatted = [
+        {
+            "label": bbox["label"],
+            "bbox": normalize_bbox(bbox["bounding_box"], image_size),
+            "score": bbox["score"],
+        }
+        for bbox in bboxes
+    ]
+    display_data = [
+        {
+            "label": bbox["label"],
+            "bbox": bbox["bounding_box"],
+            "score": bbox["score"],
+        }
+        for bbox in bboxes
+    ]
+    _display_tool_trace(
+        custom_object_detection.__name__,
+        payload,
+        display_data,
+        files,
+    )
+    return bboxes_formatted
+def custom_od_sam2_video_tracking(
+    deployment_id: str,
+    frames: List[np.ndarray],
+    chunk_length: Optional[int] = 10,
+) -> List[List[Dict[str, Any]]]:
+    """'custom_od_sam2_video_tracking' is a tool that can segment multiple objects given a
+    custom model with predefined category names.
+    It returns a list of bounding boxes, label names,
+    mask file names and associated probability scores.
+    Parameters:
+        deployment_id (str): The id of the deployed custom model.
+        image (np.ndarray): The image to ground the prompt to.
+        chunk_length (Optional[int]): The number of frames to re-run florence2 to find
+            new objects.
+    Returns:
+        List[Dict[str, Any]]: A list of dictionaries containing the score, label,
+            bounding box, and mask of the detected objects with normalized coordinates
+            (xmin, ymin, xmax, ymax). xmin and ymin are the coordinates of the top-left
+            and xmax and ymax are the coordinates of the bottom-right of the bounding box.
+            The mask is binary 2D numpy array where 1 indicates the object and 0 indicates
+            the background.
+    Example
+    -------
+        >>> custom_od_sam2_video_tracking("abcd1234-5678efg", frames)
+        [
+            [
+                {
+                    'label': '0: dinosaur',
+                    'bbox': [0.1, 0.11, 0.35, 0.4],
+                    'mask': array([[0, 0, 0, ..., 0, 0, 0],
+                        [0, 0, 0, ..., 0, 0, 0],
+                        ...,
+                        [0, 0, 0, ..., 0, 0, 0],
+                        [0, 0, 0, ..., 0, 0, 0]], dtype=uint8),
+                },
+            ],
+            ...
+        ]
+    """
+    ret = od_sam2_video_tracking(
+        ODModels.CUSTOM,
+        prompt="",
+        frames=frames,
+        chunk_length=chunk_length,
+        fine_tune_id=deployment_id,
+    )
+    _display_tool_trace(
+        custom_od_sam2_video_tracking.__name__,
+        {},
+        ret["display_data"],
+        ret["files"],
+    )
+    return ret["return_data"]  # type: ignore
 def qwen2_vl_images_vqa(prompt: str, images: List[np.ndarray]) -> str:
     """'qwen2_vl_images_vqa' is a tool that can answer any questions about arbitrary
     images including regular images or images of documents or presentations. It can be

vision_agent/utils/video_tracking.py CHANGED Viewed

@@ -17,6 +17,7 @@ class ODModels(str, Enum):
     COUNTGD = "countgd"
     FLORENCE2 = "florence2"
     OWLV2 = "owlv2"
+    CUSTOM = "custom"
 def split_frames_into_segments(

{vision_agent-0.2.226.dist-info → vision_agent-0.2.227.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.226
+Version: 0.2.227
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.226.dist-info → vision_agent-0.2.227.dist-info}/RECORD RENAMED Viewed

@@ -26,12 +26,12 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
 vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
 vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
-vision_agent/tools/__init__.py,sha256=15O7eQVn0bitmzUO5OxKdA618PoiLt6Z02gmKsSNMFM,2765
+vision_agent/tools/__init__.py,sha256=fcucnAzr5Hue9xSqpBgA7RcRJP2CgAgQJ31p_R5lg-I,2794
 vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
 vision_agent/tools/planner_tools.py,sha256=qQvPuCif-KbFi7KsXKkTCfpgEQEJJ6oq6WB3gOuG2Xg,13686
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=q9cqXO2AvigUdO1krjnOy8o0goYhgS6eILl6-F5Kxyk,10211
-vision_agent/tools/tools.py,sha256=zqoo4ml9ZS99kOeOIN6Zplq7pxOwBrVZKKFUVIzsjfw,91712
+vision_agent/tools/tools.py,sha256=36f0qAhQfA5lDhYv5BKpHfHgBVEBgOD-XNVHG5K4HLY,96619
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=QKk4zVjMwGxQI0MQ-aZZA50N-qItxRY4EB9CwQkZ2HY,185
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -40,8 +40,8 @@ vision_agent/utils/image_utils.py,sha256=z_ONgcza125B10NkoGwPOzXnL470bpTWZbkB16N
 vision_agent/utils/sim.py,sha256=qr-6UWAxxGwtwIAKZjZCY_pu9VwBI_TTB8bfrGsaABg,9282
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
-vision_agent/utils/video_tracking.py,sha256=EeOiSY8gjvvneuAnv-BO7yOyMBF_-1Irk_lLLOt3bDM,9452
-vision_agent-0.2.226.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.226.dist-info/METADATA,sha256=_7jZokNbQLK6Ups2psyRKbPDjUIzU3daxCpfrHZ6gSU,20039
-vision_agent-0.2.226.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.226.dist-info/RECORD,,
+vision_agent/utils/video_tracking.py,sha256=7ZiFBqQRTid5ytPmkrAGQUiVMr-twzib8Ha2hN3JsR0,9474
+vision_agent-0.2.227.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.227.dist-info/METADATA,sha256=qFefkLzCo7G98LyhIPqYzPOUv5nyvOK84DJvUWmeqcc,20039
+vision_agent-0.2.227.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.227.dist-info/RECORD,,

{vision_agent-0.2.226.dist-info → vision_agent-0.2.227.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.226.dist-info → vision_agent-0.2.227.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.226__py3-none-any.whl → 0.2.227__py3-none-any.whl

vision-agent 0.2.226py3-none-any.whl → 0.2.227py3-none-any.whl