PyPI - vision-agent - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

vision-agent 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -33,6 +33,7 @@ from .vision_agent_prompts import (
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
+_MAX_TABULATE_COL_WIDTH = 80
 def parse_json(s: str) -> Any:
@@ -614,7 +615,7 @@ class VisionAgent(Agent):
         self.log_progress(
             f"""Going to run the following tool(s) in sequence:
-{tabulate([tool_results], headers="keys", tablefmt="mixed_grid")}"""
+{tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
         )
         def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
@@ -660,6 +661,6 @@ class VisionAgent(Agent):
             task_list = []
         self.log_progress(
             f"""Planned tasks:
-{tabulate(task_list, headers="keys", tablefmt="mixed_grid")}"""
+{tabulate(task_list, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
         )
         return task_list

vision_agent/tools/tools.py CHANGED Viewed

@@ -250,7 +250,7 @@ class GroundingDINO(Tool):
             iou_threshold: the threshold for intersection over union used in nms algorithm. It will suppress the boxes which have iou greater than this threshold.
         Returns:
-            A list of dictionaries containing the labels, scores, and bboxes. Each dictionary contains the detection result for an image.
+            A dictionary containing the labels, scores, and bboxes, which is the detection result for the input image.
         """
         image_size = get_image_size(image)
         image_b64 = convert_to_b64(image)
@@ -346,7 +346,7 @@ class GroundingSAM(Tool):
             iou_threshold: the threshold for intersection over union used in nms algorithm. It will suppress the boxes which have iou greater than this threshold.
         Returns:
-            A list of dictionaries containing the labels, scores, bboxes and masks. Each dictionary contains the segmentation result for an image.
+            A dictionary containing the labels, scores, bboxes and masks for the input image.
         """
         image_size = get_image_size(image)
         image_b64 = convert_to_b64(image)
@@ -357,19 +357,15 @@ class GroundingSAM(Tool):
             "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
         }
         data: Dict[str, Any] = _send_inference_request(request_data, "tools")
-        ret_pred: Dict[str, List] = {"labels": [], "bboxes": [], "masks": []}
         if "bboxes" in data:
-            ret_pred["bboxes"] = [
-                normalize_bbox(box, image_size) for box in data["bboxes"]
-            ]
+            data["bboxes"] = [normalize_bbox(box, image_size) for box in data["bboxes"]]
         if "masks" in data:
-            ret_pred["masks"] = [
+            data["masks"] = [
                 rle_decode(mask_rle=mask, shape=data["mask_shape"])
                 for mask in data["masks"]
             ]
-        ret_pred["labels"] = data["labels"]
-        ret_pred["scores"] = data["scores"]
-        return ret_pred
+        data.pop("mask_shape", None)
+        return data
 class DINOv(Tool):
@@ -643,6 +639,58 @@ class SegIoU(Tool):
         return cast(float, round(iou, 2))
+class BboxContains(Tool):
+    name = "bbox_contains_"
+    description = "Given two bounding boxes, a target bounding box and a region bounding box, 'bbox_contains_' returns the intersection of the two bounding boxes over the target bounding box, reflects the percentage area of the target bounding box overlaps with the region bounding box. This is a good tool for determining if the region object contains the target object."
+    usage = {
+        "required_parameters": [
+            {"name": "target", "type": "List[int]"},
+            {"name": "target_class", "type": "str"},
+            {"name": "region", "type": "List[int]"},
+            {"name": "region_class", "type": "str"},
+        ],
+        "examples": [
+            {
+                "scenario": "Determine if the dog on the couch, bounding box of the dog: [0.2, 0.21, 0.34, 0.42], bounding box of the couch: [0.3, 0.31, 0.44, 0.52]",
+                "parameters": {
+                    "target": [0.2, 0.21, 0.34, 0.42],
+                    "target_class": "dog",
+                    "region": [0.3, 0.31, 0.44, 0.52],
+                    "region_class": "couch",
+                },
+            },
+            {
+                "scenario": "Check if the kid is in the pool? bounding box of the kid: [0.2, 0.21, 0.34, 0.42], bounding box of the pool: [0.3, 0.31, 0.44, 0.52]",
+                "parameters": {
+                    "target": [0.2, 0.21, 0.34, 0.42],
+                    "target_class": "kid",
+                    "region": [0.3, 0.31, 0.44, 0.52],
+                    "region_class": "pool",
+                },
+            },
+        ],
+    }
+    def __call__(
+        self, target: List[int], target_class: str, region: List[int], region_class: str
+    ) -> Dict[str, Union[str, float]]:
+        x1, y1, x2, y2 = target
+        x3, y3, x4, y4 = region
+        xA = max(x1, x3)
+        yA = max(y1, y3)
+        xB = min(x2, x4)
+        yB = min(y2, y4)
+        inter_area = max(0, xB - xA) * max(0, yB - yA)
+        boxa_area = (x2 - x1) * (y2 - y1)
+        iou = inter_area / float(boxa_area)
+        area = round(iou, 2)
+        return {
+            "target_class": target_class,
+            "region_class": region_class,
+            "intersection": area,
+        }
 class BoxDistance(Tool):
     name = "box_distance_"
     description = (
@@ -757,6 +805,7 @@ TOOLS = {
             SegArea,
             BboxIoU,
             SegIoU,
+            BboxContains,
             BoxDistance,
             Calculator,
         ]

{vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.1.4
+Version: 0.1.5
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/RECORD RENAMED Viewed

@@ -5,7 +5,7 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
 vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
 vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
 vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
-vision_agent/agent/vision_agent.py,sha256=QWIirRBB3ZPg3figWcf8-g9ltFydM1BDn75LbXWbep0,22735
+vision_agent/agent/vision_agent.py,sha256=Deuj28hqRq4wHnD08pU_7fok_EicvlGnDoINYh5hw1k,22853
 vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
 vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
 vision_agent/data/data.py,sha256=Z2l76OrT0GgyuN52OeJqDitUcP0q1rhfdXd1of3GsVo,5128
@@ -20,10 +20,10 @@ vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,
 vision_agent/lmm/lmm.py,sha256=1E7e_S_0fOKnf6mSsEdkXvsIjGmhBGl5XW4By2jvhbY,10045
 vision_agent/tools/__init__.py,sha256=dkzk9amNzTEKULMB1xRJspqEGpzNPGuccWeXrv1xI0U,280
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
-vision_agent/tools/tools.py,sha256=ybhCyutEGzHPKuR0Cu--Nb--KubjYvyzLEzVQYzIMTw,29148
+vision_agent/tools/tools.py,sha256=WIodfggPkz_2LSWn_Kqm9uvQUtCgKy3jmMoPVTwf1bA,31181
 vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
 vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
-vision_agent-0.1.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.1.4.dist-info/METADATA,sha256=FyBYGPHgC0uV7uy7wph8yvdQpEWSACnGR96y6Jt-E6A,6233
-vision_agent-0.1.4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.1.4.dist-info/RECORD,,
+vision_agent-0.1.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.1.5.dist-info/METADATA,sha256=ubzhbZW7oT9sIaIkuM6QObXINZGz5Zcvgjdp7sUcsJE,6233
+vision_agent-0.1.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.1.5.dist-info/RECORD,,

{vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

vision-agent 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl