vision-agent 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +3 -2
- vision_agent/tools/tools.py +59 -10
- {vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/METADATA +1 -1
- {vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/RECORD +6 -6
- {vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/LICENSE +0 -0
- {vision_agent-0.1.4.dist-info → vision_agent-0.1.5.dist-info}/WHEEL +0 -0
@@ -33,6 +33,7 @@ from .vision_agent_prompts import (
|
|
33
33
|
|
34
34
|
logging.basicConfig(stream=sys.stdout)
|
35
35
|
_LOGGER = logging.getLogger(__name__)
|
36
|
+
_MAX_TABULATE_COL_WIDTH = 80
|
36
37
|
|
37
38
|
|
38
39
|
def parse_json(s: str) -> Any:
|
@@ -614,7 +615,7 @@ class VisionAgent(Agent):
|
|
614
615
|
|
615
616
|
self.log_progress(
|
616
617
|
f"""Going to run the following tool(s) in sequence:
|
617
|
-
{tabulate([tool_results], headers="keys", tablefmt="mixed_grid")}"""
|
618
|
+
{tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
618
619
|
)
|
619
620
|
|
620
621
|
def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
|
@@ -660,6 +661,6 @@ class VisionAgent(Agent):
|
|
660
661
|
task_list = []
|
661
662
|
self.log_progress(
|
662
663
|
f"""Planned tasks:
|
663
|
-
{tabulate(task_list, headers="keys", tablefmt="mixed_grid")}"""
|
664
|
+
{tabulate(task_list, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
|
664
665
|
)
|
665
666
|
return task_list
|
vision_agent/tools/tools.py
CHANGED
@@ -250,7 +250,7 @@ class GroundingDINO(Tool):
|
|
250
250
|
iou_threshold: the threshold for intersection over union used in nms algorithm. It will suppress the boxes which have iou greater than this threshold.
|
251
251
|
|
252
252
|
Returns:
|
253
|
-
A
|
253
|
+
A dictionary containing the labels, scores, and bboxes, which is the detection result for the input image.
|
254
254
|
"""
|
255
255
|
image_size = get_image_size(image)
|
256
256
|
image_b64 = convert_to_b64(image)
|
@@ -346,7 +346,7 @@ class GroundingSAM(Tool):
|
|
346
346
|
iou_threshold: the threshold for intersection over union used in nms algorithm. It will suppress the boxes which have iou greater than this threshold.
|
347
347
|
|
348
348
|
Returns:
|
349
|
-
A
|
349
|
+
A dictionary containing the labels, scores, bboxes and masks for the input image.
|
350
350
|
"""
|
351
351
|
image_size = get_image_size(image)
|
352
352
|
image_b64 = convert_to_b64(image)
|
@@ -357,19 +357,15 @@ class GroundingSAM(Tool):
|
|
357
357
|
"kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
|
358
358
|
}
|
359
359
|
data: Dict[str, Any] = _send_inference_request(request_data, "tools")
|
360
|
-
ret_pred: Dict[str, List] = {"labels": [], "bboxes": [], "masks": []}
|
361
360
|
if "bboxes" in data:
|
362
|
-
|
363
|
-
normalize_bbox(box, image_size) for box in data["bboxes"]
|
364
|
-
]
|
361
|
+
data["bboxes"] = [normalize_bbox(box, image_size) for box in data["bboxes"]]
|
365
362
|
if "masks" in data:
|
366
|
-
|
363
|
+
data["masks"] = [
|
367
364
|
rle_decode(mask_rle=mask, shape=data["mask_shape"])
|
368
365
|
for mask in data["masks"]
|
369
366
|
]
|
370
|
-
|
371
|
-
|
372
|
-
return ret_pred
|
367
|
+
data.pop("mask_shape", None)
|
368
|
+
return data
|
373
369
|
|
374
370
|
|
375
371
|
class DINOv(Tool):
|
@@ -643,6 +639,58 @@ class SegIoU(Tool):
|
|
643
639
|
return cast(float, round(iou, 2))
|
644
640
|
|
645
641
|
|
642
|
+
class BboxContains(Tool):
|
643
|
+
name = "bbox_contains_"
|
644
|
+
description = "Given two bounding boxes, a target bounding box and a region bounding box, 'bbox_contains_' returns the intersection of the two bounding boxes over the target bounding box, reflects the percentage area of the target bounding box overlaps with the region bounding box. This is a good tool for determining if the region object contains the target object."
|
645
|
+
usage = {
|
646
|
+
"required_parameters": [
|
647
|
+
{"name": "target", "type": "List[int]"},
|
648
|
+
{"name": "target_class", "type": "str"},
|
649
|
+
{"name": "region", "type": "List[int]"},
|
650
|
+
{"name": "region_class", "type": "str"},
|
651
|
+
],
|
652
|
+
"examples": [
|
653
|
+
{
|
654
|
+
"scenario": "Determine if the dog on the couch, bounding box of the dog: [0.2, 0.21, 0.34, 0.42], bounding box of the couch: [0.3, 0.31, 0.44, 0.52]",
|
655
|
+
"parameters": {
|
656
|
+
"target": [0.2, 0.21, 0.34, 0.42],
|
657
|
+
"target_class": "dog",
|
658
|
+
"region": [0.3, 0.31, 0.44, 0.52],
|
659
|
+
"region_class": "couch",
|
660
|
+
},
|
661
|
+
},
|
662
|
+
{
|
663
|
+
"scenario": "Check if the kid is in the pool? bounding box of the kid: [0.2, 0.21, 0.34, 0.42], bounding box of the pool: [0.3, 0.31, 0.44, 0.52]",
|
664
|
+
"parameters": {
|
665
|
+
"target": [0.2, 0.21, 0.34, 0.42],
|
666
|
+
"target_class": "kid",
|
667
|
+
"region": [0.3, 0.31, 0.44, 0.52],
|
668
|
+
"region_class": "pool",
|
669
|
+
},
|
670
|
+
},
|
671
|
+
],
|
672
|
+
}
|
673
|
+
|
674
|
+
def __call__(
|
675
|
+
self, target: List[int], target_class: str, region: List[int], region_class: str
|
676
|
+
) -> Dict[str, Union[str, float]]:
|
677
|
+
x1, y1, x2, y2 = target
|
678
|
+
x3, y3, x4, y4 = region
|
679
|
+
xA = max(x1, x3)
|
680
|
+
yA = max(y1, y3)
|
681
|
+
xB = min(x2, x4)
|
682
|
+
yB = min(y2, y4)
|
683
|
+
inter_area = max(0, xB - xA) * max(0, yB - yA)
|
684
|
+
boxa_area = (x2 - x1) * (y2 - y1)
|
685
|
+
iou = inter_area / float(boxa_area)
|
686
|
+
area = round(iou, 2)
|
687
|
+
return {
|
688
|
+
"target_class": target_class,
|
689
|
+
"region_class": region_class,
|
690
|
+
"intersection": area,
|
691
|
+
}
|
692
|
+
|
693
|
+
|
646
694
|
class BoxDistance(Tool):
|
647
695
|
name = "box_distance_"
|
648
696
|
description = (
|
@@ -757,6 +805,7 @@ TOOLS = {
|
|
757
805
|
SegArea,
|
758
806
|
BboxIoU,
|
759
807
|
SegIoU,
|
808
|
+
BboxContains,
|
760
809
|
BoxDistance,
|
761
810
|
Calculator,
|
762
811
|
]
|
@@ -5,7 +5,7 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
|
|
5
5
|
vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
|
6
6
|
vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
|
7
7
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
8
|
-
vision_agent/agent/vision_agent.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent.py,sha256=Deuj28hqRq4wHnD08pU_7fok_EicvlGnDoINYh5hw1k,22853
|
9
9
|
vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
|
10
10
|
vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
|
11
11
|
vision_agent/data/data.py,sha256=Z2l76OrT0GgyuN52OeJqDitUcP0q1rhfdXd1of3GsVo,5128
|
@@ -20,10 +20,10 @@ vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,
|
|
20
20
|
vision_agent/lmm/lmm.py,sha256=1E7e_S_0fOKnf6mSsEdkXvsIjGmhBGl5XW4By2jvhbY,10045
|
21
21
|
vision_agent/tools/__init__.py,sha256=dkzk9amNzTEKULMB1xRJspqEGpzNPGuccWeXrv1xI0U,280
|
22
22
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
23
|
-
vision_agent/tools/tools.py,sha256=
|
23
|
+
vision_agent/tools/tools.py,sha256=WIodfggPkz_2LSWn_Kqm9uvQUtCgKy3jmMoPVTwf1bA,31181
|
24
24
|
vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
25
25
|
vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
|
26
|
-
vision_agent-0.1.
|
27
|
-
vision_agent-0.1.
|
28
|
-
vision_agent-0.1.
|
29
|
-
vision_agent-0.1.
|
26
|
+
vision_agent-0.1.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
27
|
+
vision_agent-0.1.5.dist-info/METADATA,sha256=ubzhbZW7oT9sIaIkuM6QObXINZGz5Zcvgjdp7sUcsJE,6233
|
28
|
+
vision_agent-0.1.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
29
|
+
vision_agent-0.1.5.dist-info/RECORD,,
|
File without changes
|
File without changes
|