vision-agent 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,6 +33,7 @@ from .vision_agent_prompts import (
33
33
 
34
34
  logging.basicConfig(stream=sys.stdout)
35
35
  _LOGGER = logging.getLogger(__name__)
36
+ _MAX_TABULATE_COL_WIDTH = 80
36
37
 
37
38
 
38
39
  def parse_json(s: str) -> Any:
@@ -614,7 +615,7 @@ class VisionAgent(Agent):
614
615
 
615
616
  self.log_progress(
616
617
  f"""Going to run the following tool(s) in sequence:
617
- {tabulate([tool_results], headers="keys", tablefmt="mixed_grid")}"""
618
+ {tabulate(tabular_data=[tool_results], headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
618
619
  )
619
620
 
620
621
  def parse_tool_results(result: Dict[str, Union[Dict, List]]) -> Any:
@@ -660,6 +661,6 @@ class VisionAgent(Agent):
660
661
  task_list = []
661
662
  self.log_progress(
662
663
  f"""Planned tasks:
663
- {tabulate(task_list, headers="keys", tablefmt="mixed_grid")}"""
664
+ {tabulate(task_list, headers="keys", tablefmt="mixed_grid", maxcolwidths=_MAX_TABULATE_COL_WIDTH)}"""
664
665
  )
665
666
  return task_list
@@ -250,7 +250,7 @@ class GroundingDINO(Tool):
250
250
  iou_threshold: the threshold for intersection over union used in nms algorithm. It will suppress the boxes which have iou greater than this threshold.
251
251
 
252
252
  Returns:
253
- A list of dictionaries containing the labels, scores, and bboxes. Each dictionary contains the detection result for an image.
253
+ A dictionary containing the labels, scores, and bboxes, which is the detection result for the input image.
254
254
  """
255
255
  image_size = get_image_size(image)
256
256
  image_b64 = convert_to_b64(image)
@@ -346,7 +346,7 @@ class GroundingSAM(Tool):
346
346
  iou_threshold: the threshold for intersection over union used in nms algorithm. It will suppress the boxes which have iou greater than this threshold.
347
347
 
348
348
  Returns:
349
- A list of dictionaries containing the labels, scores, bboxes and masks. Each dictionary contains the segmentation result for an image.
349
+ A dictionary containing the labels, scores, bboxes and masks for the input image.
350
350
  """
351
351
  image_size = get_image_size(image)
352
352
  image_b64 = convert_to_b64(image)
@@ -357,19 +357,15 @@ class GroundingSAM(Tool):
357
357
  "kwargs": {"box_threshold": box_threshold, "iou_threshold": iou_threshold},
358
358
  }
359
359
  data: Dict[str, Any] = _send_inference_request(request_data, "tools")
360
- ret_pred: Dict[str, List] = {"labels": [], "bboxes": [], "masks": []}
361
360
  if "bboxes" in data:
362
- ret_pred["bboxes"] = [
363
- normalize_bbox(box, image_size) for box in data["bboxes"]
364
- ]
361
+ data["bboxes"] = [normalize_bbox(box, image_size) for box in data["bboxes"]]
365
362
  if "masks" in data:
366
- ret_pred["masks"] = [
363
+ data["masks"] = [
367
364
  rle_decode(mask_rle=mask, shape=data["mask_shape"])
368
365
  for mask in data["masks"]
369
366
  ]
370
- ret_pred["labels"] = data["labels"]
371
- ret_pred["scores"] = data["scores"]
372
- return ret_pred
367
+ data.pop("mask_shape", None)
368
+ return data
373
369
 
374
370
 
375
371
  class DINOv(Tool):
@@ -643,6 +639,58 @@ class SegIoU(Tool):
643
639
  return cast(float, round(iou, 2))
644
640
 
645
641
 
642
+ class BboxContains(Tool):
643
+ name = "bbox_contains_"
644
+ description = "Given two bounding boxes, a target bounding box and a region bounding box, 'bbox_contains_' returns the intersection of the two bounding boxes over the target bounding box, reflects the percentage area of the target bounding box overlaps with the region bounding box. This is a good tool for determining if the region object contains the target object."
645
+ usage = {
646
+ "required_parameters": [
647
+ {"name": "target", "type": "List[int]"},
648
+ {"name": "target_class", "type": "str"},
649
+ {"name": "region", "type": "List[int]"},
650
+ {"name": "region_class", "type": "str"},
651
+ ],
652
+ "examples": [
653
+ {
654
+ "scenario": "Determine if the dog on the couch, bounding box of the dog: [0.2, 0.21, 0.34, 0.42], bounding box of the couch: [0.3, 0.31, 0.44, 0.52]",
655
+ "parameters": {
656
+ "target": [0.2, 0.21, 0.34, 0.42],
657
+ "target_class": "dog",
658
+ "region": [0.3, 0.31, 0.44, 0.52],
659
+ "region_class": "couch",
660
+ },
661
+ },
662
+ {
663
+ "scenario": "Check if the kid is in the pool? bounding box of the kid: [0.2, 0.21, 0.34, 0.42], bounding box of the pool: [0.3, 0.31, 0.44, 0.52]",
664
+ "parameters": {
665
+ "target": [0.2, 0.21, 0.34, 0.42],
666
+ "target_class": "kid",
667
+ "region": [0.3, 0.31, 0.44, 0.52],
668
+ "region_class": "pool",
669
+ },
670
+ },
671
+ ],
672
+ }
673
+
674
+ def __call__(
675
+ self, target: List[int], target_class: str, region: List[int], region_class: str
676
+ ) -> Dict[str, Union[str, float]]:
677
+ x1, y1, x2, y2 = target
678
+ x3, y3, x4, y4 = region
679
+ xA = max(x1, x3)
680
+ yA = max(y1, y3)
681
+ xB = min(x2, x4)
682
+ yB = min(y2, y4)
683
+ inter_area = max(0, xB - xA) * max(0, yB - yA)
684
+ boxa_area = (x2 - x1) * (y2 - y1)
685
+ iou = inter_area / float(boxa_area)
686
+ area = round(iou, 2)
687
+ return {
688
+ "target_class": target_class,
689
+ "region_class": region_class,
690
+ "intersection": area,
691
+ }
692
+
693
+
646
694
  class BoxDistance(Tool):
647
695
  name = "box_distance_"
648
696
  description = (
@@ -757,6 +805,7 @@ TOOLS = {
757
805
  SegArea,
758
806
  BboxIoU,
759
807
  SegIoU,
808
+ BboxContains,
760
809
  BoxDistance,
761
810
  Calculator,
762
811
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.1.4
3
+ Version: 0.1.5
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -5,7 +5,7 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
5
5
  vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
6
6
  vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
7
7
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
8
- vision_agent/agent/vision_agent.py,sha256=QWIirRBB3ZPg3figWcf8-g9ltFydM1BDn75LbXWbep0,22735
8
+ vision_agent/agent/vision_agent.py,sha256=Deuj28hqRq4wHnD08pU_7fok_EicvlGnDoINYh5hw1k,22853
9
9
  vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
10
10
  vision_agent/data/__init__.py,sha256=YU-5g3LbEQ6a4drz0RLGTagXMVU2Z4Xr3RlfWE-R0jU,46
11
11
  vision_agent/data/data.py,sha256=Z2l76OrT0GgyuN52OeJqDitUcP0q1rhfdXd1of3GsVo,5128
@@ -20,10 +20,10 @@ vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,
20
20
  vision_agent/lmm/lmm.py,sha256=1E7e_S_0fOKnf6mSsEdkXvsIjGmhBGl5XW4By2jvhbY,10045
21
21
  vision_agent/tools/__init__.py,sha256=dkzk9amNzTEKULMB1xRJspqEGpzNPGuccWeXrv1xI0U,280
22
22
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
23
- vision_agent/tools/tools.py,sha256=ybhCyutEGzHPKuR0Cu--Nb--KubjYvyzLEzVQYzIMTw,29148
23
+ vision_agent/tools/tools.py,sha256=WIodfggPkz_2LSWn_Kqm9uvQUtCgKy3jmMoPVTwf1bA,31181
24
24
  vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
25
25
  vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
26
- vision_agent-0.1.4.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
- vision_agent-0.1.4.dist-info/METADATA,sha256=FyBYGPHgC0uV7uy7wph8yvdQpEWSACnGR96y6Jt-E6A,6233
28
- vision_agent-0.1.4.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
29
- vision_agent-0.1.4.dist-info/RECORD,,
26
+ vision_agent-0.1.5.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
27
+ vision_agent-0.1.5.dist-info/METADATA,sha256=ubzhbZW7oT9sIaIkuM6QObXINZGz5Zcvgjdp7sUcsJE,6233
28
+ vision_agent-0.1.5.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
29
+ vision_agent-0.1.5.dist-info/RECORD,,