vision-agent 0.2.145__py3-none-any.whl → 0.2.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +48 -31
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.146.dist-info}/METADATA +1 -1
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.146.dist-info}/RECORD +5 -5
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.146.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.146.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -1759,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
|
|
1759
1759
|
|
1760
1760
|
|
1761
1761
|
def overlay_bounding_boxes(
|
1762
|
-
|
1763
|
-
|
1762
|
+
medias: Union[np.ndarray, List[np.ndarray]],
|
1763
|
+
bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
|
1764
|
+
) -> Union[np.ndarray, List[np.ndarray]]:
|
1764
1765
|
"""'overlay_bounding_boxes' is a utility function that displays bounding boxes on
|
1765
1766
|
an image.
|
1766
1767
|
|
1767
1768
|
Parameters:
|
1768
|
-
|
1769
|
-
|
1769
|
+
medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
|
1770
|
+
bounding boxes on.
|
1771
|
+
bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1772
|
+
dictionaries or a list of list of dictionaries containing the bounding
|
1770
1773
|
boxes.
|
1771
1774
|
|
1772
1775
|
Returns:
|
@@ -1778,41 +1781,54 @@ def overlay_bounding_boxes(
|
|
1778
1781
|
image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
|
1779
1782
|
)
|
1780
1783
|
"""
|
1781
|
-
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
1782
1784
|
|
1783
|
-
|
1785
|
+
medias_int: List[np.ndarray] = (
|
1786
|
+
[medias] if isinstance(medias, np.ndarray) else medias
|
1787
|
+
)
|
1788
|
+
bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
|
1789
|
+
bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
|
1790
|
+
labels = set([bb["label"] for b in bbox_int for bb in b])
|
1791
|
+
|
1792
|
+
if len(labels) > len(COLORS):
|
1784
1793
|
_LOGGER.warning(
|
1785
1794
|
"Number of unique labels exceeds the number of available colors. Some labels may have the same color."
|
1786
1795
|
)
|
1787
1796
|
|
1788
|
-
color = {
|
1789
|
-
label: COLORS[i % len(COLORS)]
|
1790
|
-
for i, label in enumerate(set([box["label"] for box in bboxes]))
|
1791
|
-
}
|
1792
|
-
bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
|
1797
|
+
color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
|
1793
1798
|
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
font = ImageFont.truetype(
|
1798
|
-
str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
|
1799
|
-
fontsize,
|
1800
|
-
)
|
1799
|
+
frame_out = []
|
1800
|
+
for i, frame in enumerate(medias_int):
|
1801
|
+
pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
|
1801
1802
|
|
1802
|
-
|
1803
|
-
|
1804
|
-
box = elt["bbox"]
|
1805
|
-
scores = elt["score"]
|
1803
|
+
bboxes = bbox_int[i]
|
1804
|
+
bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
|
1806
1805
|
|
1807
|
-
|
1808
|
-
|
1806
|
+
width, height = pil_image.size
|
1807
|
+
fontsize = max(12, int(min(width, height) / 40))
|
1808
|
+
draw = ImageDraw.Draw(pil_image)
|
1809
|
+
font = ImageFont.truetype(
|
1810
|
+
str(
|
1811
|
+
resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
|
1812
|
+
),
|
1813
|
+
fontsize,
|
1814
|
+
)
|
1809
1815
|
|
1810
|
-
|
1811
|
-
|
1812
|
-
|
1813
|
-
|
1814
|
-
|
1815
|
-
|
1816
|
+
for elt in bboxes:
|
1817
|
+
label = elt["label"]
|
1818
|
+
box = elt["bbox"]
|
1819
|
+
scores = elt["score"]
|
1820
|
+
|
1821
|
+
# denormalize the box if it is normalized
|
1822
|
+
box = denormalize_bbox(box, (height, width))
|
1823
|
+
draw.rectangle(box, outline=color[label], width=4)
|
1824
|
+
text = f"{label}: {scores:.2f}"
|
1825
|
+
text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
|
1826
|
+
draw.rectangle(
|
1827
|
+
(box[0], box[1], text_box[2], text_box[3]), fill=color[label]
|
1828
|
+
)
|
1829
|
+
draw.text((box[0], box[1]), text, fill="black", font=font)
|
1830
|
+
frame_out.append(np.array(pil_image))
|
1831
|
+
return frame_out[0] if len(frame_out) == 1 else frame_out
|
1816
1832
|
|
1817
1833
|
|
1818
1834
|
def _get_text_coords_from_mask(
|
@@ -1852,7 +1868,8 @@ def overlay_segmentation_masks(
|
|
1852
1868
|
medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
|
1853
1869
|
the masks on.
|
1854
1870
|
masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1855
|
-
dictionaries containing the masks, labels
|
1871
|
+
dictionaries or a list of list of dictionaries containing the masks, labels
|
1872
|
+
and scores.
|
1856
1873
|
draw_label (bool, optional): If True, the labels will be displayed on the image.
|
1857
1874
|
secondary_label_key (str, optional): The key to use for the secondary
|
1858
1875
|
tracking label which is needed in videos to display tracking information.
|
@@ -18,7 +18,7 @@ vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM8
|
|
18
18
|
vision_agent/tools/meta_tools.py,sha256=iHvMeBktWcVi-0DOrSMak1gsZrM_VKJlAq1mAFbBemE,23477
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=c7SjtZD7YfxhEAGYYe-ExVCBA4NDXmRwerBIbd-XEH8,74557
|
22
22
|
vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.146.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.146.dist-info/METADATA,sha256=0xtIEPRJgc8ifV8z9C6OoLyi9w7wqQV7mpUmrWsLSVM,13758
|
32
|
+
vision_agent-0.2.146.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.146.dist-info/RECORD,,
|
File without changes
|
File without changes
|