vision-agent 0.2.145__py3-none-any.whl → 0.2.147__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- vision_agent/tools/meta_tools.py +32 -3
- vision_agent/tools/tools.py +48 -31
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.147.dist-info}/METADATA +1 -1
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.147.dist-info}/RECORD +6 -6
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.147.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.145.dist-info → vision_agent-0.2.147.dist-info}/WHEEL +0 -0
vision_agent/tools/meta_tools.py
CHANGED
@@ -643,7 +643,23 @@ def use_object_detection_fine_tuning(
|
|
643
643
|
return output_str
|
644
644
|
|
645
645
|
code = artifacts[name]
|
646
|
-
|
646
|
+
|
647
|
+
patterns_with_fine_tune_id = [
|
648
|
+
(
|
649
|
+
r'florence2_phrase_grounding\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
|
650
|
+
lambda match: f'florence2_phrase_grounding("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
651
|
+
),
|
652
|
+
(
|
653
|
+
r'owl_v2_image\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
|
654
|
+
lambda match: f'owl_v2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
655
|
+
),
|
656
|
+
(
|
657
|
+
r'florence2_sam2_image\(\s*"([^"]+)"\s*,\s*([^,]+)(?:,\s*"[^"]+")?\s*\)',
|
658
|
+
lambda match: f'florence2_sam2_image("{match.group(1)}", {match.group(2)}, "{fine_tune_id}")',
|
659
|
+
),
|
660
|
+
]
|
661
|
+
|
662
|
+
patterns_without_fine_tune_id = [
|
647
663
|
(
|
648
664
|
r"florence2_phrase_grounding\(\s*([^\)]+)\s*\)",
|
649
665
|
lambda match: f'florence2_phrase_grounding({match.group(1)}, "{fine_tune_id}")',
|
@@ -659,8 +675,21 @@ def use_object_detection_fine_tuning(
|
|
659
675
|
]
|
660
676
|
|
661
677
|
new_code = code
|
662
|
-
|
663
|
-
|
678
|
+
|
679
|
+
for index, (pattern_with_fine_tune_id, replacer_with_fine_tune_id) in enumerate(
|
680
|
+
patterns_with_fine_tune_id
|
681
|
+
):
|
682
|
+
if re.search(pattern_with_fine_tune_id, new_code):
|
683
|
+
new_code = re.sub(
|
684
|
+
pattern_with_fine_tune_id, replacer_with_fine_tune_id, new_code
|
685
|
+
)
|
686
|
+
else:
|
687
|
+
(pattern_without_fine_tune_id, replacer_without_fine_tune_id) = (
|
688
|
+
patterns_without_fine_tune_id[index]
|
689
|
+
)
|
690
|
+
new_code = re.sub(
|
691
|
+
pattern_without_fine_tune_id, replacer_without_fine_tune_id, new_code
|
692
|
+
)
|
664
693
|
|
665
694
|
if new_code == code:
|
666
695
|
output_str = (
|
vision_agent/tools/tools.py
CHANGED
@@ -1759,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
|
|
1759
1759
|
|
1760
1760
|
|
1761
1761
|
def overlay_bounding_boxes(
|
1762
|
-
|
1763
|
-
|
1762
|
+
medias: Union[np.ndarray, List[np.ndarray]],
|
1763
|
+
bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
|
1764
|
+
) -> Union[np.ndarray, List[np.ndarray]]:
|
1764
1765
|
"""'overlay_bounding_boxes' is a utility function that displays bounding boxes on
|
1765
1766
|
an image.
|
1766
1767
|
|
1767
1768
|
Parameters:
|
1768
|
-
|
1769
|
-
|
1769
|
+
medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
|
1770
|
+
bounding boxes on.
|
1771
|
+
bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1772
|
+
dictionaries or a list of list of dictionaries containing the bounding
|
1770
1773
|
boxes.
|
1771
1774
|
|
1772
1775
|
Returns:
|
@@ -1778,41 +1781,54 @@ def overlay_bounding_boxes(
|
|
1778
1781
|
image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
|
1779
1782
|
)
|
1780
1783
|
"""
|
1781
|
-
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
1782
1784
|
|
1783
|
-
|
1785
|
+
medias_int: List[np.ndarray] = (
|
1786
|
+
[medias] if isinstance(medias, np.ndarray) else medias
|
1787
|
+
)
|
1788
|
+
bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
|
1789
|
+
bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
|
1790
|
+
labels = set([bb["label"] for b in bbox_int for bb in b])
|
1791
|
+
|
1792
|
+
if len(labels) > len(COLORS):
|
1784
1793
|
_LOGGER.warning(
|
1785
1794
|
"Number of unique labels exceeds the number of available colors. Some labels may have the same color."
|
1786
1795
|
)
|
1787
1796
|
|
1788
|
-
color = {
|
1789
|
-
label: COLORS[i % len(COLORS)]
|
1790
|
-
for i, label in enumerate(set([box["label"] for box in bboxes]))
|
1791
|
-
}
|
1792
|
-
bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
|
1797
|
+
color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
|
1793
1798
|
|
1794
|
-
|
1795
|
-
|
1796
|
-
|
1797
|
-
font = ImageFont.truetype(
|
1798
|
-
str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
|
1799
|
-
fontsize,
|
1800
|
-
)
|
1799
|
+
frame_out = []
|
1800
|
+
for i, frame in enumerate(medias_int):
|
1801
|
+
pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
|
1801
1802
|
|
1802
|
-
|
1803
|
-
|
1804
|
-
box = elt["bbox"]
|
1805
|
-
scores = elt["score"]
|
1803
|
+
bboxes = bbox_int[i]
|
1804
|
+
bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
|
1806
1805
|
|
1807
|
-
|
1808
|
-
|
1806
|
+
width, height = pil_image.size
|
1807
|
+
fontsize = max(12, int(min(width, height) / 40))
|
1808
|
+
draw = ImageDraw.Draw(pil_image)
|
1809
|
+
font = ImageFont.truetype(
|
1810
|
+
str(
|
1811
|
+
resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
|
1812
|
+
),
|
1813
|
+
fontsize,
|
1814
|
+
)
|
1809
1815
|
|
1810
|
-
|
1811
|
-
|
1812
|
-
|
1813
|
-
|
1814
|
-
|
1815
|
-
|
1816
|
+
for elt in bboxes:
|
1817
|
+
label = elt["label"]
|
1818
|
+
box = elt["bbox"]
|
1819
|
+
scores = elt["score"]
|
1820
|
+
|
1821
|
+
# denormalize the box if it is normalized
|
1822
|
+
box = denormalize_bbox(box, (height, width))
|
1823
|
+
draw.rectangle(box, outline=color[label], width=4)
|
1824
|
+
text = f"{label}: {scores:.2f}"
|
1825
|
+
text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
|
1826
|
+
draw.rectangle(
|
1827
|
+
(box[0], box[1], text_box[2], text_box[3]), fill=color[label]
|
1828
|
+
)
|
1829
|
+
draw.text((box[0], box[1]), text, fill="black", font=font)
|
1830
|
+
frame_out.append(np.array(pil_image))
|
1831
|
+
return frame_out[0] if len(frame_out) == 1 else frame_out
|
1816
1832
|
|
1817
1833
|
|
1818
1834
|
def _get_text_coords_from_mask(
|
@@ -1852,7 +1868,8 @@ def overlay_segmentation_masks(
|
|
1852
1868
|
medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
|
1853
1869
|
the masks on.
|
1854
1870
|
masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1855
|
-
dictionaries containing the masks, labels
|
1871
|
+
dictionaries or a list of list of dictionaries containing the masks, labels
|
1872
|
+
and scores.
|
1856
1873
|
draw_label (bool, optional): If True, the labels will be displayed on the image.
|
1857
1874
|
secondary_label_key (str, optional): The key to use for the secondary
|
1858
1875
|
tracking label which is needed in videos to display tracking information.
|
@@ -15,10 +15,10 @@ vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,
|
|
15
15
|
vision_agent/lmm/lmm.py,sha256=B5ClgwvbybVCWkf9opDMLjTtJZemUU4KUkQoRxGh43I,16787
|
16
16
|
vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
|
17
17
|
vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM80,2412
|
18
|
-
vision_agent/tools/meta_tools.py,sha256=
|
18
|
+
vision_agent/tools/meta_tools.py,sha256=skY4nHSH7PFXNBAEKGYI6XZskv8zJ2UX1KbMY2gd_bo,24687
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=c7SjtZD7YfxhEAGYYe-ExVCBA4NDXmRwerBIbd-XEH8,74557
|
22
22
|
vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.147.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.147.dist-info/METADATA,sha256=juNWGPjUPW3pxyGKPEBV_CwDllMnTj8B6c9Z7hevCyM,13758
|
32
|
+
vision_agent-0.2.147.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.147.dist-info/RECORD,,
|
File without changes
|
File without changes
|