vision-agent 0.2.144__py3-none-any.whl → 0.2.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +5 -14
- vision_agent/tools/tools.py +54 -32
- {vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/METADATA +1 -1
- {vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/RECORD +6 -6
- {vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.144.dist-info → vision_agent-0.2.146.dist-info}/WHEEL +0 -0
@@ -30,12 +30,6 @@ WORKSPACE.mkdir(parents=True, exist_ok=True)
|
|
30
30
|
if str(WORKSPACE) != "":
|
31
31
|
os.environ["PYTHONPATH"] = f"{WORKSPACE}:{os.getenv('PYTHONPATH', '')}"
|
32
32
|
|
33
|
-
STUCK_IN_LOOP_ERROR_MESSAGE = {
|
34
|
-
"name": "Error when running conversation agent",
|
35
|
-
"value": "Agent is stuck in conversation loop, exited",
|
36
|
-
"traceback_raw": [],
|
37
|
-
}
|
38
|
-
|
39
33
|
|
40
34
|
class BoilerplateCode:
|
41
35
|
pre_code = [
|
@@ -298,13 +292,6 @@ class VisionAgent(Agent):
|
|
298
292
|
# sometimes it gets stuck in a loop, so we force it to exit
|
299
293
|
if last_response == response:
|
300
294
|
response["let_user_respond"] = True
|
301
|
-
self.streaming_message(
|
302
|
-
{
|
303
|
-
"role": "assistant",
|
304
|
-
"content": "{}",
|
305
|
-
"error": STUCK_IN_LOOP_ERROR_MESSAGE,
|
306
|
-
}
|
307
|
-
)
|
308
295
|
|
309
296
|
finished = response["let_user_respond"]
|
310
297
|
|
@@ -317,7 +304,11 @@ class VisionAgent(Agent):
|
|
317
304
|
{
|
318
305
|
"role": "assistant",
|
319
306
|
"content": "{}",
|
320
|
-
"error":
|
307
|
+
"error": {
|
308
|
+
"name": "Error when running conversation agent",
|
309
|
+
"value": "Agent is stuck in conversation loop, exited",
|
310
|
+
"traceback_raw": [],
|
311
|
+
},
|
321
312
|
"finished": finished and code_action is None,
|
322
313
|
}
|
323
314
|
)
|
vision_agent/tools/tools.py
CHANGED
@@ -1181,7 +1181,12 @@ def florence2_phrase_grounding(
|
|
1181
1181
|
fine_tuning=FineTuning(job_id=UUID(fine_tune_id)),
|
1182
1182
|
)
|
1183
1183
|
data = data_obj.model_dump(by_alias=True)
|
1184
|
-
detections = send_inference_request(
|
1184
|
+
detections = send_inference_request(
|
1185
|
+
data,
|
1186
|
+
"tools",
|
1187
|
+
v2=False,
|
1188
|
+
metadata_payload={"function_name": "florence2_phrase_grounding"},
|
1189
|
+
)
|
1185
1190
|
else:
|
1186
1191
|
data = {
|
1187
1192
|
"image": image_b64,
|
@@ -1754,14 +1759,17 @@ def _save_video_to_result(video_uri: str) -> None:
|
|
1754
1759
|
|
1755
1760
|
|
1756
1761
|
def overlay_bounding_boxes(
|
1757
|
-
|
1758
|
-
|
1762
|
+
medias: Union[np.ndarray, List[np.ndarray]],
|
1763
|
+
bboxes: Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]],
|
1764
|
+
) -> Union[np.ndarray, List[np.ndarray]]:
|
1759
1765
|
"""'overlay_bounding_boxes' is a utility function that displays bounding boxes on
|
1760
1766
|
an image.
|
1761
1767
|
|
1762
1768
|
Parameters:
|
1763
|
-
|
1764
|
-
|
1769
|
+
medias (Union[np.ndarray, List[np.ndarra]]): The image or frames to display the
|
1770
|
+
bounding boxes on.
|
1771
|
+
bboxes (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1772
|
+
dictionaries or a list of list of dictionaries containing the bounding
|
1765
1773
|
boxes.
|
1766
1774
|
|
1767
1775
|
Returns:
|
@@ -1773,41 +1781,54 @@ def overlay_bounding_boxes(
|
|
1773
1781
|
image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
|
1774
1782
|
)
|
1775
1783
|
"""
|
1776
|
-
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
1777
1784
|
|
1778
|
-
|
1785
|
+
medias_int: List[np.ndarray] = (
|
1786
|
+
[medias] if isinstance(medias, np.ndarray) else medias
|
1787
|
+
)
|
1788
|
+
bbox_int = [bboxes] if isinstance(bboxes[0], dict) else bboxes
|
1789
|
+
bbox_int = cast(List[List[Dict[str, Any]]], bbox_int)
|
1790
|
+
labels = set([bb["label"] for b in bbox_int for bb in b])
|
1791
|
+
|
1792
|
+
if len(labels) > len(COLORS):
|
1779
1793
|
_LOGGER.warning(
|
1780
1794
|
"Number of unique labels exceeds the number of available colors. Some labels may have the same color."
|
1781
1795
|
)
|
1782
1796
|
|
1783
|
-
color = {
|
1784
|
-
label: COLORS[i % len(COLORS)]
|
1785
|
-
for i, label in enumerate(set([box["label"] for box in bboxes]))
|
1786
|
-
}
|
1787
|
-
bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
|
1797
|
+
color = {label: COLORS[i % len(COLORS)] for i, label in enumerate(labels)}
|
1788
1798
|
|
1789
|
-
|
1790
|
-
|
1791
|
-
|
1792
|
-
font = ImageFont.truetype(
|
1793
|
-
str(resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")),
|
1794
|
-
fontsize,
|
1795
|
-
)
|
1799
|
+
frame_out = []
|
1800
|
+
for i, frame in enumerate(medias_int):
|
1801
|
+
pil_image = Image.fromarray(frame.astype(np.uint8)).convert("RGB")
|
1796
1802
|
|
1797
|
-
|
1798
|
-
|
1799
|
-
box = elt["bbox"]
|
1800
|
-
scores = elt["score"]
|
1803
|
+
bboxes = bbox_int[i]
|
1804
|
+
bboxes = sorted(bboxes, key=lambda x: x["label"], reverse=True)
|
1801
1805
|
|
1802
|
-
|
1803
|
-
|
1806
|
+
width, height = pil_image.size
|
1807
|
+
fontsize = max(12, int(min(width, height) / 40))
|
1808
|
+
draw = ImageDraw.Draw(pil_image)
|
1809
|
+
font = ImageFont.truetype(
|
1810
|
+
str(
|
1811
|
+
resources.files("vision_agent.fonts").joinpath("default_font_ch_en.ttf")
|
1812
|
+
),
|
1813
|
+
fontsize,
|
1814
|
+
)
|
1804
1815
|
|
1805
|
-
|
1806
|
-
|
1807
|
-
|
1808
|
-
|
1809
|
-
|
1810
|
-
|
1816
|
+
for elt in bboxes:
|
1817
|
+
label = elt["label"]
|
1818
|
+
box = elt["bbox"]
|
1819
|
+
scores = elt["score"]
|
1820
|
+
|
1821
|
+
# denormalize the box if it is normalized
|
1822
|
+
box = denormalize_bbox(box, (height, width))
|
1823
|
+
draw.rectangle(box, outline=color[label], width=4)
|
1824
|
+
text = f"{label}: {scores:.2f}"
|
1825
|
+
text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
|
1826
|
+
draw.rectangle(
|
1827
|
+
(box[0], box[1], text_box[2], text_box[3]), fill=color[label]
|
1828
|
+
)
|
1829
|
+
draw.text((box[0], box[1]), text, fill="black", font=font)
|
1830
|
+
frame_out.append(np.array(pil_image))
|
1831
|
+
return frame_out[0] if len(frame_out) == 1 else frame_out
|
1811
1832
|
|
1812
1833
|
|
1813
1834
|
def _get_text_coords_from_mask(
|
@@ -1847,7 +1868,8 @@ def overlay_segmentation_masks(
|
|
1847
1868
|
medias (Union[np.ndarray, List[np.ndarray]]): The image or frames to display
|
1848
1869
|
the masks on.
|
1849
1870
|
masks (Union[List[Dict[str, Any]], List[List[Dict[str, Any]]]]): A list of
|
1850
|
-
dictionaries containing the masks, labels
|
1871
|
+
dictionaries or a list of list of dictionaries containing the masks, labels
|
1872
|
+
and scores.
|
1851
1873
|
draw_label (bool, optional): If True, the labels will be displayed on the image.
|
1852
1874
|
secondary_label_key (str, optional): The key to use for the secondary
|
1853
1875
|
tracking label which is needed in videos to display tracking information.
|
@@ -2,7 +2,7 @@ vision_agent/__init__.py,sha256=EAb4-f9iyuEYkBrX4ag1syM8Syx8118_t0R6_C34M9w,57
|
|
2
2
|
vision_agent/agent/__init__.py,sha256=NF2LABqHixLvbsOIO-fe-VKZ7awvShLtcT0oQT4eWtI,235
|
3
3
|
vision_agent/agent/agent.py,sha256=2cjIOxEuSJrqbfPXYoV0qER5ihXsPFCoEFJa4jpqan0,597
|
4
4
|
vision_agent/agent/agent_utils.py,sha256=PEUHqvnHmFL4np_TeFmKMwr5s_dWfdfJz6TF_ogd1dU,2353
|
5
|
-
vision_agent/agent/vision_agent.py,sha256=
|
5
|
+
vision_agent/agent/vision_agent.py,sha256=Fp2uSbroRzGrxEwbb9srGdl0h31awkzDFm2tTfn28GI,17587
|
6
6
|
vision_agent/agent/vision_agent_coder.py,sha256=4bbebV1sKE10vsxcZR-R8P54X2HjLeU9lDt7ylIZAT4,38429
|
7
7
|
vision_agent/agent/vision_agent_coder_prompts.py,sha256=YWK4C--YRS1Kuab11Gn-AXBzar1j_GNnTnxi_nnaPRY,14901
|
8
8
|
vision_agent/agent/vision_agent_prompts.py,sha256=e_ASPeRFU1yZsQhCkK_bIBG-eyIWyWXmN64lFk-r7e0,10897
|
@@ -18,7 +18,7 @@ vision_agent/tools/__init__.py,sha256=zUv3aVPN1MXfyQiQi5To4rkQGtG7mxLQ1NjLI3pxM8
|
|
18
18
|
vision_agent/tools/meta_tools.py,sha256=iHvMeBktWcVi-0DOrSMak1gsZrM_VKJlAq1mAFbBemE,23477
|
19
19
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
20
20
|
vision_agent/tools/tool_utils.py,sha256=5ukuDMxbEH4iKetYR9I7twzsA8ECyP4tVwYXQq54mxI,8020
|
21
|
-
vision_agent/tools/tools.py,sha256=
|
21
|
+
vision_agent/tools/tools.py,sha256=c7SjtZD7YfxhEAGYYe-ExVCBA4NDXmRwerBIbd-XEH8,74557
|
22
22
|
vision_agent/tools/tools_types.py,sha256=JUOZWGW2q-dlJ85CHr9gvo9KQk_rXyjJhi-iwPNn4eM,2397
|
23
23
|
vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
|
24
24
|
vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
|
@@ -27,7 +27,7 @@ vision_agent/utils/image_utils.py,sha256=rm9GfXvD4JrjnqKrP_f2gfq4SzmqYC0IdC1kKwd
|
|
27
27
|
vision_agent/utils/sim.py,sha256=ebE9Cs00pVEDI1HMjAzUBk88tQQmc2U-yAzIDinnekU,5572
|
28
28
|
vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
|
29
29
|
vision_agent/utils/video.py,sha256=xbMEoRk13l4fHeQlbvMQhLCn8RNndYmsDhUf01TUeR8,4781
|
30
|
-
vision_agent-0.2.
|
31
|
-
vision_agent-0.2.
|
32
|
-
vision_agent-0.2.
|
33
|
-
vision_agent-0.2.
|
30
|
+
vision_agent-0.2.146.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
31
|
+
vision_agent-0.2.146.dist-info/METADATA,sha256=0xtIEPRJgc8ifV8z9C6OoLyi9w7wqQV7mpUmrWsLSVM,13758
|
32
|
+
vision_agent-0.2.146.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
33
|
+
vision_agent-0.2.146.dist-info/RECORD,,
|
File without changes
|
File without changes
|