vision-agent 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/agent/vision_agent.py +23 -4
- vision_agent/image_utils.py +2 -2
- {vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/METADATA +1 -1
- {vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/RECORD +6 -6
- {vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/LICENSE +0 -0
- {vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/WHEEL +0 -0
@@ -8,7 +8,12 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
|
|
8
8
|
from PIL import Image
|
9
9
|
from tabulate import tabulate
|
10
10
|
|
11
|
-
from vision_agent.image_utils import
|
11
|
+
from vision_agent.image_utils import (
|
12
|
+
convert_to_b64,
|
13
|
+
overlay_bboxes,
|
14
|
+
overlay_heat_map,
|
15
|
+
overlay_masks,
|
16
|
+
)
|
12
17
|
from vision_agent.llm import LLM, OpenAILLM
|
13
18
|
from vision_agent.lmm import LMM, OpenAILMM
|
14
19
|
from vision_agent.tools import TOOLS
|
@@ -481,6 +486,17 @@ class VisionAgent(Agent):
|
|
481
486
|
if self.report_progress_callback:
|
482
487
|
self.report_progress_callback(description)
|
483
488
|
|
489
|
+
def _report_visualization_via_callback(
|
490
|
+
self, images: Sequence[Union[str, Path]]
|
491
|
+
) -> None:
|
492
|
+
"""This is intended for streaming the visualization images via the callback to the client side."""
|
493
|
+
if self.report_progress_callback:
|
494
|
+
self.report_progress_callback("<VIZ>")
|
495
|
+
if images:
|
496
|
+
for img in images:
|
497
|
+
self.report_progress_callback(f"<IMG>{convert_to_b64(img)}</IMG>")
|
498
|
+
self.report_progress_callback("</VIZ>")
|
499
|
+
|
484
500
|
def chat_with_workflow(
|
485
501
|
self,
|
486
502
|
chat: List[Dict[str, str]],
|
@@ -577,9 +593,12 @@ class VisionAgent(Agent):
|
|
577
593
|
)
|
578
594
|
|
579
595
|
if visualize_output:
|
580
|
-
|
581
|
-
|
582
|
-
|
596
|
+
viz_images: Sequence[Union[str, Path]] = all_tool_results[-1][
|
597
|
+
"visualized_output"
|
598
|
+
]
|
599
|
+
self._report_visualization_via_callback(viz_images)
|
600
|
+
for img in viz_images:
|
601
|
+
Image.open(img).show()
|
583
602
|
|
584
603
|
return final_answer, all_tool_results
|
585
604
|
|
vision_agent/image_utils.py
CHANGED
@@ -4,7 +4,7 @@ import base64
|
|
4
4
|
from importlib import resources
|
5
5
|
from io import BytesIO
|
6
6
|
from pathlib import Path
|
7
|
-
from typing import Dict, Tuple, Union
|
7
|
+
from typing import Dict, List, Tuple, Union
|
8
8
|
|
9
9
|
import numpy as np
|
10
10
|
from PIL import Image, ImageDraw, ImageFont
|
@@ -108,7 +108,7 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
|
|
108
108
|
data = Image.open(data)
|
109
109
|
if isinstance(data, Image.Image):
|
110
110
|
buffer = BytesIO()
|
111
|
-
data.convert("RGB").save(buffer, format="
|
111
|
+
data.convert("RGB").save(buffer, format="PNG")
|
112
112
|
return base64.b64encode(buffer.getvalue()).decode("utf-8")
|
113
113
|
else:
|
114
114
|
arr_bytes = data.tobytes()
|
@@ -5,11 +5,11 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
|
|
5
5
|
vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
|
6
6
|
vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
|
7
7
|
vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
|
8
|
-
vision_agent/agent/vision_agent.py,sha256=
|
8
|
+
vision_agent/agent/vision_agent.py,sha256=2VUMRVI6KAnmaUK-34wrgyfSQ2DAUm4g4QQcpqa2zao,24235
|
9
9
|
vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
|
10
10
|
vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
|
12
|
-
vision_agent/image_utils.py,sha256=
|
12
|
+
vision_agent/image_utils.py,sha256=YvP5KE9NrWdgJKuHW2NR1glzfObkxtcXBknpmj3Gsbs,7554
|
13
13
|
vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
|
14
14
|
vision_agent/llm/llm.py,sha256=gwDQ9-p9wEn24xi1019e5jzTGQg4xWDSqBCsqIqGcU4,5168
|
15
15
|
vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
|
@@ -19,7 +19,7 @@ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E
|
|
19
19
|
vision_agent/tools/tools.py,sha256=gCjHs5vJuGNBFsnJWFT7PX3wTyfHgtrgX1Eq9vqknN0,34979
|
20
20
|
vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
|
21
21
|
vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
|
22
|
-
vision_agent-0.2.
|
23
|
-
vision_agent-0.2.
|
24
|
-
vision_agent-0.2.
|
25
|
-
vision_agent-0.2.
|
22
|
+
vision_agent-0.2.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
23
|
+
vision_agent-0.2.2.dist-info/METADATA,sha256=dOZ9KWmhuVb5wvschxYBis8x79HwgOD3MmTKqyupggg,6434
|
24
|
+
vision_agent-0.2.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
|
25
|
+
vision_agent-0.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|