PyPI - vision-agent - Versions diffs - 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl - Mend

vision-agent 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

vision_agent/agent/vision_agent.py CHANGED Viewed

@@ -8,7 +8,12 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
 from PIL import Image
 from tabulate import tabulate
-from vision_agent.image_utils import overlay_bboxes, overlay_masks, overlay_heat_map
+from vision_agent.image_utils import (
+    convert_to_b64,
+    overlay_bboxes,
+    overlay_heat_map,
+    overlay_masks,
+)
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
 from vision_agent.tools import TOOLS
@@ -481,6 +486,17 @@ class VisionAgent(Agent):
         if self.report_progress_callback:
             self.report_progress_callback(description)
+    def _report_visualization_via_callback(
+        self, images: Sequence[Union[str, Path]]
+    ) -> None:
+        """This is intended for streaming the visualization images via the callback to the client side."""
+        if self.report_progress_callback:
+            self.report_progress_callback("<VIZ>")
+            if images:
+                for img in images:
+                    self.report_progress_callback(f"<IMG>{convert_to_b64(img)}</IMG>")
+            self.report_progress_callback("</VIZ>")
     def chat_with_workflow(
         self,
         chat: List[Dict[str, str]],
@@ -577,9 +593,12 @@ class VisionAgent(Agent):
         )
         if visualize_output:
-            visualized_output = all_tool_results[-1]["visualized_output"]
-            for image in visualized_output:
-                Image.open(image).show()
+            viz_images: Sequence[Union[str, Path]] = all_tool_results[-1][
+                "visualized_output"
+            ]
+            self._report_visualization_via_callback(viz_images)
+            for img in viz_images:
+                Image.open(img).show()
         return final_answer, all_tool_results

vision_agent/image_utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ import base64
 from importlib import resources
 from io import BytesIO
 from pathlib import Path
-from typing import Dict, Tuple, Union, List
+from typing import Dict, List, Tuple, Union
 import numpy as np
 from PIL import Image, ImageDraw, ImageFont
@@ -108,7 +108,7 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
         data = Image.open(data)
     if isinstance(data, Image.Image):
         buffer = BytesIO()
-        data.convert("RGB").save(buffer, format="JPEG")
+        data.convert("RGB").save(buffer, format="PNG")
         return base64.b64encode(buffer.getvalue()).decode("utf-8")
     else:
         arr_bytes = data.tobytes()

{vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.1
+Version: 0.2.2
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/RECORD RENAMED Viewed

@@ -5,11 +5,11 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
 vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
 vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
 vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
-vision_agent/agent/vision_agent.py,sha256=MTxeV5_Sghqoe2aOW9EbNgiq61sVCcF3ZndJ7BZl6x0,23588
+vision_agent/agent/vision_agent.py,sha256=2VUMRVI6KAnmaUK-34wrgyfSQ2DAUm4g4QQcpqa2zao,24235
 vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
 vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
-vision_agent/image_utils.py,sha256=Cg4aKO1tQiETT1gdsZ50XzORBtJnBFfMG2cKJyjaY6Q,7555
+vision_agent/image_utils.py,sha256=YvP5KE9NrWdgJKuHW2NR1glzfObkxtcXBknpmj3Gsbs,7554
 vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
 vision_agent/llm/llm.py,sha256=gwDQ9-p9wEn24xi1019e5jzTGQg4xWDSqBCsqIqGcU4,5168
 vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
@@ -19,7 +19,7 @@ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E
 vision_agent/tools/tools.py,sha256=gCjHs5vJuGNBFsnJWFT7PX3wTyfHgtrgX1Eq9vqknN0,34979
 vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
 vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
-vision_agent-0.2.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.1.dist-info/METADATA,sha256=RAD8NCAo5N12sccgSC5Q0j4hKwU_rVKg5p_eLE-Njdc,6434
-vision_agent-0.2.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.1.dist-info/RECORD,,
+vision_agent-0.2.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.2.dist-info/METADATA,sha256=dOZ9KWmhuVb5wvschxYBis8x79HwgOD3MmTKqyupggg,6434
+vision_agent-0.2.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.2.dist-info/RECORD,,

{vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.1.dist-info → vision_agent-0.2.2.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

vision-agent 0.2.1py3-none-any.whl → 0.2.2py3-none-any.whl