vision-agent 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,12 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
8
8
  from PIL import Image
9
9
  from tabulate import tabulate
10
10
 
11
- from vision_agent.image_utils import overlay_bboxes, overlay_masks, overlay_heat_map
11
+ from vision_agent.image_utils import (
12
+ convert_to_b64,
13
+ overlay_bboxes,
14
+ overlay_heat_map,
15
+ overlay_masks,
16
+ )
12
17
  from vision_agent.llm import LLM, OpenAILLM
13
18
  from vision_agent.lmm import LMM, OpenAILMM
14
19
  from vision_agent.tools import TOOLS
@@ -481,6 +486,17 @@ class VisionAgent(Agent):
481
486
  if self.report_progress_callback:
482
487
  self.report_progress_callback(description)
483
488
 
489
+ def _report_visualization_via_callback(
490
+ self, images: Sequence[Union[str, Path]]
491
+ ) -> None:
492
+ """This is intended for streaming the visualization images via the callback to the client side."""
493
+ if self.report_progress_callback:
494
+ self.report_progress_callback("<VIZ>")
495
+ if images:
496
+ for img in images:
497
+ self.report_progress_callback(f"<IMG>{convert_to_b64(img)}</IMG>")
498
+ self.report_progress_callback("</VIZ>")
499
+
484
500
  def chat_with_workflow(
485
501
  self,
486
502
  chat: List[Dict[str, str]],
@@ -577,9 +593,12 @@ class VisionAgent(Agent):
577
593
  )
578
594
 
579
595
  if visualize_output:
580
- visualized_output = all_tool_results[-1]["visualized_output"]
581
- for image in visualized_output:
582
- Image.open(image).show()
596
+ viz_images: Sequence[Union[str, Path]] = all_tool_results[-1][
597
+ "visualized_output"
598
+ ]
599
+ self._report_visualization_via_callback(viz_images)
600
+ for img in viz_images:
601
+ Image.open(img).show()
583
602
 
584
603
  return final_answer, all_tool_results
585
604
 
@@ -4,7 +4,7 @@ import base64
4
4
  from importlib import resources
5
5
  from io import BytesIO
6
6
  from pathlib import Path
7
- from typing import Dict, Tuple, Union, List
7
+ from typing import Dict, List, Tuple, Union
8
8
 
9
9
  import numpy as np
10
10
  from PIL import Image, ImageDraw, ImageFont
@@ -108,7 +108,7 @@ def convert_to_b64(data: Union[str, Path, np.ndarray, ImageType]) -> str:
108
108
  data = Image.open(data)
109
109
  if isinstance(data, Image.Image):
110
110
  buffer = BytesIO()
111
- data.convert("RGB").save(buffer, format="JPEG")
111
+ data.convert("RGB").save(buffer, format="PNG")
112
112
  return base64.b64encode(buffer.getvalue()).decode("utf-8")
113
113
  else:
114
114
  arr_bytes = data.tobytes()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.1
3
+ Version: 0.2.2
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -5,11 +5,11 @@ vision_agent/agent/easytool.py,sha256=oMHnBg7YBtIPgqQUNcZgq7uMgpPThs99_UnO7ERkMV
5
5
  vision_agent/agent/easytool_prompts.py,sha256=zdQQw6WpXOmvwOMtlBlNKY5a3WNlr65dbUvMIGiqdeo,4526
6
6
  vision_agent/agent/reflexion.py,sha256=4gz30BuFMeGxSsTzoDV4p91yE0R8LISXp28IaOI6wdM,10506
7
7
  vision_agent/agent/reflexion_prompts.py,sha256=G7UAeNz_g2qCb2yN6OaIC7bQVUkda4m3z42EG8wAyfE,9342
8
- vision_agent/agent/vision_agent.py,sha256=MTxeV5_Sghqoe2aOW9EbNgiq61sVCcF3ZndJ7BZl6x0,23588
8
+ vision_agent/agent/vision_agent.py,sha256=2VUMRVI6KAnmaUK-34wrgyfSQ2DAUm4g4QQcpqa2zao,24235
9
9
  vision_agent/agent/vision_agent_prompts.py,sha256=W3Z72FpUt71UIJSkjAcgtQqxeMqkYuATqHAN5fYY26c,7342
10
10
  vision_agent/fonts/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1rtHiNC_6QosTE,1594400
12
- vision_agent/image_utils.py,sha256=Cg4aKO1tQiETT1gdsZ50XzORBtJnBFfMG2cKJyjaY6Q,7555
12
+ vision_agent/image_utils.py,sha256=YvP5KE9NrWdgJKuHW2NR1glzfObkxtcXBknpmj3Gsbs,7554
13
13
  vision_agent/llm/__init__.py,sha256=BoUm_zSAKnLlE8s-gKTSQugXDqVZKPqYlWwlTLdhcz4,48
14
14
  vision_agent/llm/llm.py,sha256=gwDQ9-p9wEn24xi1019e5jzTGQg4xWDSqBCsqIqGcU4,5168
15
15
  vision_agent/lmm/__init__.py,sha256=nnNeKD1k7q_4vLb1x51O_EUTYaBgGfeiCx5F433gr3M,67
@@ -19,7 +19,7 @@ vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E
19
19
  vision_agent/tools/tools.py,sha256=gCjHs5vJuGNBFsnJWFT7PX3wTyfHgtrgX1Eq9vqknN0,34979
20
20
  vision_agent/tools/video.py,sha256=xTElFSFp1Jw4ulOMnk81Vxsh-9dTxcWUO6P9fzEi3AM,7653
21
21
  vision_agent/type_defs.py,sha256=4LTnTL4HNsfYqCrDn9Ppjg9bSG2ZGcoKSSd9YeQf4Bw,1792
22
- vision_agent-0.2.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
- vision_agent-0.2.1.dist-info/METADATA,sha256=RAD8NCAo5N12sccgSC5Q0j4hKwU_rVKg5p_eLE-Njdc,6434
24
- vision_agent-0.2.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
25
- vision_agent-0.2.1.dist-info/RECORD,,
22
+ vision_agent-0.2.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
23
+ vision_agent-0.2.2.dist-info/METADATA,sha256=dOZ9KWmhuVb5wvschxYBis8x79HwgOD3MmTKqyupggg,6434
24
+ vision_agent-0.2.2.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
25
+ vision_agent-0.2.2.dist-info/RECORD,,