vision-agent 0.2.42__tar.gz → 0.2.44__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {vision_agent-0.2.42 → vision_agent-0.2.44}/PKG-INFO +2 -2
- {vision_agent-0.2.42 → vision_agent-0.2.44}/pyproject.toml +2 -2
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/vision_agent.py +4 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/tools/tools.py +78 -8
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/utils/execute.py +24 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/utils/video.py +35 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/LICENSE +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/README.md +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/agent_coder.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/agent_coder_prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/data_interpreter.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/data_interpreter_prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/easytool.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/easytool_prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/easytool_v2.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/easytool_v2_prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/reflexion.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/reflexion_prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/llm/__init__.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/llm/llm.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/tools/easytool_tools.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.42 → vision_agent-0.2.44}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.44
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -18,7 +18,7 @@ Requires-Dist: nbclient (>=0.10.0,<0.11.0)
|
|
18
18
|
Requires-Dist: nbformat (>=5.10.4,<6.0.0)
|
19
19
|
Requires-Dist: numpy (>=1.21.0,<2.0.0)
|
20
20
|
Requires-Dist: openai (>=1.0.0,<2.0.0)
|
21
|
-
Requires-Dist: opencv-python
|
21
|
+
Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
|
22
22
|
Requires-Dist: pandas (>=2.0.0,<3.0.0)
|
23
23
|
Requires-Dist: pillow (>=10.0.0,<11.0.0)
|
24
24
|
Requires-Dist: pydantic-settings (>=2.2.1,<3.0.0)
|
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
|
|
4
4
|
|
5
5
|
[tool.poetry]
|
6
6
|
name = "vision-agent"
|
7
|
-
version = "0.2.
|
7
|
+
version = "0.2.44"
|
8
8
|
description = "Toolset for Vision Agent"
|
9
9
|
authors = ["Landing AI <dev@landing.ai>"]
|
10
10
|
readme = "README.md"
|
@@ -25,7 +25,7 @@ pandas = "2.*"
|
|
25
25
|
openai = "1.*"
|
26
26
|
typing_extensions = "4.*"
|
27
27
|
moviepy = "1.*"
|
28
|
-
opencv-python
|
28
|
+
opencv-python = "4.*"
|
29
29
|
tabulate = "^0.9.0"
|
30
30
|
pydantic-settings = "^2.2.1"
|
31
31
|
scipy = "1.13.*"
|
@@ -28,6 +28,7 @@ from vision_agent.utils import CodeInterpreterFactory, Execution
|
|
28
28
|
from vision_agent.utils.execute import CodeInterpreter
|
29
29
|
from vision_agent.utils.image_utils import b64_to_pil
|
30
30
|
from vision_agent.utils.sim import Sim
|
31
|
+
from vision_agent.utils.video import play_video
|
31
32
|
|
32
33
|
logging.basicConfig(stream=sys.stdout)
|
33
34
|
_LOGGER = logging.getLogger(__name__)
|
@@ -522,6 +523,9 @@ class VisionAgent(Agent):
|
|
522
523
|
for res in execution_result.results:
|
523
524
|
if res.png:
|
524
525
|
b64_to_pil(res.png).show()
|
526
|
+
if res.mp4:
|
527
|
+
play_video(res.mp4)
|
528
|
+
|
525
529
|
return {
|
526
530
|
"code": code,
|
527
531
|
"test": test,
|
@@ -7,14 +7,15 @@ from importlib import resources
|
|
7
7
|
from pathlib import Path
|
8
8
|
from typing import Any, Callable, Dict, List, Tuple, Union, cast
|
9
9
|
|
10
|
+
import cv2
|
10
11
|
import numpy as np
|
11
12
|
import pandas as pd
|
12
13
|
import requests
|
13
14
|
from PIL import Image, ImageDraw, ImageFont
|
14
|
-
from scipy.spatial import distance # type: ignore
|
15
15
|
|
16
16
|
from vision_agent.tools.tool_utils import _send_inference_request
|
17
17
|
from vision_agent.utils import extract_frames_from_video
|
18
|
+
from vision_agent.utils.execute import FileSerializer, MimeType
|
18
19
|
from vision_agent.utils.image_utils import (
|
19
20
|
b64_to_pil,
|
20
21
|
convert_to_b64,
|
@@ -421,10 +422,39 @@ def closest_mask_distance(mask1: np.ndarray, mask2: np.ndarray) -> float:
|
|
421
422
|
|
422
423
|
mask1 = np.clip(mask1, 0, 1)
|
423
424
|
mask2 = np.clip(mask2, 0, 1)
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
425
|
+
contours1, _ = cv2.findContours(mask1, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
426
|
+
contours2, _ = cv2.findContours(mask2, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
|
427
|
+
largest_contour1 = max(contours1, key=cv2.contourArea)
|
428
|
+
largest_contour2 = max(contours2, key=cv2.contourArea)
|
429
|
+
polygon1 = cv2.approxPolyDP(largest_contour1, 1.0, True)
|
430
|
+
polygon2 = cv2.approxPolyDP(largest_contour2, 1.0, True)
|
431
|
+
min_distance = np.inf
|
432
|
+
|
433
|
+
small_polygon, larger_contour = (
|
434
|
+
(polygon1, largest_contour2)
|
435
|
+
if len(largest_contour1) < len(largest_contour2)
|
436
|
+
else (polygon2, largest_contour1)
|
437
|
+
)
|
438
|
+
|
439
|
+
# For each point in the first polygon
|
440
|
+
for point in small_polygon:
|
441
|
+
# Calculate the distance to the second polygon, -1 is to invert result as point inside the polygon is positive
|
442
|
+
|
443
|
+
distance = (
|
444
|
+
cv2.pointPolygonTest(
|
445
|
+
larger_contour, (point[0, 0].item(), point[0, 1].item()), True
|
446
|
+
)
|
447
|
+
* -1
|
448
|
+
)
|
449
|
+
|
450
|
+
# If the distance is negative, the point is inside the polygon, so the distance is 0
|
451
|
+
if distance < 0:
|
452
|
+
continue
|
453
|
+
else:
|
454
|
+
# Update the minimum distance if the point is outside the polygon
|
455
|
+
min_distance = min(min_distance, distance)
|
456
|
+
|
457
|
+
return min_distance if min_distance != np.inf else 0.0
|
428
458
|
|
429
459
|
|
430
460
|
def closest_box_distance(
|
@@ -521,6 +551,29 @@ def save_image(image: np.ndarray) -> str:
|
|
521
551
|
return f.name
|
522
552
|
|
523
553
|
|
554
|
+
def save_video_to_result(video_uri: str) -> None:
|
555
|
+
"""'save_video_to_result' a utility function that saves a video into the result of the code execution (as an intermediate output).
|
556
|
+
This function is required to run if user wants to visualize the video generated by the code.
|
557
|
+
|
558
|
+
Parameters:
|
559
|
+
video_uri (str): The URI to the video file. Currently only local file paths are supported.
|
560
|
+
|
561
|
+
Example
|
562
|
+
-------
|
563
|
+
>>> save_video_to_result("path/to/video.mp4")
|
564
|
+
"""
|
565
|
+
from IPython.display import display
|
566
|
+
|
567
|
+
serializer = FileSerializer(video_uri)
|
568
|
+
display(
|
569
|
+
{
|
570
|
+
MimeType.VIDEO_MP4_B64: serializer.base64(),
|
571
|
+
MimeType.TEXT_PLAIN: str(serializer),
|
572
|
+
},
|
573
|
+
raw=True,
|
574
|
+
)
|
575
|
+
|
576
|
+
|
524
577
|
def overlay_bounding_boxes(
|
525
578
|
image: np.ndarray, bboxes: List[Dict[str, Any]]
|
526
579
|
) -> np.ndarray:
|
@@ -541,6 +594,8 @@ def overlay_bounding_boxes(
|
|
541
594
|
image, [{'score': 0.99, 'label': 'dinosaur', 'bbox': [0.1, 0.11, 0.35, 0.4]}],
|
542
595
|
)
|
543
596
|
"""
|
597
|
+
from IPython.display import display
|
598
|
+
|
544
599
|
pil_image = Image.fromarray(image.astype(np.uint8))
|
545
600
|
|
546
601
|
if len(set([box["label"] for box in bboxes])) > len(COLORS):
|
@@ -577,7 +632,10 @@ def overlay_bounding_boxes(
|
|
577
632
|
text_box = draw.textbbox((box[0], box[1]), text=text, font=font)
|
578
633
|
draw.rectangle((box[0], box[1], text_box[2], text_box[3]), fill=color[label])
|
579
634
|
draw.text((box[0], box[1]), text, fill="black", font=font)
|
580
|
-
|
635
|
+
|
636
|
+
pil_image = pil_image.convert("RGB")
|
637
|
+
display(pil_image)
|
638
|
+
return np.array(pil_image)
|
581
639
|
|
582
640
|
|
583
641
|
def overlay_segmentation_masks(
|
@@ -608,6 +666,8 @@ def overlay_segmentation_masks(
|
|
608
666
|
}],
|
609
667
|
)
|
610
668
|
"""
|
669
|
+
from IPython.display import display
|
670
|
+
|
611
671
|
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGBA")
|
612
672
|
|
613
673
|
if len(set([mask["label"] for mask in masks])) > len(COLORS):
|
@@ -627,7 +687,10 @@ def overlay_segmentation_masks(
|
|
627
687
|
np_mask[mask > 0, :] = color[label] + (255 * 0.5,)
|
628
688
|
mask_img = Image.fromarray(np_mask.astype(np.uint8))
|
629
689
|
pil_image = Image.alpha_composite(pil_image, mask_img)
|
630
|
-
|
690
|
+
|
691
|
+
pil_image = pil_image.convert("RGB")
|
692
|
+
display(pil_image)
|
693
|
+
return np.array(pil_image)
|
631
694
|
|
632
695
|
|
633
696
|
def overlay_heat_map(
|
@@ -657,6 +720,8 @@ def overlay_heat_map(
|
|
657
720
|
},
|
658
721
|
)
|
659
722
|
"""
|
723
|
+
from IPython.display import display
|
724
|
+
|
660
725
|
pil_image = Image.fromarray(image.astype(np.uint8)).convert("RGB")
|
661
726
|
|
662
727
|
if "heat_map" not in heat_map or len(heat_map["heat_map"]) == 0:
|
@@ -672,7 +737,10 @@ def overlay_heat_map(
|
|
672
737
|
combined = Image.alpha_composite(
|
673
738
|
pil_image.convert("RGBA"), overlay.resize(pil_image.size)
|
674
739
|
)
|
675
|
-
|
740
|
+
|
741
|
+
pil_image = combined.convert("RGB")
|
742
|
+
display(pil_image)
|
743
|
+
return np.array(pil_image)
|
676
744
|
|
677
745
|
|
678
746
|
def get_tool_documentation(funcs: List[Callable[..., Any]]) -> str:
|
@@ -734,6 +802,7 @@ TOOLS = [
|
|
734
802
|
save_json,
|
735
803
|
load_image,
|
736
804
|
save_image,
|
805
|
+
save_video_to_result,
|
737
806
|
overlay_bounding_boxes,
|
738
807
|
overlay_segmentation_masks,
|
739
808
|
overlay_heat_map,
|
@@ -746,6 +815,7 @@ UTILITIES_DOCSTRING = get_tool_documentation(
|
|
746
815
|
save_json,
|
747
816
|
load_image,
|
748
817
|
save_image,
|
818
|
+
save_video_to_result,
|
749
819
|
overlay_bounding_boxes,
|
750
820
|
overlay_segmentation_masks,
|
751
821
|
overlay_heat_map,
|
@@ -1,5 +1,6 @@
|
|
1
1
|
import abc
|
2
2
|
import atexit
|
3
|
+
import base64
|
3
4
|
import copy
|
4
5
|
import logging
|
5
6
|
import os
|
@@ -45,12 +46,31 @@ class MimeType(str, Enum):
|
|
45
46
|
IMAGE_SVG = "image/svg+xml"
|
46
47
|
IMAGE_PNG = "image/png"
|
47
48
|
IMAGE_JPEG = "image/jpeg"
|
49
|
+
VIDEO_MP4_B64 = "video/mp4/base64"
|
48
50
|
APPLICATION_PDF = "application/pdf"
|
49
51
|
TEXT_LATEX = "text/latex"
|
50
52
|
APPLICATION_JSON = "application/json"
|
51
53
|
APPLICATION_JAVASCRIPT = "application/javascript"
|
52
54
|
|
53
55
|
|
56
|
+
class FileSerializer:
|
57
|
+
"""Adaptor class that allows IPython.display.display() to serialize a file to a base64 string representation."""
|
58
|
+
|
59
|
+
def __init__(self, file_uri: str):
|
60
|
+
self.video_uri = file_uri
|
61
|
+
assert os.path.isfile(
|
62
|
+
file_uri
|
63
|
+
), f"Only support local files currently: {file_uri}"
|
64
|
+
assert Path(file_uri).exists(), f"File not found: {file_uri}"
|
65
|
+
|
66
|
+
def __repr__(self) -> str:
|
67
|
+
return f"FileSerializer({self.video_uri})"
|
68
|
+
|
69
|
+
def base64(self) -> str:
|
70
|
+
with open(self.video_uri, "rb") as file:
|
71
|
+
return base64.b64encode(file.read()).decode("utf-8")
|
72
|
+
|
73
|
+
|
54
74
|
class Result:
|
55
75
|
"""
|
56
76
|
Represents the data to be displayed as a result of executing a cell in a Jupyter notebook.
|
@@ -70,6 +90,7 @@ class Result:
|
|
70
90
|
png: Optional[str] = None
|
71
91
|
jpeg: Optional[str] = None
|
72
92
|
pdf: Optional[str] = None
|
93
|
+
mp4: Optional[str] = None
|
73
94
|
latex: Optional[str] = None
|
74
95
|
json: Optional[Dict[str, Any]] = None
|
75
96
|
javascript: Optional[str] = None
|
@@ -93,6 +114,7 @@ class Result:
|
|
93
114
|
self.png = data.pop(MimeType.IMAGE_PNG, None)
|
94
115
|
self.jpeg = data.pop(MimeType.IMAGE_JPEG, None)
|
95
116
|
self.pdf = data.pop(MimeType.APPLICATION_PDF, None)
|
117
|
+
self.mp4 = data.pop(MimeType.VIDEO_MP4_B64, None)
|
96
118
|
self.latex = data.pop(MimeType.TEXT_LATEX, None)
|
97
119
|
self.json = data.pop(MimeType.APPLICATION_JSON, None)
|
98
120
|
self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
|
@@ -190,6 +212,8 @@ class Result:
|
|
190
212
|
formats.append("json")
|
191
213
|
if self.javascript:
|
192
214
|
formats.append("javascript")
|
215
|
+
if self.mp4:
|
216
|
+
formats.append("mp4")
|
193
217
|
if self.extra:
|
194
218
|
formats.extend(iter(self.extra))
|
195
219
|
return formats
|
@@ -1,7 +1,9 @@
|
|
1
|
+
import base64
|
1
2
|
import logging
|
2
3
|
import math
|
3
4
|
import os
|
4
5
|
from concurrent.futures import ProcessPoolExecutor, as_completed
|
6
|
+
import tempfile
|
5
7
|
from typing import List, Tuple, cast
|
6
8
|
|
7
9
|
import cv2
|
@@ -14,6 +16,39 @@ _LOGGER = logging.getLogger(__name__)
|
|
14
16
|
_CLIP_LENGTH = 30.0
|
15
17
|
|
16
18
|
|
19
|
+
def play_video(video_base64: str) -> None:
|
20
|
+
"""Play a video file"""
|
21
|
+
video_data = base64.b64decode(video_base64)
|
22
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_video:
|
23
|
+
temp_video.write(video_data)
|
24
|
+
temp_video_path = temp_video.name
|
25
|
+
|
26
|
+
cap = cv2.VideoCapture(temp_video_path)
|
27
|
+
if not cap.isOpened():
|
28
|
+
_LOGGER.error("Error: Could not open video.")
|
29
|
+
return
|
30
|
+
|
31
|
+
# Display the first frame and wait for any key press to start the video
|
32
|
+
ret, frame = cap.read()
|
33
|
+
if ret:
|
34
|
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
35
|
+
cv2.imshow("Video Player", frame)
|
36
|
+
_LOGGER.info(f"Press any key to start playing the video: {temp_video_path}")
|
37
|
+
cv2.waitKey(0) # Wait for any key press
|
38
|
+
|
39
|
+
while cap.isOpened():
|
40
|
+
ret, frame = cap.read()
|
41
|
+
if not ret:
|
42
|
+
break
|
43
|
+
frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
|
44
|
+
cv2.imshow("Video Player", frame)
|
45
|
+
# Press 'q' to exit the video
|
46
|
+
if cv2.waitKey(200) & 0xFF == ord("q"):
|
47
|
+
break
|
48
|
+
cap.release()
|
49
|
+
cv2.destroyAllWindows()
|
50
|
+
|
51
|
+
|
17
52
|
def extract_frames_from_video(
|
18
53
|
video_uri: str, fps: float = 0.5, motion_detection_threshold: float = 0.0
|
19
54
|
) -> List[Tuple[np.ndarray, float]]:
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|