vision-agent 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vision_agent/tools/tools.py +51 -0
- {vision_agent-1.1.1.dist-info → vision_agent-1.1.2.dist-info}/METADATA +2 -1
- {vision_agent-1.1.1.dist-info → vision_agent-1.1.2.dist-info}/RECORD +5 -5
- {vision_agent-1.1.1.dist-info → vision_agent-1.1.2.dist-info}/LICENSE +0 -0
- {vision_agent-1.1.1.dist-info → vision_agent-1.1.2.dist-info}/WHEEL +0 -0
vision_agent/tools/tools.py
CHANGED
@@ -19,6 +19,7 @@ from IPython.display import display
|
|
19
19
|
from PIL import Image, ImageDraw, ImageFont
|
20
20
|
from pillow_heif import register_heif_opener # type: ignore
|
21
21
|
from pytube import YouTube # type: ignore
|
22
|
+
import pymupdf # type: ignore
|
22
23
|
|
23
24
|
from vision_agent.lmm.lmm import LMM, AnthropicLMM, OpenAILMM
|
24
25
|
from vision_agent.utils.execute import FileSerializer, MimeType
|
@@ -3147,6 +3148,56 @@ def save_image(image: np.ndarray, file_path: str) -> None:
|
|
3147
3148
|
pil_image.save(file_path)
|
3148
3149
|
|
3149
3150
|
|
3151
|
+
def load_pdf(pdf_path: str) -> List[np.ndarray]:
|
3152
|
+
"""'load_pdf' is a utility function that loads a PDF from the given file path string and converts each page to an image.
|
3153
|
+
|
3154
|
+
Parameters:
|
3155
|
+
pdf_path (str): The path to the PDF file.
|
3156
|
+
|
3157
|
+
Returns:
|
3158
|
+
List[np.ndarray]: A list of images as NumPy arrays, one for each page of the PDF.
|
3159
|
+
|
3160
|
+
Example
|
3161
|
+
-------
|
3162
|
+
>>> load_pdf("path/to/document.pdf")
|
3163
|
+
"""
|
3164
|
+
|
3165
|
+
# Handle URL case
|
3166
|
+
if pdf_path.startswith(("http", "https")):
|
3167
|
+
_, pdf_suffix = os.path.splitext(pdf_path)
|
3168
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=pdf_suffix) as tmp_file:
|
3169
|
+
# Download the PDF and save it to the temporary file
|
3170
|
+
with urllib.request.urlopen(pdf_path) as response:
|
3171
|
+
tmp_file.write(response.read())
|
3172
|
+
pdf_path = tmp_file.name
|
3173
|
+
|
3174
|
+
# Open the PDF
|
3175
|
+
doc = pymupdf.open(pdf_path)
|
3176
|
+
images = []
|
3177
|
+
|
3178
|
+
# Convert each page to an image
|
3179
|
+
for page_num in range(len(doc)):
|
3180
|
+
page = doc.load_page(page_num)
|
3181
|
+
|
3182
|
+
# Render page to an image
|
3183
|
+
pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))
|
3184
|
+
|
3185
|
+
# Convert to PIL Image
|
3186
|
+
img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
|
3187
|
+
|
3188
|
+
# Convert to numpy array
|
3189
|
+
images.append(np.array(img))
|
3190
|
+
|
3191
|
+
# Close the document
|
3192
|
+
doc.close()
|
3193
|
+
|
3194
|
+
# Clean up temporary file if it was a URL
|
3195
|
+
if pdf_path.startswith(("http", "https")):
|
3196
|
+
os.unlink(pdf_path)
|
3197
|
+
|
3198
|
+
return images
|
3199
|
+
|
3200
|
+
|
3150
3201
|
def save_video(
|
3151
3202
|
frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 5
|
3152
3203
|
) -> str:
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: vision-agent
|
3
|
-
Version: 1.1.
|
3
|
+
Version: 1.1.2
|
4
4
|
Summary: Toolset for Vision Agent
|
5
5
|
Author: Landing AI
|
6
6
|
Author-email: dev@landing.ai
|
@@ -28,6 +28,7 @@ Requires-Dist: pandas (==2.*)
|
|
28
28
|
Requires-Dist: pillow (==10.*)
|
29
29
|
Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
|
30
30
|
Requires-Dist: pydantic (>=2.0.0,<3.0.0)
|
31
|
+
Requires-Dist: pymupdf (>=1.23.0,<2.0.0)
|
31
32
|
Requires-Dist: pytube (==15.0.0)
|
32
33
|
Requires-Dist: requests (==2.*)
|
33
34
|
Requires-Dist: rich (>=13.7.1,<14.0.0)
|
@@ -30,7 +30,7 @@ vision_agent/tools/__init__.py,sha256=o9lfWBVopT_qSoSi26WcgQJTKQYNgbXv7r4z_o5j2E
|
|
30
30
|
vision_agent/tools/meta_tools.py,sha256=9iJilpGYEiXW0nYPTYAWHa7l23wGN8IM5KbE7mWDOT0,6798
|
31
31
|
vision_agent/tools/planner_tools.py,sha256=iQWtTgXdomn0IWrbmvXXM-y8Q_RSEOxyP04HIRLrgWI,19576
|
32
32
|
vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
|
33
|
-
vision_agent/tools/tools.py,sha256=
|
33
|
+
vision_agent/tools/tools.py,sha256=dKKrfKxqQYVDFRsLjMMpp1z4_5k68pkaoZUMf1BMc_Q,125694
|
34
34
|
vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
|
35
35
|
vision_agent/utils/agent.py,sha256=2ifTP5QElItnr4YHOJR6L5P1PUzV0GhChTTqVxuVyQg,15153
|
36
36
|
vision_agent/utils/exceptions.py,sha256=zis8smCbdEylBVZBTVfEUfAh7Rb7cWV3MSPambu6FsQ,1837
|
@@ -40,7 +40,7 @@ vision_agent/utils/tools.py,sha256=XbH5wuD1nlaKe6zBeVPqUbQDsK6D-eCskKND3rRHOzo,8
|
|
40
40
|
vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8Wis,2523
|
41
41
|
vision_agent/utils/video.py,sha256=rjsQ1sKKisaQ6AVjJz0zd_G4g-ovRweS_rs4JEhenoI,5340
|
42
42
|
vision_agent/utils/video_tracking.py,sha256=DZLFpNCuzuPJQzbQoVNcp-m4dKxgiKdCNM5QTh_zURE,12245
|
43
|
-
vision_agent-1.1.
|
44
|
-
vision_agent-1.1.
|
45
|
-
vision_agent-1.1.
|
46
|
-
vision_agent-1.1.
|
43
|
+
vision_agent-1.1.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
44
|
+
vision_agent-1.1.2.dist-info/METADATA,sha256=JxWPwfrAwtWx0Fpqq9b9Se7LZi22Ddqiw-YxX6nHe0A,12573
|
45
|
+
vision_agent-1.1.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
|
46
|
+
vision_agent-1.1.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|