vision-agent 1.1.1__py3-none-any.whl → 1.1.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -19,6 +19,7 @@ from IPython.display import display
19
19
  from PIL import Image, ImageDraw, ImageFont
20
20
  from pillow_heif import register_heif_opener # type: ignore
21
21
  from pytube import YouTube # type: ignore
22
+ import pymupdf # type: ignore
22
23
 
23
24
  from vision_agent.lmm.lmm import LMM, AnthropicLMM, OpenAILMM
24
25
  from vision_agent.utils.execute import FileSerializer, MimeType
@@ -3147,6 +3148,56 @@ def save_image(image: np.ndarray, file_path: str) -> None:
3147
3148
  pil_image.save(file_path)
3148
3149
 
3149
3150
 
3151
+ def load_pdf(pdf_path: str) -> List[np.ndarray]:
3152
+ """'load_pdf' is a utility function that loads a PDF from the given file path string and converts each page to an image.
3153
+
3154
+ Parameters:
3155
+ pdf_path (str): The path to the PDF file.
3156
+
3157
+ Returns:
3158
+ List[np.ndarray]: A list of images as NumPy arrays, one for each page of the PDF.
3159
+
3160
+ Example
3161
+ -------
3162
+ >>> load_pdf("path/to/document.pdf")
3163
+ """
3164
+
3165
+ # Handle URL case
3166
+ if pdf_path.startswith(("http", "https")):
3167
+ _, pdf_suffix = os.path.splitext(pdf_path)
3168
+ with tempfile.NamedTemporaryFile(delete=False, suffix=pdf_suffix) as tmp_file:
3169
+ # Download the PDF and save it to the temporary file
3170
+ with urllib.request.urlopen(pdf_path) as response:
3171
+ tmp_file.write(response.read())
3172
+ pdf_path = tmp_file.name
3173
+
3174
+ # Open the PDF
3175
+ doc = pymupdf.open(pdf_path)
3176
+ images = []
3177
+
3178
+ # Convert each page to an image
3179
+ for page_num in range(len(doc)):
3180
+ page = doc.load_page(page_num)
3181
+
3182
+ # Render page to an image
3183
+ pix = page.get_pixmap(matrix=pymupdf.Matrix(2, 2))
3184
+
3185
+ # Convert to PIL Image
3186
+ img = Image.frombytes("RGB", (pix.width, pix.height), pix.samples)
3187
+
3188
+ # Convert to numpy array
3189
+ images.append(np.array(img))
3190
+
3191
+ # Close the document
3192
+ doc.close()
3193
+
3194
+ # Clean up temporary file if it was a URL
3195
+ if pdf_path.startswith(("http", "https")):
3196
+ os.unlink(pdf_path)
3197
+
3198
+ return images
3199
+
3200
+
3150
3201
  def save_video(
3151
3202
  frames: List[np.ndarray], output_video_path: Optional[str] = None, fps: float = 5
3152
3203
  ) -> str:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: vision-agent
3
- Version: 1.1.1
3
+ Version: 1.1.2
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -28,6 +28,7 @@ Requires-Dist: pandas (==2.*)
28
28
  Requires-Dist: pillow (==10.*)
29
29
  Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
30
30
  Requires-Dist: pydantic (>=2.0.0,<3.0.0)
31
+ Requires-Dist: pymupdf (>=1.23.0,<2.0.0)
31
32
  Requires-Dist: pytube (==15.0.0)
32
33
  Requires-Dist: requests (==2.*)
33
34
  Requires-Dist: rich (>=13.7.1,<14.0.0)
@@ -30,7 +30,7 @@ vision_agent/tools/__init__.py,sha256=o9lfWBVopT_qSoSi26WcgQJTKQYNgbXv7r4z_o5j2E
30
30
  vision_agent/tools/meta_tools.py,sha256=9iJilpGYEiXW0nYPTYAWHa7l23wGN8IM5KbE7mWDOT0,6798
31
31
  vision_agent/tools/planner_tools.py,sha256=iQWtTgXdomn0IWrbmvXXM-y8Q_RSEOxyP04HIRLrgWI,19576
32
32
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
33
- vision_agent/tools/tools.py,sha256=yr45nu0Hr9_KT5sQn6ggaI0FP65XP0dlAmBtHnKihPU,124180
33
+ vision_agent/tools/tools.py,sha256=dKKrfKxqQYVDFRsLjMMpp1z4_5k68pkaoZUMf1BMc_Q,125694
34
34
  vision_agent/utils/__init__.py,sha256=mANUs_84VL-3gpZbXryvV2mWU623eWnRlJCSUHtMjuw,122
35
35
  vision_agent/utils/agent.py,sha256=2ifTP5QElItnr4YHOJR6L5P1PUzV0GhChTTqVxuVyQg,15153
36
36
  vision_agent/utils/exceptions.py,sha256=zis8smCbdEylBVZBTVfEUfAh7Rb7cWV3MSPambu6FsQ,1837
@@ -40,7 +40,7 @@ vision_agent/utils/tools.py,sha256=XbH5wuD1nlaKe6zBeVPqUbQDsK6D-eCskKND3rRHOzo,8
40
40
  vision_agent/utils/tools_doc.py,sha256=yFue6KSXoa_Z1ngCdBEc4SdPZOWF1rVLeaHu02I8Wis,2523
41
41
  vision_agent/utils/video.py,sha256=rjsQ1sKKisaQ6AVjJz0zd_G4g-ovRweS_rs4JEhenoI,5340
42
42
  vision_agent/utils/video_tracking.py,sha256=DZLFpNCuzuPJQzbQoVNcp-m4dKxgiKdCNM5QTh_zURE,12245
43
- vision_agent-1.1.1.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- vision_agent-1.1.1.dist-info/METADATA,sha256=T9CSlGabaZwR1u2ZQlV2wTkXNpQZi5Nn1KwJMSo7s2o,12532
45
- vision_agent-1.1.1.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
46
- vision_agent-1.1.1.dist-info/RECORD,,
43
+ vision_agent-1.1.2.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ vision_agent-1.1.2.dist-info/METADATA,sha256=JxWPwfrAwtWx0Fpqq9b9Se7LZi22Ddqiw-YxX6nHe0A,12573
45
+ vision_agent-1.1.2.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
46
+ vision_agent-1.1.2.dist-info/RECORD,,