PyPI - vision-agent - Versions diffs - 0.2.213__py3-none-any.whl → 0.2.215__py3-none-any.whl - Mend

vision-agent 0.2.213py3-none-any.whl → 0.2.215py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agent/tools/__init__.py CHANGED Viewed

@@ -32,6 +32,7 @@ from .tools import (
     countgd_sam2_video_tracking,
     depth_anything_v2,
     detr_segmentation,
+    document_analysis,
     extract_frames_and_timestamps,
     florence2_ocr,
     florence2_phrase_grounding,

vision_agent/tools/tools.py CHANGED Viewed

@@ -1879,6 +1879,64 @@ def closest_box_distance(
     return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
+def document_analysis(image: np.ndarray) -> Dict[str, Any]:
+    """'document_analysis' is an understanding tool that can handle various
+    types of document image layouts. It returns a structured output containing the text,
+    tables, pictures, charts and information caption, summary, labels, bounding boxes, etc
+    avoiding information loss.
+    Parameters:
+        image (np.ndarray): The document image to analyze
+    Returns:
+        Dict[str, Any]: A dictionary containing the extracted information.
+    Example
+    -------
+        >>> document_analysis(image)
+        {'pages': [{'bbox': [left_0, top_0, right_0, bottom_0],
+                    'chunks': [{'bbox': [left_1, top_1, right_1, bottom_1],
+                                'caption': 'TITLE',
+                                'label': 'page_header',
+                                'summary': 'The image contains a single word ...' },
+                               {'bbox': [left_2, top_2, right_2, bottom_2],
+                                'caption': {'data': [{'value': 200, 'year': '2024' ...},
+                                    'title': 'Total CapEx Spending',
+                                    'type': 'bar chart',
+                                    'unit': 'Billion USD',
+                                    'xAxis': 'Year',
+                                    'yAxis': 'Total CapEx Spending'},
+                                'label': 'picture',
+                                'summary': 'This bar chart illustrates the trend of ...'},
+                    ],
+    """
+    image_file = numpy_to_bytes(image)
+    files = [("image", image_file)]
+    payload = {
+        "model": "document-analysis",
+    }
+    response: dict[str, Any] = send_inference_request(
+        payload=payload,
+        endpoint_name="document-analysis",
+        files=files,
+        v2=True,
+        metadata_payload={"function_name": "document_analysis"},
+    )
+    _display_tool_trace(
+        document_analysis.__name__,
+        payload,
+        response,
+        files,
+    )
+    return response
 # Utility and visualization functions

vision_agent/utils/video.py CHANGED Viewed

@@ -106,9 +106,9 @@ def frames_to_bytes(
     return buffer_bytes
-# WARNING: this cache is cache is a little dangerous because if the underlying video
-# contents change but the filename remains the same it will return the old file contents
-# but for vision agent it's unlikely to change the file contents while keeping the
+# WARNING: This cache is a little dangerous because if the underlying video
+# contents change but the filename remains the same it will return the old file contents.
+# For vision agent it's unlikely to change the file contents while keeping the
 # same file name and the time savings are very large.
 @lru_cache(maxsize=8)
 def extract_frames_from_video(

{vision_agent-0.2.213.dist-info → vision_agent-0.2.215.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.213
+Version: 0.2.215
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai

{vision_agent-0.2.213.dist-info → vision_agent-0.2.215.dist-info}/RECORD RENAMED Viewed

@@ -26,12 +26,12 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
 vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
 vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
-vision_agent/tools/__init__.py,sha256=InL8zUTRN8i_9J6r2wAtYdtNrVkElqdO_p-e2OA8q5A,2770
+vision_agent/tools/__init__.py,sha256=Ny522Y4h1xDQTW6kBP_ceUM4jc0Y14dRhcHdtMDdr24,2793
 vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
 vision_agent/tools/planner_tools.py,sha256=k7PPu-HhwDwusQgFSPTCWKRVVHBzPMeYB6h2xSEjdUo,13273
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
-vision_agent/tools/tools.py,sha256=ZcXEI0Pb54OGXnLWi690SFx22k7JlEmQ-N16LzRLHlk,90627
+vision_agent/tools/tools.py,sha256=xzN1uOkVQ9l1MaXsJxT_VlDp6nLQfdBX04kex_jE0fc,92692
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -39,8 +39,8 @@ vision_agent/utils/execute.py,sha256=ktJX1gWBk4D_tXeWV5olGUMC4dU_Z6m5oSv-6Yu1O0w
 vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50vG3w,12182
 vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
-vision_agent/utils/video.py,sha256=tRcGp4vEnaDycigL1hBO9k0FBPtDH35fCQciVr9GqYI,6013
-vision_agent-0.2.213.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.213.dist-info/METADATA,sha256=iXy6vkFwSXz6UQW1LjuZMCj6YT8YwmjGklhmulFOoIc,19071
-vision_agent-0.2.213.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.213.dist-info/RECORD,,
+vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
+vision_agent-0.2.215.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.215.dist-info/METADATA,sha256=nSGpnpDpzJmWmGYDSShBvfjD5dbB6ZWSgOXGQ2Ci_yM,19071
+vision_agent-0.2.215.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.215.dist-info/RECORD,,

{vision_agent-0.2.213.dist-info → vision_agent-0.2.215.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.213.dist-info → vision_agent-0.2.215.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.213__py3-none-any.whl → 0.2.215__py3-none-any.whl

vision-agent 0.2.213py3-none-any.whl → 0.2.215py3-none-any.whl