vision-agent 0.2.214__py3-none-any.whl → 0.2.215__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -32,6 +32,7 @@ from .tools import (
32
32
  countgd_sam2_video_tracking,
33
33
  depth_anything_v2,
34
34
  detr_segmentation,
35
+ document_analysis,
35
36
  extract_frames_and_timestamps,
36
37
  florence2_ocr,
37
38
  florence2_phrase_grounding,
@@ -1879,6 +1879,64 @@ def closest_box_distance(
1879
1879
  return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
1880
1880
 
1881
1881
 
1882
+ def document_analysis(image: np.ndarray) -> Dict[str, Any]:
1883
+ """'document_analysis' is an understanding tool that can handle various
1884
+ types of document image layouts. It returns a structured output containing the text,
1885
+ tables, pictures, charts and information caption, summary, labels, bounding boxes, etc
1886
+ avoiding information loss.
1887
+
1888
+ Parameters:
1889
+ image (np.ndarray): The document image to analyze
1890
+
1891
+ Returns:
1892
+ Dict[str, Any]: A dictionary containing the extracted information.
1893
+
1894
+ Example
1895
+ -------
1896
+ >>> document_analysis(image)
1897
+ {'pages': [{'bbox': [left_0, top_0, right_0, bottom_0],
1898
+ 'chunks': [{'bbox': [left_1, top_1, right_1, bottom_1],
1899
+ 'caption': 'TITLE',
1900
+ 'label': 'page_header',
1901
+ 'summary': 'The image contains a single word ...' },
1902
+ {'bbox': [left_2, top_2, right_2, bottom_2],
1903
+ 'caption': {'data': [{'value': 200, 'year': '2024' ...},
1904
+ 'title': 'Total CapEx Spending',
1905
+ 'type': 'bar chart',
1906
+ 'unit': 'Billion USD',
1907
+ 'xAxis': 'Year',
1908
+ 'yAxis': 'Total CapEx Spending'},
1909
+ 'label': 'picture',
1910
+ 'summary': 'This bar chart illustrates the trend of ...'},
1911
+ ],
1912
+ """
1913
+
1914
+ image_file = numpy_to_bytes(image)
1915
+
1916
+ files = [("image", image_file)]
1917
+
1918
+ payload = {
1919
+ "model": "document-analysis",
1920
+ }
1921
+
1922
+ response: dict[str, Any] = send_inference_request(
1923
+ payload=payload,
1924
+ endpoint_name="document-analysis",
1925
+ files=files,
1926
+ v2=True,
1927
+ metadata_payload={"function_name": "document_analysis"},
1928
+ )
1929
+
1930
+ _display_tool_trace(
1931
+ document_analysis.__name__,
1932
+ payload,
1933
+ response,
1934
+ files,
1935
+ )
1936
+
1937
+ return response
1938
+
1939
+
1882
1940
  # Utility and visualization functions
1883
1941
 
1884
1942
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.214
3
+ Version: 0.2.215
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -26,12 +26,12 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
26
26
  vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
27
27
  vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
28
28
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
29
- vision_agent/tools/__init__.py,sha256=InL8zUTRN8i_9J6r2wAtYdtNrVkElqdO_p-e2OA8q5A,2770
29
+ vision_agent/tools/__init__.py,sha256=Ny522Y4h1xDQTW6kBP_ceUM4jc0Y14dRhcHdtMDdr24,2793
30
30
  vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
31
31
  vision_agent/tools/planner_tools.py,sha256=k7PPu-HhwDwusQgFSPTCWKRVVHBzPMeYB6h2xSEjdUo,13273
32
32
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
33
33
  vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
34
- vision_agent/tools/tools.py,sha256=ZcXEI0Pb54OGXnLWi690SFx22k7JlEmQ-N16LzRLHlk,90627
34
+ vision_agent/tools/tools.py,sha256=xzN1uOkVQ9l1MaXsJxT_VlDp6nLQfdBX04kex_jE0fc,92692
35
35
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
36
36
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
37
37
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
@@ -40,7 +40,7 @@ vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50
40
40
  vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
41
41
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
42
42
  vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
43
- vision_agent-0.2.214.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- vision_agent-0.2.214.dist-info/METADATA,sha256=H-NMLaCs8bVHlxWxDQzqixEQjbqMOwYk4aYGkE13BqM,19071
45
- vision_agent-0.2.214.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
46
- vision_agent-0.2.214.dist-info/RECORD,,
43
+ vision_agent-0.2.215.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ vision_agent-0.2.215.dist-info/METADATA,sha256=nSGpnpDpzJmWmGYDSShBvfjD5dbB6ZWSgOXGQ2Ci_yM,19071
45
+ vision_agent-0.2.215.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
46
+ vision_agent-0.2.215.dist-info/RECORD,,