vision-agent 0.2.213__tar.gz → 0.2.215__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. {vision_agent-0.2.213 → vision_agent-0.2.215}/PKG-INFO +1 -1
  2. {vision_agent-0.2.213 → vision_agent-0.2.215}/pyproject.toml +1 -1
  3. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/__init__.py +1 -0
  4. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/tools.py +58 -0
  5. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/video.py +3 -3
  6. {vision_agent-0.2.213 → vision_agent-0.2.215}/LICENSE +0 -0
  7. {vision_agent-0.2.213 → vision_agent-0.2.215}/README.md +0 -0
  8. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/.sim_tools/df.csv +0 -0
  9. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/.sim_tools/embs.npy +0 -0
  10. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/__init__.py +0 -0
  11. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/README.md +0 -0
  12. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/__init__.py +0 -0
  13. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/agent.py +0 -0
  14. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/agent_utils.py +0 -0
  15. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/types.py +0 -0
  16. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent.py +0 -0
  17. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder.py +0 -0
  18. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  19. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  20. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
  21. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner.py +0 -0
  22. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  23. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  24. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
  25. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_prompts.py +0 -0
  26. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  27. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_v2.py +0 -0
  28. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/clients/__init__.py +0 -0
  29. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/clients/http.py +0 -0
  30. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/clients/landing_public_api.py +0 -0
  31. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/fonts/__init__.py +0 -0
  32. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  33. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/lmm/__init__.py +0 -0
  34. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/lmm/lmm.py +0 -0
  35. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/lmm/types.py +0 -0
  36. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/meta_tools.py +0 -0
  37. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/planner_tools.py +0 -0
  38. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/prompts.py +0 -0
  39. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/tool_utils.py +0 -0
  40. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/tools/tools_types.py +0 -0
  41. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/__init__.py +0 -0
  42. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/exceptions.py +0 -0
  43. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/execute.py +0 -0
  44. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/image_utils.py +0 -0
  45. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/sim.py +0 -0
  46. {vision_agent-0.2.213 → vision_agent-0.2.215}/vision_agent/utils/type_defs.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.213
3
+ Version: 0.2.215
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.213"
7
+ version = "0.2.215"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -32,6 +32,7 @@ from .tools import (
32
32
  countgd_sam2_video_tracking,
33
33
  depth_anything_v2,
34
34
  detr_segmentation,
35
+ document_analysis,
35
36
  extract_frames_and_timestamps,
36
37
  florence2_ocr,
37
38
  florence2_phrase_grounding,
@@ -1879,6 +1879,64 @@ def closest_box_distance(
1879
1879
  return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
1880
1880
 
1881
1881
 
1882
+ def document_analysis(image: np.ndarray) -> Dict[str, Any]:
1883
+ """'document_analysis' is an understanding tool that can handle various
1884
+ types of document image layouts. It returns a structured output containing the text,
1885
+ tables, pictures, charts and information caption, summary, labels, bounding boxes, etc
1886
+ avoiding information loss.
1887
+
1888
+ Parameters:
1889
+ image (np.ndarray): The document image to analyze
1890
+
1891
+ Returns:
1892
+ Dict[str, Any]: A dictionary containing the extracted information.
1893
+
1894
+ Example
1895
+ -------
1896
+ >>> document_analysis(image)
1897
+ {'pages': [{'bbox': [left_0, top_0, right_0, bottom_0],
1898
+ 'chunks': [{'bbox': [left_1, top_1, right_1, bottom_1],
1899
+ 'caption': 'TITLE',
1900
+ 'label': 'page_header',
1901
+ 'summary': 'The image contains a single word ...' },
1902
+ {'bbox': [left_2, top_2, right_2, bottom_2],
1903
+ 'caption': {'data': [{'value': 200, 'year': '2024' ...},
1904
+ 'title': 'Total CapEx Spending',
1905
+ 'type': 'bar chart',
1906
+ 'unit': 'Billion USD',
1907
+ 'xAxis': 'Year',
1908
+ 'yAxis': 'Total CapEx Spending'},
1909
+ 'label': 'picture',
1910
+ 'summary': 'This bar chart illustrates the trend of ...'},
1911
+ ],
1912
+ """
1913
+
1914
+ image_file = numpy_to_bytes(image)
1915
+
1916
+ files = [("image", image_file)]
1917
+
1918
+ payload = {
1919
+ "model": "document-analysis",
1920
+ }
1921
+
1922
+ response: dict[str, Any] = send_inference_request(
1923
+ payload=payload,
1924
+ endpoint_name="document-analysis",
1925
+ files=files,
1926
+ v2=True,
1927
+ metadata_payload={"function_name": "document_analysis"},
1928
+ )
1929
+
1930
+ _display_tool_trace(
1931
+ document_analysis.__name__,
1932
+ payload,
1933
+ response,
1934
+ files,
1935
+ )
1936
+
1937
+ return response
1938
+
1939
+
1882
1940
  # Utility and visualization functions
1883
1941
 
1884
1942
 
@@ -106,9 +106,9 @@ def frames_to_bytes(
106
106
  return buffer_bytes
107
107
 
108
108
 
109
- # WARNING: this cache is cache is a little dangerous because if the underlying video
110
- # contents change but the filename remains the same it will return the old file contents
111
- # but for vision agent it's unlikely to change the file contents while keeping the
109
+ # WARNING: This cache is a little dangerous because if the underlying video
110
+ # contents change but the filename remains the same it will return the old file contents.
111
+ # For vision agent it's unlikely to change the file contents while keeping the
112
112
  # same file name and the time savings are very large.
113
113
  @lru_cache(maxsize=8)
114
114
  def extract_frames_from_video(
File without changes
File without changes