vision-agent 0.2.214__tar.gz → 0.2.215__tar.gz
Sign up to get free protection for your applications and to get access to all the features.
- {vision_agent-0.2.214 → vision_agent-0.2.215}/PKG-INFO +1 -1
- {vision_agent-0.2.214 → vision_agent-0.2.215}/pyproject.toml +1 -1
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/__init__.py +1 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/tools.py +58 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/LICENSE +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/README.md +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/.sim_tools/df.csv +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/.sim_tools/embs.npy +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/__init__.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/README.md +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/__init__.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/agent.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/agent_utils.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/types.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_prompts.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_v2.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/clients/__init__.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/clients/http.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/clients/landing_public_api.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/fonts/__init__.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/lmm/__init__.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/lmm/lmm.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/lmm/types.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/meta_tools.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/planner_tools.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/prompts.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/tool_utils.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/tools/tools_types.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/__init__.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/exceptions.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/execute.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/image_utils.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/sim.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/type_defs.py +0 -0
- {vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/utils/video.py +0 -0
@@ -1879,6 +1879,64 @@ def closest_box_distance(
|
|
1879
1879
|
return cast(float, np.sqrt(horizontal_distance**2 + vertical_distance**2))
|
1880
1880
|
|
1881
1881
|
|
1882
|
+
def document_analysis(image: np.ndarray) -> Dict[str, Any]:
|
1883
|
+
"""'document_analysis' is an understanding tool that can handle various
|
1884
|
+
types of document image layouts. It returns a structured output containing the text,
|
1885
|
+
tables, pictures, charts and information caption, summary, labels, bounding boxes, etc
|
1886
|
+
avoiding information loss.
|
1887
|
+
|
1888
|
+
Parameters:
|
1889
|
+
image (np.ndarray): The document image to analyze
|
1890
|
+
|
1891
|
+
Returns:
|
1892
|
+
Dict[str, Any]: A dictionary containing the extracted information.
|
1893
|
+
|
1894
|
+
Example
|
1895
|
+
-------
|
1896
|
+
>>> document_analysis(image)
|
1897
|
+
{'pages': [{'bbox': [left_0, top_0, right_0, bottom_0],
|
1898
|
+
'chunks': [{'bbox': [left_1, top_1, right_1, bottom_1],
|
1899
|
+
'caption': 'TITLE',
|
1900
|
+
'label': 'page_header',
|
1901
|
+
'summary': 'The image contains a single word ...' },
|
1902
|
+
{'bbox': [left_2, top_2, right_2, bottom_2],
|
1903
|
+
'caption': {'data': [{'value': 200, 'year': '2024' ...},
|
1904
|
+
'title': 'Total CapEx Spending',
|
1905
|
+
'type': 'bar chart',
|
1906
|
+
'unit': 'Billion USD',
|
1907
|
+
'xAxis': 'Year',
|
1908
|
+
'yAxis': 'Total CapEx Spending'},
|
1909
|
+
'label': 'picture',
|
1910
|
+
'summary': 'This bar chart illustrates the trend of ...'},
|
1911
|
+
],
|
1912
|
+
"""
|
1913
|
+
|
1914
|
+
image_file = numpy_to_bytes(image)
|
1915
|
+
|
1916
|
+
files = [("image", image_file)]
|
1917
|
+
|
1918
|
+
payload = {
|
1919
|
+
"model": "document-analysis",
|
1920
|
+
}
|
1921
|
+
|
1922
|
+
response: dict[str, Any] = send_inference_request(
|
1923
|
+
payload=payload,
|
1924
|
+
endpoint_name="document-analysis",
|
1925
|
+
files=files,
|
1926
|
+
v2=True,
|
1927
|
+
metadata_payload={"function_name": "document_analysis"},
|
1928
|
+
)
|
1929
|
+
|
1930
|
+
_display_tool_trace(
|
1931
|
+
document_analysis.__name__,
|
1932
|
+
payload,
|
1933
|
+
response,
|
1934
|
+
files,
|
1935
|
+
)
|
1936
|
+
|
1937
|
+
return response
|
1938
|
+
|
1939
|
+
|
1882
1940
|
# Utility and visualization functions
|
1883
1941
|
|
1884
1942
|
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_coder_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
{vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_prompts.py
RENAMED
File without changes
|
{vision_agent-0.2.214 → vision_agent-0.2.215}/vision_agent/agent/vision_agent_planner_prompts_v2.py
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|