vision-agent 0.2.216__tar.gz → 0.2.217__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. {vision_agent-0.2.216 → vision_agent-0.2.217}/PKG-INFO +1 -1
  2. {vision_agent-0.2.216 → vision_agent-0.2.217}/pyproject.toml +1 -1
  3. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/__init__.py +1 -0
  4. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/tools.py +71 -0
  5. {vision_agent-0.2.216 → vision_agent-0.2.217}/LICENSE +0 -0
  6. {vision_agent-0.2.216 → vision_agent-0.2.217}/README.md +0 -0
  7. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/.sim_tools/df.csv +0 -0
  8. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/.sim_tools/embs.npy +0 -0
  9. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/__init__.py +0 -0
  10. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/README.md +0 -0
  11. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/__init__.py +0 -0
  12. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/agent.py +0 -0
  13. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/agent_utils.py +0 -0
  14. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/types.py +0 -0
  15. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent.py +0 -0
  16. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_coder.py +0 -0
  17. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  18. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  19. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
  20. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_planner.py +0 -0
  21. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  22. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  23. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
  24. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_prompts.py +0 -0
  25. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  26. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/agent/vision_agent_v2.py +0 -0
  27. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/clients/__init__.py +0 -0
  28. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/clients/http.py +0 -0
  29. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/clients/landing_public_api.py +0 -0
  30. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/fonts/__init__.py +0 -0
  31. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  32. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/lmm/__init__.py +0 -0
  33. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/lmm/lmm.py +0 -0
  34. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/lmm/types.py +0 -0
  35. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/meta_tools.py +0 -0
  36. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/planner_tools.py +0 -0
  37. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/prompts.py +0 -0
  38. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/tool_utils.py +0 -0
  39. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/tools/tools_types.py +0 -0
  40. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/__init__.py +0 -0
  41. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/exceptions.py +0 -0
  42. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/execute.py +0 -0
  43. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/image_utils.py +0 -0
  44. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/sim.py +0 -0
  45. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/type_defs.py +0 -0
  46. {vision_agent-0.2.216 → vision_agent-0.2.217}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.216
3
+ Version: 0.2.217
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.216"
7
+ version = "0.2.217"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -33,6 +33,7 @@ from .tools import (
33
33
  depth_anything_v2,
34
34
  detr_segmentation,
35
35
  document_extraction,
36
+ document_qa,
36
37
  extract_frames_and_timestamps,
37
38
  florence2_ocr,
38
39
  florence2_phrase_grounding,
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
2174
2174
  return data
2175
2175
 
2176
2176
 
2177
+ def document_qa(
2178
+ prompt: str,
2179
+ image: np.ndarray,
2180
+ ) -> str:
2181
+ """'document_qa' is a tool that can answer any questions about arbitrary
2182
+ images of documents or presentations. It answers by analyzing the contextual document data
2183
+ and then using a model to answer specific questions. It returns text as an answer to the question.
2184
+
2185
+ Parameters:
2186
+ prompt (str): The question to be answered about the document image
2187
+ image (np.ndarray): The document image to analyze
2188
+
2189
+ Returns:
2190
+ str: The answer to the question based on the document's context.
2191
+
2192
+ Example
2193
+ -------
2194
+ >>> document_qa(image, question)
2195
+ 'The answer to the question ...'
2196
+ """
2197
+
2198
+ image_file = numpy_to_bytes(image)
2199
+
2200
+ files = [("image", image_file)]
2201
+
2202
+ payload = {
2203
+ "model": "document-analysis",
2204
+ }
2205
+
2206
+ data: dict[str, Any] = send_inference_request(
2207
+ payload=payload,
2208
+ endpoint_name="document-analysis",
2209
+ files=files,
2210
+ v2=True,
2211
+ metadata_payload={"function_name": "document_qa"},
2212
+ )
2213
+
2214
+ def normalize(data: Any) -> Dict[str, Any]:
2215
+ if isinstance(data, Dict):
2216
+ if "bbox" in data:
2217
+ data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
2218
+ for key in data:
2219
+ data[key] = normalize(data[key])
2220
+ elif isinstance(data, List):
2221
+ for i in range(len(data)):
2222
+ data[i] = normalize(data[i])
2223
+ return data # type: ignore
2224
+
2225
+ data = normalize(data)
2226
+
2227
+ prompt = f"""
2228
+ Document Context:
2229
+ {data}\n
2230
+ Question: {prompt}\n
2231
+ Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
2232
+ """
2233
+
2234
+ lmm = AnthropicLMM()
2235
+ llm_output = lmm.generate(prompt=prompt)
2236
+ llm_output = cast(str, llm_output)
2237
+
2238
+ _display_tool_trace(
2239
+ document_qa.__name__,
2240
+ payload,
2241
+ llm_output,
2242
+ files,
2243
+ )
2244
+
2245
+ return llm_output
2246
+
2247
+
2177
2248
  # Utility and visualization functions
2178
2249
 
2179
2250
 
File without changes
File without changes