vision-agent 0.2.216__tar.gz → 0.2.218__tar.gz

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. {vision_agent-0.2.216 → vision_agent-0.2.218}/PKG-INFO +2 -1
  2. {vision_agent-0.2.216 → vision_agent-0.2.218}/pyproject.toml +2 -1
  3. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/__init__.py +1 -0
  4. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tools.py +71 -0
  5. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/execute.py +40 -17
  6. {vision_agent-0.2.216 → vision_agent-0.2.218}/LICENSE +0 -0
  7. {vision_agent-0.2.216 → vision_agent-0.2.218}/README.md +0 -0
  8. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/.sim_tools/df.csv +0 -0
  9. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/.sim_tools/embs.npy +0 -0
  10. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/__init__.py +0 -0
  11. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/README.md +0 -0
  12. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/__init__.py +0 -0
  13. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/agent.py +0 -0
  14. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/agent_utils.py +0 -0
  15. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/types.py +0 -0
  16. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent.py +0 -0
  17. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder.py +0 -0
  18. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_prompts.py +0 -0
  19. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_prompts_v2.py +0 -0
  20. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_coder_v2.py +0 -0
  21. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner.py +0 -0
  22. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_prompts.py +0 -0
  23. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_prompts_v2.py +0 -0
  24. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_planner_v2.py +0 -0
  25. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_prompts.py +0 -0
  26. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_prompts_v2.py +0 -0
  27. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/agent/vision_agent_v2.py +0 -0
  28. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/clients/__init__.py +0 -0
  29. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/clients/http.py +0 -0
  30. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/clients/landing_public_api.py +0 -0
  31. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/fonts/__init__.py +0 -0
  32. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/fonts/default_font_ch_en.ttf +0 -0
  33. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/lmm/__init__.py +0 -0
  34. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/lmm/lmm.py +0 -0
  35. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/lmm/types.py +0 -0
  36. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/meta_tools.py +0 -0
  37. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/planner_tools.py +0 -0
  38. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/prompts.py +0 -0
  39. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tool_utils.py +0 -0
  40. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tools_types.py +0 -0
  41. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/__init__.py +0 -0
  42. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/exceptions.py +0 -0
  43. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/image_utils.py +0 -0
  44. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/sim.py +0 -0
  45. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/type_defs.py +0 -0
  46. {vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/video.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.216
3
+ Version: 0.2.218
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
23
23
  Requires-Dist: numpy (>=1.21.0,<2.0.0)
24
24
  Requires-Dist: openai (>=1.0.0,<2.0.0)
25
25
  Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
26
+ Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
26
27
  Requires-Dist: pandas (>=2.0.0,<3.0.0)
27
28
  Requires-Dist: pillow (>=10.0.0,<11.0.0)
28
29
  Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
4
4
 
5
5
  [tool.poetry]
6
6
  name = "vision-agent"
7
- version = "0.2.216"
7
+ version = "0.2.218"
8
8
  description = "Toolset for Vision Agent"
9
9
  authors = ["Landing AI <dev@landing.ai>"]
10
10
  readme = "README.md"
@@ -47,6 +47,7 @@ av = "^11.0.0"
47
47
  libcst = "^1.5.0"
48
48
  matplotlib = "^3.9.2"
49
49
  scikit-learn = "^1.5.2"
50
+ opentelemetry-api = "^1.29.0"
50
51
 
51
52
  [tool.poetry.group.dev.dependencies]
52
53
  autoflake = "1.*"
@@ -33,6 +33,7 @@ from .tools import (
33
33
  depth_anything_v2,
34
34
  detr_segmentation,
35
35
  document_extraction,
36
+ document_qa,
36
37
  extract_frames_and_timestamps,
37
38
  florence2_ocr,
38
39
  florence2_phrase_grounding,
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
2174
2174
  return data
2175
2175
 
2176
2176
 
2177
+ def document_qa(
2178
+ prompt: str,
2179
+ image: np.ndarray,
2180
+ ) -> str:
2181
+ """'document_qa' is a tool that can answer any questions about arbitrary
2182
+ images of documents or presentations. It answers by analyzing the contextual document data
2183
+ and then using a model to answer specific questions. It returns text as an answer to the question.
2184
+
2185
+ Parameters:
2186
+ prompt (str): The question to be answered about the document image
2187
+ image (np.ndarray): The document image to analyze
2188
+
2189
+ Returns:
2190
+ str: The answer to the question based on the document's context.
2191
+
2192
+ Example
2193
+ -------
2194
+ >>> document_qa(image, question)
2195
+ 'The answer to the question ...'
2196
+ """
2197
+
2198
+ image_file = numpy_to_bytes(image)
2199
+
2200
+ files = [("image", image_file)]
2201
+
2202
+ payload = {
2203
+ "model": "document-analysis",
2204
+ }
2205
+
2206
+ data: dict[str, Any] = send_inference_request(
2207
+ payload=payload,
2208
+ endpoint_name="document-analysis",
2209
+ files=files,
2210
+ v2=True,
2211
+ metadata_payload={"function_name": "document_qa"},
2212
+ )
2213
+
2214
+ def normalize(data: Any) -> Dict[str, Any]:
2215
+ if isinstance(data, Dict):
2216
+ if "bbox" in data:
2217
+ data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
2218
+ for key in data:
2219
+ data[key] = normalize(data[key])
2220
+ elif isinstance(data, List):
2221
+ for i in range(len(data)):
2222
+ data[i] = normalize(data[i])
2223
+ return data # type: ignore
2224
+
2225
+ data = normalize(data)
2226
+
2227
+ prompt = f"""
2228
+ Document Context:
2229
+ {data}\n
2230
+ Question: {prompt}\n
2231
+ Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
2232
+ """
2233
+
2234
+ lmm = AnthropicLMM()
2235
+ llm_output = lmm.generate(prompt=prompt)
2236
+ llm_output = cast(str, llm_output)
2237
+
2238
+ _display_tool_trace(
2239
+ document_qa.__name__,
2240
+ payload,
2241
+ llm_output,
2242
+ files,
2243
+ )
2244
+
2245
+ return llm_output
2246
+
2247
+
2177
2248
  # Utility and visualization functions
2178
2249
 
2179
2250
 
@@ -30,6 +30,8 @@ from nbclient.util import run_sync
30
30
  from nbformat.v4 import new_code_cell
31
31
  from pydantic import BaseModel, field_serializer
32
32
  from typing_extensions import Self
33
+ from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
34
+ from opentelemetry.context import get_current
33
35
 
34
36
  from vision_agent.utils.exceptions import (
35
37
  RemoteSandboxCreationError,
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
633
635
  self._new_kernel()
634
636
 
635
637
  def exec_cell(self, code: str) -> Execution:
636
- try:
637
- self.nb.cells.append(new_code_cell(code))
638
- cell = self.nb.cells[-1]
639
- self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
640
- return _parse_local_code_interpreter_outputs(self.nb.cells[-1].outputs)
641
- except CellTimeoutError as e:
642
- run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
643
- sleep(1)
644
- traceback_raw = traceback.format_exc().splitlines()
645
- return Execution.from_exception(e, traceback_raw)
646
- except DeadKernelError as e:
647
- self.restart_kernel()
648
- traceback_raw = traceback.format_exc().splitlines()
649
- return Execution.from_exception(e, traceback_raw)
650
- except Exception as e:
651
- traceback_raw = traceback.format_exc().splitlines()
652
- return Execution.from_exception(e, traceback_raw)
638
+ # track the exec_cell with opentelemetry trace
639
+ tracer = get_tracer(__name__)
640
+ context = get_current()
641
+ with tracer.start_as_current_span(
642
+ "notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
643
+ ) as span:
644
+ try:
645
+ # Add code as span attribute
646
+ span.set_attribute("code", code)
647
+ span.set_attribute("cell_index", len(self.nb.cells))
648
+
649
+ self.nb.cells.append(new_code_cell(code))
650
+ cell = self.nb.cells[-1]
651
+ self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
652
+
653
+ result = _parse_local_code_interpreter_outputs(
654
+ self.nb.cells[-1].outputs
655
+ )
656
+ span.set_status(Status(StatusCode.OK))
657
+ return result
658
+ except CellTimeoutError as e:
659
+ run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
660
+ sleep(1)
661
+ span.set_status(Status(StatusCode.ERROR, str(e)))
662
+ span.record_exception(e)
663
+ traceback_raw = traceback.format_exc().splitlines()
664
+ return Execution.from_exception(e, traceback_raw)
665
+ except DeadKernelError as e:
666
+ self.restart_kernel()
667
+ span.set_status(Status(StatusCode.ERROR, str(e)))
668
+ span.record_exception(e)
669
+ traceback_raw = traceback.format_exc().splitlines()
670
+ return Execution.from_exception(e, traceback_raw)
671
+ except Exception as e:
672
+ span.set_status(Status(StatusCode.ERROR, str(e)))
673
+ span.record_exception(e)
674
+ traceback_raw = traceback.format_exc().splitlines()
675
+ return Execution.from_exception(e, traceback_raw)
653
676
 
654
677
  def upload_file(self, file_path: Union[str, Path]) -> Path:
655
678
  with open(file_path, "rb") as f:
File without changes
File without changes