vision-agent 0.2.216__py3-none-any.whl → 0.2.218__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,6 +33,7 @@ from .tools import (
33
33
  depth_anything_v2,
34
34
  detr_segmentation,
35
35
  document_extraction,
36
+ document_qa,
36
37
  extract_frames_and_timestamps,
37
38
  florence2_ocr,
38
39
  florence2_phrase_grounding,
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
2174
2174
  return data
2175
2175
 
2176
2176
 
2177
+ def document_qa(
2178
+ prompt: str,
2179
+ image: np.ndarray,
2180
+ ) -> str:
2181
+ """'document_qa' is a tool that can answer any questions about arbitrary
2182
+ images of documents or presentations. It answers by analyzing the contextual document data
2183
+ and then using a model to answer specific questions. It returns text as an answer to the question.
2184
+
2185
+ Parameters:
2186
+ prompt (str): The question to be answered about the document image
2187
+ image (np.ndarray): The document image to analyze
2188
+
2189
+ Returns:
2190
+ str: The answer to the question based on the document's context.
2191
+
2192
+ Example
2193
+ -------
2194
+ >>> document_qa(image, question)
2195
+ 'The answer to the question ...'
2196
+ """
2197
+
2198
+ image_file = numpy_to_bytes(image)
2199
+
2200
+ files = [("image", image_file)]
2201
+
2202
+ payload = {
2203
+ "model": "document-analysis",
2204
+ }
2205
+
2206
+ data: dict[str, Any] = send_inference_request(
2207
+ payload=payload,
2208
+ endpoint_name="document-analysis",
2209
+ files=files,
2210
+ v2=True,
2211
+ metadata_payload={"function_name": "document_qa"},
2212
+ )
2213
+
2214
+ def normalize(data: Any) -> Dict[str, Any]:
2215
+ if isinstance(data, Dict):
2216
+ if "bbox" in data:
2217
+ data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
2218
+ for key in data:
2219
+ data[key] = normalize(data[key])
2220
+ elif isinstance(data, List):
2221
+ for i in range(len(data)):
2222
+ data[i] = normalize(data[i])
2223
+ return data # type: ignore
2224
+
2225
+ data = normalize(data)
2226
+
2227
+ prompt = f"""
2228
+ Document Context:
2229
+ {data}\n
2230
+ Question: {prompt}\n
2231
+ Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
2232
+ """
2233
+
2234
+ lmm = AnthropicLMM()
2235
+ llm_output = lmm.generate(prompt=prompt)
2236
+ llm_output = cast(str, llm_output)
2237
+
2238
+ _display_tool_trace(
2239
+ document_qa.__name__,
2240
+ payload,
2241
+ llm_output,
2242
+ files,
2243
+ )
2244
+
2245
+ return llm_output
2246
+
2247
+
2177
2248
  # Utility and visualization functions
2178
2249
 
2179
2250
 
@@ -30,6 +30,8 @@ from nbclient.util import run_sync
30
30
  from nbformat.v4 import new_code_cell
31
31
  from pydantic import BaseModel, field_serializer
32
32
  from typing_extensions import Self
33
+ from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
34
+ from opentelemetry.context import get_current
33
35
 
34
36
  from vision_agent.utils.exceptions import (
35
37
  RemoteSandboxCreationError,
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
633
635
  self._new_kernel()
634
636
 
635
637
  def exec_cell(self, code: str) -> Execution:
636
- try:
637
- self.nb.cells.append(new_code_cell(code))
638
- cell = self.nb.cells[-1]
639
- self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
640
- return _parse_local_code_interpreter_outputs(self.nb.cells[-1].outputs)
641
- except CellTimeoutError as e:
642
- run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
643
- sleep(1)
644
- traceback_raw = traceback.format_exc().splitlines()
645
- return Execution.from_exception(e, traceback_raw)
646
- except DeadKernelError as e:
647
- self.restart_kernel()
648
- traceback_raw = traceback.format_exc().splitlines()
649
- return Execution.from_exception(e, traceback_raw)
650
- except Exception as e:
651
- traceback_raw = traceback.format_exc().splitlines()
652
- return Execution.from_exception(e, traceback_raw)
638
+ # track the exec_cell with opentelemetry trace
639
+ tracer = get_tracer(__name__)
640
+ context = get_current()
641
+ with tracer.start_as_current_span(
642
+ "notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
643
+ ) as span:
644
+ try:
645
+ # Add code as span attribute
646
+ span.set_attribute("code", code)
647
+ span.set_attribute("cell_index", len(self.nb.cells))
648
+
649
+ self.nb.cells.append(new_code_cell(code))
650
+ cell = self.nb.cells[-1]
651
+ self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
652
+
653
+ result = _parse_local_code_interpreter_outputs(
654
+ self.nb.cells[-1].outputs
655
+ )
656
+ span.set_status(Status(StatusCode.OK))
657
+ return result
658
+ except CellTimeoutError as e:
659
+ run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
660
+ sleep(1)
661
+ span.set_status(Status(StatusCode.ERROR, str(e)))
662
+ span.record_exception(e)
663
+ traceback_raw = traceback.format_exc().splitlines()
664
+ return Execution.from_exception(e, traceback_raw)
665
+ except DeadKernelError as e:
666
+ self.restart_kernel()
667
+ span.set_status(Status(StatusCode.ERROR, str(e)))
668
+ span.record_exception(e)
669
+ traceback_raw = traceback.format_exc().splitlines()
670
+ return Execution.from_exception(e, traceback_raw)
671
+ except Exception as e:
672
+ span.set_status(Status(StatusCode.ERROR, str(e)))
673
+ span.record_exception(e)
674
+ traceback_raw = traceback.format_exc().splitlines()
675
+ return Execution.from_exception(e, traceback_raw)
653
676
 
654
677
  def upload_file(self, file_path: Union[str, Path]) -> Path:
655
678
  with open(file_path, "rb") as f:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.216
3
+ Version: 0.2.218
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
23
23
  Requires-Dist: numpy (>=1.21.0,<2.0.0)
24
24
  Requires-Dist: openai (>=1.0.0,<2.0.0)
25
25
  Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
26
+ Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
26
27
  Requires-Dist: pandas (>=2.0.0,<3.0.0)
27
28
  Requires-Dist: pillow (>=10.0.0,<11.0.0)
28
29
  Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
@@ -26,21 +26,21 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
26
26
  vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
27
27
  vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
28
28
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
29
- vision_agent/tools/__init__.py,sha256=qzAqY2WnRLoClz3qiNtupkLtvpPlcGa5ZUCIs21WS7k,2795
29
+ vision_agent/tools/__init__.py,sha256=Jdq34jMw_KuYwk4Wexqm4DRjuLLoL1Q8wukm0NBv1Tc,2812
30
30
  vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
31
31
  vision_agent/tools/planner_tools.py,sha256=tU1qz_VIQM_yPKDmuxjMWu68ZlAZ7ePWI1g7zswyWhI,13540
32
32
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
33
33
  vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
34
- vision_agent/tools/tools.py,sha256=n6-UPaZ4XjF29_7EF5GRgx74GjiZ7HqZn4a1Aw-e4P0,94059
34
+ vision_agent/tools/tools.py,sha256=Xcm_9EQdDCR9X5FhIm7VJaTL0qWqhnJUVTRVrRtETrA,96112
35
35
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
36
36
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
37
37
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
38
- vision_agent/utils/execute.py,sha256=ktJX1gWBk4D_tXeWV5olGUMC4dU_Z6m5oSv-6Yu1O0w,28292
38
+ vision_agent/utils/execute.py,sha256=Qs-C9lnRBc3frUH_bmrwHLuJ9qjPykIytex8y4E0f7s,29356
39
39
  vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50vG3w,12182
40
40
  vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
41
41
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
42
42
  vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
43
- vision_agent-0.2.216.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- vision_agent-0.2.216.dist-info/METADATA,sha256=B88HzV_M0A12EmhiC-968LcdospsiOUUR-aTcZFTH8A,19071
45
- vision_agent-0.2.216.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
46
- vision_agent-0.2.216.dist-info/RECORD,,
43
+ vision_agent-0.2.218.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ vision_agent-0.2.218.dist-info/METADATA,sha256=Bh9yQRcNSytsUOIqztuXkUhSprPu-le7ncfb7owkc24,19122
45
+ vision_agent-0.2.218.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
46
+ vision_agent-0.2.218.dist-info/RECORD,,