vision-agent 0.2.216__py3-none-any.whl → 0.2.218__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -33,6 +33,7 @@ from .tools import (
33
33
  depth_anything_v2,
34
34
  detr_segmentation,
35
35
  document_extraction,
36
+ document_qa,
36
37
  extract_frames_and_timestamps,
37
38
  florence2_ocr,
38
39
  florence2_phrase_grounding,
@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
2174
2174
  return data
2175
2175
 
2176
2176
 
2177
+ def document_qa(
2178
+ prompt: str,
2179
+ image: np.ndarray,
2180
+ ) -> str:
2181
+ """'document_qa' is a tool that can answer any questions about arbitrary
2182
+ images of documents or presentations. It answers by analyzing the contextual document data
2183
+ and then using a model to answer specific questions. It returns text as an answer to the question.
2184
+
2185
+ Parameters:
2186
+ prompt (str): The question to be answered about the document image
2187
+ image (np.ndarray): The document image to analyze
2188
+
2189
+ Returns:
2190
+ str: The answer to the question based on the document's context.
2191
+
2192
+ Example
2193
+ -------
2194
+ >>> document_qa(image, question)
2195
+ 'The answer to the question ...'
2196
+ """
2197
+
2198
+ image_file = numpy_to_bytes(image)
2199
+
2200
+ files = [("image", image_file)]
2201
+
2202
+ payload = {
2203
+ "model": "document-analysis",
2204
+ }
2205
+
2206
+ data: dict[str, Any] = send_inference_request(
2207
+ payload=payload,
2208
+ endpoint_name="document-analysis",
2209
+ files=files,
2210
+ v2=True,
2211
+ metadata_payload={"function_name": "document_qa"},
2212
+ )
2213
+
2214
+ def normalize(data: Any) -> Dict[str, Any]:
2215
+ if isinstance(data, Dict):
2216
+ if "bbox" in data:
2217
+ data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
2218
+ for key in data:
2219
+ data[key] = normalize(data[key])
2220
+ elif isinstance(data, List):
2221
+ for i in range(len(data)):
2222
+ data[i] = normalize(data[i])
2223
+ return data # type: ignore
2224
+
2225
+ data = normalize(data)
2226
+
2227
+ prompt = f"""
2228
+ Document Context:
2229
+ {data}\n
2230
+ Question: {prompt}\n
2231
+ Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
2232
+ """
2233
+
2234
+ lmm = AnthropicLMM()
2235
+ llm_output = lmm.generate(prompt=prompt)
2236
+ llm_output = cast(str, llm_output)
2237
+
2238
+ _display_tool_trace(
2239
+ document_qa.__name__,
2240
+ payload,
2241
+ llm_output,
2242
+ files,
2243
+ )
2244
+
2245
+ return llm_output
2246
+
2247
+
2177
2248
  # Utility and visualization functions
2178
2249
 
2179
2250
 
@@ -30,6 +30,8 @@ from nbclient.util import run_sync
30
30
  from nbformat.v4 import new_code_cell
31
31
  from pydantic import BaseModel, field_serializer
32
32
  from typing_extensions import Self
33
+ from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
34
+ from opentelemetry.context import get_current
33
35
 
34
36
  from vision_agent.utils.exceptions import (
35
37
  RemoteSandboxCreationError,
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
633
635
  self._new_kernel()
634
636
 
635
637
  def exec_cell(self, code: str) -> Execution:
636
- try:
637
- self.nb.cells.append(new_code_cell(code))
638
- cell = self.nb.cells[-1]
639
- self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
640
- return _parse_local_code_interpreter_outputs(self.nb.cells[-1].outputs)
641
- except CellTimeoutError as e:
642
- run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
643
- sleep(1)
644
- traceback_raw = traceback.format_exc().splitlines()
645
- return Execution.from_exception(e, traceback_raw)
646
- except DeadKernelError as e:
647
- self.restart_kernel()
648
- traceback_raw = traceback.format_exc().splitlines()
649
- return Execution.from_exception(e, traceback_raw)
650
- except Exception as e:
651
- traceback_raw = traceback.format_exc().splitlines()
652
- return Execution.from_exception(e, traceback_raw)
638
+ # track the exec_cell with opentelemetry trace
639
+ tracer = get_tracer(__name__)
640
+ context = get_current()
641
+ with tracer.start_as_current_span(
642
+ "notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
643
+ ) as span:
644
+ try:
645
+ # Add code as span attribute
646
+ span.set_attribute("code", code)
647
+ span.set_attribute("cell_index", len(self.nb.cells))
648
+
649
+ self.nb.cells.append(new_code_cell(code))
650
+ cell = self.nb.cells[-1]
651
+ self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
652
+
653
+ result = _parse_local_code_interpreter_outputs(
654
+ self.nb.cells[-1].outputs
655
+ )
656
+ span.set_status(Status(StatusCode.OK))
657
+ return result
658
+ except CellTimeoutError as e:
659
+ run_sync(self.nb_client.km.interrupt_kernel)() # type: ignore
660
+ sleep(1)
661
+ span.set_status(Status(StatusCode.ERROR, str(e)))
662
+ span.record_exception(e)
663
+ traceback_raw = traceback.format_exc().splitlines()
664
+ return Execution.from_exception(e, traceback_raw)
665
+ except DeadKernelError as e:
666
+ self.restart_kernel()
667
+ span.set_status(Status(StatusCode.ERROR, str(e)))
668
+ span.record_exception(e)
669
+ traceback_raw = traceback.format_exc().splitlines()
670
+ return Execution.from_exception(e, traceback_raw)
671
+ except Exception as e:
672
+ span.set_status(Status(StatusCode.ERROR, str(e)))
673
+ span.record_exception(e)
674
+ traceback_raw = traceback.format_exc().splitlines()
675
+ return Execution.from_exception(e, traceback_raw)
653
676
 
654
677
  def upload_file(self, file_path: Union[str, Path]) -> Path:
655
678
  with open(file_path, "rb") as f:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: vision-agent
3
- Version: 0.2.216
3
+ Version: 0.2.218
4
4
  Summary: Toolset for Vision Agent
5
5
  Author: Landing AI
6
6
  Author-email: dev@landing.ai
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
23
23
  Requires-Dist: numpy (>=1.21.0,<2.0.0)
24
24
  Requires-Dist: openai (>=1.0.0,<2.0.0)
25
25
  Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
26
+ Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
26
27
  Requires-Dist: pandas (>=2.0.0,<3.0.0)
27
28
  Requires-Dist: pillow (>=10.0.0,<11.0.0)
28
29
  Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)
@@ -26,21 +26,21 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
26
26
  vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
27
27
  vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
28
28
  vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
29
- vision_agent/tools/__init__.py,sha256=qzAqY2WnRLoClz3qiNtupkLtvpPlcGa5ZUCIs21WS7k,2795
29
+ vision_agent/tools/__init__.py,sha256=Jdq34jMw_KuYwk4Wexqm4DRjuLLoL1Q8wukm0NBv1Tc,2812
30
30
  vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
31
31
  vision_agent/tools/planner_tools.py,sha256=tU1qz_VIQM_yPKDmuxjMWu68ZlAZ7ePWI1g7zswyWhI,13540
32
32
  vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
33
33
  vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
34
- vision_agent/tools/tools.py,sha256=n6-UPaZ4XjF29_7EF5GRgx74GjiZ7HqZn4a1Aw-e4P0,94059
34
+ vision_agent/tools/tools.py,sha256=Xcm_9EQdDCR9X5FhIm7VJaTL0qWqhnJUVTRVrRtETrA,96112
35
35
  vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
36
36
  vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
37
37
  vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
38
- vision_agent/utils/execute.py,sha256=ktJX1gWBk4D_tXeWV5olGUMC4dU_Z6m5oSv-6Yu1O0w,28292
38
+ vision_agent/utils/execute.py,sha256=Qs-C9lnRBc3frUH_bmrwHLuJ9qjPykIytex8y4E0f7s,29356
39
39
  vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50vG3w,12182
40
40
  vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
41
41
  vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
42
42
  vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
43
- vision_agent-0.2.216.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
- vision_agent-0.2.216.dist-info/METADATA,sha256=B88HzV_M0A12EmhiC-968LcdospsiOUUR-aTcZFTH8A,19071
45
- vision_agent-0.2.216.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
46
- vision_agent-0.2.216.dist-info/RECORD,,
43
+ vision_agent-0.2.218.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
44
+ vision_agent-0.2.218.dist-info/METADATA,sha256=Bh9yQRcNSytsUOIqztuXkUhSprPu-le7ncfb7owkc24,19122
45
+ vision_agent-0.2.218.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
46
+ vision_agent-0.2.218.dist-info/RECORD,,