PyPI - vision-agent - Versions diffs - 0.2.216__py3-none-any.whl → 0.2.218__py3-none-any.whl - Mend

vision-agent 0.2.216py3-none-any.whl → 0.2.218py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

vision_agent/tools/__init__.py CHANGED Viewed

@@ -33,6 +33,7 @@ from .tools import (
     depth_anything_v2,
     detr_segmentation,
     document_extraction,
+    document_qa,
     extract_frames_and_timestamps,
     florence2_ocr,
     florence2_phrase_grounding,

vision_agent/tools/tools.py CHANGED Viewed

@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
     return data
+def document_qa(
+    prompt: str,
+    image: np.ndarray,
+) -> str:
+    """'document_qa' is a tool that can answer any questions about arbitrary
+    images of documents or presentations. It answers by analyzing the contextual document data
+    and then using a model to answer specific questions. It returns text as an answer to the question.
+    Parameters:
+        prompt (str): The question to be answered about the document image
+        image (np.ndarray): The document image to analyze
+    Returns:
+        str: The answer to the question based on the document's context.
+    Example
+    -------
+        >>> document_qa(image, question)
+        'The answer to the question ...'
+    """
+    image_file = numpy_to_bytes(image)
+    files = [("image", image_file)]
+    payload = {
+        "model": "document-analysis",
+    }
+    data: dict[str, Any] = send_inference_request(
+        payload=payload,
+        endpoint_name="document-analysis",
+        files=files,
+        v2=True,
+        metadata_payload={"function_name": "document_qa"},
+    )
+    def normalize(data: Any) -> Dict[str, Any]:
+        if isinstance(data, Dict):
+            if "bbox" in data:
+                data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
+            for key in data:
+                data[key] = normalize(data[key])
+        elif isinstance(data, List):
+            for i in range(len(data)):
+                data[i] = normalize(data[i])
+        return data  # type: ignore
+    data = normalize(data)
+    prompt = f"""
+    Document Context:
+    {data}\n
+    Question: {prompt}\n
+    Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
+    """
+    lmm = AnthropicLMM()
+    llm_output = lmm.generate(prompt=prompt)
+    llm_output = cast(str, llm_output)
+    _display_tool_trace(
+        document_qa.__name__,
+        payload,
+        llm_output,
+        files,
+    )
+    return llm_output
 # Utility and visualization functions

vision_agent/utils/execute.py CHANGED Viewed

@@ -30,6 +30,8 @@ from nbclient.util import run_sync
 from nbformat.v4 import new_code_cell
 from pydantic import BaseModel, field_serializer
 from typing_extensions import Self
+from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
+from opentelemetry.context import get_current
 from vision_agent.utils.exceptions import (
     RemoteSandboxCreationError,
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
         self._new_kernel()
     def exec_cell(self, code: str) -> Execution:
-        try:
-            self.nb.cells.append(new_code_cell(code))
-            cell = self.nb.cells[-1]
-            self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
-            return _parse_local_code_interpreter_outputs(self.nb.cells[-1].outputs)
-        except CellTimeoutError as e:
-            run_sync(self.nb_client.km.interrupt_kernel)()  # type: ignore
-            sleep(1)
-            traceback_raw = traceback.format_exc().splitlines()
-            return Execution.from_exception(e, traceback_raw)
-        except DeadKernelError as e:
-            self.restart_kernel()
-            traceback_raw = traceback.format_exc().splitlines()
-            return Execution.from_exception(e, traceback_raw)
-        except Exception as e:
-            traceback_raw = traceback.format_exc().splitlines()
-            return Execution.from_exception(e, traceback_raw)
+        # track the exec_cell with opentelemetry trace
+        tracer = get_tracer(__name__)
+        context = get_current()
+        with tracer.start_as_current_span(
+            "notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
+        ) as span:
+            try:
+                # Add code as span attribute
+                span.set_attribute("code", code)
+                span.set_attribute("cell_index", len(self.nb.cells))
+                self.nb.cells.append(new_code_cell(code))
+                cell = self.nb.cells[-1]
+                self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
+                result = _parse_local_code_interpreter_outputs(
+                    self.nb.cells[-1].outputs
+                )
+                span.set_status(Status(StatusCode.OK))
+                return result
+            except CellTimeoutError as e:
+                run_sync(self.nb_client.km.interrupt_kernel)()  # type: ignore
+                sleep(1)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.record_exception(e)
+                traceback_raw = traceback.format_exc().splitlines()
+                return Execution.from_exception(e, traceback_raw)
+            except DeadKernelError as e:
+                self.restart_kernel()
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.record_exception(e)
+                traceback_raw = traceback.format_exc().splitlines()
+                return Execution.from_exception(e, traceback_raw)
+            except Exception as e:
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.record_exception(e)
+                traceback_raw = traceback.format_exc().splitlines()
+                return Execution.from_exception(e, traceback_raw)
     def upload_file(self, file_path: Union[str, Path]) -> Path:
         with open(file_path, "rb") as f:

{vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.216
+Version: 0.2.218
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
 Requires-Dist: numpy (>=1.21.0,<2.0.0)
 Requires-Dist: openai (>=1.0.0,<2.0.0)
 Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
+Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
 Requires-Dist: pandas (>=2.0.0,<3.0.0)
 Requires-Dist: pillow (>=10.0.0,<11.0.0)
 Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)

{vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/RECORD RENAMED Viewed

@@ -26,21 +26,21 @@ vision_agent/fonts/default_font_ch_en.ttf,sha256=1YM0Z3XqLDjSNbF7ihQFSAIUdjF9m1r
 vision_agent/lmm/__init__.py,sha256=jyY1sJb_tYKg5-Wzs3p1lvwFkc-aUNZfMcLy3TOC4Zg,100
 vision_agent/lmm/lmm.py,sha256=x_nIyDNDZwq4-pfjnJTmcyyJZ2_B7TjkA5jZp88YVO8,17103
 vision_agent/lmm/types.py,sha256=ZEXR_ptBL0ZwDMTDYkgxUCmSZFmBYPQd2jreNzr_8UY,221
-vision_agent/tools/__init__.py,sha256=qzAqY2WnRLoClz3qiNtupkLtvpPlcGa5ZUCIs21WS7k,2795
+vision_agent/tools/__init__.py,sha256=Jdq34jMw_KuYwk4Wexqm4DRjuLLoL1Q8wukm0NBv1Tc,2812
 vision_agent/tools/meta_tools.py,sha256=TPeS7QWnc_PmmU_ndiDT03dXbQ5yDSP33E7U8cSj7Ls,28660
 vision_agent/tools/planner_tools.py,sha256=tU1qz_VIQM_yPKDmuxjMWu68ZlAZ7ePWI1g7zswyWhI,13540
 vision_agent/tools/prompts.py,sha256=V1z4YJLXZuUl_iZ5rY0M5hHc_2tmMEUKr0WocXKGt4E,1430
 vision_agent/tools/tool_utils.py,sha256=LAnrb_nY6PNVamqJahRN-J0cuOy4gsKvCtSuXJf0RsI,10075
-vision_agent/tools/tools.py,sha256=n6-UPaZ4XjF29_7EF5GRgx74GjiZ7HqZn4a1Aw-e4P0,94059
+vision_agent/tools/tools.py,sha256=Xcm_9EQdDCR9X5FhIm7VJaTL0qWqhnJUVTRVrRtETrA,96112
 vision_agent/tools/tools_types.py,sha256=8hYf2OZhI58gvf65KGaeGkt4EQ56nwLFqIQDPHioOBc,2339
 vision_agent/utils/__init__.py,sha256=7fMgbZiEwbNS0fBOS_hJI5PuEYBblw36zLi_UjUzvj4,244
 vision_agent/utils/exceptions.py,sha256=booSPSuoULF7OXRr_YbC4dtKt6gM_HyiFQHBuaW86C4,2052
-vision_agent/utils/execute.py,sha256=ktJX1gWBk4D_tXeWV5olGUMC4dU_Z6m5oSv-6Yu1O0w,28292
+vision_agent/utils/execute.py,sha256=Qs-C9lnRBc3frUH_bmrwHLuJ9qjPykIytex8y4E0f7s,29356
 vision_agent/utils/image_utils.py,sha256=5uoYgXa6E0-lVrXR7K2XE7fe6r_n7pvK64HYQ50vG3w,12182
 vision_agent/utils/sim.py,sha256=f1emBQM8SmyVKSrhj0NHItnfMHSeTw-Nk2pw-0eBZ5c,7462
 vision_agent/utils/type_defs.py,sha256=BE12s3JNQy36QvauXHjwyeffVh5enfcvd4vTzSwvEZI,1384
 vision_agent/utils/video.py,sha256=e1VwKhXzzlC5LcFMyrcQYrPnpnX4wxDpnQ-76sB4jgM,6001
-vision_agent-0.2.216.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-vision_agent-0.2.216.dist-info/METADATA,sha256=B88HzV_M0A12EmhiC-968LcdospsiOUUR-aTcZFTH8A,19071
-vision_agent-0.2.216.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
-vision_agent-0.2.216.dist-info/RECORD,,
+vision_agent-0.2.218.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+vision_agent-0.2.218.dist-info/METADATA,sha256=Bh9yQRcNSytsUOIqztuXkUhSprPu-le7ncfb7owkc24,19122
+vision_agent-0.2.218.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
+vision_agent-0.2.218.dist-info/RECORD,,

{vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/LICENSE RENAMED Viewed

File without changes

{vision_agent-0.2.216.dist-info → vision_agent-0.2.218.dist-info}/WHEEL RENAMED Viewed

File without changes

vision-agent 0.2.216__py3-none-any.whl → 0.2.218__py3-none-any.whl

vision-agent 0.2.216py3-none-any.whl → 0.2.218py3-none-any.whl