PyPI - vision-agent - Versions diffs - 0.2.216__tar.gz → 0.2.218__tar.gz - Mend

vision-agent 0.2.216tar.gz → 0.2.218tar.gz

Files changed (46) hide show

{vision_agent-0.2.216 → vision_agent-0.2.218}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.216
+Version: 0.2.218
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -23,6 +23,7 @@ Requires-Dist: nbformat (>=5.10.4,<6.0.0)
 Requires-Dist: numpy (>=1.21.0,<2.0.0)
 Requires-Dist: openai (>=1.0.0,<2.0.0)
 Requires-Dist: opencv-python (>=4.0.0,<5.0.0)
+Requires-Dist: opentelemetry-api (>=1.29.0,<2.0.0)
 Requires-Dist: pandas (>=2.0.0,<3.0.0)
 Requires-Dist: pillow (>=10.0.0,<11.0.0)
 Requires-Dist: pillow-heif (>=0.16.0,<0.17.0)

{vision_agent-0.2.216 → vision_agent-0.2.218}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.216"
+version = "0.2.218"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -47,6 +47,7 @@ av = "^11.0.0"
 libcst = "^1.5.0"
 matplotlib = "^3.9.2"
 scikit-learn = "^1.5.2"
+opentelemetry-api = "^1.29.0"
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"

{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/__init__.py RENAMED Viewed

@@ -33,6 +33,7 @@ from .tools import (
     depth_anything_v2,
     detr_segmentation,
     document_extraction,
+    document_qa,
     extract_frames_and_timestamps,
     florence2_ocr,
     florence2_phrase_grounding,

{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/tools/tools.py RENAMED Viewed

@@ -2174,6 +2174,77 @@ def document_extraction(image: np.ndarray) -> Dict[str, Any]:
     return data
+def document_qa(
+    prompt: str,
+    image: np.ndarray,
+) -> str:
+    """'document_qa' is a tool that can answer any questions about arbitrary
+    images of documents or presentations. It answers by analyzing the contextual document data
+    and then using a model to answer specific questions. It returns text as an answer to the question.
+    Parameters:
+        prompt (str): The question to be answered about the document image
+        image (np.ndarray): The document image to analyze
+    Returns:
+        str: The answer to the question based on the document's context.
+    Example
+    -------
+        >>> document_qa(image, question)
+        'The answer to the question ...'
+    """
+    image_file = numpy_to_bytes(image)
+    files = [("image", image_file)]
+    payload = {
+        "model": "document-analysis",
+    }
+    data: dict[str, Any] = send_inference_request(
+        payload=payload,
+        endpoint_name="document-analysis",
+        files=files,
+        v2=True,
+        metadata_payload={"function_name": "document_qa"},
+    )
+    def normalize(data: Any) -> Dict[str, Any]:
+        if isinstance(data, Dict):
+            if "bbox" in data:
+                data["bbox"] = normalize_bbox(data["bbox"], image.shape[:2])
+            for key in data:
+                data[key] = normalize(data[key])
+        elif isinstance(data, List):
+            for i in range(len(data)):
+                data[i] = normalize(data[i])
+        return data  # type: ignore
+    data = normalize(data)
+    prompt = f"""
+    Document Context:
+    {data}\n
+    Question: {prompt}\n
+    Please provide a clear, concise answer using only the information from the document. If the answer is not definitively contained in the document, say "I cannot find the answer in the provided document."
+    """
+    lmm = AnthropicLMM()
+    llm_output = lmm.generate(prompt=prompt)
+    llm_output = cast(str, llm_output)
+    _display_tool_trace(
+        document_qa.__name__,
+        payload,
+        llm_output,
+        files,
+    )
+    return llm_output
 # Utility and visualization functions

{vision_agent-0.2.216 → vision_agent-0.2.218}/vision_agent/utils/execute.py RENAMED Viewed

@@ -30,6 +30,8 @@ from nbclient.util import run_sync
 from nbformat.v4 import new_code_cell
 from pydantic import BaseModel, field_serializer
 from typing_extensions import Self
+from opentelemetry.trace import get_tracer, Status, StatusCode, SpanKind
+from opentelemetry.context import get_current
 from vision_agent.utils.exceptions import (
     RemoteSandboxCreationError,
@@ -633,23 +635,44 @@ Timeout: {self.timeout}"""
         self._new_kernel()
     def exec_cell(self, code: str) -> Execution:
-        try:
-            self.nb.cells.append(new_code_cell(code))
-            cell = self.nb.cells[-1]
-            self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
-            return _parse_local_code_interpreter_outputs(self.nb.cells[-1].outputs)
-        except CellTimeoutError as e:
-            run_sync(self.nb_client.km.interrupt_kernel)()  # type: ignore
-            sleep(1)
-            traceback_raw = traceback.format_exc().splitlines()
-            return Execution.from_exception(e, traceback_raw)
-        except DeadKernelError as e:
-            self.restart_kernel()
-            traceback_raw = traceback.format_exc().splitlines()
-            return Execution.from_exception(e, traceback_raw)
-        except Exception as e:
-            traceback_raw = traceback.format_exc().splitlines()
-            return Execution.from_exception(e, traceback_raw)
+        # track the exec_cell with opentelemetry trace
+        tracer = get_tracer(__name__)
+        context = get_current()
+        with tracer.start_as_current_span(
+            "notebook_cell_execution", kind=SpanKind.INTERNAL, context=context
+        ) as span:
+            try:
+                # Add code as span attribute
+                span.set_attribute("code", code)
+                span.set_attribute("cell_index", len(self.nb.cells))
+                self.nb.cells.append(new_code_cell(code))
+                cell = self.nb.cells[-1]
+                self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
+                result = _parse_local_code_interpreter_outputs(
+                    self.nb.cells[-1].outputs
+                )
+                span.set_status(Status(StatusCode.OK))
+                return result
+            except CellTimeoutError as e:
+                run_sync(self.nb_client.km.interrupt_kernel)()  # type: ignore
+                sleep(1)
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.record_exception(e)
+                traceback_raw = traceback.format_exc().splitlines()
+                return Execution.from_exception(e, traceback_raw)
+            except DeadKernelError as e:
+                self.restart_kernel()
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.record_exception(e)
+                traceback_raw = traceback.format_exc().splitlines()
+                return Execution.from_exception(e, traceback_raw)
+            except Exception as e:
+                span.set_status(Status(StatusCode.ERROR, str(e)))
+                span.record_exception(e)
+                traceback_raw = traceback.format_exc().splitlines()
+                return Execution.from_exception(e, traceback_raw)
     def upload_file(self, file_path: Union[str, Path]) -> Path:
         with open(file_path, "rb") as f: