PyPI - vision-agent - Versions diffs - 0.2.36__tar.gz → 0.2.38__tar.gz - Mend

vision-agent 0.2.36tar.gz → 0.2.38tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (39) hide show

{vision_agent-0.2.36 → vision_agent-0.2.38}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vision-agent
-Version: 0.2.36
+Version: 0.2.38
 Summary: Toolset for Vision Agent
 Author: Landing AI
 Author-email: dev@landing.ai
@@ -9,6 +9,8 @@ Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
+Requires-Dist: e2b (>=0.17.0,<0.18.0)
+Requires-Dist: e2b-code-interpreter (>=0.0.7,<0.0.8)
 Requires-Dist: ipykernel (>=6.29.4,<7.0.0)
 Requires-Dist: langsmith (>=0.1.58,<0.2.0)
 Requires-Dist: moviepy (>=1.0.0,<2.0.0)
@@ -24,6 +26,7 @@ Requires-Dist: requests (>=2.0.0,<3.0.0)
 Requires-Dist: rich (>=13.7.1,<14.0.0)
 Requires-Dist: scipy (>=1.13.0,<1.14.0)
 Requires-Dist: tabulate (>=0.9.0,<0.10.0)
+Requires-Dist: tenacity (>=8.3.0,<9.0.0)
 Requires-Dist: tqdm (>=4.64.0,<5.0.0)
 Requires-Dist: typing_extensions (>=4.0.0,<5.0.0)
 Project-URL: Homepage, https://landing.ai

{vision_agent-0.2.36 → vision_agent-0.2.38}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"
 [tool.poetry]
 name = "vision-agent"
-version = "0.2.36"
+version = "0.2.38"
 description = "Toolset for Vision Agent"
 authors = ["Landing AI <dev@landing.ai>"]
 readme = "README.md"
@@ -34,6 +34,9 @@ nbformat = "^5.10.4"
 rich = "^13.7.1"
 langsmith = "^0.1.58"
 ipykernel = "^6.29.4"
+e2b = "^0.17.0"
+e2b-code-interpreter = "^0.0.7"
+tenacity = "^8.3.0"
 [tool.poetry.group.dev.dependencies]
 autoflake = "1.*"
@@ -93,4 +96,6 @@ module = [
     "openai.*",
     "sentence_transformers.*",
     "moviepy.*",
+    "e2b_code_interpreter.*",
+    "e2b.*",
 ]

vision_agent-0.2.38/vision_agent/agent/__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ from .agent import Agent
2	+ from .vision_agent import VisionAgent

{vision_agent-0.2.36 → vision_agent-0.2.38}/vision_agent/agent/agent_coder.py RENAMED Viewed

@@ -19,7 +19,7 @@ from vision_agent.agent.agent_coder_prompts import (
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
 from vision_agent.tools import TOOL_DOCSTRING, UTILITIES_DOCSTRING
-from vision_agent.utils import Execute
+from vision_agent.utils import CodeInterpreterFactory
 IMPORT_HELPER = """
 import math
@@ -42,7 +42,7 @@ from vision_agent.tools import *
 """
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
-_EXECUTE = Execute()
+_EXECUTE = CodeInterpreterFactory.get_default_instance()
 _CONSOLE = Console()
@@ -94,8 +94,8 @@ def write_debug(question: str, code: str, feedback: str, model: LLM) -> str:
 def execute_tests(code: str, tests: str) -> Dict[str, Union[str, bool]]:
     full_code = f"{IMPORT_HELPER}\n{code}\n{tests}"
-    success, result = _EXECUTE.run_isolation(full_code)
-    return {"code": code, "result": result, "passed": success}
+    result = _EXECUTE.exec_isolation(full_code)
+    return {"code": code, "result": result.text(), "passed": result.success}
 def run_visual_tests(

{vision_agent-0.2.36 → vision_agent-0.2.38}/vision_agent/agent/data_interpreter.py RENAMED Viewed

@@ -26,11 +26,12 @@ from vision_agent.agent.data_interpreter_prompts import (
 )
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.tools import TOOL_DESCRIPTIONS, TOOLS_DF
-from vision_agent.utils import Execute, Sim
+from vision_agent.utils import CodeInterpreter, CodeInterpreterFactory, Execution, Sim
 logging.basicConfig(level=logging.INFO)
 _LOGGER = logging.getLogger(__name__)
 _MAX_TABULATE_COL_WIDTH = 80
+_EXECUTE = CodeInterpreterFactory.get_default_instance()
 _CONSOLE = Console()
@@ -163,12 +164,12 @@ def write_and_exec_code(
     code_writer_call: Callable[..., str],
     model: LLM,
     tool_info: str,
-    exec: Execute,
+    exec: CodeInterpreter,
     retrieved_ltm: str,
     log_progress: Callable[[Dict[str, Any]], None],
     max_retry: int = 3,
     verbosity: int = 0,
-) -> Tuple[bool, str, str, Dict[str, List[str]]]:
+) -> Tuple[bool, str, Execution, Dict[str, List[str]]]:
     success = False
     counter = 0
     reflection = ""
@@ -176,7 +177,8 @@ def write_and_exec_code(
     code = code_writer_call(
         user_req, subtask, retrieved_ltm, tool_info, orig_code, model
     )
-    success, result = exec.run_isolation(code)
+    result = exec.exec_isolation(code)
+    success = result.success
     if verbosity == 2:
         _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
         log_progress(
@@ -193,10 +195,10 @@ def write_and_exec_code(
         log_progress(
             {
                 "log": "Result:",
-                "result": str(result),
+                "result": result.to_json(),
             }
         )
-        _LOGGER.info(f"\tCode success: {success}, result: {str(result)}")
+        _LOGGER.info(f"\tCode success: {success}, result: {result.text(False)}")
     working_memory: Dict[str, List[str]] = {}
     while not success and counter < max_retry:
         if subtask not in working_memory:
@@ -210,13 +212,13 @@ def write_and_exec_code(
             )
         else:
             working_memory[subtask].append(
-                PREV_CODE_CONTEXT.format(code=code, result=result)
+                PREV_CODE_CONTEXT.format(code=code, result=result.text())
             )
         code, reflection = debug_code(
             user_req, subtask, retrieved_ltm, "\n".join(working_memory[subtask]), model
         )
-        success, result = exec.run_isolation(code)
+        result = exec.exec_isolation(code)
         counter += 1
         if verbosity == 2:
             _CONSOLE.print(
@@ -231,19 +233,21 @@ def write_and_exec_code(
             log_progress(
                 {
                     "log": "Result:",
-                    "result": result,
+                    "result": result.to_json(),
                 }
             )
-            _LOGGER.info(f"\tDebugging reflection: {reflection}, result: {result}")
+            _LOGGER.info(
+                f"\tDebugging reflection: {reflection}, result: {result.text(False)}"
+            )
         if success:
             working_memory[subtask].append(
                 PREV_CODE_CONTEXT_WITH_REFLECTION.format(
-                    reflection=reflection, code=code, result=result
+                    reflection=reflection, code=code, result=result.text()
                 )
             )
-    return success, code, result, working_memory
+    return result.success, code, result, working_memory
 @traceable(name="plan execution")
@@ -251,7 +255,7 @@ def run_plan(
     user_req: str,
     plan: List[Dict[str, Any]],
     coder: LLM,
-    exec: Execute,
+    exec: CodeInterpreter,
     code: str,
     tool_recommender: Sim,
     log_progress: Callable[[Dict[str, Any]], None],
@@ -316,10 +320,10 @@ def run_plan(
         log_progress(
             {
                 "log": "Result:",
-                "result": str(result),
+                "result": result.to_json(),
             }
         )
-        _LOGGER.info(f"\tCode success: {success} result: {str(result)}")
+        _LOGGER.info(f"\tCode success: {success} result: {result.text(False)}")
         task["success"] = success
         task["result"] = result
@@ -360,7 +364,7 @@ class DataInterpreter(Agent):
     ) -> None:
         self.planner = OpenAILLM(temperature=0.0, json_mode=True)
         self.coder = OpenAILLM(temperature=0.0)
-        self.exec = Execute(timeout=timeout)
+        self.exec = _EXECUTE
         self.report_progress_callback = report_progress_callback
         if tool_recommender is None:
             self.tool_recommender = Sim(TOOLS_DF, sim_key="desc")

{vision_agent-0.2.36 → vision_agent-0.2.38}/vision_agent/agent/vision_agent.py RENAMED Viewed

@@ -6,6 +6,7 @@ from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Union, cast
 from rich.console import Console
+from rich.style import Style
 from rich.syntax import Syntax
 from tabulate import tabulate
@@ -23,13 +24,13 @@ from vision_agent.agent.vision_agent_prompts import (
 )
 from vision_agent.llm import LLM, OpenAILLM
 from vision_agent.lmm import LMM, OpenAILMM
-from vision_agent.utils import Execute
+from vision_agent.utils import CodeInterpreterFactory, Execution
 from vision_agent.utils.sim import Sim
 logging.basicConfig(stream=sys.stdout)
 _LOGGER = logging.getLogger(__name__)
 _MAX_TABULATE_COL_WIDTH = 80
-_EXECUTE = Execute(600)
+_EXECUTE = CodeInterpreterFactory.get_default_instance()
 _CONSOLE = Console()
 _DEFAULT_IMPORT = "\n".join(T.__new_tools__)
@@ -157,28 +158,27 @@ def write_and_test_code(
             },
         }
     )
-    success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
+    result = _EXECUTE.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
     log_progress(
         {
             "type": "code",
-            "status": "completed" if success else "failed",
+            "status": "completed" if result.success else "failed",
             "payload": {
                 "code": code,
                 "test": test,
-                "result": result,
+                "result": result.to_json(),
             },
         }
     )
     if verbosity == 2:
-        _LOGGER.info("Initial code and tests:")
-        _CONSOLE.print(
-            Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
+        _print_code("Initial code and tests:", code, test)
+        _LOGGER.info(
+            f"Initial code execution result:\n{result.text(include_logs=False)}"
         )
-        _LOGGER.info(f"Initial result: {result}")
     count = 0
     new_working_memory = []
-    while not success and count < max_retries:
+    while not result.success and count < max_retries:
         log_progress(
             {
                 "type": "code",
@@ -188,7 +188,7 @@ def write_and_test_code(
         fixed_code_and_test = extract_json(
             debugger(
                 FIX_BUG.format(
-                    code=code, tests=test, result=result, feedback=working_memory
+                    code=code, tests=test, result=result.text(), feedback=working_memory
                 )
             )
         )
@@ -210,15 +210,15 @@ def write_and_test_code(
             {"code": f"{code}\n{test}", "feedback": fixed_code_and_test["reflections"]}
         )
-        success, result = _EXECUTE.run_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
+        result = _EXECUTE.exec_isolation(f"{_DEFAULT_IMPORT}\n{code}\n{test}")
         log_progress(
             {
                 "type": "code",
-                "status": "completed" if success else "failed",
+                "status": "completed" if result.success else "failed",
                 "payload": {
                     "code": code,
                     "test": test,
-                    "result": result,
+                    "result": result.to_json(),
                 },
             }
         )
@@ -226,30 +226,33 @@ def write_and_test_code(
             _LOGGER.info(
                 f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
             )
-            _CONSOLE.print(
-                Syntax(
-                    f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True
-                )
+            _print_code("Code and test after attempted fix:", code, test)
+            _LOGGER.info(
+                f"Code execution result after attempted fix: {result.text(include_logs=False)}"
             )
-            _LOGGER.info(f"Debug result: {result}")
         count += 1
     if verbosity >= 1:
-        _LOGGER.info("Final code and tests:")
-        _CONSOLE.print(
-            Syntax(f"{code}\n{test}", "python", theme="gruvbox-dark", line_numbers=True)
-        )
-        _LOGGER.info(f"Final Result: {result}")
+        _print_code("Final code and tests:", code, test)
     return {
         "code": code,
         "test": test,
-        "success": success,
+        "success": result.success,
         "test_result": result,
         "working_memory": new_working_memory,
     }
+def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
+    _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
+    _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
+    _CONSOLE.print(Syntax(code, "python", theme="gruvbox-dark", line_numbers=True))
+    if test:
+        _CONSOLE.print("=" * 30 + " Test " + "=" * 30)
+        _CONSOLE.print(Syntax(test, "python", theme="gruvbox-dark", line_numbers=True))
 def retrieve_tools(
     plan: List[Dict[str, str]],
     tool_recommender: Sim,
@@ -279,8 +282,10 @@ def retrieve_tools(
             "payload": tool_list,
         }
     )
     if verbosity == 2:
-        _LOGGER.info(f"Tools: {tool_desc}")
+        tool_desc_str = "\n".join(tool_desc)
+        _LOGGER.info(f"Tools Description:\n{tool_desc_str}")
     tool_info_set = set(tool_info)
     return "\n\n".join(tool_info_set)
@@ -386,10 +391,11 @@ class VisionAgent(Agent):
                 and working memory of the agent.
         """
-        if len(chat) == 0:
+        if not chat:
             raise ValueError("Chat cannot be empty.")
         if media is not None:
+            media = _EXECUTE.upload_file(media)
             for chat_i in chat:
                 if chat_i["role"] == "user":
                     chat_i["content"] += f" Image name {media}"
@@ -497,7 +503,7 @@ class VisionAgent(Agent):
                 "payload": {
                     "code": code,
                     "test": test,
-                    "result": results["test_result"],
+                    "result": cast(Execution, results["test_result"]).to_json(),
                 },
             }
         )
@@ -513,4 +519,3 @@ class VisionAgent(Agent):
     def log_progress(self, data: Dict[str, Any]) -> None:
         if self.report_progress_callback is not None:
             self.report_progress_callback(data)
-        pass

{vision_agent-0.2.36 → vision_agent-0.2.38}/vision_agent/tools/tools.py RENAMED Viewed

@@ -198,7 +198,7 @@ def extract_frames(
 def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
     """'ocr' extracts text from an image. It returns a list of detected text, bounding
-    boxes, and confidence scores.
+    boxes, and confidence scores. The results are sorted from top-left to bottom right
     Parameters:
         image (np.ndarray): The image to extract text from.
@@ -211,7 +211,7 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
     -------
     >>> ocr(image)
     [
-        {'label': 'some text', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
+        {'label': 'hello world', 'bbox': [0.1, 0.11, 0.35, 0.4], 'score': 0.99},
     ]
     """
@@ -245,7 +245,8 @@ def ocr(image: np.ndarray) -> List[Dict[str, Any]]:
         box = normalize_bbox(box, image_size)
         output.append({"label": label, "bbox": box, "score": round(det["score"], 2)})
-    return output
+    ocr_results = sorted(output, key=lambda x: (x["bbox"][1], x["bbox"][0]))
+    return ocr_results
 def zero_shot_counting(image: np.ndarray) -> Dict[str, Any]:

vision_agent-0.2.38/vision_agent/utils/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+from .execute import (
+    CodeInterpreter,
+    CodeInterpreterFactory,
+    Error,
+    Execution,
+    Logs,
+    Result,
+)
+from .sim import Sim, load_sim, merge_sim
+from .video import extract_frames_from_video

vision_agent-0.2.38/vision_agent/utils/execute.py ADDED Viewed

@@ -0,0 +1,556 @@
+import abc
+import atexit
+import copy
+import logging
+import os
+import platform
+import re
+import sys
+import tempfile
+import traceback
+from enum import Enum
+from io import IOBase
+from pathlib import Path
+from time import sleep
+from typing import IO, Any, Dict, Iterable, List, Optional, Union, cast
+import nbformat
+import tenacity
+from dotenv import load_dotenv
+from e2b.api.v2.client.exceptions import ServiceException
+from e2b_code_interpreter import CodeInterpreter as E2BCodeInterpreterImpl
+from e2b_code_interpreter import Execution as E2BExecution
+from e2b_code_interpreter import Result as E2BResult
+from nbclient import NotebookClient
+from nbclient import __version__ as nbclient_version
+from nbclient.exceptions import CellTimeoutError, DeadKernelError
+from nbclient.util import run_sync
+from nbformat.v4 import new_code_cell
+from pydantic import BaseModel, field_serializer
+from typing_extensions import Self
+load_dotenv()
+_LOGGER = logging.getLogger(__name__)
+class MimeType(str, Enum):
+    """
+    Represents a MIME type.
+    """
+    TEXT_PLAIN = "text/plain"
+    TEXT_HTML = "text/html"
+    TEXT_MARKDOWN = "text/markdown"
+    IMAGE_SVG = "image/svg+xml"
+    IMAGE_PNG = "image/png"
+    IMAGE_JPEG = "image/jpeg"
+    APPLICATION_PDF = "application/pdf"
+    TEXT_LATEX = "text/latex"
+    APPLICATION_JSON = "application/json"
+    APPLICATION_JAVASCRIPT = "application/javascript"
+class Result:
+    """
+    Represents the data to be displayed as a result of executing a cell in a Jupyter notebook.
+    The result is similar to the structure returned by ipython kernel: https://ipython.readthedocs.io/en/stable/development/execution.html#execution-semantics
+    The result can contain multiple types of data, such as text, images, plots, etc. Each type of data is represented
+    as a string, and the result can contain multiple types of data. The display calls don't have to have text representation,
+    for the actual result the representation is always present for the result, the other representations are always optional.
+    The class also provides methods to display the data in a Jupyter notebook.
+    """
+    text: Optional[str] = None
+    html: Optional[str] = None
+    markdown: Optional[str] = None
+    svg: Optional[str] = None
+    png: Optional[str] = None
+    jpeg: Optional[str] = None
+    pdf: Optional[str] = None
+    latex: Optional[str] = None
+    json: Optional[Dict[str, Any]] = None
+    javascript: Optional[str] = None
+    extra: Optional[Dict[str, Any]] = None
+    "Extra data that can be included. Not part of the standard types."
+    is_main_result: bool
+    "Whether this data is the result of the cell. Data can be produced by display calls of which can be multiple in a cell."
+    raw: Dict[str, str]
+    "Dictionary that maps MIME types to their corresponding string representations of the data."
+    def __init__(self, is_main_result: bool, data: Dict[str, Any]):
+        self.is_main_result = is_main_result
+        self.raw = copy.deepcopy(data)
+        self.text = data.pop(MimeType.TEXT_PLAIN, None)
+        self.html = data.pop(MimeType.TEXT_HTML, None)
+        self.markdown = data.pop(MimeType.TEXT_MARKDOWN, None)
+        self.svg = data.pop(MimeType.IMAGE_SVG, None)
+        self.png = data.pop(MimeType.IMAGE_PNG, None)
+        self.jpeg = data.pop(MimeType.IMAGE_JPEG, None)
+        self.pdf = data.pop(MimeType.APPLICATION_PDF, None)
+        self.latex = data.pop(MimeType.TEXT_LATEX, None)
+        self.json = data.pop(MimeType.APPLICATION_JSON, None)
+        self.javascript = data.pop(MimeType.APPLICATION_JAVASCRIPT, None)
+        self.extra = data
+        # Only keeping the PNG representation if both PNG and JPEG are present
+        if self.png and self.jpeg:
+            del self.jpeg
+    # Allows to iterate over formats()
+    def __getitem__(self, key: Any) -> Any:
+        return self.raw[key] if key in self.raw else getattr(self, key)
+    def __str__(self) -> str:
+        return repr(self)
+    def __repr__(self) -> str:
+        return str(self.raw)
+    def _repr_html_(self) -> Optional[str]:
+        """
+        Returns the HTML representation of the data.
+        """
+        return self.html
+    def _repr_markdown_(self) -> Optional[str]:
+        """
+        Returns the Markdown representation of the data.
+        """
+        return self.markdown
+    def _repr_svg_(self) -> Optional[str]:
+        """
+        Returns the SVG representation of the data.
+        """
+        return self.svg
+    def _repr_png_(self) -> Optional[str]:
+        """
+        Returns the base64 representation of the PNG data.
+        """
+        return self.png
+    def _repr_jpeg_(self) -> Optional[str]:
+        """
+        Returns the base64 representation of the JPEG data.
+        """
+        return self.jpeg
+    def _repr_pdf_(self) -> Optional[str]:
+        """
+        Returns the PDF representation of the data.
+        """
+        return self.pdf
+    def _repr_latex_(self) -> Optional[str]:
+        """
+        Returns the LaTeX representation of the data.
+        """
+        return self.latex
+    def _repr_json_(self) -> Optional[dict]:
+        """
+        Returns the JSON representation of the data.
+        """
+        return self.json
+    def _repr_javascript_(self) -> Optional[str]:
+        """
+        Returns the JavaScript representation of the data.
+        """
+        return self.javascript
+    def formats(self) -> Iterable[str]:
+        """
+        Returns all available formats of the result.
+        :return: All available formats of the result in MIME types.
+        """
+        formats = []
+        if self.html:
+            formats.append("html")
+        if self.markdown:
+            formats.append("markdown")
+        if self.svg:
+            formats.append("svg")
+        if self.png:
+            formats.append("png")
+        if self.jpeg:
+            formats.append("jpeg")
+        if self.pdf:
+            formats.append("pdf")
+        if self.latex:
+            formats.append("latex")
+        if self.json:
+            formats.append("json")
+        if self.javascript:
+            formats.append("javascript")
+        if self.extra:
+            formats.extend(iter(self.extra))
+        return formats
+    @staticmethod
+    def from_e2b_result(result: E2BResult) -> "Result":  # type: ignore
+        """
+        Creates a Result object from an E2BResult object.
+        """
+        return Result(
+            is_main_result=result.is_main_result,
+            data=result.raw,
+        )
+class Logs(BaseModel):
+    """
+    Data printed to stdout and stderr during execution, usually by print statements, logs, warnings, subprocesses, etc.
+    """
+    stdout: List[str] = []
+    "List of strings printed to stdout by prints, subprocesses, etc."
+    stderr: List[str] = []
+    "List of strings printed to stderr by prints, subprocesses, etc."
+    def __str__(self) -> str:
+        stdout_str = "\n".join(self.stdout)
+        stderr_str = "\n".join(self.stderr)
+        return _remove_escape_and_color_codes(
+            f"stdout:\n{stdout_str}\nstderr:\n{stderr_str}"
+        )
+class Error(BaseModel):
+    """
+    Represents an error that occurred during the execution of a cell.
+    The error contains the name of the error, the value of the error, and the traceback.
+    """
+    name: str
+    "Name of the exception."
+    value: str
+    "Value of the exception."
+    traceback_raw: List[str]
+    "List of strings representing the traceback."
+    @property
+    def traceback(self, return_clean_text: bool = True) -> str:
+        """
+        Returns the traceback as a single string.
+        """
+        text = "\n".join(self.traceback_raw)
+        return _remove_escape_and_color_codes(text) if return_clean_text else text
+class Execution(BaseModel):
+    """
+    Represents the result of a cell execution.
+    """
+    class Config:
+        arbitrary_types_allowed = True
+    results: List[Result] = []
+    "List of the result of the cell (interactively interpreted last line), display calls (e.g. matplotlib plots)."
+    logs: Logs = Logs()
+    "Logs printed to stdout and stderr during execution."
+    error: Optional[Error] = None
+    "Error object if an error occurred, None otherwise."
+    def text(self, include_logs: bool = True) -> str:
+        """
+        Returns the text representation of this object, i.e. including the main result or the error traceback, optionally along with the logs (stdout, stderr).
+        """
+        prefix = (
+            "\n".join(self.logs.stdout) + "\n".join(self.logs.stderr)
+            if include_logs
+            else ""
+        )
+        if self.error:
+            return prefix + "\n" + self.error.traceback
+        return next(
+            (
+                prefix + "\n" + (res.text or "")
+                for res in self.results
+                if res.is_main_result
+            ),
+            prefix,
+        )
+    @property
+    def success(self) -> bool:
+        """
+        Returns whether the execution was successful.
+        """
+        return self.error is None
+    def to_json(self) -> str:
+        """
+        Returns the JSON representation of the Execution object.
+        """
+        return self.model_dump_json(exclude_none=True)
+    @field_serializer("results", when_used="json")
+    def serialize_results(results: List[Result]) -> List[Dict[str, Union[str, bool]]]:  # type: ignore
+        """
+        Serializes the results to JSON.
+        This method is used by the Pydantic JSON encoder.
+        """
+        serialized = []
+        for result in results:
+            serialized_dict = {key: result[key] for key in result.formats()}
+            serialized_dict["text"] = result.text
+            serialized_dict["is_main_result"] = result.is_main_result
+            serialized.append(serialized_dict)
+        return serialized
+    @staticmethod
+    def from_exception(exec: Exception, traceback_raw: List[str]) -> "Execution":
+        """
+        Creates an Execution object from an exception.
+        """
+        return Execution(
+            error=Error(
+                name=exec.__class__.__name__,
+                value=str(exec),
+                traceback_raw=traceback_raw,
+            )
+        )
+    @staticmethod
+    def from_e2b_execution(exec: E2BExecution) -> "Execution":  # type: ignore
+        """
+        Creates an Execution object from an E2BResult object.
+        """
+        return Execution(
+            results=[Result.from_e2b_result(res) for res in exec.results],
+            logs=Logs(stdout=exec.logs.stdout, stderr=exec.logs.stderr),
+            error=(
+                Error(
+                    name=exec.error.name,
+                    value=exec.error.value,
+                    traceback_raw=exec.error.traceback_raw,
+                )
+                if exec.error
+                else None
+            ),
+        )
+class CodeInterpreter(abc.ABC):
+    """Code interpreter interface."""
+    def __init__(self, timeout: int, *args: Any, **kwargs: Any) -> None:
+        self.timeout = timeout
+    def __enter__(self) -> Self:
+        return self
+    def __exit__(self, *exc_info: Any) -> None:
+        self.close()
+    def close(self, *args: Any, **kwargs: Any) -> None:
+        raise NotImplementedError()
+    def restart_kernel(self) -> None:
+        raise NotImplementedError()
+    def exec_cell(self, code: str) -> Execution:
+        raise NotImplementedError()
+    def exec_isolation(self, code: str) -> Execution:
+        self.restart_kernel()
+        return self.exec_cell(code)
+    def upload_file(self, file: Union[str, Path, IO]) -> str:
+        # Default behavior is a no-op (for local code interpreter)
+        assert not isinstance(
+            file, IO
+        ), "Don't pass IO objects to upload_file() of local interpreter"
+        return str(file)
+    def download_file(self, file_path: str) -> Path:
+        # Default behavior is a no-op (for local code interpreter)
+        return Path(file_path)
+class E2BCodeInterpreter(CodeInterpreter):
+    def __init__(self, *args: Any, **kwargs: Any) -> None:
+        super().__init__(*args, **kwargs)
+        assert os.getenv("E2B_API_KEY"), "E2B_API_KEY environment variable must be set"
+        self.interpreter = E2BCodeInterpreter._new_e2b_interpreter_impl(*args, **kwargs)
+        result = self.exec_cell(
+            """
+import platform
+import sys
+import pkg_resources
+print(f"Python version: {sys.version}")
+print(f"OS version: {platform.system()} {platform.release()} ({platform.architecture()})")
+va_version = pkg_resources.get_distribution("vision-agent").version
+print(f"Vision Agent version: {va_version}")"""
+        )
+        sys_versions = "\n".join(result.logs.stdout)
+        _LOGGER.info(f"E2BCodeInterpreter initialized:\n{sys_versions}")
+    def close(self, *args: Any, **kwargs: Any) -> None:
+        self.interpreter.notebook.close()
+        self.interpreter.close()
+    def restart_kernel(self) -> None:
+        self.interpreter.notebook.restart_kernel()
+    def exec_cell(self, code: str) -> Execution:
+        execution = self.interpreter.notebook.exec_cell(code)
+        return Execution.from_e2b_execution(execution)
+    def upload_file(self, file: Union[str, Path, IO]) -> str:
+        try:
+            if isinstance(file, (Path, str)):
+                file = open(file, "rb")
+            return cast(str, self.interpreter.upload_file(cast(IO, file)))
+        finally:
+            assert isinstance(file, IOBase), f"Unexpected file type: {type(file)}"
+            file.close()
+            _LOGGER.info(f"File ({file}) is uploaded to: {file.name}")
+    def download_file(self, file_path: str) -> Path:
+        file = tempfile.NamedTemporaryFile(mode="w+b", delete=False)
+        file.write(self.interpreter.download_file(file_path))
+        _LOGGER.info(f"File ({file_path}) is downloaded to: {file.name}")
+        return Path(file.name)
+    @staticmethod
+    @tenacity.retry(
+        wait=tenacity.wait_exponential_jitter(),
+        stop=tenacity.stop_after_delay(60),
+        retry=tenacity.retry_if_exception_type(ServiceException),
+    )
+    def _new_e2b_interpreter_impl(*args, **kwargs) -> E2BCodeInterpreterImpl:  # type: ignore
+        return E2BCodeInterpreterImpl(template="va-sandbox", *args, **kwargs)
+class LocalCodeInterpreter(CodeInterpreter):
+    def __init__(self, timeout: int = 600) -> None:
+        super().__init__(timeout=timeout)
+        self.nb = nbformat.v4.new_notebook()
+        self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
+        _LOGGER.info(
+            f"""Local code interpreter initialized
+Python version: {sys.version}
+OS version: {platform.system()} {platform.release()} ({platform.architecture()})
+nbclient version: {nbclient_version}
+nbformat version: {nbformat.__version__}
+Timeout: {self.timeout}"""
+        )
+    def _new_kernel(self) -> None:
+        if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)():  # type: ignore
+            self.nb_client.create_kernel_manager()
+            self.nb_client.start_new_kernel()
+            self.nb_client.start_new_kernel_client()
+    def close(self) -> None:
+        if self.nb_client.km is not None and run_sync(self.nb_client.km.is_alive)():  # type: ignore
+            run_sync(self.nb_client.km.shutdown_kernel)(now=True)
+            run_sync(self.nb_client.km.cleanup_resources)()
+            channels = [
+                self.nb_client.kc.stdin_channel,
+                self.nb_client.kc.hb_channel,
+                self.nb_client.kc.control_channel,
+            ]
+            for ch in channels:
+                if ch.is_alive():
+                    ch.stop()
+            self.nb_client.kc = None
+            self.nb_client.km = None
+    def restart_kernel(self) -> None:
+        self.close()
+        self.nb = nbformat.v4.new_notebook()
+        self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
+        sleep(1)
+        self._new_kernel()
+    def exec_cell(self, code: str) -> Execution:
+        try:
+            self.nb.cells.append(new_code_cell(code))
+            cell = self.nb.cells[-1]
+            self.nb_client.execute_cell(cell, len(self.nb.cells) - 1)
+            return _parse_local_code_interpreter_outputs(self.nb.cells[-1].outputs)
+        except CellTimeoutError as e:
+            run_sync(self.nb_client.km.interrupt_kernel)()  # type: ignore
+            sleep(1)
+            traceback_raw = traceback.format_exc().splitlines()
+            return Execution.from_exception(e, traceback_raw)
+        except DeadKernelError as e:
+            self.restart_kernel()
+            traceback_raw = traceback.format_exc().splitlines()
+            return Execution.from_exception(e, traceback_raw)
+        except Exception as e:
+            traceback_raw = traceback.format_exc().splitlines()
+            return Execution.from_exception(e, traceback_raw)
+class CodeInterpreterFactory:
+    """Factory class for creating code interpreters.
+    Could be extended to support multiple code interpreters.
+    """
+    _instance_map: Dict[str, CodeInterpreter] = {}
+    _default_key = "default"
+    @staticmethod
+    def get_default_instance() -> CodeInterpreter:
+        inst_map = CodeInterpreterFactory._instance_map
+        instance = inst_map.get(CodeInterpreterFactory._default_key)
+        if instance:
+            return instance
+        if os.getenv("CODE_SANDBOX_RUNTIME") == "e2b":
+            instance = E2BCodeInterpreter(timeout=600)
+            atexit.register(instance.close)
+        else:
+            instance = LocalCodeInterpreter(timeout=600)
+        inst_map[CodeInterpreterFactory._default_key] = instance
+        return instance
+def _parse_local_code_interpreter_outputs(outputs: List[Dict[str, Any]]) -> Execution:
+    """
+    Parse notebook cell outputs to Execution object.
+    Output types: https://nbformat.readthedocs.io/en/latest/format_description.html#code-cell-outputs
+    """
+    execution = Execution()
+    for data in outputs:
+        if data["output_type"] == "error":
+            _LOGGER.debug("Cell finished execution with error")
+            execution.error = Error(
+                name=data["ename"],
+                value=data["evalue"],
+                traceback_raw=data["traceback"],
+            )
+        elif data["output_type"] == "stream":
+            if data["name"] == "stdout":
+                execution.logs.stdout.append(data["text"])
+            elif data["name"] == "stderr":
+                execution.logs.stderr.append(data["text"])
+        elif data["output_type"] in "display_data":
+            result = Result(is_main_result=False, data=data["data"])
+            execution.results.append(result)
+        elif data["output_type"] == "execute_result":
+            result = Result(is_main_result=True, data=data["data"])
+            execution.results.append(result)
+        else:
+            raise ValueError(f"Unknown output type: {data['output_type']}")
+    return execution
+def _remove_escape_and_color_codes(input_str: str) -> str:
+    pattern = re.compile(r"\x1b\[[0-9;]*[mK]")
+    return pattern.sub("", input_str)

vision_agent-0.2.36/vision_agent/agent/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-from .agent import Agent
-from .agent_coder import AgentCoder
-from .data_interpreter import DataInterpreter
-from .easytool import EasyTool
-from .easytool_v2 import EasyToolV2
-from .reflexion import Reflexion
-from .vision_agent import VisionAgent

vision_agent-0.2.36/vision_agent/utils/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from .execute import Execute
-from .sim import Sim, load_sim, merge_sim
-from .video import extract_frames_from_video

vision_agent-0.2.36/vision_agent/utils/execute.py DELETED Viewed

@@ -1,107 +0,0 @@
-"""This code is adapted from MetaGPT's https://github.com/geekan/MetaGPT/blob/main/metagpt/actions/di/execute_nb_code.py
-"""
-import base64 as b64
-import io
-import re
-from time import sleep
-from typing import Dict, List, Tuple
-import nbformat
-from nbclient import NotebookClient
-from nbclient.exceptions import CellTimeoutError, DeadKernelError
-from nbclient.util import run_sync
-from nbformat import NotebookNode
-from nbformat.v4 import new_code_cell
-from PIL import Image
-def remove_escape_and_color_codes(input_str: str) -> str:
-    pattern = re.compile(r"\x1b\[[0-9;]*[mK]")
-    result = pattern.sub("", input_str)
-    return result
-def parse_outputs(outputs: List[Dict]) -> Tuple[bool, str]:
-    success, parsed_output = True, []
-    for output in outputs:
-        # TODO: add parse image data
-        if output["output_type"] == "stream":
-            parsed_output.append(output["text"])
-        elif output["output_type"] == "text/plain":
-            parsed_output.append(output["data"]["text/plain"])
-        elif output["output_type"] == "display_data":
-            if "image/png" in output["data"]:
-                image_bytes = b64.b64decode(output["data"]["image/png"])
-                Image.open(io.BytesIO(image_bytes)).show()
-        elif output["output_type"] == "error":
-            success = False
-            output_text = remove_escape_and_color_codes("\n".join(output["traceback"]))
-            parsed_output.append(output_text)
-    return success, ",".join(parsed_output)
-class Execute:
-    def __init__(self, timeout: int = 600) -> None:
-        self.nb = nbformat.v4.new_notebook()
-        self.timeout = timeout
-        self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
-    def build(self) -> None:
-        if self.nb_client.kc is None or not run_sync(self.nb_client.kc.is_alive)():  # type: ignore
-            self.nb_client.create_kernel_manager()
-            self.nb_client.start_new_kernel()
-            self.nb_client.start_new_kernel_client()
-    def terminate(self) -> None:
-        if self.nb_client.km is not None and run_sync(self.nb_client.km.is_alive)():  # type: ignore
-            run_sync(self.nb_client.km.shutdown_kernel)(now=True)
-            run_sync(self.nb_client.km.cleanup_resources)()
-            channels = [
-                self.nb_client.kc.stdin_channel,
-                self.nb_client.kc.hb_channel,
-                self.nb_client.kc.control_channel,
-            ]
-            for ch in channels:
-                if ch.is_alive():
-                    ch.stop()
-            self.nb_client.kc = None
-            self.nb_client.km = None
-    def reset(self) -> None:
-        self.terminate()
-        self.nb = nbformat.v4.new_notebook()
-        self.nb_client = NotebookClient(self.nb, timeout=self.timeout)
-        sleep(1)
-        self.build()
-    def run_cell(self, cell: NotebookNode, cell_index: int) -> Tuple[bool, str]:
-        try:
-            self.nb_client.execute_cell(cell, cell_index)
-            return parse_outputs(self.nb.cells[-1].outputs)
-        except CellTimeoutError:
-            run_sync(self.nb_client.km.interrupt_kernel)()  # type: ignore
-            sleep(1)
-            return False, "Cell execution timed out."
-        except DeadKernelError:
-            self.reset()
-            return False, "DeadKernelError"
-        except Exception:
-            return parse_outputs(self.nb.cells[-1].outputs)
-    def add_code_cell(self, code: str) -> None:
-        self.nb.cells.append(new_code_cell(code))
-    def run_additional(self, code: str) -> Tuple[bool, str]:
-        self.build()
-        self.add_code_cell(code)
-        return self.run_cell(self.nb.cells[-1], len(self.nb.cells) - 1)
-    def run_isolation(self, code: str) -> Tuple[bool, str]:
-        self.reset()
-        self.add_code_cell(code)
-        return self.run_cell(self.nb.cells[-1], len(self.nb.cells) - 1)